diff -Naur libmbfl-1.0.1/configure.in libmbfl/configure.in --- libmbfl-1.0.1/configure.in 2006-01-21 05:04:54.000000000 +0100 +++ libmbfl/configure.in 2008-08-22 12:09:11.000000000 +0200 @@ -1,6 +1,6 @@ # Process this file with autoconf to produce a configure script. AC_INIT(mbfl/mbfilter.c) -AM_INIT_AUTOMAKE(libmbfl, 1.0.1) +AM_INIT_AUTOMAKE(libmbfl, 1.0.2) AC_CONFIG_SRCDIR(mbfl/mbfilter.c) AM_CONFIG_HEADER(config.h) diff -Naur libmbfl-1.0.1/filters/Makefile.am libmbfl/filters/Makefile.am --- libmbfl-1.0.1/filters/Makefile.am 2005-03-22 21:30:21.000000000 +0100 +++ libmbfl/filters/Makefile.am 2008-07-05 08:52:04.000000000 +0200 @@ -2,7 +2,136 @@ noinst_LTLIBRARIES=libmbfl_filters.la INCLUDES=-I../mbfl libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION) -libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h +libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ + mbfilter_hz.c \ + mbfilter_euc_tw.c \ + mbfilter_big5.c \ + mbfilter_euc_jp.c \ + mbfilter_jis.c \ + mbfilter_iso8859_1.c \ + mbfilter_iso8859_2.c \ + mbfilter_cp1254.c \ + mbfilter_cp1252.c \ + mbfilter_cp1251.c \ + mbfilter_ascii.c \ + mbfilter_iso8859_3.c \ + mbfilter_iso8859_4.c \ + mbfilter_iso8859_5.c \ + mbfilter_iso8859_6.c \ + mbfilter_iso8859_7.c \ + mbfilter_iso8859_8.c \ + mbfilter_iso8859_9.c \ + mbfilter_iso8859_10.c \ + mbfilter_iso8859_13.c \ + mbfilter_iso8859_14.c \ + mbfilter_iso8859_15.c \ + mbfilter_iso8859_16.c \ + mbfilter_htmlent.c \ + mbfilter_byte2.c \ + mbfilter_byte4.c \ + mbfilter_uuencode.c \ + mbfilter_base64.c \ + mbfilter_sjis.c \ + mbfilter_7bit.c \ + mbfilter_qprint.c \ + mbfilter_ucs4.c \ + mbfilter_ucs2.c \ + mbfilter_utf32.c \ + mbfilter_utf16.c \ + mbfilter_utf8.c \ + mbfilter_utf7.c \ + mbfilter_utf7imap.c \ + mbfilter_euc_jp_win.c \ + mbfilter_cp932.c \ + mbfilter_cp51932.c \ + mbfilter_euc_cn.c \ + mbfilter_euc_kr.c \ + mbfilter_uhc.c \ + mbfilter_iso2022_kr.c \ + mbfilter_cp866.c \ + mbfilter_koi8r.c \ + mbfilter_koi8u.c \ + mbfilter_armscii8.c \ + html_entities.c \ + cp932_table.h \ + html_entities.h \ + mbfilter_7bit.h \ + mbfilter_ascii.h \ + mbfilter_base64.h \ + mbfilter_big5.h \ + mbfilter_byte2.h \ + mbfilter_byte4.h \ + mbfilter_cp1251.h \ + mbfilter_cp1252.h \ + mbfilter_cp1254.h \ + mbfilter_cp866.h \ + mbfilter_cp932.h \ + mbfilter_cp936.h \ + mbfilter_euc_cn.h \ + mbfilter_euc_jp.h \ + mbfilter_euc_jp_win.h \ + mbfilter_euc_kr.h \ + mbfilter_euc_tw.h \ + mbfilter_htmlent.h \ + mbfilter_hz.h \ + mbfilter_iso2022_kr.h \ + mbfilter_iso8859_1.h \ + mbfilter_iso8859_10.h \ + mbfilter_iso8859_13.h \ + mbfilter_iso8859_14.h \ + mbfilter_iso8859_15.h \ + mbfilter_iso8859_16.h \ + mbfilter_iso8859_2.h \ + mbfilter_iso8859_3.h \ + mbfilter_iso8859_4.h \ + mbfilter_iso8859_5.h \ + mbfilter_iso8859_6.h \ + mbfilter_iso8859_7.h \ + mbfilter_iso8859_8.h \ + mbfilter_iso8859_9.h \ + mbfilter_jis.h \ + mbfilter_koi8r.h \ + mbfilter_koi8u.h \ + mbfilter_armscii8.h \ + mbfilter_qprint.h \ + mbfilter_sjis.h \ + mbfilter_ucs2.h \ + mbfilter_ucs4.h \ + mbfilter_uhc.h \ + mbfilter_utf16.h \ + mbfilter_utf32.h \ + mbfilter_utf7.h \ + mbfilter_utf7imap.h \ + mbfilter_utf8.h \ + mbfilter_uuencode.h \ + mbfilter_cp51932.h \ + unicode_prop.h \ + unicode_table_big5.h \ + unicode_table_cns11643.h \ + unicode_table_cp1251.h \ + unicode_table_cp1252.h \ + unicode_table_cp1254.h \ + unicode_table_cp866.h \ + unicode_table_cp932_ext.h \ + unicode_table_cp936.h \ + unicode_table_iso8859_10.h \ + unicode_table_iso8859_13.h \ + unicode_table_iso8859_14.h \ + unicode_table_iso8859_15.h \ + unicode_table_iso8859_16.h \ + unicode_table_iso8859_2.h \ + unicode_table_iso8859_3.h \ + unicode_table_iso8859_4.h \ + unicode_table_iso8859_5.h \ + unicode_table_iso8859_6.h \ + unicode_table_iso8859_7.h \ + unicode_table_iso8859_8.h \ + unicode_table_iso8859_9.h \ + unicode_table_jis.h \ + unicode_table_koi8r.h \ + unicode_table_koi8u.h \ + unicode_table_armscii8.h \ + unicode_table_uhc.h mbfilter_iso8859_2.c: unicode_table_iso8859_2.h diff -Naur libmbfl-1.0.1/filters/mbfilter_cp1254.c libmbfl/filters/mbfilter_cp1254.c --- libmbfl-1.0.1/filters/mbfilter_cp1254.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_cp1254.c 2008-07-05 09:36:24.000000000 +0200 @@ -0,0 +1,157 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Haluk AKIN <halukakin@gmail.com> + * + */ +/* + * The source code included in this files was separated from mbfilter_ru.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp1254.h" +#include "unicode_table_cp1254.h" + +static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL}; + +const mbfl_encoding mbfl_encoding_cp1254 = { + mbfl_no_encoding_cp1254, + "Windows-1254", + "Windows-1254", + (const char *(*)[])&mbfl_encoding_cp1254_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp1254 = { + mbfl_no_encoding_cp1254, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp1254 +}; + +const struct mbfl_convert_vtbl vtbl_cp1254_wchar = { + mbfl_no_encoding_cp1254, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp1254_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp1254 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp1254, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp1254, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * wchar => cp1254 + */ +int +mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = cp1254_ucs_table_len-1; + while (n >= 0) { + if (c == cp1254_ucs_table[n] && c != 0xfffe) { + s = cp1254_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1254) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +/* + * cp1254 => wchar + */ +int +mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < cp1254_ucs_table_min) { + s = c; + } else if (c >= cp1254_ucs_table_min && c < 0x100) { + s = cp1254_ucs_table[c - cp1254_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_CP1254; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* We only distinguish the MS extensions to ISO-8859-1. + * Actually, this is pretty much a NO-OP, since the identification + * system doesn't allow us to discriminate between a positive match, + * a possible match and a definite non-match. + * The problem here is that cp1254 looks like SJIS for certain chars. + * */ +static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + diff -Naur libmbfl-1.0.1/filters/mbfilter_cp1254.h libmbfl/filters/mbfilter_cp1254.h --- libmbfl-1.0.1/filters/mbfilter_cp1254.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_cp1254.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Haluk AKIN <halukakin@gmail.com> + * + */ +/* + * the source code included in this files was separated from mbfilter.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP1254_H +#define MBFL_MBFILTER_CP1254_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_cp1254; +extern const struct mbfl_identify_vtbl vtbl_identify_cp1254; +extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254; + +int mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP1254_H */ diff -Naur libmbfl-1.0.1/filters/mbfilter_cp51932.c libmbfl/filters/mbfilter_cp51932.c --- libmbfl-1.0.1/filters/mbfilter_cp51932.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_cp51932.c 2006-11-16 18:33:39.000000000 +0100 @@ -0,0 +1,360 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp51932.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter); + +static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + + +static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; + +const struct mbfl_identify_vtbl vtbl_identify_cp51932 = { + mbfl_no_encoding_cp51932, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp51932 +}; + +const mbfl_encoding mbfl_encoding_cp51932 = { + mbfl_no_encoding_cp51932, + "CP51932", + "CP51932", + (const char *(*)[])&mbfl_encoding_cp51932_aliases, + mblen_table_eucjp, + MBFL_ENCTYPE_MBCS +}; + +const struct mbfl_convert_vtbl vtbl_cp51932_wchar = { + mbfl_no_encoding_cp51932, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp51932_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp51932, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp51932, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) \ + ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ + : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtoeuc1(c) (((c) / 94) + 0xa1) +#define idxtoeuc2(c) (((c) % 94) + 0xa1) + +/* + * cp51932 => wchar + */ +int +mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xff) { /* CP932 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + w = 0; + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + if (w <= 0) { + w = ((c1 & 0x7f) << 8) | (c & 0x7f); + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_WINCP932; + } + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + case 2: /* got 0x8e, X0201 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = 0x8e00 | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +int +cp932ext3_to_cp51932(int c) +{ + int idx; + + idx = sjistoidx(c >> 8, c & 0xff); + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtoeuc1(idx) << 8 | idxtoeuc2(idx); +} + +/* + * wchar => cp51932 + */ +int +mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1; + + s1 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */ + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + if (s1 >= ((85 + 0x20) << 8)) { /* 85ku - 120ku */ + s1 = -1; + } + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + if ((s1 >= ((85 + 0x20) << 8) && /* 85ku - 94ku */ + s1 <= ((88 + 0x20) << 8)) ||/* IBM extension */ + (s1 >= ((93 + 0x20) << 8) && /* 89ku - 92ku */ + s1 <= ((94 + 0x20) << 8))) { + s1 = -1; + } + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } else { + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 < 0) { + c1 = 0; + c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext2_ucs_table[c1]) { + s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); + break; + } + c1++; + } + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_cp51932(int c, mbfl_identify_filter *filter) +{ + switch (filter->status) { + case 0: /* latin */ + if (c >= 0 && c < 0x80) { /* ok */ + ; + } else if (c > 0xa0 && c < 0xff) { /* kanji first char */ + filter->status = 1; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { /* bad */ + filter->flag = 1; + } + break; + + case 1: /* got first half */ + if (c < 0xa1 || c > 0xfe) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + case 2: /* got 0x8e */ + if (c < 0xa1 || c > 0xdf) { /* bad */ + filter->flag = 1; + } + filter->status = 0; + break; + + default: + filter->status = 0; + break; + } + + return c; +} + + diff -Naur libmbfl-1.0.1/filters/mbfilter_cp51932.h libmbfl/filters/mbfilter_cp51932.h --- libmbfl-1.0.1/filters/mbfilter_cp51932.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_cp51932.h 2006-11-03 02:11:35.000000000 +0100 @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.h + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP51932_H +#define MBFL_MBFILTER_CP51932_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_cp51932; +extern const struct mbfl_identify_vtbl vtbl_identify_cp51932; +extern const struct mbfl_convert_vtbl vtbl_cp51932_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp51932; + +int mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP51932_H */ diff -Naur libmbfl-1.0.1/filters/mbfilter_euc_jp_win.c libmbfl/filters/mbfilter_euc_jp_win.c --- libmbfl-1.0.1/filters/mbfilter_euc_jp_win.c 2002-12-20 19:17:31.000000000 +0100 +++ libmbfl/filters/mbfilter_euc_jp_win.c 2007-01-29 23:53:47.000000000 +0100 @@ -60,7 +60,8 @@ }; -static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", NULL}; +static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", + "eucJP-ms", NULL}; const struct mbfl_identify_vtbl vtbl_identify_eucjpwin = { mbfl_no_encoding_eucjp_win, @@ -203,6 +204,9 @@ s = (c1 - 0xa1)*94 + c - 0xa1; if (s >= 0 && s < jisx0212_ucs_table_size) { w = jisx0212_ucs_table[s]; + if (w == 0x007e) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ s = (c1<< 8) | c; w = 0; @@ -221,6 +225,9 @@ } else { w = 0; } + if (w == 0x00A6) { + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + } if (w <= 0) { w = ((c1 & 0x7f) << 8) | (c & 0x7f); w &= MBFL_WCSPLANE_MASK; @@ -273,6 +280,9 @@ c2 = s1%94 + 0xa1; s1 = (c1 << 8) | c2; } + if (s1 == 0xa2f1) { + s1 = 0x2d62; /* NUMERO SIGN */ + } if (s1 <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; if (c1 == MBFL_WCSPLANE_WINCP932) { @@ -310,6 +320,8 @@ s1 = 0x2172; } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ s1 = 0x224c; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; } else { s1 = -1; c1 = 0; diff -Naur libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.c libmbfl/filters/mbfilter_iso2022_jp_ms.c --- libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_iso2022_jp_ms.c 2007-03-06 22:04:27.000000000 +0100 @@ -0,0 +1,522 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_iso2022_jp_ms.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; + +const mbfl_encoding mbfl_encoding_2022jpms = { + mbfl_no_encoding_2022jpms, + "ISO-2022-JP-MS", + "ISO-2022-JP", + (const char *(*)[])&mbfl_encoding_2022jpms_aliases, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { + mbfl_no_encoding_2022jpms, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_2022jpms +}; + +const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_2022jpms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jpms, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_2022jpms, + mbfl_filt_conv_any_2022jpms_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) \ + ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ + : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtojis1(c) (((c) / 94) + 0x21) +#define idxtojis2(c) (((c) % 94) + 0x21) + +/* + * ISO-2022-JP-MS => wchar + */ +int +mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + w = 0; + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else { + w = 0; + } + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } else { + if (c1 > 0x20 && c1 < 0x35) { + w = 0xe000 + (c1 - 0x21)*94 + c - 0x21; + } + if (w <= 0) { + w = (((c1 - 0x21) + 0x7f) << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } + } else if (c == 0x1b) { + filter->status += 2; + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0xa2: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0xa3: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0xa4: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0xa5: */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int +cp932ext3_cp932ext2_jis(int c) +{ + int idx; + + idx = sjistoidx(0xfa, 0x40) + c; + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtojis1(idx) << 8 | idxtojis2(idx); +} + +/* + * wchar => ISO-2022-JP-MS + */ +int +mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + } + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + s2 = 1; + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s1 = c & MBFL_WCSPLANE_MASK; + s1 |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = cp932ext3_cp932ext2_jis(c1); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */ + if ((filter->status & 0xff00) != 0x100) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x7e7f) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x927f) { /* UDC */ + if ((filter->status & 0xff00) != 0x800) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x3f, filter->data)); /* '?' */ + } + filter->status = 0x800; + CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +int +mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status &= 0xff; + return 0; +} + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: X UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + filter->status &= ~0xf; + if (c == 0x1b) { + goto retry; + } else if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ ( */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} diff -Naur libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.h libmbfl/filters/mbfilter_iso2022_jp_ms.h --- libmbfl-1.0.1/filters/mbfilter_iso2022_jp_ms.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_iso2022_jp_ms.h 2007-01-29 23:53:47.000000000 +0100 @@ -0,0 +1,44 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H +#define MBFL_MBFILTER_ISO2022_JP_MS_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_2022jpms; +extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms; +extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; + +int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */ diff -Naur libmbfl-1.0.1/filters/mbfilter_koi8u.c libmbfl/filters/mbfilter_koi8u.c --- libmbfl-1.0.1/filters/mbfilter_koi8u.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_koi8u.c 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,146 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko <verem@m1.tv> + * + * Based on mbfilter_koi8r.c code + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_koi8u.h" +#include "unicode_table_koi8u.h" + +static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL}; + +const mbfl_encoding mbfl_encoding_koi8u = { + mbfl_no_encoding_koi8u, + "KOI8-U", + "KOI8-U", + (const char *(*)[])&mbfl_encoding_koi8u_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_koi8u = { + mbfl_no_encoding_koi8u, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_koi8u +}; + +const struct mbfl_convert_vtbl vtbl_wchar_koi8u = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_koi8u, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_koi8u, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_koi8u_wchar = { + mbfl_no_encoding_koi8u, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_koi8u_wchar, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * koi8u => wchar + */ +int +mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < koi8u_ucs_table_min) { + s = c; + } else if (c >= koi8u_ucs_table_min && c < 0x100) { + s = koi8u_ucs_table[c - koi8u_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_KOI8U; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* + * wchar => koi8u + */ +int +mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = koi8u_ucs_table_len-1; + while (n >= 0) { + if (c == koi8u_ucs_table[n]) { + s = koi8u_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8U) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} diff -Naur libmbfl-1.0.1/filters/mbfilter_koi8u.h libmbfl/filters/mbfilter_koi8u.h --- libmbfl-1.0.1/filters/mbfilter_koi8u.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/mbfilter_koi8u.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,47 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko <verem@m1.tv> + * + * Based on mbfilter_koi8r.h code + * + */ + +#ifndef MBFL_MBFILTER_KOI8U_H +#define MBFL_MBFILTER_KOI8U_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_koi8u; +extern const struct mbfl_identify_vtbl vtbl_identify_koi8u; +extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u; +extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar; + +int mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_KOI8U_H */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ diff -Naur libmbfl-1.0.1/filters/mk_sb_tbl.awk libmbfl/filters/mk_sb_tbl.awk --- libmbfl-1.0.1/filters/mk_sb_tbl.awk 2005-02-21 08:53:17.000000000 +0100 +++ libmbfl/filters/mk_sb_tbl.awk 2005-02-21 08:57:08.000000000 +0100 @@ -1,6 +1,6 @@ #!/usr/bin/awk -f # -# $Id: mk_sb_tbl.awk,v 1.1.2.3 2005/02/21 07:53:17 moriyoshi Exp $ +# $Id: mk_sb_tbl.awk,v 1.2 2005/02/21 07:57:08 moriyoshi Exp $ # # Description: a script that generates a single byte code set to Unicode # mapping table. diff -Naur libmbfl-1.0.1/filters/unicode_table_cp1254.h libmbfl/filters/unicode_table_cp1254.h --- libmbfl-1.0.1/filters/unicode_table_cp1254.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/unicode_table_cp1254.h 2008-07-05 09:30:01.000000000 +0200 @@ -0,0 +1,51 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The authors of this file: PHP3 internationalization team + * You can contact the primary author 金本 茂 <sgk@happysize.co.jp>. + * + */ + +#ifndef UNICODE_TABLE_CP1254_H + +/* cp1254 to Unicode table */ +static const unsigned short cp1254_ucs_table[] = { + 0x20ac, 0xfffe, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0xfffe, 0xfffe, 0xfffe, + 0xfffe, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0xfffe, 0xfffe, 0x0178, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff +}; +static const int cp1254_ucs_table_min = 0x80; +static const int cp1254_ucs_table_len = (sizeof (cp1254_ucs_table) / sizeof (unsigned short)); +static const int cp1254_ucs_table_max = 0x80 + (sizeof (cp1254_ucs_table) / sizeof (unsigned short)); + +#endif /* UNICODE_TABLE_CP1254_H */ diff -Naur libmbfl-1.0.1/filters/unicode_table_koi8u.h libmbfl/filters/unicode_table_koi8u.h --- libmbfl-1.0.1/filters/unicode_table_koi8u.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/filters/unicode_table_koi8u.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,166 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko <verem@m1.tv> + * + */ + +#ifndef UNICODE_TABLE_KOI8U_H +#define UNICODE_TABLE_KOI8U_H + +/* KOI8-U (RFC2319) to Unicode */ +static const unsigned short koi8u_ucs_table[] = { + 0x2500, /* BOX DRAWINGS LIGHT HORIZONTAL */ + 0x2502, /* BOX DRAWINGS LIGHT VERTICAL */ + 0x250C, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */ + 0x2510, /* BOX DRAWINGS LIGHT DOWN AND LEFT */ + 0x2514, /* BOX DRAWINGS LIGHT UP AND RIGHT */ + 0x2518, /* BOX DRAWINGS LIGHT UP AND LEFT */ + 0x251C, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */ + 0x2524, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */ + 0x252C, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */ + 0x2534, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */ + 0x253C, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */ + 0x2580, /* UPPER HALF BLOCK */ + 0x2584, /* LOWER HALF BLOCK */ + 0x2588, /* FULL BLOCK */ + 0x258C, /* LEFT HALF BLOCK */ + 0x2590, /* RIGHT HALF BLOCK */ + 0x2591, /* LIGHT SHADE */ + 0x2592, /* MEDIUM SHADE */ + 0x2593, /* DARK SHADE */ + 0x2320, /* TOP HALF INTEGRAL */ + 0x25A0, /* BLACK SQUARE */ + 0x2219, /* BULLET OPERATOR */ + 0x221A, /* SQUARE ROOT */ + 0x2248, /* ALMOST EQUAL TO */ + 0x2264, /* LESS THAN OR EQUAL TO */ + 0x2265, /* GREATER THAN OR EQUAL TO */ + 0x00A0, /* NO-BREAK SPACE */ + 0x2321, /* BOTTOM HALF INTEGRAL */ + 0x00B0, /* DEGREE SIGN */ + 0x00B2, /* SUPERSCRIPT TWO */ + 0x00B7, /* MIDDLE DOT */ + 0x00F7, /* DIVISION SIGN */ + 0x2550, /* BOX DRAWINGS DOUBLE HORIZONTAL */ + 0x2551, /* BOX DRAWINGS DOUBLE VERTICAL */ + 0x2552, /* BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE */ + 0x0451, /* CYRILLIC SMALL LETTER IO */ + 0x0454, /* CYRILLIC SMALL LETTER UKRAINIAN IE */ + 0x2554, /* BOX DRAWINGS DOUBLE DOWN AND RIGHT */ + 0x0456, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ + 0x0457, /* CYRILLIC SMALL LETTER YI (UKRAINIAN) */ + 0x2557, /* BOX DRAWINGS DOUBLE DOWN AND LEFT */ + 0x2558, /* BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE */ + 0x2559, /* BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE */ + 0x255A, /* BOX DRAWINGS DOUBLE UP AND RIGHT */ + 0x255B, /* BOX DRAWINGS UP SINGLE AND LEFT DOUBLE */ + 0x0491, /* CYRILLIC SMALL LETTER GHE WITH UPTURN */ + 0x255D, /* BOX DRAWINGS DOUBLE UP AND LEFT */ + 0x255E, /* BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE */ + 0x255F, /* BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE */ + 0x2560, /* BOX DRAWINGS DOUBLE VERTICAL AND RIGHT */ + 0x2561, /* BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE */ + 0x0401, /* CYRILLIC CAPITAL LETTER IO */ + 0x0404, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */ + 0x2563, /* BOX DRAWINGS DOUBLE VERTICAL AND LEFT */ + 0x0406, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */ + 0x0407, /* CYRILLIC CAPITAL LETTER YI (UKRAINIAN) */ + 0x2566, /* BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL */ + 0x2567, /* BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE */ + 0x2568, /* BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE */ + 0x2569, /* BOX DRAWINGS DOUBLE UP AND HORIZONTAL */ + 0x256A, /* BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE */ + 0x0490, /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */ + 0x256C, /* BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL */ + 0x00A9, /* COPYRIGHT SIGN */ + 0x044E, /* CYRILLIC SMALL LETTER YU */ + 0x0430, /* CYRILLIC SMALL LETTER A */ + 0x0431, /* CYRILLIC SMALL LETTER BE */ + 0x0446, /* CYRILLIC SMALL LETTER TSE */ + 0x0434, /* CYRILLIC SMALL LETTER DE */ + 0x0435, /* CYRILLIC SMALL LETTER IE */ + 0x0444, /* CYRILLIC SMALL LETTER EF */ + 0x0433, /* CYRILLIC SMALL LETTER GHE */ + 0x0445, /* CYRILLIC SMALL LETTER KHA */ + 0x0438, /* CYRILLIC SMALL LETTER I */ + 0x0439, /* CYRILLIC SMALL LETTER SHORT I */ + 0x043A, /* CYRILLIC SMALL LETTER KA */ + 0x043B, /* CYRILLIC SMALL LETTER EL */ + 0x043C, /* CYRILLIC SMALL LETTER EM */ + 0x043D, /* CYRILLIC SMALL LETTER EN */ + 0x043E, /* CYRILLIC SMALL LETTER O */ + 0x043F, /* CYRILLIC SMALL LETTER PE */ + 0x044F, /* CYRILLIC SMALL LETTER YA */ + 0x0440, /* CYRILLIC SMALL LETTER ER */ + 0x0441, /* CYRILLIC SMALL LETTER ES */ + 0x0442, /* CYRILLIC SMALL LETTER TE */ + 0x0443, /* CYRILLIC SMALL LETTER U */ + 0x0436, /* CYRILLIC SMALL LETTER ZHE */ + 0x0432, /* CYRILLIC SMALL LETTER VE */ + 0x044C, /* CYRILLIC SMALL LETTER SOFT SIGN */ + 0x044B, /* CYRILLIC SMALL LETTER YERU */ + 0x0437, /* CYRILLIC SMALL LETTER ZE */ + 0x0448, /* CYRILLIC SMALL LETTER SHA */ + 0x044D, /* CYRILLIC SMALL LETTER E */ + 0x0449, /* CYRILLIC SMALL LETTER SHCHA */ + 0x0447, /* CYRILLIC SMALL LETTER CHE */ + 0x044A, /* CYRILLIC SMALL LETTER HARD SIGN */ + 0x042E, /* CYRILLIC CAPITAL LETTER YU */ + 0x0410, /* CYRILLIC CAPITAL LETTER A */ + 0x0411, /* CYRILLIC CAPITAL LETTER BE */ + 0x0426, /* CYRILLIC CAPITAL LETTER TSE */ + 0x0414, /* CYRILLIC CAPITAL LETTER DE */ + 0x0415, /* CYRILLIC CAPITAL LETTER IE */ + 0x0424, /* CYRILLIC CAPITAL LETTER EF */ + 0x0413, /* CYRILLIC CAPITAL LETTER GHE */ + 0x0425, /* CYRILLIC CAPITAL LETTER KHA */ + 0x0418, /* CYRILLIC CAPITAL LETTER I */ + 0x0419, /* CYRILLIC CAPITAL LETTER SHORT I */ + 0x041A, /* CYRILLIC CAPITAL LETTER KA */ + 0x041B, /* CYRILLIC CAPITAL LETTER EL */ + 0x041C, /* CYRILLIC CAPITAL LETTER EM */ + 0x041D, /* CYRILLIC CAPITAL LETTER EN */ + 0x041E, /* CYRILLIC CAPITAL LETTER O */ + 0x041F, /* CYRILLIC CAPITAL LETTER PE */ + 0x042F, /* CYRILLIC CAPITAL LETTER YA */ + 0x0420, /* CYRILLIC CAPITAL LETTER ER */ + 0x0421, /* CYRILLIC CAPITAL LETTER ES */ + 0x0422, /* CYRILLIC CAPITAL LETTER TE */ + 0x0423, /* CYRILLIC CAPITAL LETTER U */ + 0x0416, /* CYRILLIC CAPITAL LETTER ZHE */ + 0x0412, /* CYRILLIC CAPITAL LETTER VE */ + 0x042C, /* CYRILLIC CAPITAL LETTER SOFT SIGN */ + 0x042B, /* CYRILLIC CAPITAL LETTER YERU */ + 0x0417, /* CYRILLIC CAPITAL LETTER ZE */ + 0x0428, /* CYRILLIC CAPITAL LETTER SHA */ + 0x042D, /* CYRILLIC CAPITAL LETTER E */ + 0x0429, /* CYRILLIC CAPITAL LETTER SHCHA */ + 0x0427, /* CYRILLIC CAPITAL LETTER CHE */ + 0x042A /* CYRILLIC CAPITAL LETTER HARD SIGN */ +}; +static const int koi8u_ucs_table_min = 0x80; +static const int koi8u_ucs_table_len = (sizeof (koi8u_ucs_table) / sizeof (unsigned short)); +static const int koi8u_ucs_table_max = 0x80 + (sizeof (koi8u_ucs_table) / sizeof (unsigned short)); + + + +#endif /* UNNICODE_TABLE_KOI8U_H */ + diff -Naur libmbfl-1.0.1/mbfl/eaw_table.h libmbfl/mbfl/eaw_table.h --- libmbfl-1.0.1/mbfl/eaw_table.h 2006-01-21 05:16:47.000000000 +0100 +++ libmbfl/mbfl/eaw_table.h 2004-10-08 16:40:46.000000000 +0200 @@ -2,16 +2,35 @@ int begin; int end; } mbfl_eaw_table[] = { - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 }, - { 0x0000, 0x0000 } + { 0x1100, 0x1159 }, + { 0x115f, 0x115f }, + { 0x2329, 0x232a }, + { 0x2e80, 0x2e99 }, + { 0x2e9b, 0x2ef3 }, + { 0x2f00, 0x2fd5 }, + { 0x2ff0, 0x2ffb }, + { 0x3000, 0x303e }, + { 0x3041, 0x3096 }, + { 0x3099, 0x30ff }, + { 0x3105, 0x312c }, + { 0x3131, 0x318e }, + { 0x3190, 0x31b7 }, + { 0x31f0, 0x321e }, + { 0x3220, 0x3243 }, + { 0x3250, 0x327d }, + { 0x327f, 0x32fe }, + { 0x3300, 0x4db5 }, + { 0x4e00, 0x9fa5 }, + { 0xa000, 0xa48c }, + { 0xa490, 0xa4c6 }, + { 0xac00, 0xd7a3 }, + { 0xf900, 0xfa2d }, + { 0xfa30, 0xfa6a }, + { 0xfe30, 0xfe52 }, + { 0xfe54, 0xfe66 }, + { 0xfe68, 0xfe6b }, + { 0xff01, 0xff60 }, + { 0xffe0, 0xffe6 }, + { 0x20000, 0x2fffd }, + { 0x30000, 0x3fffd } }; diff -Naur libmbfl-1.0.1/mbfl/Makefile.am libmbfl/mbfl/Makefile.am --- libmbfl-1.0.1/mbfl/Makefile.am 2004-06-29 14:13:37.000000000 +0200 +++ libmbfl/mbfl/Makefile.am 2008-07-05 08:52:04.000000000 +0200 @@ -1,12 +1,37 @@ EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk lib_LTLIBRARIES=libmbfl.la -libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h +libmbfl_la_SOURCES=mbfilter.c \ + mbfl_string.c \ + mbfl_language.c \ + mbfl_encoding.c \ + mbfl_convert.c \ + mbfl_ident.c \ + mbfl_memory_device.c \ + mbfl_allocators.c \ + mbfl_filter_output.c \ + mbfilter_pass.c \ + mbfilter_wchar.c \ + mbfilter_8bit.c \ + eaw_table.h libmbfl_filters_la=../filters/libmbfl_filters.la libmbfl_nls_la=../nls/libmbfl_nls.la libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la) libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION) libmbfl_includedir=$(includedir)/mbfl -libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h +libmbfl_include_HEADERS=mbfilter.h \ + mbfl_consts.h \ + mbfl_encoding.h \ + mbfl_language.h \ + mbfl_string.h \ + mbfl_convert.h \ + mbfl_ident.h \ + mbfl_memory_device.h \ + mbfl_allocators.h \ + mbfl_defs.h \ + mbfl_filter_output.h \ + mbfilter_pass.h \ + mbfilter_wchar.h \ + mbfilter_8bit.h mbfilter.c: eaw_table.h diff -Naur libmbfl-1.0.1/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_8bit.h --- libmbfl-1.0.1/mbfl/mbfilter_8bit.h 2004-02-04 05:17:51.000000000 +0100 +++ libmbfl/mbfl/mbfilter_8bit.h 2008-07-05 08:52:04.000000000 +0200 @@ -34,6 +34,6 @@ #include "mbfl_defs.h" #include "mbfilter.h" -MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit; +extern const mbfl_encoding mbfl_encoding_8bit; #endif /* MBFL_MBFILTER_8BIT_H */ diff -Naur libmbfl-1.0.1/mbfl/mbfilter.c libmbfl/mbfl/mbfilter.c --- libmbfl-1.0.1/mbfl/mbfilter.c 2006-01-21 04:35:09.000000000 +0100 +++ libmbfl/mbfl/mbfilter.c 2007-08-19 03:58:39.000000000 +0200 @@ -331,6 +331,24 @@ return mbfl_memory_device_result(&convd->device, result); } +int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd) +{ + int num_illegalchars = 0; + + if (convd == NULL) { + return 0; + } + + if (convd->filter1 != NULL) { + num_illegalchars += convd->filter1->num_illegalchar; + } + + if (convd->filter2 != NULL) { + num_illegalchars += convd->filter2->num_illegalchar; + } + + return (num_illegalchars); +} /* * encoding detector @@ -371,9 +389,9 @@ } identd->filter_list_size = num; - /* set strict flag */ - identd->strict = strict; - + /* set strict flag */ + identd->strict = strict; + return identd; } @@ -444,25 +462,24 @@ while (n >= 0) { filter = identd->filter_list[n]; if (!filter->flag) { - if (identd->strict && filter->status) { - continue; - } + if (identd->strict && filter->status) { + continue; + } encoding = filter->encoding->no_encoding; } n--; } - - /* fallback judge */ - if (encoding == mbfl_no_encoding_invalid) { - n = identd->filter_list_size - 1; - while (n >= 0) { - filter = identd->filter_list[n]; - if (!filter->flag) { - encoding = filter->encoding->no_encoding; - } - n--; - } - } + + if (encoding == mbfl_no_encoding_invalid) { + n = identd->filter_list_size - 1; + while (n >= 0) { + filter = identd->filter_list[n]; + if (!filter->flag) { + encoding = filter->encoding->no_encoding; + } + n--; + } + } } return encoding; @@ -594,25 +611,25 @@ for (i = 0; i < num; i++) { filter = &flist[i]; if (!filter->flag) { - if (strict && filter->status) { - continue; - } + if (strict && filter->status) { + continue; + } encoding = filter->encoding; break; } } - /* fall-back judge */ - if (!encoding) { - for (i = 0; i < num; i++) { - filter = &flist[i]; - if (!filter->flag) { - encoding = filter->encoding; - break; - } - } - } - + /* fall-back judge */ + if (!encoding) { + for (i = 0; i < num; i++) { + filter = &flist[i]; + if (!filter->flag) { + encoding = filter->encoding; + break; + } + } + } + /* cleanup */ /* dtors should be called in reverse order */ i = num; while (--i >= 0) { @@ -841,7 +858,7 @@ int offset, int reverse) { - int n, result; + int n, result, negative_offset = 0; unsigned char *p; mbfl_convert_filter *filter; struct collector_strpos_data pc; @@ -887,6 +904,12 @@ mbfl_wchar_device_clear(&pc.needle); return -4; } + + if (offset < 0) { + negative_offset = -offset-1; + offset = 0; + } + pc.start = offset; pc.output = 0; pc.needle_pos = 0; @@ -895,7 +918,7 @@ /* feed data */ p = haystack->val; - n = haystack->len; + n = haystack->len - negative_offset; if (p != NULL) { while (n > 0) { if ((*filter->filter_function)(*p++, filter) < 0) { diff -Naur libmbfl-1.0.1/mbfl/mbfilter.h libmbfl/mbfl/mbfilter.h --- libmbfl-1.0.1/mbfl/mbfilter.h 2006-01-21 04:36:56.000000000 +0100 +++ libmbfl/mbfl/mbfilter.h 2008-07-05 08:52:04.000000000 +0200 @@ -99,11 +99,19 @@ #include "mbfl_ident.h" /* + * version information + */ +#define MBFL_VERSION_MAJOR 1 +#define MBFL_VERSION_MINOR 0 +#define MBFL_VERSION_TEENY 2 + +/* * convert filter */ #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE 0 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR 1 #define MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG 2 +#define MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY 3 /* * buffering converter @@ -129,6 +137,7 @@ MBFLAPI extern mbfl_string * mbfl_buffer_converter_getbuffer(mbfl_buffer_converter *convd, mbfl_string *result); MBFLAPI extern mbfl_string * mbfl_buffer_converter_result(mbfl_buffer_converter *convd, mbfl_string *result); MBFLAPI extern mbfl_string * mbfl_buffer_converter_feed_result(mbfl_buffer_converter *convd, mbfl_string *string, mbfl_string *result); +MBFLAPI extern int mbfl_buffer_illegalchars(mbfl_buffer_converter *convd); /* * encoding detector diff -Naur libmbfl-1.0.1/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_pass.h --- libmbfl-1.0.1/mbfl/mbfilter_pass.h 2004-02-04 05:17:51.000000000 +0100 +++ libmbfl/mbfl/mbfilter_pass.h 2008-07-05 08:52:04.000000000 +0200 @@ -33,8 +33,8 @@ #include "mbfl_defs.h" #include "mbfilter.h" -MBFLAPI extern const mbfl_encoding mbfl_encoding_pass; -MBFLAPI extern const struct mbfl_convert_vtbl vtbl_pass; +extern const mbfl_encoding mbfl_encoding_pass; +extern const struct mbfl_convert_vtbl vtbl_pass; MBFLAPI extern int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter); diff -Naur libmbfl-1.0.1/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfilter_wchar.h --- libmbfl-1.0.1/mbfl/mbfilter_wchar.h 2004-02-04 05:17:51.000000000 +0100 +++ libmbfl/mbfl/mbfilter_wchar.h 2008-07-05 08:52:04.000000000 +0200 @@ -34,6 +34,6 @@ #include "mbfl_defs.h" #include "mbfilter.h" -MBFLAPI extern const mbfl_encoding mbfl_encoding_wchar; +extern const mbfl_encoding mbfl_encoding_wchar; #endif /* MBFL_MBFILTER_WCHAR_H */ diff -Naur libmbfl-1.0.1/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_consts.h --- libmbfl-1.0.1/mbfl/mbfl_consts.h 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl/mbfl/mbfl_consts.h 2008-07-05 08:52:04.000000000 +0200 @@ -72,10 +72,12 @@ #define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */ #define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */ #define MBFL_WCSPLANE_CP1251 0x70f70000 -#define MBFL_WCSPLANE_CP866 0x70f80000 +#define MBFL_WCSPLANE_CP866 0x70f80000 #define MBFL_WCSPLANE_KOI8R 0x70f90000 #define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */ #define MBFL_WCSPLANE_ARMSCII8 0x70fb0000 +#define MBFL_WCSPLANE_KOI8U 0x70fc0000 +#define MBFL_WCSPLANE_CP1254 0x70fd0000 /* 00h - FFh */ #define MBFL_WCSGROUP_MASK 0xffffff #define MBFL_WCSGROUP_UCS4MAX 0x70000000 #define MBFL_WCSGROUP_WCHARMAX 0x78000000 diff -Naur libmbfl-1.0.1/mbfl/mbfl_convert.c libmbfl/mbfl/mbfl_convert.c --- libmbfl-1.0.1/mbfl/mbfl_convert.c 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl/mbfl/mbfl_convert.c 2008-07-31 19:37:12.000000000 +0200 @@ -51,16 +51,20 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -104,10 +108,14 @@ &vtbl_wchar_eucjp, &vtbl_sjis_wchar, &vtbl_wchar_sjis, + &vtbl_cp51932_wchar, + &vtbl_wchar_cp51932, &vtbl_jis_wchar, &vtbl_wchar_jis, &vtbl_2022jp_wchar, &vtbl_wchar_2022jp, + &vtbl_2022jpms_wchar, + &vtbl_wchar_2022jpms, &vtbl_eucjpwin_wchar, &vtbl_wchar_eucjpwin, &vtbl_sjiswin_wchar, @@ -134,8 +142,12 @@ &vtbl_wchar_cp866, &vtbl_koi8r_wchar, &vtbl_wchar_koi8r, + &vtbl_koi8u_wchar, + &vtbl_wchar_koi8u, &vtbl_cp1252_wchar, &vtbl_wchar_cp1252, + &vtbl_cp1254_wchar, + &vtbl_wchar_cp1254, &vtbl_ascii_wchar, &vtbl_wchar_ascii, &vtbl_8859_1_wchar, @@ -250,6 +262,7 @@ filter->data = data; filter->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; filter->illegal_substchar = 0x3f; /* '?' */ + filter->num_illegalchar = 0; /* setup the function table */ mbfl_convert_filter_reset_vtbl(filter); @@ -317,6 +330,7 @@ dist->to = src->to; dist->illegal_mode = src->illegal_mode; dist->illegal_substchar = src->illegal_substchar; + dist->num_illegalchar = src->num_illegalchar; } int mbfl_convert_filter_devcat(mbfl_convert_filter *filter, mbfl_memory_device *src) @@ -349,22 +363,6 @@ return 0; } -#if 0 -static int -mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p, - int n) -{ - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - return -1; - } - n--; - } - - return n; -} -#endif - /* illegal character output function for conv-filter */ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) @@ -428,11 +426,43 @@ } } break; + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: + if (c >= 0) { + if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x"); + if (ret < 0) + break; + + m = 0; + r = 28; + while (r >= 0) { + n = (c >> r) & 0xf; + if (n || m) { + m = 1; + ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); + if (ret < 0) { + break; + } + } + r -= 4; + } + if (ret < 0) { + break; + } + if (m == 0) { + ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); + } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } else { + ret = (*filter->filter_function)(filter->illegal_substchar, filter); + } + } + break; default: break; } filter->illegal_mode = mode_backup; - + filter->num_illegalchar++; return ret; } @@ -446,8 +476,8 @@ to == mbfl_no_encoding_7bit) { from = mbfl_no_encoding_8bit; } else if (from == mbfl_no_encoding_base64 || - from == mbfl_no_encoding_qprint || - from == mbfl_no_encoding_uuencode) { + from == mbfl_no_encoding_qprint || + from == mbfl_no_encoding_uuencode) { to = mbfl_no_encoding_8bit; } diff -Naur libmbfl-1.0.1/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_convert.h --- libmbfl-1.0.1/mbfl/mbfl_convert.h 2005-02-21 11:09:40.000000000 +0100 +++ libmbfl/mbfl/mbfl_convert.h 2006-03-21 02:58:27.000000000 +0100 @@ -51,6 +51,7 @@ const mbfl_encoding *to; int illegal_mode; int illegal_substchar; + int num_illegalchar; void *opaque; }; diff -Naur libmbfl-1.0.1/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_defs.h --- libmbfl-1.0.1/mbfl/mbfl_defs.h 2003-08-25 03:15:33.000000000 +0200 +++ libmbfl/mbfl/mbfl_defs.h 2008-07-20 20:26:02.000000000 +0200 @@ -44,9 +44,13 @@ #define MBFLAPI __declspec(dllexport) #else #define MBFLAPI __declspec(dllimport) -#endif +#endif /* MBFL_DLL_EXPORT */ +#else +#if defined(__GNUC__) && __GNUC__ >= 4 +#define MBFLAPI __attribute__((visibility("default"))) #else #define MBFLAPI -#endif +#endif /* defined(__GNUC__) && __GNUC__ >= 4 */ +#endif /* WIN32 */ #endif /* MBFL_DEFS_H */ diff -Naur libmbfl-1.0.1/mbfl/mbfl_encoding.c libmbfl/mbfl/mbfl_encoding.c --- libmbfl-1.0.1/mbfl/mbfl_encoding.c 2006-01-21 04:39:51.000000000 +0100 +++ libmbfl/mbfl/mbfl_encoding.c 2008-07-05 08:52:04.000000000 +0200 @@ -57,16 +57,20 @@ #include "filters/mbfilter_euc_kr.h" #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -149,9 +153,12 @@ &mbfl_encoding_sjis, &mbfl_encoding_eucjp_win, &mbfl_encoding_sjis_win, + &mbfl_encoding_cp51932, &mbfl_encoding_jis, &mbfl_encoding_2022jp, + &mbfl_encoding_2022jpms, &mbfl_encoding_cp1252, + &mbfl_encoding_cp1254, &mbfl_encoding_8859_1, &mbfl_encoding_8859_2, &mbfl_encoding_8859_3, @@ -177,6 +184,7 @@ &mbfl_encoding_cp1251, &mbfl_encoding_cp866, &mbfl_encoding_koi8r, + &mbfl_encoding_koi8u, &mbfl_encoding_armscii8, NULL }; @@ -199,7 +207,7 @@ } } - /* serch MIME charset name */ + /* search MIME charset name */ i = 0; while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { if (encoding->mime_name != NULL) { @@ -209,7 +217,7 @@ } } - /* serch aliases */ + /* search aliases */ i = 0; while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { if (encoding->aliases != NULL) { diff -Naur libmbfl-1.0.1/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_encoding.h --- libmbfl-1.0.1/mbfl/mbfl_encoding.h 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl/mbfl/mbfl_encoding.h 2008-07-05 08:52:04.000000000 +0200 @@ -70,9 +70,12 @@ mbfl_no_encoding_eucjp_win, mbfl_no_encoding_sjis_win, mbfl_no_encoding_sjis_mac, + mbfl_no_encoding_cp51932, mbfl_no_encoding_jis, mbfl_no_encoding_2022jp, + mbfl_no_encoding_2022jpms, mbfl_no_encoding_cp1252, + mbfl_no_encoding_cp1254, mbfl_no_encoding_8859_1, mbfl_no_encoding_8859_2, mbfl_no_encoding_8859_3, @@ -97,6 +100,7 @@ mbfl_no_encoding_cp1251, mbfl_no_encoding_cp866, mbfl_no_encoding_koi8r, + mbfl_no_encoding_koi8u, mbfl_no_encoding_8859_16, mbfl_no_encoding_armscii8, mbfl_no_encoding_charset_max diff -Naur libmbfl-1.0.1/mbfl/mbfl_ident.c libmbfl/mbfl/mbfl_ident.c --- libmbfl-1.0.1/mbfl/mbfl_ident.c 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl/mbfl/mbfl_ident.c 2008-07-05 08:52:04.000000000 +0200 @@ -51,15 +51,19 @@ #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -106,6 +110,8 @@ &vtbl_identify_sjiswin, &vtbl_identify_jis, &vtbl_identify_2022jp, + &vtbl_identify_2022jpms, + &vtbl_identify_cp51932, &vtbl_identify_euccn, &vtbl_identify_cp936, &vtbl_identify_hz, @@ -117,7 +123,9 @@ &vtbl_identify_cp1251, &vtbl_identify_cp866, &vtbl_identify_koi8r, + &vtbl_identify_koi8u, &vtbl_identify_cp1252, + &vtbl_identify_cp1254, &vtbl_identify_8859_1, &vtbl_identify_8859_2, &vtbl_identify_8859_3, diff -Naur libmbfl-1.0.1/mbfl/mbfl_language.c libmbfl/mbfl/mbfl_language.c --- libmbfl-1.0.1/mbfl/mbfl_language.c 2006-01-21 04:40:18.000000000 +0100 +++ libmbfl/mbfl/mbfl_language.c 2008-07-05 08:52:04.000000000 +0200 @@ -57,6 +57,7 @@ #include "nls/nls_uni.h" #include "nls/nls_de.h" #include "nls/nls_ru.h" +#include "nls/nls_ua.h" #include "nls/nls_en.h" #include "nls/nls_hy.h" #include "nls/nls_tr.h" @@ -77,6 +78,7 @@ &mbfl_language_english, &mbfl_language_german, &mbfl_language_russian, + &mbfl_language_ukrainian, &mbfl_language_armenian, &mbfl_language_turkish, &mbfl_language_neutral, diff -Naur libmbfl-1.0.1/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_language.h --- libmbfl-1.0.1/mbfl/mbfl_language.h 2006-01-21 04:40:34.000000000 +0100 +++ libmbfl/mbfl/mbfl_language.h 2008-07-05 08:52:04.000000000 +0200 @@ -57,6 +57,7 @@ mbfl_no_language_simplified_chinese, /* zh-cn */ mbfl_no_language_traditional_chinese, /* zh-tw */ mbfl_no_language_russian, /* ru */ + mbfl_no_language_ukrainian, /* ua */ mbfl_no_language_armenian, /* hy */ mbfl_no_language_turkish, /* tr */ mbfl_no_language_max diff -Naur libmbfl-1.0.1/mbfl/mk_eaw_tbl.awk libmbfl/mbfl/mk_eaw_tbl.awk --- libmbfl-1.0.1/mbfl/mk_eaw_tbl.awk 2006-01-21 05:02:50.000000000 +0100 +++ libmbfl/mbfl/mk_eaw_tbl.awk 2005-02-20 23:18:08.000000000 +0100 @@ -1,6 +1,6 @@ #!/usr/bin/awk -f # -# $Id: mk_eaw_tbl.awk,v 1.1.2.3 2006/01/21 04:02:50 hirokawa Exp $ +# $Id: mk_eaw_tbl.awk,v 1.2 2005/02/20 22:18:08 moriyoshi Exp $ # # Description: a script to generate east asian width table. # diff -Naur libmbfl-1.0.1/nls/Makefile.am libmbfl/nls/Makefile.am --- libmbfl-1.0.1/nls/Makefile.am 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl/nls/Makefile.am 2008-07-05 08:52:04.000000000 +0200 @@ -2,4 +2,25 @@ noinst_LTLIBRARIES=libmbfl_nls.la INCLUDES=-I../mbfl libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION) -libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_hy.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_hy.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h +libmbfl_nls_la_SOURCES=nls_ja.c \ + nls_de.c \ + nls_en.c \ + nls_hy.c \ + nls_tr.c \ + nls_kr.c \ + nls_ru.c \ + nls_ua.c \ + nls_zh.c \ + nls_uni.c \ + nls_neutral.c \ + nls_ja.h \ + nls_de.h \ + nls_en.h \ + nls_hy.h \ + nls_tr.h \ + nls_kr.h \ + nls_ru.h \ + nls_ua.h \ + nls_zh.h \ + nls_uni.h \ + nls_neutral.h diff -Naur libmbfl-1.0.1/nls/nls_hy.c libmbfl/nls/nls_hy.c --- libmbfl-1.0.1/nls/nls_hy.c 2005-03-22 20:41:14.000000000 +0100 +++ libmbfl/nls/nls_hy.c 2008-02-16 09:32:26.000000000 +0100 @@ -11,7 +11,7 @@ const mbfl_language mbfl_language_armenian = { mbfl_no_language_armenian , - "Armenian ", + "Armenian", "hy", NULL, mbfl_no_encoding_armscii8, diff -Naur libmbfl-1.0.1/nls/nls_ru.c libmbfl/nls/nls_ru.c --- libmbfl-1.0.1/nls/nls_ru.c 2002-12-24 19:28:44.000000000 +0100 +++ libmbfl/nls/nls_ru.c 2008-07-05 08:52:04.000000000 +0200 @@ -1,20 +1,20 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_STDDEF_H -#include <stddef.h> -#endif - -#include "mbfilter.h" -#include "nls_ru.h" - -const mbfl_language mbfl_language_russian = { - mbfl_no_language_russian, - "Russian", - "ru", - NULL, - mbfl_no_encoding_koi8r, - mbfl_no_encoding_qprint, - mbfl_no_encoding_8bit -}; +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" +#include "nls_ru.h" + +const mbfl_language mbfl_language_russian = { + mbfl_no_language_russian, + "Russian", + "ru", + NULL, + mbfl_no_encoding_koi8r, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; diff -Naur libmbfl-1.0.1/nls/nls_ru.h libmbfl/nls/nls_ru.h --- libmbfl-1.0.1/nls/nls_ru.h 2002-12-07 20:20:44.000000000 +0100 +++ libmbfl/nls/nls_ru.h 2007-09-18 23:33:29.000000000 +0200 @@ -1,9 +1,9 @@ -#ifndef MBFL_NLS_RU_H -#define MBFL_NLS_RU_H - -#include "mbfilter.h" -#include "nls_ru.h" - -extern const mbfl_language mbfl_language_russian; - -#endif /* MBFL_NLS_RU_H */ +#ifndef MBFL_NLS_RU_H +#define MBFL_NLS_RU_H + +#include "mbfilter.h" +#include "nls_ru.h" + +extern const mbfl_language mbfl_language_russian; + +#endif /* MBFL_NLS_RU_H */ diff -Naur libmbfl-1.0.1/nls/nls_tr.c libmbfl/nls/nls_tr.c --- libmbfl-1.0.1/nls/nls_tr.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/nls/nls_tr.c 2005-12-23 16:18:52.000000000 +0100 @@ -0,0 +1,21 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + +#include "mbfilter.h" +#include "nls_tr.h" + +const mbfl_language mbfl_language_turkish = { + mbfl_no_language_turkish, + "Turkish", + "tr", + NULL, + mbfl_no_encoding_8859_9, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; + diff -Naur libmbfl-1.0.1/nls/nls_tr.h libmbfl/nls/nls_tr.h --- libmbfl-1.0.1/nls/nls_tr.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/nls/nls_tr.h 2005-12-23 16:18:52.000000000 +0100 @@ -0,0 +1,8 @@ +#ifndef MBFL_NLS_TR_H +#define MBFL_NLS_TR_H + +#include "mbfilter.h" + +extern const mbfl_language mbfl_language_turkish; + +#endif /* MBFL_NLS_TR_H */ diff -Naur libmbfl-1.0.1/nls/nls_ua.c libmbfl/nls/nls_ua.c --- libmbfl-1.0.1/nls/nls_ua.c 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/nls/nls_ua.c 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,22 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif + + +#include "mbfilter.h" +#include "nls_ua.h" + +const mbfl_language mbfl_language_ukrainian = { + mbfl_no_language_ukrainian, + "Ukrainian", + "ua", + NULL, + mbfl_no_encoding_koi8u, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; diff -Naur libmbfl-1.0.1/nls/nls_ua.h libmbfl/nls/nls_ua.h --- libmbfl-1.0.1/nls/nls_ua.h 1970-01-01 01:00:00.000000000 +0100 +++ libmbfl/nls/nls_ua.h 2008-07-05 08:52:04.000000000 +0200 @@ -0,0 +1,9 @@ +#ifndef MBFL_NLS_UA_H +#define MBFL_NLS_UA_H + +#include "mbfilter.h" +#include "nls_ua.h" + +extern const mbfl_language mbfl_language_ukrainian; + +#endif /* MBFL_NLS_UA_H */