<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"> <title>utf8transliterator.cpp Source File</title> <link href="doxygen.css" rel="stylesheet" type="text/css"> </head><body> <!-- Generated by Doxygen 1.2.15 --> <center> <a class="qindex" href="index.html">Main Page</a> <a class="qindex" href="namespaces.html">Namespace List</a> <a class="qindex" href="hierarchy.html">Class Hierarchy</a> <a class="qindex" href="classes.html">Alphabetical List</a> <a class="qindex" href="annotated.html">Compound List</a> <a class="qindex" href="files.html">File List</a> <a class="qindex" href="functions.html">Compound Members</a> </center> <hr><h1>utf8transliterator.cpp</h1><div class="fragment"><pre>00001 <font class="comment">/******************************************************************************</font> 00002 <font class="comment">*</font> 00003 <font class="comment">* utf8transliterators - SWFilter decendant to transliterate between</font> 00004 <font class="comment">* ICU-supported scripts.</font> 00005 <font class="comment">*/</font> 00006 00007 <font class="preprocessor">#ifdef _ICU_</font> 00008 <font class="preprocessor"></font> 00009 <font class="preprocessor">#include <stdlib.h></font> 00010 <font class="preprocessor">#include <string.h></font> 00011 00012 <font class="preprocessor">#ifdef __GNUC__</font> 00013 <font class="preprocessor"></font><font class="preprocessor">#include <unixstr.h></font> 00014 <font class="preprocessor">#endif</font> 00015 <font class="preprocessor"></font> 00016 <font class="preprocessor">#include <utf8transliterator.h></font> 00017 00018 <font class="keyword">const</font> <font class="keywordtype">char</font> UTF8Transliterator::optionstring[NUMTARGETSCRIPTS][16] = { 00019 <font class="stringliteral">"Off"</font>, 00020 <font class="stringliteral">"Latin"</font>, 00021 <font class="stringliteral">"Basic Latin"</font>, 00022 <font class="stringliteral">"Beta"</font>, 00023 <font class="stringliteral">"BGreek"</font>, 00024 <font class="comment">/*</font> 00025 <font class="comment"> "Greek",</font> 00026 <font class="comment"> "Hebrew",</font> 00027 <font class="comment"> "Cyrillic",</font> 00028 <font class="comment"> "Arabic",</font> 00029 <font class="comment"> "Syriac",</font> 00030 <font class="comment"> "Katakana",</font> 00031 <font class="comment"> "Hiragana",</font> 00032 <font class="comment"> "Jamo",</font> 00033 <font class="comment"> "Hangul",</font> 00034 <font class="comment"> "Devanagari",</font> 00035 <font class="comment"> "Tamil",</font> 00036 <font class="comment"> "Bengali",</font> 00037 <font class="comment"> "Gurmukhi",</font> 00038 <font class="comment"> "Gujarati",</font> 00039 <font class="comment"> "Oriya",</font> 00040 <font class="comment"> "Telugu",</font> 00041 <font class="comment"> "Kannada",</font> 00042 <font class="comment"> "Malayalam",</font> 00043 <font class="comment"> "Thai",</font> 00044 <font class="comment"> "Georgian",</font> 00045 <font class="comment"> "Armenian",</font> 00046 <font class="comment"> "Ethiopic",</font> 00047 <font class="comment"> "Gothic",</font> 00048 <font class="comment"> "Ugaritic",</font> 00049 <font class="comment"> "Coptic"</font> 00050 <font class="comment"> */</font> 00051 }; 00052 00053 <font class="keyword">const</font> <font class="keywordtype">char</font> UTF8Transliterator::optName[] = <font class="stringliteral">"Transliteration"</font>; 00054 <font class="keyword">const</font> <font class="keywordtype">char</font> UTF8Transliterator::optTip[] = <font class="stringliteral">"Transliterates between scripts"</font>; 00055 00056 UTF8Transliterator::UTF8Transliterator() { 00057 option = 0; 00058 <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> i; 00059 <font class="keywordflow">for</font> (i = 0; i < NUMTARGETSCRIPTS; i++) { 00060 options.push_back(optionstring[i]); 00061 } 00062 } 00063 00064 <font class="keywordtype">void</font> UTF8Transliterator::setOptionValue(<font class="keyword">const</font> <font class="keywordtype">char</font> *ival) 00065 { 00066 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> i = option = NUMTARGETSCRIPTS; 00067 <font class="keywordflow">while</font> (i && stricmp(ival, optionstring[i])) { 00068 i--; 00069 option = i; 00070 } 00071 } 00072 00073 <font class="keyword">const</font> <font class="keywordtype">char</font> *UTF8Transliterator::getOptionValue() 00074 { 00075 <font class="keywordflow">return</font> (NUMTARGETSCRIPTS > option) ? optionstring[option] : 0; 00076 } 00077 00078 <font class="keywordtype">char</font> UTF8Transliterator::ProcessText(<font class="keywordtype">char</font> *text, <font class="keywordtype">int</font> maxlen, <font class="keyword">const</font> <a class="code" href="class_s_w_key.html">SWKey</a> *key, <font class="keyword">const</font> <a class="code" href="class_s_w_module.html">SWModule</a> *module) 00079 { 00080 <font class="keywordflow">if</font> (option) { <font class="comment">// if we want transliteration</font> 00081 <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> i, j; 00082 UErrorCode err = U_ZERO_ERROR; 00083 UConverter * conv = NULL; 00084 conv = ucnv_open(<font class="stringliteral">"UTF-8"</font>, &err); 00085 00086 <font class="keywordtype">bool</font> compat = <font class="keyword">false</font>; 00087 <font class="keywordtype">bool</font> noNFC = <font class="keyword">false</font>; 00088 00089 <font class="keywordflow">if</font> (option == SE_JAMO) { 00090 noNFC = <font class="keyword">true</font>; 00091 } 00092 00093 <font class="comment">// Convert UTF-8 string to UTF-16 (UChars)</font> 00094 j = strlen(text); 00095 int32_t len = (j * 2) + 1; 00096 UChar *source = <font class="keyword">new</font> UChar[len]; 00097 err = U_ZERO_ERROR; 00098 len = ucnv_toUChars(conv, source, len, text, j, &err); 00099 source[len] = 0; 00100 00101 <font class="comment">// Figure out which scripts are used in the string</font> 00102 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> scripts[NUMSCRIPTS]; 00103 00104 <font class="keywordflow">for</font> (i = 0; i < NUMSCRIPTS; i++) { 00105 scripts[i] = <font class="keyword">false</font>; 00106 } 00107 00108 <font class="keywordflow">for</font> (i = 0; i < len; i++) { 00109 j = ublock_getCode(source[i]); 00110 <font class="keywordflow">switch</font> (j) { 00111 <font class="keywordflow">case</font> UBLOCK_BASIC_LATIN: scripts[SE_LATIN] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00112 <font class="keywordflow">case</font> UBLOCK_GREEK: scripts[SE_GREEK] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00113 <font class="keywordflow">case</font> UBLOCK_HEBREW: scripts[SE_HEBREW] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00114 <font class="keywordflow">case</font> UBLOCK_CYRILLIC: scripts[SE_CYRILLIC] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00115 <font class="keywordflow">case</font> UBLOCK_ARABIC: scripts[SE_ARABIC] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00116 <font class="keywordflow">case</font> UBLOCK_SYRIAC: scripts[SE_SYRIAC] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00117 <font class="keywordflow">case</font> UBLOCK_KATAKANA: scripts[SE_KATAKANA] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00118 <font class="keywordflow">case</font> UBLOCK_HIRAGANA: scripts[SE_HIRAGANA] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00119 <font class="keywordflow">case</font> UBLOCK_HANGUL_SYLLABLES: scripts[SE_HANGUL] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00120 <font class="keywordflow">case</font> UBLOCK_HANGUL_JAMO: scripts[SE_JAMO] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00121 <font class="keywordflow">case</font> UBLOCK_DEVANAGARI: scripts[SE_DEVANAGARI] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00122 <font class="keywordflow">case</font> UBLOCK_TAMIL: scripts[SE_TAMIL] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00123 <font class="keywordflow">case</font> UBLOCK_BENGALI: scripts[SE_BENGALI] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00124 <font class="keywordflow">case</font> UBLOCK_GURMUKHI: scripts[SE_GURMUKHI] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00125 <font class="keywordflow">case</font> UBLOCK_GUJARATI: scripts[SE_GUJARATI] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00126 <font class="keywordflow">case</font> UBLOCK_ORIYA: scripts[SE_ORIYA] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00127 <font class="keywordflow">case</font> UBLOCK_TELUGU: scripts[SE_TELUGU] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00128 <font class="keywordflow">case</font> UBLOCK_KANNADA: scripts[SE_KANNADA] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00129 <font class="keywordflow">case</font> UBLOCK_MALAYALAM: scripts[SE_MALAYALAM] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00130 <font class="keywordflow">case</font> UBLOCK_THAI: scripts[SE_THAI] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00131 <font class="keywordflow">case</font> UBLOCK_GEORGIAN: scripts[SE_GEORGIAN] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00132 <font class="keywordflow">case</font> UBLOCK_ARMENIAN: scripts[SE_ARMENIAN] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00133 <font class="keywordflow">case</font> UBLOCK_ETHIOPIC: scripts[SE_ETHIOPIC] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00134 <font class="keywordflow">case</font> UBLOCK_GOTHIC: scripts[SE_GOTHIC] = <font class="keyword">true</font>; <font class="keywordflow">break</font>; 00135 <font class="comment">// needs Unicode 3.2? or 4.0? support from ICU</font> 00136 <font class="comment">//case UBLOCK_UGARITIC: scripts[SE_UGARITIC] = true; break;</font> 00137 <font class="keywordflow">case</font> UBLOCK_CJK_RADICALS_SUPPLEMENT: 00138 <font class="keywordflow">case</font> UBLOCK_KANGXI_RADICALS: 00139 <font class="keywordflow">case</font> UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS: 00140 <font class="keywordflow">case</font> UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: 00141 <font class="keywordflow">case</font> UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A: 00142 <font class="keywordflow">case</font> UBLOCK_CJK_UNIFIED_IDEOGRAPHS: 00143 scripts[SE_HAN] = <font class="keyword">true</font>; 00144 <font class="keywordflow">break</font>; 00145 <font class="keywordflow">case</font> UBLOCK_CJK_COMPATIBILITY: 00146 <font class="keywordflow">case</font> UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS: 00147 <font class="keywordflow">case</font> UBLOCK_CJK_COMPATIBILITY_FORMS: 00148 scripts[SE_HAN] = <font class="keyword">true</font>; 00149 compat = <font class="keyword">true</font>; 00150 <font class="keywordflow">break</font>; 00151 <font class="keywordflow">case</font> UBLOCK_HANGUL_COMPATIBILITY_JAMO: 00152 scripts[SE_HANGUL] = <font class="keyword">true</font>; 00153 compat = <font class="keyword">true</font>; 00154 <font class="keywordflow">break</font>; 00155 00156 <font class="keywordflow">default</font>: scripts[SE_LATIN] = <font class="keyword">true</font>; 00157 } 00158 } 00159 scripts[option] = <font class="keyword">false</font>; <font class="comment">//turn off the reflexive transliteration</font> 00160 00161 <font class="comment">//return if we have no transliteration to do for this text</font> 00162 j = 0; 00163 <font class="keywordflow">for</font> (i = 0; !j && i < NUMSCRIPTS; i++) { 00164 <font class="keywordflow">if</font> (scripts[i]) j++; 00165 } 00166 <font class="keywordflow">if</font> (!j) { 00167 ucnv_close(conv); 00168 <font class="keywordflow">return</font> 0; 00169 } 00170 00171 UnicodeString id; 00172 <font class="keywordflow">if</font> (compat) { 00173 id = UnicodeString(<font class="stringliteral">"NFKD"</font>); 00174 } 00175 <font class="keywordflow">else</font> { 00176 id = UnicodeString(<font class="stringliteral">"NFD"</font>); 00177 } 00178 00179 <font class="comment">//Simple X to Latin transliterators</font> 00180 <font class="keywordflow">if</font> (scripts[SE_GREEK]) { 00181 <font class="keywordflow">if</font> (option == SE_BETA) 00182 id += UnicodeString(<font class="stringliteral">";Greek-Beta"</font>); 00183 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (option == SE_BGREEK) 00184 id += UnicodeString(<font class="stringliteral">";Greek-BGreek"</font>); 00185 <font class="keywordflow">else</font> { 00186 <font class="keywordflow">if</font> (!strnicmp (((<a class="code" href="class_s_w_module.html">SWModule</a>*)module)->Lang(), <font class="stringliteral">"cop"</font>, 3)) { 00187 id += UnicodeString(<font class="stringliteral">";Coptic-Latin"</font>); 00188 } 00189 <font class="keywordflow">else</font> { 00190 id += UnicodeString(<font class="stringliteral">";Greek-Latin"</font>); 00191 } 00192 scripts[SE_LATIN] = <font class="keyword">true</font>; 00193 } 00194 } 00195 <font class="keywordflow">if</font> (scripts[SE_HEBREW]) { 00196 <font class="keywordflow">if</font> (option == SE_BETA) 00197 id += UnicodeString(<font class="stringliteral">";Hebrew-CCAT"</font>); 00198 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (option == SE_SYRIAC) 00199 id += UnicodeString(<font class="stringliteral">";Hebrew-Syriac"</font>); 00200 <font class="keywordflow">else</font> { 00201 id += UnicodeString(<font class="stringliteral">";Hebrew-Latin"</font>); 00202 scripts[SE_LATIN] = <font class="keyword">true</font>; 00203 } 00204 } 00205 <font class="keywordflow">if</font> (scripts[SE_CYRILLIC]) { 00206 id += UnicodeString(<font class="stringliteral">";Cyrillic-Latin"</font>); 00207 scripts[SE_LATIN] = <font class="keyword">true</font>; 00208 } 00209 <font class="keywordflow">if</font> (scripts[SE_ARABIC]) { 00210 id += UnicodeString(<font class="stringliteral">";Arabic-Latin"</font>); 00211 scripts[SE_LATIN] = <font class="keyword">true</font>; 00212 } 00213 <font class="keywordflow">if</font> (scripts[SE_SYRIAC]) { 00214 <font class="keywordflow">if</font> (option == SE_BETA) 00215 id += UnicodeString(<font class="stringliteral">";Syriac-CCAT"</font>); 00216 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (option == SE_HEBREW) 00217 id += UnicodeString(<font class="stringliteral">";Syriac-Hebrew"</font>); 00218 <font class="keywordflow">else</font> { 00219 id += UnicodeString(<font class="stringliteral">";Syriac-Latin"</font>); 00220 scripts[SE_LATIN] = <font class="keyword">true</font>; 00221 } 00222 } 00223 <font class="keywordflow">if</font> (scripts[SE_THAI]) { 00224 id += UnicodeString(<font class="stringliteral">";Thai-Latin"</font>); 00225 scripts[SE_LATIN] = <font class="keyword">true</font>; 00226 } 00227 <font class="keywordflow">if</font> (scripts[SE_GEORGIAN]) { 00228 id += UnicodeString(<font class="stringliteral">";Georgian-Latin"</font>); 00229 scripts[SE_LATIN] = <font class="keyword">true</font>; 00230 } 00231 <font class="keywordflow">if</font> (scripts[SE_ARMENIAN]) { 00232 id += UnicodeString(<font class="stringliteral">";Armenian-Latin"</font>); 00233 scripts[SE_LATIN] = <font class="keyword">true</font>; 00234 } 00235 <font class="keywordflow">if</font> (scripts[SE_ETHIOPIC]) { 00236 id += UnicodeString(<font class="stringliteral">";Ethiopic-Latin"</font>); 00237 scripts[SE_LATIN] = <font class="keyword">true</font>; 00238 } 00239 <font class="keywordflow">if</font> (scripts[SE_GOTHIC]) { 00240 id += UnicodeString(<font class="stringliteral">";Gothic-Latin"</font>); 00241 scripts[SE_LATIN] = <font class="keyword">true</font>; 00242 } 00243 <font class="keywordflow">if</font> (scripts[SE_UGARITIC]) { 00244 id += UnicodeString(<font class="stringliteral">";Ugaritic-Latin"</font>); 00245 scripts[SE_LATIN] = <font class="keyword">true</font>; 00246 } 00247 <font class="keywordflow">if</font> (scripts[SE_HAN]) { 00248 <font class="keywordflow">if</font> (!strnicmp (((<a class="code" href="class_s_w_module.html">SWModule</a>*)module)->Lang(), <font class="stringliteral">"ja"</font>, 2)) { 00249 id += UnicodeString(<font class="stringliteral">";Kanji-OnRomaji"</font>); 00250 } 00251 <font class="keywordflow">else</font> { 00252 id += UnicodeString(<font class="stringliteral">";Han-Pinyin"</font>); 00253 } 00254 scripts[SE_LATIN] = <font class="keyword">true</font>; 00255 } 00256 00257 <font class="comment">// Inter-Kana and Kana to Latin transliterators</font> 00258 <font class="keywordflow">if</font> (option == SE_HIRAGANA && scripts[SE_KATAKANA]) { 00259 id += UnicodeString(<font class="stringliteral">";Katakana-Hiragana"</font>); 00260 scripts[SE_HIRAGANA] = <font class="keyword">true</font>; 00261 } 00262 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (option == SE_KATAKANA && scripts[SE_HIRAGANA]) { 00263 id += UnicodeString(<font class="stringliteral">";Hiragana-Katakana"</font>); 00264 scripts[SE_KATAKANA] = <font class="keyword">true</font>; 00265 } 00266 <font class="keywordflow">else</font> { 00267 <font class="keywordflow">if</font> (scripts[SE_KATAKANA]) { 00268 id += UnicodeString(<font class="stringliteral">";Katakana-Latin"</font>); 00269 scripts[SE_LATIN] = <font class="keyword">true</font>; 00270 } 00271 <font class="keywordflow">if</font> (scripts[SE_HIRAGANA]) { 00272 id += UnicodeString(<font class="stringliteral">";Hiragana-Latin"</font>); 00273 scripts[SE_LATIN] = <font class="keyword">true</font>; 00274 } 00275 } 00276 00277 <font class="comment">// Inter-Korean and Korean to Latin transliterators</font> 00278 <font class="keywordflow">if</font> (option == SE_HANGUL && scripts[SE_JAMO]) { 00279 noNFC = <font class="keyword">false</font>; 00280 scripts[SE_HANGUL] = <font class="keyword">true</font>; 00281 } 00282 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (option == SE_JAMO && scripts[SE_HANGUL]) { 00283 noNFC = <font class="keyword">true</font>; 00284 scripts[SE_JAMO] = <font class="keyword">true</font>; 00285 } 00286 <font class="keywordflow">else</font> { 00287 <font class="keywordflow">if</font> (scripts[SE_HANGUL]) { 00288 id += UnicodeString(<font class="stringliteral">";Hangul-Latin"</font>); 00289 scripts[SE_LATIN] = <font class="keyword">true</font>; 00290 } 00291 <font class="keywordflow">if</font> (scripts[SE_JAMO]) { 00292 id += UnicodeString(<font class="stringliteral">";Jamo-Latin"</font>); 00293 scripts[SE_LATIN] = <font class="keyword">true</font>; 00294 } 00295 } 00296 00297 <font class="comment">// Indic-Latin</font> 00298 <font class="keywordflow">if</font> (option < SE_DEVANAGARI || option > SE_MALAYALAM) { 00299 <font class="comment">// Indic to Latin</font> 00300 <font class="keywordflow">if</font> (scripts[SE_TAMIL]) { 00301 id += UnicodeString(<font class="stringliteral">";Tamil-Latin"</font>); 00302 scripts[SE_LATIN] = <font class="keyword">true</font>; 00303 } 00304 <font class="keywordflow">if</font> (scripts[SE_BENGALI]) { 00305 id += UnicodeString(<font class="stringliteral">";Bengali-Latin"</font>); 00306 scripts[SE_LATIN] = <font class="keyword">true</font>; 00307 } 00308 <font class="keywordflow">if</font> (scripts[SE_GURMUKHI]) { 00309 id += UnicodeString(<font class="stringliteral">";Gurmukhi-Latin"</font>); 00310 scripts[SE_LATIN] = <font class="keyword">true</font>; 00311 } 00312 <font class="keywordflow">if</font> (scripts[SE_GUJARATI]) { 00313 id += UnicodeString(<font class="stringliteral">";Gujarati-Latin"</font>); 00314 scripts[SE_LATIN] = <font class="keyword">true</font>; 00315 } 00316 <font class="keywordflow">if</font> (scripts[SE_ORIYA]) { 00317 id += UnicodeString(<font class="stringliteral">";Oriya-Latin"</font>); 00318 scripts[SE_LATIN] = <font class="keyword">true</font>; 00319 } 00320 <font class="keywordflow">if</font> (scripts[SE_TELUGU]) { 00321 id += UnicodeString(<font class="stringliteral">";Telugu-Latin"</font>); 00322 scripts[SE_LATIN] = <font class="keyword">true</font>; 00323 } 00324 <font class="keywordflow">if</font> (scripts[SE_KANNADA]) { 00325 id += UnicodeString(<font class="stringliteral">";Kannada-Latin"</font>); 00326 scripts[SE_LATIN] = <font class="keyword">true</font>; 00327 } 00328 <font class="keywordflow">if</font> (scripts[SE_MALAYALAM]) { 00329 id += UnicodeString(<font class="stringliteral">";Malayalam-Latin"</font>); 00330 scripts[SE_LATIN] = <font class="keyword">true</font>; 00331 } 00332 } 00333 <font class="keywordflow">else</font> { 00334 <font class="keywordflow">if</font> (scripts[SE_LATIN]) { 00335 id += UnicodeString(<font class="stringliteral">";Latin-InterIndic"</font>); 00336 } 00337 <font class="keywordflow">if</font> (scripts[SE_DEVANAGARI]) { 00338 id += UnicodeString(<font class="stringliteral">";Devanagari-InterIndic"</font>); 00339 } 00340 <font class="keywordflow">if</font> (scripts[SE_TAMIL]) { 00341 id += UnicodeString(<font class="stringliteral">";Tamil-InterIndic"</font>); 00342 } 00343 <font class="keywordflow">if</font> (scripts[SE_BENGALI]) { 00344 id += UnicodeString(<font class="stringliteral">";Bengali-InterIndic"</font>); 00345 } 00346 <font class="keywordflow">if</font> (scripts[SE_GURMUKHI]) { 00347 id += UnicodeString(<font class="stringliteral">";Gurmurkhi-InterIndic"</font>); 00348 } 00349 <font class="keywordflow">if</font> (scripts[SE_GUJARATI]) { 00350 id += UnicodeString(<font class="stringliteral">";Gujarati-InterIndic"</font>); 00351 } 00352 <font class="keywordflow">if</font> (scripts[SE_ORIYA]) { 00353 id += UnicodeString(<font class="stringliteral">";Oriya-InterIndic"</font>); 00354 } 00355 <font class="keywordflow">if</font> (scripts[SE_TELUGU]) { 00356 id += UnicodeString(<font class="stringliteral">";Telugu-InterIndic"</font>); 00357 } 00358 <font class="keywordflow">if</font> (scripts[SE_KANNADA]) { 00359 id += UnicodeString(<font class="stringliteral">";Kannada-InterIndic"</font>); 00360 } 00361 <font class="keywordflow">if</font> (scripts[SE_MALAYALAM]) { 00362 id += UnicodeString(<font class="stringliteral">";Malayalam-InterIndic"</font>); 00363 } 00364 00365 <font class="keywordflow">switch</font>(option) { 00366 <font class="keywordflow">case</font> SE_DEVANAGARI: 00367 id += UnicodeString(<font class="stringliteral">";InterIndic-Devanagari"</font>); 00368 <font class="keywordflow">break</font>; 00369 <font class="keywordflow">case</font> SE_TAMIL: 00370 id += UnicodeString(<font class="stringliteral">";InterIndic-Tamil"</font>); 00371 <font class="keywordflow">break</font>; 00372 <font class="keywordflow">case</font> SE_BENGALI: 00373 id += UnicodeString(<font class="stringliteral">";InterIndic-Bengali"</font>); 00374 <font class="keywordflow">break</font>; 00375 <font class="keywordflow">case</font> SE_GURMUKHI: 00376 id += UnicodeString(<font class="stringliteral">";InterIndic-Gurmukhi"</font>); 00377 <font class="keywordflow">break</font>; 00378 <font class="keywordflow">case</font> SE_GUJARATI: 00379 id += UnicodeString(<font class="stringliteral">";InterIndic-Gujarati"</font>); 00380 <font class="keywordflow">break</font>; 00381 <font class="keywordflow">case</font> SE_ORIYA: 00382 id += UnicodeString(<font class="stringliteral">";InterIndic-Oriya"</font>); 00383 <font class="keywordflow">break</font>; 00384 <font class="keywordflow">case</font> SE_TELUGU: 00385 id += UnicodeString(<font class="stringliteral">";InterIndic-Telugu"</font>); 00386 <font class="keywordflow">break</font>; 00387 <font class="keywordflow">case</font> SE_KANNADA: 00388 id += UnicodeString(<font class="stringliteral">";InterIndic-Kannada"</font>); 00389 <font class="keywordflow">break</font>; 00390 <font class="keywordflow">case</font> SE_MALAYALAM: 00391 id += UnicodeString(<font class="stringliteral">";InterIndic-Malayalam"</font>); 00392 <font class="keywordflow">break</font>; 00393 <font class="keywordflow">default</font>: 00394 id += UnicodeString(<font class="stringliteral">";InterIndic-Latin"</font>); 00395 scripts[SE_LATIN] = <font class="keyword">true</font>; 00396 <font class="keywordflow">break</font>; 00397 } 00398 } 00399 00400 <font class="keywordflow">if</font> (scripts[SE_LATIN]) { 00401 <font class="keywordflow">switch</font> (option) { 00402 <font class="keywordflow">case</font> SE_GREEK: 00403 id += UnicodeString(<font class="stringliteral">";Latin-Greek"</font>); 00404 <font class="keywordflow">break</font>; 00405 <font class="keywordflow">case</font> SE_HEBREW: 00406 id += UnicodeString(<font class="stringliteral">";Latin-Hebrew"</font>); 00407 <font class="keywordflow">break</font>; 00408 <font class="keywordflow">case</font> SE_CYRILLIC: 00409 id += UnicodeString(<font class="stringliteral">";Latin-Cyrillic"</font>); 00410 <font class="keywordflow">break</font>; 00411 <font class="keywordflow">case</font> SE_ARABIC: 00412 id += UnicodeString(<font class="stringliteral">";Latin-Arabic"</font>); 00413 <font class="keywordflow">break</font>; 00414 <font class="keywordflow">case</font> SE_SYRIAC: 00415 id += UnicodeString(<font class="stringliteral">";Latin-Syriac"</font>); 00416 <font class="keywordflow">break</font>; 00417 <font class="keywordflow">case</font> SE_THAI: 00418 id += UnicodeString(<font class="stringliteral">";Latin-Thai"</font>); 00419 <font class="keywordflow">break</font>; 00420 <font class="keywordflow">case</font> SE_GEORGIAN: 00421 id += UnicodeString(<font class="stringliteral">";Latin-Georgian"</font>); 00422 <font class="keywordflow">break</font>; 00423 <font class="keywordflow">case</font> SE_ARMENIAN: 00424 id += UnicodeString(<font class="stringliteral">";Latin-Armenian"</font>); 00425 <font class="keywordflow">break</font>; 00426 <font class="keywordflow">case</font> SE_ETHIOPIC: 00427 id += UnicodeString(<font class="stringliteral">";Latin-Ethiopic"</font>); 00428 <font class="keywordflow">break</font>; 00429 <font class="keywordflow">case</font> SE_GOTHIC: 00430 id += UnicodeString(<font class="stringliteral">";Latin-Gothic"</font>); 00431 <font class="keywordflow">break</font>; 00432 <font class="keywordflow">case</font> SE_UGARITIC: 00433 id += UnicodeString(<font class="stringliteral">";Latin-Ugaritic"</font>); 00434 <font class="keywordflow">break</font>; 00435 <font class="keywordflow">case</font> SE_COPTIC: 00436 id += UnicodeString(<font class="stringliteral">";Latin-Coptic"</font>); 00437 <font class="keywordflow">break</font>; 00438 <font class="keywordflow">case</font> SE_KATAKANA: 00439 id += UnicodeString(<font class="stringliteral">";Latin-Katakana"</font>); 00440 <font class="keywordflow">break</font>; 00441 <font class="keywordflow">case</font> SE_HIRAGANA: 00442 id += UnicodeString(<font class="stringliteral">";Latin-Hiragana"</font>); 00443 <font class="keywordflow">break</font>; 00444 <font class="keywordflow">case</font> SE_JAMO: 00445 id += UnicodeString(<font class="stringliteral">";Latin-Jamo"</font>); 00446 <font class="keywordflow">break</font>; 00447 <font class="keywordflow">case</font> SE_HANGUL: 00448 id += UnicodeString(<font class="stringliteral">";Latin-Hangul"</font>); 00449 <font class="keywordflow">break</font>; 00450 } 00451 } 00452 00453 <font class="keywordflow">if</font> (option == SE_BASICLATIN) { 00454 id += UnicodeString(<font class="stringliteral">";Any-Latin1"</font>); 00455 } 00456 00457 <font class="keywordflow">if</font> (noNFC) { 00458 id += UnicodeString(<font class="stringliteral">";NFD"</font>); 00459 } <font class="keywordflow">else</font> { 00460 id += UnicodeString(<font class="stringliteral">";NFC"</font>); 00461 } 00462 00463 UParseError perr; 00464 00465 err = U_ZERO_ERROR; 00466 Transliterator * trans = Transliterator::createInstance(id, UTRANS_FORWARD, perr, err); 00467 <font class="keywordflow">if</font> (trans) { 00468 UnicodeString target = UnicodeString(source); 00469 trans->transliterate(target); 00470 len = ucnv_fromUChars(conv, text, maxlen, target.getBuffer(), target.length(), &err); 00471 <font class="keywordflow">if</font> (len < maxlen) *(text + len) = 0; 00472 <font class="keywordflow">else</font> *(text + maxlen) = 0; 00473 <font class="keyword">delete</font> trans; 00474 } 00475 ucnv_close(conv); 00476 } 00477 <font class="keywordflow">return</font> 0; 00478 } 00479 <font class="preprocessor">#endif</font> </pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:13:01 2002 for The Sword Project by <a href="http://www.doxygen.org/index.html"> <img src="doxygen.png" alt="doxygen" align="middle" border=0 width=110 height=53></a>1.2.15 </small></address> </body> </html>