<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"> <title>scsuutf8.cpp Source File</title> <link href="doxygen.css" rel="stylesheet" type="text/css"> </head><body> <!-- Generated by Doxygen 1.2.15 --> <center> <a class="qindex" href="index.html">Main Page</a> <a class="qindex" href="namespaces.html">Namespace List</a> <a class="qindex" href="hierarchy.html">Class Hierarchy</a> <a class="qindex" href="classes.html">Alphabetical List</a> <a class="qindex" href="annotated.html">Compound List</a> <a class="qindex" href="files.html">File List</a> <a class="qindex" href="functions.html">Compound Members</a> </center> <hr><h1>scsuutf8.cpp</h1><div class="fragment"><pre>00001 <font class="comment">/******************************************************************************</font> 00002 <font class="comment"> *</font> 00003 <font class="comment"> * SCSUUTF8 - SWFilter decendant to convert a SCSU character to UTF-8</font> 00004 <font class="comment"> *</font> 00005 <font class="comment"> */</font> 00006 00007 00008 <font class="comment">/* This class is based on:</font> 00009 <font class="comment"> * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl</font> 00010 <font class="comment"> * on Andrea's balcony in North Amsterdam on 1998-08-04</font> 00011 <font class="comment"> * Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion</font> 00012 <font class="comment"> * to correct the haphazard "if" after UQU to "else if" on 1998-10-01</font> 00013 <font class="comment"> * </font> 00014 <font class="comment"> * This is a deflator to UTF-8 output for input compressed in SCSU,</font> 00015 <font class="comment"> * the (Reuters) Standard Compression Scheme for Unicode as described</font> 00016 <font class="comment"> * in http://www.unicode.org/unicode/reports/tr6.html</font> 00017 <font class="comment"> */</font> 00018 00019 <font class="preprocessor">#include <stdlib.h></font> 00020 <font class="preprocessor">#include <stdio.h></font> 00021 <font class="preprocessor">#include <swmodule.h></font> 00022 00023 <font class="preprocessor">#include <scsuutf8.h></font> 00024 00025 SCSUUTF8::SCSUUTF8() { 00026 } 00027 00028 00029 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>* SCSUUTF8::UTF8Output(<font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> uchar, <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>* text) 00030 { 00031 <font class="comment">/* join UTF-16 surrogates without any pairing sanity checks */</font> 00032 00033 <font class="keyword">static</font> <font class="keywordtype">int</font> d; 00034 00035 <font class="keywordflow">if</font> (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; <font class="keywordflow">return</font> text; } 00036 <font class="keywordflow">if</font> (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; } 00037 00038 <font class="comment">/* output one character as UTF-8 multibyte sequence */</font> 00039 00040 <font class="keywordflow">if</font> (uchar < 0x80) { 00041 *text++ = c; 00042 } 00043 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (uchar < 0x800) { 00044 *text++ = 0xc0 | uchar >> 6; 00045 *text++ = 0x80 | uchar & 0x3f; 00046 } 00047 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (uchar < 0x10000) { 00048 *text++ = 0xe0 | uchar >> 12; 00049 *text++ = 0x80 | uchar >> 6 & 0x3f; 00050 *text++ = 0x80 | uchar & 0x3f; 00051 } 00052 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (uchar < 0x200000) { 00053 *text++ = 0xf0 | uchar >> 18; 00054 *text++ = 0x80 | uchar >> 12 & 0x3f; 00055 *text++ = 0x80 | uchar >> 6 & 0x3f; 00056 *text++ = 0x80 | uchar & 0x3f; 00057 } 00058 00059 <font class="keywordflow">return</font> text; 00060 } 00061 00062 <font class="keywordtype">char</font> SCSUUTF8::ProcessText(<font class="keywordtype">char</font> *text, <font class="keywordtype">int</font> len, <font class="keyword">const</font> <a class="code" href="class_s_w_key.html">SWKey</a> *key, <font class="keyword">const</font> <a class="code" href="class_s_w_module.html">SWModule</a> *module) 00063 { 00064 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> *to, *from; 00065 <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> buflen = len * FILTERPAD; 00066 <font class="keywordtype">char</font> active = 0, mode = 0; 00067 00068 <font class="keyword">static</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000}; 00069 <font class="keyword">static</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00}; 00070 <font class="keyword">static</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> win[256] = { 00071 0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380, 00072 0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780, 00073 0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, 00074 0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80, 00075 0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380, 00076 0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780, 00077 0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80, 00078 0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80, 00079 0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380, 00080 0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780, 00081 0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80, 00082 0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80, 00083 0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800, 00084 0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380, 00085 0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780, 00086 0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80, 00087 0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80, 00088 0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380, 00089 0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780, 00090 0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80, 00091 0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80, 00092 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00093 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00094 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00095 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00096 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00097 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00098 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00099 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00100 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00101 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 00102 0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60 00103 }; 00104 00105 <font class="keywordflow">if</font> (!len) 00106 <font class="keywordflow">return</font> 0; 00107 00108 memmove(&text[buflen - len], text, len); 00109 from = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>*)&text[buflen - len]; 00110 to = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> *)text; 00111 00112 <font class="comment">// -------------------------------</font> 00113 00114 <font class="keywordflow">for</font> (<font class="keywordtype">int</font> i = 0; i < len;) { 00115 00116 00117 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00118 c = from[i++]; 00119 00120 <font class="keywordflow">if</font> (c >= 0x80) 00121 { 00122 to = UTF8Output (c - 0x80 + slide[active], to); 00123 } 00124 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c >= 0x20 && c <= 0x7F) 00125 { 00126 to = UTF8Output (c, to); 00127 } 00128 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD) 00129 { 00130 to = UTF8Output (c, to); 00131 } 00132 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c >= 0x1 && c <= 0x8) <font class="comment">/* SQn */</font> 00133 { 00134 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00135 <font class="comment">/* single quote */</font> d = from[i++]; 00136 00137 to = UTF8Output (d < 0x80 ? d + start [c - 0x1] : 00138 d - 0x80 + slide [c - 0x1], to); 00139 } 00140 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c >= 0x10 && c <= 0x17) <font class="comment">/* SCn */</font> 00141 { 00142 <font class="comment">/* change window */</font> active = c - 0x10; 00143 } 00144 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c >= 0x18 && c <= 0x1F) <font class="comment">/* SDn */</font> 00145 { 00146 <font class="comment">/* define window */</font> active = c - 0x18; 00147 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00148 slide [active] = win [from[i++]]; 00149 } 00150 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xB) <font class="comment">/* SDX */</font> 00151 { 00152 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00153 c = from[i++]; 00154 00155 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00156 d = from[i++]; 00157 00158 slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7); 00159 } 00160 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xE) <font class="comment">/* SQU */</font> 00161 { 00162 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00163 <font class="comment">/* SQU */</font> c = from[i++]; 00164 00165 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00166 to = UTF8Output (c << 8 | from[i++], to); 00167 } 00168 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xF) <font class="comment">/* SCU */</font> 00169 { 00170 <font class="comment">/* change to Unicode mode */</font> mode = 1; 00171 00172 <font class="keywordflow">while</font> (mode) 00173 { 00174 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00175 c = from[i++]; 00176 00177 <font class="keywordflow">if</font> (c <= 0xDF || c >= 0xF3) 00178 { 00179 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00180 to = UTF8Output (c << 8 | from[i++], to); 00181 } 00182 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xF0) <font class="comment">/* UQU */</font> 00183 { 00184 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00185 c = from[i++]; 00186 00187 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00188 to = UTF8Output (c << 8 | from[i++], to); 00189 } 00190 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c >= 0xE0 && c <= 0xE7) <font class="comment">/* UCn */</font> 00191 { 00192 active = c - 0xE0; mode = 0; 00193 } 00194 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c >= 0xE8 && c <= 0xEF) <font class="comment">/* UDn */</font> 00195 { 00196 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00197 slide [active=c-0xE8] = win [from[i++]]; mode = 0; 00198 } 00199 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xF1) <font class="comment">/* UDX */</font> 00200 { 00201 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00202 c = from[i++]; 00203 00204 <font class="keywordflow">if</font> (i >= len) <font class="keywordflow">break</font>; 00205 d = from[i++]; 00206 00207 slide [active = c>>5] = 00208 0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0; 00209 } 00210 } 00211 } 00212 00213 00214 } 00215 00216 *to++ = 0; 00217 *to = 0; 00218 <font class="keywordflow">return</font> 0; 00219 } 00220 </pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:13:00 2002 for The Sword Project by <a href="http://www.doxygen.org/index.html"> <img src="doxygen.png" alt="doxygen" align="middle" border=0 width=110 height=53></a>1.2.15 </small></address> </body> </html>