Sophie

Sophie

distrib > Mandriva > 9.1 > ppc > by-pkgid > a74ec78bdb789d910d054e3918f3f007 > files > 466

libsword1-devel-1.5.5-2mdk.ppc.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
<title>scsuutf8.cpp Source File</title>
<link href="doxygen.css" rel="stylesheet" type="text/css">
</head><body>
<!-- Generated by Doxygen 1.2.15 -->
<center>
<a class="qindex" href="index.html">Main Page</a> &nbsp; <a class="qindex" href="namespaces.html">Namespace List</a> &nbsp; <a class="qindex" href="hierarchy.html">Class Hierarchy</a> &nbsp; <a class="qindex" href="classes.html">Alphabetical List</a> &nbsp; <a class="qindex" href="annotated.html">Compound List</a> &nbsp; <a class="qindex" href="files.html">File List</a> &nbsp; <a class="qindex" href="functions.html">Compound Members</a> &nbsp; </center>
<hr><h1>scsuutf8.cpp</h1><div class="fragment"><pre>00001 <font class="comment">/******************************************************************************</font>
00002 <font class="comment"> *</font>
00003 <font class="comment"> * SCSUUTF8 -   SWFilter decendant to convert a SCSU character to UTF-8</font>
00004 <font class="comment"> *</font>
00005 <font class="comment"> */</font>
00006 
00007 
00008 <font class="comment">/* This class is based on:</font>
00009 <font class="comment"> * http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl</font>
00010 <font class="comment"> * on Andrea's balcony in North Amsterdam on 1998-08-04</font>
00011 <font class="comment"> * Thanks to Richard Verhoeven &lt;rcb5@win.tue.nl&gt; for his suggestion</font>
00012 <font class="comment"> * to correct the haphazard "if" after UQU to "else if" on 1998-10-01</font>
00013 <font class="comment"> * </font>
00014 <font class="comment"> * This is a deflator to UTF-8 output for input compressed in SCSU,</font>
00015 <font class="comment"> * the (Reuters) Standard Compression Scheme for Unicode as described</font>
00016 <font class="comment"> * in http://www.unicode.org/unicode/reports/tr6.html</font>
00017 <font class="comment"> */</font>
00018 
00019 <font class="preprocessor">#include &lt;stdlib.h&gt;</font>
00020 <font class="preprocessor">#include &lt;stdio.h&gt;</font>
00021 <font class="preprocessor">#include &lt;swmodule.h&gt;</font>
00022 
00023 <font class="preprocessor">#include &lt;scsuutf8.h&gt;</font>
00024 
00025 SCSUUTF8::SCSUUTF8() {
00026 }
00027 
00028 
00029 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>* SCSUUTF8::UTF8Output(<font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> uchar, <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>* text)
00030 {
00031   <font class="comment">/* join UTF-16 surrogates without any pairing sanity checks */</font>
00032 
00033   <font class="keyword">static</font> <font class="keywordtype">int</font> d;
00034   
00035   <font class="keywordflow">if</font> (uchar &gt;= 0xd800 &amp;&amp; uchar &lt;= 0xdbff) { d = uchar &amp; 0x3f; <font class="keywordflow">return</font> text;  }
00036   <font class="keywordflow">if</font> (uchar &gt;= 0xdc00 &amp;&amp; uchar &lt;= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; }
00037   
00038   <font class="comment">/* output one character as UTF-8 multibyte sequence */</font>
00039   
00040   <font class="keywordflow">if</font> (uchar &lt; 0x80) {
00041     *text++ = c;
00042   }
00043   <font class="keywordflow">else</font> <font class="keywordflow">if</font> (uchar &lt; 0x800) { 
00044     *text++ = 0xc0 | uchar &gt;&gt; 6; 
00045     *text++ = 0x80 | uchar &amp; 0x3f;
00046   }
00047   <font class="keywordflow">else</font> <font class="keywordflow">if</font> (uchar &lt; 0x10000) {
00048     *text++ = 0xe0 | uchar &gt;&gt; 12; 
00049     *text++ = 0x80 | uchar &gt;&gt; 6 &amp; 0x3f;
00050     *text++ = 0x80 | uchar &amp; 0x3f;
00051   }
00052   <font class="keywordflow">else</font> <font class="keywordflow">if</font> (uchar &lt; 0x200000) {
00053     *text++ = 0xf0 | uchar &gt;&gt; 18;
00054     *text++ = 0x80 | uchar &gt;&gt; 12 &amp; 0x3f; 
00055     *text++ = 0x80 | uchar &gt;&gt; 6 &amp; 0x3f; 
00056     *text++ = 0x80 | uchar &amp; 0x3f;
00057   }  
00058   
00059   <font class="keywordflow">return</font> text;
00060 }
00061 
00062 <font class="keywordtype">char</font> SCSUUTF8::ProcessText(<font class="keywordtype">char</font> *text, <font class="keywordtype">int</font> len, <font class="keyword">const</font> <a class="code" href="class_s_w_key.html">SWKey</a> *key, <font class="keyword">const</font> <a class="code" href="class_s_w_module.html">SWModule</a> *module)
00063 {
00064   <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> *to, *from;
00065   <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> buflen = len * FILTERPAD;
00066   <font class="keywordtype">char</font> active = 0, mode = 0;
00067 
00068   <font class="keyword">static</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000};
00069   <font class="keyword">static</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00};
00070   <font class="keyword">static</font> <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> win[256]   = {
00071     0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380,
00072     0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780,
00073     0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80,
00074     0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80,
00075     0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380,
00076     0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780,
00077     0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80,
00078     0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80,
00079     0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380,
00080     0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780,
00081     0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80,
00082     0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80,
00083     0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800,
00084     0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380,
00085     0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780,
00086     0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80,
00087     0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80,
00088     0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380,
00089     0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780,
00090     0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80,
00091     0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80,
00092     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00093     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00094     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00095     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00096     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00097     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00098     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00099     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00100     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00101     0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00102     0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60
00103   };
00104 
00105   <font class="keywordflow">if</font> (!len)
00106         <font class="keywordflow">return</font> 0;
00107 
00108   memmove(&amp;text[buflen - len], text, len);
00109   from = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>*)&amp;text[buflen - len];
00110   to = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> *)text;
00111 
00112   <font class="comment">// -------------------------------</font>
00113 
00114   <font class="keywordflow">for</font> (<font class="keywordtype">int</font> i = 0; i &lt; len;) {
00115 
00116 
00117       <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00118       c = from[i++];
00119 
00120       <font class="keywordflow">if</font> (c &gt;= 0x80)
00121         {
00122           to = UTF8Output (c - 0x80 + slide[active], to);
00123         }
00124       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c &gt;= 0x20 &amp;&amp; c &lt;= 0x7F)
00125         {
00126           to = UTF8Output (c, to);
00127         }
00128       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
00129         {
00130           to = UTF8Output (c, to);
00131         }
00132       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c &gt;= 0x1 &amp;&amp; c &lt;= 0x8) <font class="comment">/* SQn */</font>
00133         {
00134           <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00135           <font class="comment">/* single quote */</font> d = from[i++];
00136 
00137           to = UTF8Output (d &lt; 0x80 ? d + start [c - 0x1] :
00138                   d - 0x80 + slide [c - 0x1], to);
00139         }
00140       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c &gt;= 0x10 &amp;&amp; c &lt;= 0x17) <font class="comment">/* SCn */</font>
00141         {
00142           <font class="comment">/* change window */</font> active = c - 0x10;
00143         }
00144       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c &gt;= 0x18 &amp;&amp; c &lt;= 0x1F) <font class="comment">/* SDn */</font>
00145         {
00146           <font class="comment">/* define window */</font> active = c - 0x18;
00147           <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00148           slide [active] = win [from[i++]];
00149         }
00150       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xB) <font class="comment">/* SDX */</font>
00151         {
00152           <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00153           c = from[i++];
00154 
00155           <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00156           d = from[i++];
00157 
00158           slide [active = c&gt;&gt;5] = 0x10000 + (((c &amp; 0x1F) &lt;&lt; 8 | d) &lt;&lt; 7);
00159         }
00160       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xE) <font class="comment">/* SQU */</font>
00161         {
00162           <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00163           <font class="comment">/* SQU */</font> c = from[i++];
00164 
00165           <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00166           to = UTF8Output (c &lt;&lt; 8 | from[i++], to);
00167         }
00168       <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xF) <font class="comment">/* SCU */</font>
00169         {
00170           <font class="comment">/* change to Unicode mode */</font> mode = 1;
00171 
00172           <font class="keywordflow">while</font> (mode)
00173             {
00174               <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00175               c = from[i++];
00176 
00177               <font class="keywordflow">if</font> (c &lt;= 0xDF || c &gt;= 0xF3)
00178                 {
00179                   <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00180                   to = UTF8Output (c &lt;&lt; 8 | from[i++], to);
00181                 }
00182               <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xF0) <font class="comment">/* UQU */</font>
00183                 {
00184                   <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00185                   c = from[i++];
00186 
00187                   <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00188                   to = UTF8Output (c &lt;&lt; 8 | from[i++], to);
00189                 }
00190               <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c &gt;= 0xE0 &amp;&amp; c &lt;= 0xE7) <font class="comment">/* UCn */</font>
00191                 {
00192                   active = c - 0xE0; mode = 0;
00193                 }
00194               <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c &gt;= 0xE8 &amp;&amp; c &lt;= 0xEF) <font class="comment">/* UDn */</font>
00195                 {
00196                   <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00197                   slide [active=c-0xE8] = win [from[i++]]; mode = 0;
00198                 }
00199               <font class="keywordflow">else</font> <font class="keywordflow">if</font> (c == 0xF1) <font class="comment">/* UDX */</font>
00200                 {
00201                   <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00202                   c = from[i++];
00203 
00204                   <font class="keywordflow">if</font> (i &gt;= len) <font class="keywordflow">break</font>;
00205                   d = from[i++];
00206 
00207                   slide [active = c&gt;&gt;5] =
00208                     0x10000 + (((c &amp; 0x1F) &lt;&lt; 8 | d) &lt;&lt; 7); mode = 0;
00209                 }
00210             }
00211         }
00212 
00213 
00214   }
00215 
00216   *to++ = 0;
00217   *to = 0;
00218   <font class="keywordflow">return</font> 0;
00219 }
00220 
</pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:13:00 2002 for The Sword Project by
<a href="http://www.doxygen.org/index.html">
<img src="doxygen.png" alt="doxygen" align="middle" border=0 
width=110 height=53></a>1.2.15 </small></address>
</body>
</html>