Sophie

Sophie

distrib > Mandriva > 9.1 > ppc > by-pkgid > a74ec78bdb789d910d054e3918f3f007 > files > 417

libsword1-devel-1.5.5-2mdk.ppc.rpm

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
<title>lzsscomprs.cpp Source File</title>
<link href="doxygen.css" rel="stylesheet" type="text/css">
</head><body>
<!-- Generated by Doxygen 1.2.15 -->
<center>
<a class="qindex" href="index.html">Main Page</a> &nbsp; <a class="qindex" href="namespaces.html">Namespace List</a> &nbsp; <a class="qindex" href="hierarchy.html">Class Hierarchy</a> &nbsp; <a class="qindex" href="classes.html">Alphabetical List</a> &nbsp; <a class="qindex" href="annotated.html">Compound List</a> &nbsp; <a class="qindex" href="files.html">File List</a> &nbsp; <a class="qindex" href="functions.html">Compound Members</a> &nbsp; </center>
<hr><h1>lzsscomprs.cpp</h1><div class="fragment"><pre>00001 <font class="comment">/******************************************************************************</font>
00002 <font class="comment"> *  lzsscomprs.cpp   - code for class 'LZSSCompress'- a driver class that</font>
00003 <font class="comment"> *                      provides LZSS compression</font>
00004 <font class="comment"> */</font>
00005 
00006 <font class="preprocessor">#include &lt;string.h&gt;</font>
00007 <font class="preprocessor">#include &lt;stdlib.h&gt;</font>
00008 <font class="preprocessor">#include &lt;lzsscomprs.h&gt;</font>
00009 
00010 
00011 <font class="comment">/******************************************************************************</font>
00012 <font class="comment"> * LZSSCompress Statics</font>
00013 <font class="comment"> */</font>
00014 
00015 <font class="comment">// m_ring_buffer is a text buffer.  It contains "nodes" of</font>
00016 <font class="comment">// uncompressed text that can be indexed by position.  That is,</font>
00017 <font class="comment">// a substring of the ring buffer can be indexed by a position</font>
00018 <font class="comment">// and a length.  When decoding, the compressed text may contain</font>
00019 <font class="comment">// a position in the ring buffer and a count of the number of</font>
00020 <font class="comment">// bytes from the ring buffer that are to be moved into the</font>
00021 <font class="comment">// uncompressed buffer.  </font>
00022 <font class="comment">//</font>
00023 <font class="comment">// This ring buffer is not maintained as part of the compressed</font>
00024 <font class="comment">// text.  Instead, it is reconstructed dynamically.  That is,</font>
00025 <font class="comment">// it starts out empty and gets built as the text is decompressed.</font>
00026 <font class="comment">//</font>
00027 <font class="comment">// The ring buffer contain N bytes, with an additional F - 1 bytes</font>
00028 <font class="comment">// to facilitate string comparison.</font>
00029 
00030 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> LZSSCompress::m_ring_buffer[N + F - 1];
00031 
00032 <font class="comment">// m_match_position and m_match_length are set by InsertNode().</font>
00033 <font class="comment">//</font>
00034 <font class="comment">// These variables indicate the position in the ring buffer </font>
00035 <font class="comment">// and the number of characters at that position that match</font>
00036 <font class="comment">// a given string.</font>
00037 
00038 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_match_position;
00039 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_match_length;
00040 
00041 <font class="comment">// m_lson, m_rson, and m_dad are the Japanese way of referring to</font>
00042 <font class="comment">// a tree structure.  The dad is the parent and it has a right and</font>
00043 <font class="comment">// left son (child).</font>
00044 <font class="comment">//</font>
00045 <font class="comment">// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right </font>
00046 <font class="comment">// and left children of node i.  </font>
00047 <font class="comment">//</font>
00048 <font class="comment">// For i = 0 to N-1, m_dad[i] is the parent of node i.</font>
00049 <font class="comment">//</font>
00050 <font class="comment">// For i = 0 to 255, rson[N + i + 1] is the root of the tree for </font>
00051 <font class="comment">// strings that begin with the character i.  Note that this requires </font>
00052 <font class="comment">// one byte characters.</font>
00053 <font class="comment">//</font>
00054 <font class="comment">// These nodes store values of 0...(N-1).  Memory requirements</font>
00055 <font class="comment">// can be reduces by using 2-byte integers instead of full 4-byte</font>
00056 <font class="comment">// integers (for 32-bit applications).  Therefore, these are </font>
00057 <font class="comment">// defined as "short ints."</font>
00058 
00059 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_lson[N + 1];
00060 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_rson[N + 257];
00061 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_dad[N + 1];
00062 
00063 
00064 <font class="comment">/******************************************************************************</font>
00065 <font class="comment"> * LZSSCompress Constructor - Initializes data for instance of LZSSCompress</font>
00066 <font class="comment"> *</font>
00067 <font class="comment"> */</font>
00068 
00069 LZSSCompress::LZSSCompress() : SWCompress() {
00070 }
00071 
00072 
00073 <font class="comment">/******************************************************************************</font>
00074 <font class="comment"> * LZSSCompress Destructor - Cleans up instance of LZSSCompress</font>
00075 <font class="comment"> */</font>
00076 
00077 LZSSCompress::~LZSSCompress() {
00078 }
00079 
00080 
00081 <font class="comment">/******************************************************************************</font>
00082 <font class="comment"> * LZSSCompress::InitTree       - This function initializes the tree nodes to</font>
00083 <font class="comment"> *                                                      "empty" states. </font>
00084 <font class="comment"> */</font>
00085 
00086 <font class="keywordtype">void</font> LZSSCompress::InitTree(<font class="keywordtype">void</font>) {
00087         <font class="keywordtype">int</font>  i;
00088 
00089         <font class="comment">// For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right</font>
00090         <font class="comment">// and left children of node i.  These nodes need not be</font>
00091         <font class="comment">// initialized.  However, for debugging purposes, it is nice to</font>
00092         <font class="comment">// have them initialized.  Since this is only used for compression</font>
00093         <font class="comment">// (not decompression), I don't mind spending the time to do it.</font>
00094         <font class="comment">//</font>
00095         <font class="comment">// For the same range of i, m_dad[i] is the parent of node i.</font>
00096         <font class="comment">// These are initialized to a known value that can represent</font>
00097         <font class="comment">// a "not used" state.</font>
00098         
00099         <font class="keywordflow">for</font> (i = 0; i &lt; N; i++) {
00100                 m_lson[i] = NOT_USED;
00101                 m_rson[i] = NOT_USED;
00102                 m_dad[i] = NOT_USED;
00103         }
00104 
00105         <font class="comment">// For i = 0 to 255, m_rson[N + i + 1] is the root of the tree</font>
00106         <font class="comment">// for strings that begin with the character i.  This is why</font>
00107         <font class="comment">// the right child array is larger than the left child array.</font>
00108         <font class="comment">// These are also initialzied to a "not used" state.</font>
00109         <font class="comment">//</font>
00110         <font class="comment">// Note that there are 256 of these, one for each of the possible</font>
00111         <font class="comment">// 256 characters.</font>
00112 
00113         <font class="keywordflow">for</font> (i = N + 1; i &lt;= (N + 256); i++) {
00114                 m_rson[i] = NOT_USED;
00115         }
00116 }
00117 
00118 
00119 <font class="comment">/******************************************************************************</font>
00120 <font class="comment"> * LZSSCompress::InsertNode     - This function inserts a string from the ring</font>
00121 <font class="comment"> *                                                      buffer into one of the trees.  It loads the</font>
00122 <font class="comment"> *                                                      match position and length member variables</font>
00123 <font class="comment"> *                                                      for the longest match.</font>
00124 <font class="comment"> *      </font>
00125 <font class="comment"> *                                                      The string to be inserted is identified by</font>
00126 <font class="comment"> *                                                      the parameter Pos, A full F bytes are</font>
00127 <font class="comment"> *                                                      inserted.  So,</font>
00128 <font class="comment"> *                                                      m_ring_buffer[Pos ... Pos+F-1]</font>
00129 <font class="comment"> *                                                      are inserted.</font>
00130 <font class="comment"> *</font>
00131 <font class="comment"> *                                                      If the matched length is exactly F, then an</font>
00132 <font class="comment"> *                                                      old node is removed in favor of the new one</font>
00133 <font class="comment"> *                                                      (because the old one will be deleted</font>
00134 <font class="comment"> *                                                      sooner).</font>
00135 <font class="comment"> *</font>
00136 <font class="comment"> *                                                      Note that Pos plays a dual role.  It is</font>
00137 <font class="comment"> *                                                      used as both a position in the ring buffer</font>
00138 <font class="comment"> *                                                      and also as a tree node.</font>
00139 <font class="comment"> *                                                      m_ring_buffer[Pos] defines a character that</font>
00140 <font class="comment"> *                                                      is used to identify a tree node.</font>
00141 <font class="comment"> *</font>
00142 <font class="comment"> * ENT: pos     - position in the buffer</font>
00143 <font class="comment"> */</font>
00144 
00145 <font class="keywordtype">void</font> LZSSCompress::InsertNode(<font class="keywordtype">short</font> <font class="keywordtype">int</font> Pos)
00146 {
00147         <font class="keywordtype">short</font> <font class="keywordtype">int</font> i;
00148         <font class="keywordtype">short</font> <font class="keywordtype">int</font> p;
00149         <font class="keywordtype">int</font> cmp;
00150         <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> * key;
00151 
00152 <font class="comment">/*</font>
00153 <font class="comment">        ASSERT(Pos &gt;= 0);</font>
00154 <font class="comment">        ASSERT(Pos &lt; N);</font>
00155 <font class="comment">*/</font>
00156 
00157         cmp = 1;
00158         key = &amp;(m_ring_buffer[Pos]);
00159 
00160         <font class="comment">// The last 256 entries in m_rson contain the root nodes for</font>
00161         <font class="comment">// strings that begin with a letter.  Get an index for the</font>
00162         <font class="comment">// first letter in this string.</font>
00163 
00164         p = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) (N + 1 + key[0]);
00165 
00166         <font class="comment">// Set the left and right tree nodes for this position to "not</font>
00167         <font class="comment">// used."</font>
00168 
00169         m_lson[Pos] = NOT_USED;
00170         m_rson[Pos] = NOT_USED;
00171 
00172         <font class="comment">// Haven't matched anything yet.</font>
00173 
00174         m_match_length = 0;
00175 
00176         <font class="keywordflow">for</font> ( ; ; ) {
00177                 <font class="keywordflow">if</font> (cmp &gt;= 0) {
00178                         <font class="keywordflow">if</font> (m_rson[p] != NOT_USED) {
00179                                 p = m_rson[p];
00180                         }
00181                         <font class="keywordflow">else</font> {
00182                                 m_rson[p] = Pos;
00183                                 m_dad[Pos] = p;
00184                                 <font class="keywordflow">return</font>;
00185                         }
00186                 }
00187                 <font class="keywordflow">else</font> {
00188                         <font class="keywordflow">if</font> (m_lson[p] != NOT_USED) {
00189                                 p = m_lson[p];
00190                         }
00191                         <font class="keywordflow">else</font> {
00192                                 m_lson[p] = Pos;
00193                                 m_dad[Pos] = p;
00194                                 <font class="keywordflow">return</font>;
00195                         }
00196                 }
00197 
00198                 <font class="comment">// Should we go to the right or the left to look for the</font>
00199                 <font class="comment">// next match?</font>
00200 
00201                 <font class="keywordflow">for</font> (i = 1; i &lt; F; i++) {
00202                         cmp = key[i] - m_ring_buffer[p + i];
00203                         <font class="keywordflow">if</font> (cmp != 0)
00204                                 <font class="keywordflow">break</font>;
00205                 }
00206 
00207                 <font class="keywordflow">if</font> (i &gt; m_match_length) {
00208                         m_match_position = p;
00209                         m_match_length = i;
00210 
00211                         <font class="keywordflow">if</font> (i &gt;= F)
00212                                 <font class="keywordflow">break</font>;
00213                 }
00214         }
00215 
00216         m_dad[Pos] = m_dad[p];
00217         m_lson[Pos] = m_lson[p];
00218         m_rson[Pos] = m_rson[p];
00219 
00220         m_dad[ m_lson[p] ] = Pos;
00221         m_dad[ m_rson[p] ] = Pos;
00222 
00223         <font class="keywordflow">if</font> (m_rson[ m_dad[p] ] == p) {
00224                 m_rson[ m_dad[p] ] = Pos;
00225         }
00226         <font class="keywordflow">else</font> {
00227                 m_lson[ m_dad[p] ] = Pos;
00228         }
00229 
00230         <font class="comment">// Remove "p"</font>
00231 
00232         m_dad[p] = NOT_USED;
00233 }
00234 
00235 
00236 <font class="comment">/******************************************************************************</font>
00237 <font class="comment"> * LZSSCompress::DeleteNode     - This function removes the node "Node" from the</font>
00238 <font class="comment"> *                                                      tree.</font>
00239 <font class="comment"> *</font>
00240 <font class="comment"> * ENT: node    - node to be removed</font>
00241 <font class="comment"> */</font>
00242 
00243 <font class="keywordtype">void</font> LZSSCompress::DeleteNode(<font class="keywordtype">short</font> <font class="keywordtype">int</font> Node)
00244 {
00245         <font class="keywordtype">short</font> <font class="keywordtype">int</font>  q;
00246 
00247 <font class="comment">/*</font>
00248 <font class="comment">        ASSERT(Node &gt;= 0);</font>
00249 <font class="comment">        ASSERT(Node &lt; (N+1));</font>
00250 <font class="comment">*/</font>
00251 
00252         <font class="keywordflow">if</font> (m_dad[Node] == NOT_USED) { <font class="comment">// not in tree, nothing to do</font>
00253                 <font class="keywordflow">return</font>;
00254         }
00255 
00256         <font class="keywordflow">if</font> (m_rson[Node] == NOT_USED) {
00257                 q = m_lson[Node];
00258         }
00259         <font class="keywordflow">else</font> <font class="keywordflow">if</font> (m_lson[Node] == NOT_USED) {
00260                 q = m_rson[Node];
00261         }
00262         <font class="keywordflow">else</font> {
00263                 q = m_lson[Node];
00264                 <font class="keywordflow">if</font> (m_rson[q] != NOT_USED) {
00265                         <font class="keywordflow">do</font> {
00266                                 q = m_rson[q];
00267                         } <font class="keywordflow">while</font> (m_rson[q] != NOT_USED);
00268 
00269                         m_rson[ m_dad[q] ] = m_lson[q];
00270                         m_dad[ m_lson[q] ] = m_dad[q];
00271                         m_lson[q] = m_lson[Node];
00272                         m_dad[ m_lson[Node] ] = q;
00273                 }
00274 
00275                 m_rson[q] = m_rson[Node];
00276                 m_dad[ m_rson[Node] ] = q;
00277         }
00278 
00279         m_dad[q] = m_dad[Node];
00280 
00281         <font class="keywordflow">if</font> (m_rson[ m_dad[Node] ] == Node) {
00282                 m_rson[ m_dad[Node] ] = q;
00283         }
00284         <font class="keywordflow">else</font> {
00285                 m_lson[ m_dad[Node] ] = q;
00286         }
00287 
00288         m_dad[Node] = NOT_USED;
00289 }
00290 
00291 
00292 <font class="comment">/******************************************************************************</font>
00293 <font class="comment"> * LZSSCompress::Encode - This function "encodes" the input stream into the</font>
00294 <font class="comment"> *                                              output stream.</font>
00295 <font class="comment"> *                                              The GetChars() and SendChars() functions are</font>
00296 <font class="comment"> *                                              used to separate this method from the actual</font>
00297 <font class="comment"> *                                              i/o.</font>
00298 <font class="comment"> *              NOTE:                   must set zlen for parent class to know length of</font>
00299 <font class="comment"> *                                              compressed buffer.</font>
00300 <font class="comment"> */</font>
00301 
00302 <font class="keywordtype">void</font> LZSSCompress::Encode(<font class="keywordtype">void</font>)
00303 {
00304         <font class="keywordtype">short</font> <font class="keywordtype">int</font> i;                                            <font class="comment">// an iterator</font>
00305         <font class="keywordtype">short</font> <font class="keywordtype">int</font> r;                                            <font class="comment">// node number in the binary tree</font>
00306         <font class="keywordtype">short</font> <font class="keywordtype">int</font> s;                                            <font class="comment">// position in the ring buffer</font>
00307         <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> <font class="keywordtype">int</font> len;                  <font class="comment">// len of initial string</font>
00308         <font class="keywordtype">short</font> <font class="keywordtype">int</font> last_match_length;            <font class="comment">// length of last match</font>
00309         <font class="keywordtype">short</font> <font class="keywordtype">int</font> code_buf_pos;                  <font class="comment">// position in the output buffer</font>
00310         <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> code_buf[17];              <font class="comment">// the output buffer</font>
00311         <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> mask;                              <font class="comment">// bit mask for byte 0 of out buf</font>
00312         <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> c;                                        <font class="comment">// character read from string</font>
00313 
00314         <font class="comment">// Start with a clean tree.</font>
00315 
00316         InitTree();
00317         direct = 0;     <font class="comment">// set direction needed by parent [Get|Send]Chars()</font>
00318 
00319         <font class="comment">// code_buf[0] works as eight flags.  A "1" represents that the</font>
00320         <font class="comment">// unit is an unencoded letter (1 byte), and a "0" represents</font>
00321         <font class="comment">// that the next unit is a &lt;position,length&gt; pair (2 bytes).</font>
00322         <font class="comment">//</font>
00323         <font class="comment">// code_buf[1..16] stores eight units of code.  Since the best</font>
00324         <font class="comment">// we can do is store eight &lt;position,length&gt; pairs, at most 16 </font>
00325         <font class="comment">// bytes are needed to store this.</font>
00326         <font class="comment">//</font>
00327         <font class="comment">// This is why the maximum size of the code buffer is 17 bytes.</font>
00328 
00329         code_buf[0] = 0;
00330         code_buf_pos = 1;
00331 
00332         <font class="comment">// Mask iterates over the 8 bits in the code buffer.  The first</font>
00333         <font class="comment">// character ends up being stored in the low bit.</font>
00334         <font class="comment">//</font>
00335         <font class="comment">//  bit   8   7   6   5   4   3   2   1</font>
00336         <font class="comment">//              |                                                  |</font>
00337         <font class="comment">//              |                        first sequence in code buffer</font>
00338         <font class="comment">//              |</font>
00339         <font class="comment">//        last sequence in code buffer          </font>
00340 
00341         mask = 1;
00342 
00343         s = 0;
00344         r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) N - (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) F;
00345 
00346         <font class="comment">// Initialize the ring buffer with spaces...</font>
00347 
00348         <font class="comment">// Note that the last F bytes of the ring buffer are not filled.</font>
00349         <font class="comment">// This is because those F bytes will be filled in immediately</font>
00350         <font class="comment">// with bytes from the input stream.</font>
00351 
00352         memset(m_ring_buffer, <font class="charliteral">' '</font>, N - F);
00353         
00354         <font class="comment">// Read F bytes into the last F bytes of the ring buffer.</font>
00355         <font class="comment">//</font>
00356         <font class="comment">// This function loads the buffer with X characters and returns</font>
00357         <font class="comment">// the actual amount loaded.</font>
00358 
00359         len = GetChars((<font class="keywordtype">char</font> *) &amp;(m_ring_buffer[r]), F);
00360 
00361         <font class="comment">// Make sure there is something to be compressed.</font>
00362 
00363         <font class="keywordflow">if</font> (len == 0)
00364                 <font class="keywordflow">return</font>;
00365 
00366         <font class="comment">// Insert the F strings, each of which begins with one or more</font>
00367         <font class="comment">// 'space' characters.  Note the order in which these strings</font>
00368         <font class="comment">// are inserted.  This way, degenerate trees will be less likely</font>
00369         <font class="comment">// to occur.</font>
00370 
00371         <font class="keywordflow">for</font> (i = 1; i &lt;= F; i++) {
00372                 InsertNode((<font class="keywordtype">short</font> <font class="keywordtype">int</font>) (r - i));
00373         }
00374 
00375         <font class="comment">// Finally, insert the whole string just read.  The</font>
00376         <font class="comment">// member variables match_length and match_position are set.</font>
00377 
00378         InsertNode(r);
00379 
00380         <font class="comment">// Now that we're preloaded, continue till done.</font>
00381 
00382         <font class="keywordflow">do</font> {
00383 
00384                 <font class="comment">// m_match_length may be spuriously long near the end of</font>
00385                 <font class="comment">// text.</font>
00386 
00387                 <font class="keywordflow">if</font> (m_match_length &gt; len) {
00388                         m_match_length = len;
00389                 }
00390 
00391                 <font class="comment">// Is it cheaper to store this as a single character?  If so,</font>
00392                 <font class="comment">// make it so.</font>
00393 
00394                 <font class="keywordflow">if</font> (m_match_length &lt; THRESHOLD) {
00395                         <font class="comment">// Send one character.  Remember that code_buf[0] is the</font>
00396                         <font class="comment">// set of flags for the next eight items.</font>
00397 
00398                         m_match_length = 1;      
00399                         code_buf[0] |= mask;  
00400                         code_buf[code_buf_pos++] = m_ring_buffer[r];
00401                 }
00402 
00403                 <font class="comment">// Otherwise, we do indeed have a string that can be stored</font>
00404                 <font class="comment">// compressed to save space.</font>
00405 
00406                 <font class="keywordflow">else</font> {
00407                         <font class="comment">// The next 16 bits need to contain the position (12 bits)</font>
00408                         <font class="comment">// and the length (4 bits).</font>
00409 
00410                         code_buf[code_buf_pos++] = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) m_match_position;
00411                         code_buf[code_buf_pos++] = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) (
00412                                 ((m_match_position &gt;&gt; 4) &amp; 0xf0) | 
00413                                 (m_match_length - THRESHOLD) );
00414                 }
00415 
00416                 <font class="comment">// Shift the mask one bit to the left so that it will be ready</font>
00417                 <font class="comment">// to store the new bit.</font>
00418 
00419                 mask = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) (mask &lt;&lt; 1);
00420 
00421                 <font class="comment">// If the mask is now 0, then we know that we have a full set</font>
00422                 <font class="comment">// of flags and items in the code buffer.  These need to be</font>
00423                 <font class="comment">// output.</font>
00424 
00425                 <font class="keywordflow">if</font> (!mask) {
00426                         <font class="comment">// code_buf is the buffer of characters to be output.</font>
00427                         <font class="comment">// code_buf_pos is the number of characters it contains.</font>
00428 
00429                         SendChars((<font class="keywordtype">char</font> *) code_buf, code_buf_pos);
00430 
00431                         <font class="comment">// Reset for next buffer...</font>
00432 
00433                         code_buf[0] = 0;
00434                         code_buf_pos = 1;
00435                         mask = 1;
00436                 }
00437 
00438                 last_match_length = m_match_length;
00439 
00440                 <font class="comment">// Delete old strings and read new bytes...</font>
00441 
00442                 <font class="keywordflow">for</font> (i = 0; i &lt; last_match_length; i++) {
00443                         <font class="comment">// Get next character...</font>
00444 
00445                         <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) &amp;c, 1) != 1)
00446                                 <font class="keywordflow">break</font>;
00447 
00448                         <font class="comment">// Delete "old strings"</font>
00449 
00450                         DeleteNode(s);
00451 
00452                         <font class="comment">// Put this character into the ring buffer.</font>
00453                         <font class="comment">//                </font>
00454                         <font class="comment">// The original comment here says "If the position is near</font>
00455                         <font class="comment">// the end of the buffer, extend the buffer to make</font>
00456                         <font class="comment">// string comparison easier."</font>
00457                         <font class="comment">//</font>
00458                         <font class="comment">// That's a little misleading, because the "end" of the </font>
00459                         <font class="comment">// buffer is really what we consider to be the "beginning"</font>
00460                         <font class="comment">// of the buffer, that is, positions 0 through F.</font>
00461                         <font class="comment">//</font>
00462                         <font class="comment">// The idea is that the front end of the buffer is duplicated</font>
00463                         <font class="comment">// into the back end so that when you're looking at characters</font>
00464                         <font class="comment">// at the back end of the buffer, you can index ahead (beyond</font>
00465                         <font class="comment">// the normal end of the buffer) and see the characters</font>
00466                         <font class="comment">// that are at the front end of the buffer wihtout having</font>
00467                         <font class="comment">// to adjust the index.</font>
00468                         <font class="comment">//</font>
00469                         <font class="comment">// That is...</font>
00470                         <font class="comment">//</font>
00471                         <font class="comment">//        1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234</font>
00472                         <font class="comment">//        |                                                        |  |</font>
00473                         <font class="comment">//        position 0              end of buffer  |</font>
00474                         <font class="comment">//                                                                               |</font>
00475                         <font class="comment">//                                duplicate of front of buffer</font>
00476 
00477                         m_ring_buffer[s] = c;
00478 
00479                         <font class="keywordflow">if</font> (s &lt; F - 1) {
00480                                 m_ring_buffer[s + N] = c;
00481                         }
00482 
00483                         <font class="comment">// Increment the position, and wrap around when we're at</font>
00484                         <font class="comment">// the end.  Note that this relies on N being a power of 2.</font>
00485 
00486                         s = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (s + 1) &amp; (N - 1) );
00487                         r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) &amp; (N - 1) );
00488 
00489                         <font class="comment">// Register the string that is found in </font>
00490                         <font class="comment">// m_ring_buffer[r..r+F-1].</font>
00491 
00492                         InsertNode(r);
00493                 }
00494 
00495                 <font class="comment">// If we didn't quit because we hit the last_match_length,</font>
00496                 <font class="comment">// then we must have quit because we ran out of characters</font>
00497                 <font class="comment">// to process.</font>
00498 
00499                 <font class="keywordflow">while</font> (i++ &lt; last_match_length) {                                                         
00500                         DeleteNode(s);
00501 
00502                         s = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (s + 1) &amp; (N - 1) );
00503                         r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) &amp; (N - 1) );
00504 
00505                         <font class="comment">// Note that len hitting 0 is the key that causes the</font>
00506                         <font class="comment">// do...while() to terminate.  This is the only place</font>
00507                         <font class="comment">// within the loop that len is modified.</font>
00508                         <font class="comment">//</font>
00509                         <font class="comment">// Its original value is F (or a number less than F for</font>
00510                         <font class="comment">// short strings).</font>
00511 
00512                         <font class="keywordflow">if</font> (--len) {
00513                                 InsertNode(r);     <font class="comment">/* buffer may not be empty. */</font>
00514                         }
00515                 }
00516 
00517                 <font class="comment">// End of do...while() loop.  Continue processing until there</font>
00518                 <font class="comment">// are no more characters to be compressed.  The variable</font>
00519                 <font class="comment">// "len" is used to signal this condition.</font>
00520         } <font class="keywordflow">while</font> (len &gt; 0);
00521 
00522         <font class="comment">// There could still be something in the output buffer.  Send it</font>
00523         <font class="comment">// now.</font>
00524 
00525         <font class="keywordflow">if</font> (code_buf_pos &gt; 1) {
00526                 <font class="comment">// code_buf is the encoded string to send.</font>
00527                 <font class="comment">// code_buf_ptr is the number of characters.</font>
00528 
00529                 SendChars((<font class="keywordtype">char</font> *) code_buf, code_buf_pos);
00530         }
00531 
00532 
00533         <font class="comment">// must set zlen for parent class to know length of compressed buffer</font>
00534         zlen = zpos;
00535 }
00536 
00537 
00538 <font class="comment">/******************************************************************************</font>
00539 <font class="comment"> * LZSSCompress::Decode - This function "decodes" the input stream into the</font>
00540 <font class="comment"> *                                              output stream.</font>
00541 <font class="comment"> *                                              The GetChars() and SendChars() functions are</font>
00542 <font class="comment"> *                                              used to separate this method from the actual</font>
00543 <font class="comment"> *                                              i/o.</font>
00544 <font class="comment"> */</font>
00545 
00546 <font class="keywordtype">void</font> LZSSCompress::Decode(<font class="keywordtype">void</font>)
00547 {
00548         <font class="keywordtype">int</font> k;
00549         <font class="keywordtype">int</font> r;                                                    <font class="comment">// node number</font>
00550         <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> c[F];                              <font class="comment">// an array of chars</font>
00551         <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> flags;                            <font class="comment">// 8 bits of flags</font>
00552         <font class="keywordtype">int</font> flag_count;                                  <font class="comment">// which flag we're on</font>
00553         <font class="keywordtype">short</font> <font class="keywordtype">int</font> pos;                                    <font class="comment">// position in the ring buffer</font>
00554         <font class="keywordtype">short</font> <font class="keywordtype">int</font> len;                                    <font class="comment">// number of chars in ring buffer</font>
00555         <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> totalLen = 0;
00556 
00557         direct = 1;     <font class="comment">// set direction needed by parent [Get|Send]Chars()</font>
00558 
00559         <font class="comment">// Initialize the ring buffer with a common string.</font>
00560         <font class="comment">//</font>
00561         <font class="comment">// Note that the last F bytes of the ring buffer are not filled.</font>
00562 
00563         memset(m_ring_buffer, <font class="charliteral">' '</font>, N - F);
00564         
00565         r = N - F;
00566 
00567         flags = (char) 0;
00568         flag_count = 0;
00569 
00570         <font class="keywordflow">for</font> ( ; ; ) {
00571 
00572                 <font class="comment">// If there are more bits of interest in this flag, then</font>
00573                 <font class="comment">// shift that next interesting bit into the 1's position.</font>
00574                 <font class="comment">//</font>
00575                 <font class="comment">// If this flag has been exhausted, the next byte must </font>
00576                 <font class="comment">// be a flag.</font>
00577 
00578                 <font class="keywordflow">if</font> (flag_count &gt; 0) {
00579                         flags = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) (flags &gt;&gt; 1);
00580                         flag_count--;
00581                 }
00582                 <font class="keywordflow">else</font> {
00583                         <font class="comment">// Next byte must be a flag.</font>
00584 
00585                         <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) &amp;flags, 1) != 1)
00586                                 <font class="keywordflow">break</font>;
00587 
00588                         <font class="comment">// Set the flag counter.  While at first it might appear</font>
00589                         <font class="comment">// that this should be an 8 since there are 8 bits in the</font>
00590                         <font class="comment">// flag, it should really be a 7 because the shift must</font>
00591                         <font class="comment">// be performed 7 times in order to see all 8 bits.</font>
00592 
00593                         flag_count = 7;
00594                 }
00595 
00596                 <font class="comment">// If the low order bit of the flag is now set, then we know</font>
00597                 <font class="comment">// that the next byte is a single, unencoded character.</font>
00598 
00599                 <font class="keywordflow">if</font> (flags &amp; 1) {
00600                         <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) c, 1) != 1)
00601                                 <font class="keywordflow">break</font>;
00602 
00603                         <font class="keywordflow">if</font> (SendChars((<font class="keywordtype">char</font> *) c, 1) != 1) {
00604                                 totalLen++;
00605                                 <font class="keywordflow">break</font>;
00606                         }
00607 
00608                         <font class="comment">// Add to buffer, and increment to next spot. Wrap at end.</font>
00609 
00610                         m_ring_buffer[r] = c[0];
00611                         r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) &amp; (N - 1) );
00612                 }
00613 
00614                 <font class="comment">// Otherwise, we know that the next two bytes are a</font>
00615                 <font class="comment">// &lt;position,length&gt; pair.  The position is in 12 bits and</font>
00616                 <font class="comment">// the length is in 4 bits.</font>
00617 
00618                 <font class="keywordflow">else</font> {
00619                         <font class="comment">// Original code:</font>
00620                         <font class="comment">//  if ((i = getc(infile)) == EOF)</font>
00621                         <font class="comment">//        break;</font>
00622                         <font class="comment">//  if ((j = getc(infile)) == EOF)</font>
00623                         <font class="comment">//        break;</font>
00624                         <font class="comment">//  i |= ((j &amp; 0xf0) &lt;&lt; 4);     </font>
00625                         <font class="comment">//  j = (j &amp; 0x0f) + THRESHOLD;</font>
00626                         <font class="comment">//</font>
00627                         <font class="comment">// I've modified this to only make one input call, and</font>
00628                         <font class="comment">// have changed the variable names to something more</font>
00629                         <font class="comment">// obvious.</font>
00630 
00631                         <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) c, 2) != 2)
00632                                 <font class="keywordflow">break</font>;
00633 
00634                         <font class="comment">// Convert these two characters into the position and</font>
00635                         <font class="comment">// length.  Note that the length is always at least</font>
00636                         <font class="comment">// THRESHOLD, which is why we're able to get a length</font>
00637                         <font class="comment">// of 18 out of only 4 bits.</font>
00638 
00639                         pos = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( c[0] | ((c[1] &amp; 0xf0) &lt;&lt; 4) );
00640 
00641                         len = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (c[1] &amp; 0x0f) + THRESHOLD );
00642 
00643                         <font class="comment">// There are now "len" characters at position "pos" in</font>
00644                         <font class="comment">// the ring buffer that can be pulled out.  Note that</font>
00645                         <font class="comment">// len is never more than F.</font>
00646 
00647                         <font class="keywordflow">for</font> (k = 0; k &lt; len; k++) {
00648                                 c[k] = m_ring_buffer[(pos + k) &amp; (N - 1)];
00649 
00650                                 <font class="comment">// Add to buffer, and increment to next spot. Wrap at end.</font>
00651 
00652                                 m_ring_buffer[r] = c[k];
00653                                 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) &amp; (N - 1) );
00654                         }
00655 
00656                         <font class="comment">// Add the "len" :characters to the output stream.</font>
00657 
00658                         <font class="keywordflow">if</font> (SendChars((<font class="keywordtype">char</font> *) c, len) != (<font class="keywordtype">unsigned</font> <font class="keywordtype">int</font>)len) {
00659                                 totalLen += len;
00660                                 <font class="keywordflow">break</font>;
00661                         }
00662                 }
00663         }
00664         slen = totalLen;
00665 }
</pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:12:59 2002 for The Sword Project by
<a href="http://www.doxygen.org/index.html">
<img src="doxygen.png" alt="doxygen" align="middle" border=0 
width=110 height=53></a>1.2.15 </small></address>
</body>
</html>