<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html><head><meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"> <title>lzsscomprs.cpp Source File</title> <link href="doxygen.css" rel="stylesheet" type="text/css"> </head><body> <!-- Generated by Doxygen 1.2.15 --> <center> <a class="qindex" href="index.html">Main Page</a> <a class="qindex" href="namespaces.html">Namespace List</a> <a class="qindex" href="hierarchy.html">Class Hierarchy</a> <a class="qindex" href="classes.html">Alphabetical List</a> <a class="qindex" href="annotated.html">Compound List</a> <a class="qindex" href="files.html">File List</a> <a class="qindex" href="functions.html">Compound Members</a> </center> <hr><h1>lzsscomprs.cpp</h1><div class="fragment"><pre>00001 <font class="comment">/******************************************************************************</font> 00002 <font class="comment"> * lzsscomprs.cpp - code for class 'LZSSCompress'- a driver class that</font> 00003 <font class="comment"> * provides LZSS compression</font> 00004 <font class="comment"> */</font> 00005 00006 <font class="preprocessor">#include <string.h></font> 00007 <font class="preprocessor">#include <stdlib.h></font> 00008 <font class="preprocessor">#include <lzsscomprs.h></font> 00009 00010 00011 <font class="comment">/******************************************************************************</font> 00012 <font class="comment"> * LZSSCompress Statics</font> 00013 <font class="comment"> */</font> 00014 00015 <font class="comment">// m_ring_buffer is a text buffer. It contains "nodes" of</font> 00016 <font class="comment">// uncompressed text that can be indexed by position. That is,</font> 00017 <font class="comment">// a substring of the ring buffer can be indexed by a position</font> 00018 <font class="comment">// and a length. When decoding, the compressed text may contain</font> 00019 <font class="comment">// a position in the ring buffer and a count of the number of</font> 00020 <font class="comment">// bytes from the ring buffer that are to be moved into the</font> 00021 <font class="comment">// uncompressed buffer. </font> 00022 <font class="comment">//</font> 00023 <font class="comment">// This ring buffer is not maintained as part of the compressed</font> 00024 <font class="comment">// text. Instead, it is reconstructed dynamically. That is,</font> 00025 <font class="comment">// it starts out empty and gets built as the text is decompressed.</font> 00026 <font class="comment">//</font> 00027 <font class="comment">// The ring buffer contain N bytes, with an additional F - 1 bytes</font> 00028 <font class="comment">// to facilitate string comparison.</font> 00029 00030 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> LZSSCompress::m_ring_buffer[N + F - 1]; 00031 00032 <font class="comment">// m_match_position and m_match_length are set by InsertNode().</font> 00033 <font class="comment">//</font> 00034 <font class="comment">// These variables indicate the position in the ring buffer </font> 00035 <font class="comment">// and the number of characters at that position that match</font> 00036 <font class="comment">// a given string.</font> 00037 00038 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_match_position; 00039 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_match_length; 00040 00041 <font class="comment">// m_lson, m_rson, and m_dad are the Japanese way of referring to</font> 00042 <font class="comment">// a tree structure. The dad is the parent and it has a right and</font> 00043 <font class="comment">// left son (child).</font> 00044 <font class="comment">//</font> 00045 <font class="comment">// For i = 0 to N-1, m_rson[i] and m_lson[i] will be the right </font> 00046 <font class="comment">// and left children of node i. </font> 00047 <font class="comment">//</font> 00048 <font class="comment">// For i = 0 to N-1, m_dad[i] is the parent of node i.</font> 00049 <font class="comment">//</font> 00050 <font class="comment">// For i = 0 to 255, rson[N + i + 1] is the root of the tree for </font> 00051 <font class="comment">// strings that begin with the character i. Note that this requires </font> 00052 <font class="comment">// one byte characters.</font> 00053 <font class="comment">//</font> 00054 <font class="comment">// These nodes store values of 0...(N-1). Memory requirements</font> 00055 <font class="comment">// can be reduces by using 2-byte integers instead of full 4-byte</font> 00056 <font class="comment">// integers (for 32-bit applications). Therefore, these are </font> 00057 <font class="comment">// defined as "short ints."</font> 00058 00059 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_lson[N + 1]; 00060 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_rson[N + 257]; 00061 <font class="keywordtype">short</font> <font class="keywordtype">int</font> LZSSCompress::m_dad[N + 1]; 00062 00063 00064 <font class="comment">/******************************************************************************</font> 00065 <font class="comment"> * LZSSCompress Constructor - Initializes data for instance of LZSSCompress</font> 00066 <font class="comment"> *</font> 00067 <font class="comment"> */</font> 00068 00069 LZSSCompress::LZSSCompress() : SWCompress() { 00070 } 00071 00072 00073 <font class="comment">/******************************************************************************</font> 00074 <font class="comment"> * LZSSCompress Destructor - Cleans up instance of LZSSCompress</font> 00075 <font class="comment"> */</font> 00076 00077 LZSSCompress::~LZSSCompress() { 00078 } 00079 00080 00081 <font class="comment">/******************************************************************************</font> 00082 <font class="comment"> * LZSSCompress::InitTree - This function initializes the tree nodes to</font> 00083 <font class="comment"> * "empty" states. </font> 00084 <font class="comment"> */</font> 00085 00086 <font class="keywordtype">void</font> LZSSCompress::InitTree(<font class="keywordtype">void</font>) { 00087 <font class="keywordtype">int</font> i; 00088 00089 <font class="comment">// For i = 0 to N - 1, m_rson[i] and m_lson[i] will be the right</font> 00090 <font class="comment">// and left children of node i. These nodes need not be</font> 00091 <font class="comment">// initialized. However, for debugging purposes, it is nice to</font> 00092 <font class="comment">// have them initialized. Since this is only used for compression</font> 00093 <font class="comment">// (not decompression), I don't mind spending the time to do it.</font> 00094 <font class="comment">//</font> 00095 <font class="comment">// For the same range of i, m_dad[i] is the parent of node i.</font> 00096 <font class="comment">// These are initialized to a known value that can represent</font> 00097 <font class="comment">// a "not used" state.</font> 00098 00099 <font class="keywordflow">for</font> (i = 0; i < N; i++) { 00100 m_lson[i] = NOT_USED; 00101 m_rson[i] = NOT_USED; 00102 m_dad[i] = NOT_USED; 00103 } 00104 00105 <font class="comment">// For i = 0 to 255, m_rson[N + i + 1] is the root of the tree</font> 00106 <font class="comment">// for strings that begin with the character i. This is why</font> 00107 <font class="comment">// the right child array is larger than the left child array.</font> 00108 <font class="comment">// These are also initialzied to a "not used" state.</font> 00109 <font class="comment">//</font> 00110 <font class="comment">// Note that there are 256 of these, one for each of the possible</font> 00111 <font class="comment">// 256 characters.</font> 00112 00113 <font class="keywordflow">for</font> (i = N + 1; i <= (N + 256); i++) { 00114 m_rson[i] = NOT_USED; 00115 } 00116 } 00117 00118 00119 <font class="comment">/******************************************************************************</font> 00120 <font class="comment"> * LZSSCompress::InsertNode - This function inserts a string from the ring</font> 00121 <font class="comment"> * buffer into one of the trees. It loads the</font> 00122 <font class="comment"> * match position and length member variables</font> 00123 <font class="comment"> * for the longest match.</font> 00124 <font class="comment"> * </font> 00125 <font class="comment"> * The string to be inserted is identified by</font> 00126 <font class="comment"> * the parameter Pos, A full F bytes are</font> 00127 <font class="comment"> * inserted. So,</font> 00128 <font class="comment"> * m_ring_buffer[Pos ... Pos+F-1]</font> 00129 <font class="comment"> * are inserted.</font> 00130 <font class="comment"> *</font> 00131 <font class="comment"> * If the matched length is exactly F, then an</font> 00132 <font class="comment"> * old node is removed in favor of the new one</font> 00133 <font class="comment"> * (because the old one will be deleted</font> 00134 <font class="comment"> * sooner).</font> 00135 <font class="comment"> *</font> 00136 <font class="comment"> * Note that Pos plays a dual role. It is</font> 00137 <font class="comment"> * used as both a position in the ring buffer</font> 00138 <font class="comment"> * and also as a tree node.</font> 00139 <font class="comment"> * m_ring_buffer[Pos] defines a character that</font> 00140 <font class="comment"> * is used to identify a tree node.</font> 00141 <font class="comment"> *</font> 00142 <font class="comment"> * ENT: pos - position in the buffer</font> 00143 <font class="comment"> */</font> 00144 00145 <font class="keywordtype">void</font> LZSSCompress::InsertNode(<font class="keywordtype">short</font> <font class="keywordtype">int</font> Pos) 00146 { 00147 <font class="keywordtype">short</font> <font class="keywordtype">int</font> i; 00148 <font class="keywordtype">short</font> <font class="keywordtype">int</font> p; 00149 <font class="keywordtype">int</font> cmp; 00150 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> * key; 00151 00152 <font class="comment">/*</font> 00153 <font class="comment"> ASSERT(Pos >= 0);</font> 00154 <font class="comment"> ASSERT(Pos < N);</font> 00155 <font class="comment">*/</font> 00156 00157 cmp = 1; 00158 key = &(m_ring_buffer[Pos]); 00159 00160 <font class="comment">// The last 256 entries in m_rson contain the root nodes for</font> 00161 <font class="comment">// strings that begin with a letter. Get an index for the</font> 00162 <font class="comment">// first letter in this string.</font> 00163 00164 p = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) (N + 1 + key[0]); 00165 00166 <font class="comment">// Set the left and right tree nodes for this position to "not</font> 00167 <font class="comment">// used."</font> 00168 00169 m_lson[Pos] = NOT_USED; 00170 m_rson[Pos] = NOT_USED; 00171 00172 <font class="comment">// Haven't matched anything yet.</font> 00173 00174 m_match_length = 0; 00175 00176 <font class="keywordflow">for</font> ( ; ; ) { 00177 <font class="keywordflow">if</font> (cmp >= 0) { 00178 <font class="keywordflow">if</font> (m_rson[p] != NOT_USED) { 00179 p = m_rson[p]; 00180 } 00181 <font class="keywordflow">else</font> { 00182 m_rson[p] = Pos; 00183 m_dad[Pos] = p; 00184 <font class="keywordflow">return</font>; 00185 } 00186 } 00187 <font class="keywordflow">else</font> { 00188 <font class="keywordflow">if</font> (m_lson[p] != NOT_USED) { 00189 p = m_lson[p]; 00190 } 00191 <font class="keywordflow">else</font> { 00192 m_lson[p] = Pos; 00193 m_dad[Pos] = p; 00194 <font class="keywordflow">return</font>; 00195 } 00196 } 00197 00198 <font class="comment">// Should we go to the right or the left to look for the</font> 00199 <font class="comment">// next match?</font> 00200 00201 <font class="keywordflow">for</font> (i = 1; i < F; i++) { 00202 cmp = key[i] - m_ring_buffer[p + i]; 00203 <font class="keywordflow">if</font> (cmp != 0) 00204 <font class="keywordflow">break</font>; 00205 } 00206 00207 <font class="keywordflow">if</font> (i > m_match_length) { 00208 m_match_position = p; 00209 m_match_length = i; 00210 00211 <font class="keywordflow">if</font> (i >= F) 00212 <font class="keywordflow">break</font>; 00213 } 00214 } 00215 00216 m_dad[Pos] = m_dad[p]; 00217 m_lson[Pos] = m_lson[p]; 00218 m_rson[Pos] = m_rson[p]; 00219 00220 m_dad[ m_lson[p] ] = Pos; 00221 m_dad[ m_rson[p] ] = Pos; 00222 00223 <font class="keywordflow">if</font> (m_rson[ m_dad[p] ] == p) { 00224 m_rson[ m_dad[p] ] = Pos; 00225 } 00226 <font class="keywordflow">else</font> { 00227 m_lson[ m_dad[p] ] = Pos; 00228 } 00229 00230 <font class="comment">// Remove "p"</font> 00231 00232 m_dad[p] = NOT_USED; 00233 } 00234 00235 00236 <font class="comment">/******************************************************************************</font> 00237 <font class="comment"> * LZSSCompress::DeleteNode - This function removes the node "Node" from the</font> 00238 <font class="comment"> * tree.</font> 00239 <font class="comment"> *</font> 00240 <font class="comment"> * ENT: node - node to be removed</font> 00241 <font class="comment"> */</font> 00242 00243 <font class="keywordtype">void</font> LZSSCompress::DeleteNode(<font class="keywordtype">short</font> <font class="keywordtype">int</font> Node) 00244 { 00245 <font class="keywordtype">short</font> <font class="keywordtype">int</font> q; 00246 00247 <font class="comment">/*</font> 00248 <font class="comment"> ASSERT(Node >= 0);</font> 00249 <font class="comment"> ASSERT(Node < (N+1));</font> 00250 <font class="comment">*/</font> 00251 00252 <font class="keywordflow">if</font> (m_dad[Node] == NOT_USED) { <font class="comment">// not in tree, nothing to do</font> 00253 <font class="keywordflow">return</font>; 00254 } 00255 00256 <font class="keywordflow">if</font> (m_rson[Node] == NOT_USED) { 00257 q = m_lson[Node]; 00258 } 00259 <font class="keywordflow">else</font> <font class="keywordflow">if</font> (m_lson[Node] == NOT_USED) { 00260 q = m_rson[Node]; 00261 } 00262 <font class="keywordflow">else</font> { 00263 q = m_lson[Node]; 00264 <font class="keywordflow">if</font> (m_rson[q] != NOT_USED) { 00265 <font class="keywordflow">do</font> { 00266 q = m_rson[q]; 00267 } <font class="keywordflow">while</font> (m_rson[q] != NOT_USED); 00268 00269 m_rson[ m_dad[q] ] = m_lson[q]; 00270 m_dad[ m_lson[q] ] = m_dad[q]; 00271 m_lson[q] = m_lson[Node]; 00272 m_dad[ m_lson[Node] ] = q; 00273 } 00274 00275 m_rson[q] = m_rson[Node]; 00276 m_dad[ m_rson[Node] ] = q; 00277 } 00278 00279 m_dad[q] = m_dad[Node]; 00280 00281 <font class="keywordflow">if</font> (m_rson[ m_dad[Node] ] == Node) { 00282 m_rson[ m_dad[Node] ] = q; 00283 } 00284 <font class="keywordflow">else</font> { 00285 m_lson[ m_dad[Node] ] = q; 00286 } 00287 00288 m_dad[Node] = NOT_USED; 00289 } 00290 00291 00292 <font class="comment">/******************************************************************************</font> 00293 <font class="comment"> * LZSSCompress::Encode - This function "encodes" the input stream into the</font> 00294 <font class="comment"> * output stream.</font> 00295 <font class="comment"> * The GetChars() and SendChars() functions are</font> 00296 <font class="comment"> * used to separate this method from the actual</font> 00297 <font class="comment"> * i/o.</font> 00298 <font class="comment"> * NOTE: must set zlen for parent class to know length of</font> 00299 <font class="comment"> * compressed buffer.</font> 00300 <font class="comment"> */</font> 00301 00302 <font class="keywordtype">void</font> LZSSCompress::Encode(<font class="keywordtype">void</font>) 00303 { 00304 <font class="keywordtype">short</font> <font class="keywordtype">int</font> i; <font class="comment">// an iterator</font> 00305 <font class="keywordtype">short</font> <font class="keywordtype">int</font> r; <font class="comment">// node number in the binary tree</font> 00306 <font class="keywordtype">short</font> <font class="keywordtype">int</font> s; <font class="comment">// position in the ring buffer</font> 00307 <font class="keywordtype">unsigned</font> <font class="keywordtype">short</font> <font class="keywordtype">int</font> len; <font class="comment">// len of initial string</font> 00308 <font class="keywordtype">short</font> <font class="keywordtype">int</font> last_match_length; <font class="comment">// length of last match</font> 00309 <font class="keywordtype">short</font> <font class="keywordtype">int</font> code_buf_pos; <font class="comment">// position in the output buffer</font> 00310 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> code_buf[17]; <font class="comment">// the output buffer</font> 00311 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> mask; <font class="comment">// bit mask for byte 0 of out buf</font> 00312 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> c; <font class="comment">// character read from string</font> 00313 00314 <font class="comment">// Start with a clean tree.</font> 00315 00316 InitTree(); 00317 direct = 0; <font class="comment">// set direction needed by parent [Get|Send]Chars()</font> 00318 00319 <font class="comment">// code_buf[0] works as eight flags. A "1" represents that the</font> 00320 <font class="comment">// unit is an unencoded letter (1 byte), and a "0" represents</font> 00321 <font class="comment">// that the next unit is a <position,length> pair (2 bytes).</font> 00322 <font class="comment">//</font> 00323 <font class="comment">// code_buf[1..16] stores eight units of code. Since the best</font> 00324 <font class="comment">// we can do is store eight <position,length> pairs, at most 16 </font> 00325 <font class="comment">// bytes are needed to store this.</font> 00326 <font class="comment">//</font> 00327 <font class="comment">// This is why the maximum size of the code buffer is 17 bytes.</font> 00328 00329 code_buf[0] = 0; 00330 code_buf_pos = 1; 00331 00332 <font class="comment">// Mask iterates over the 8 bits in the code buffer. The first</font> 00333 <font class="comment">// character ends up being stored in the low bit.</font> 00334 <font class="comment">//</font> 00335 <font class="comment">// bit 8 7 6 5 4 3 2 1</font> 00336 <font class="comment">// | |</font> 00337 <font class="comment">// | first sequence in code buffer</font> 00338 <font class="comment">// |</font> 00339 <font class="comment">// last sequence in code buffer </font> 00340 00341 mask = 1; 00342 00343 s = 0; 00344 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) N - (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) F; 00345 00346 <font class="comment">// Initialize the ring buffer with spaces...</font> 00347 00348 <font class="comment">// Note that the last F bytes of the ring buffer are not filled.</font> 00349 <font class="comment">// This is because those F bytes will be filled in immediately</font> 00350 <font class="comment">// with bytes from the input stream.</font> 00351 00352 memset(m_ring_buffer, <font class="charliteral">' '</font>, N - F); 00353 00354 <font class="comment">// Read F bytes into the last F bytes of the ring buffer.</font> 00355 <font class="comment">//</font> 00356 <font class="comment">// This function loads the buffer with X characters and returns</font> 00357 <font class="comment">// the actual amount loaded.</font> 00358 00359 len = GetChars((<font class="keywordtype">char</font> *) &(m_ring_buffer[r]), F); 00360 00361 <font class="comment">// Make sure there is something to be compressed.</font> 00362 00363 <font class="keywordflow">if</font> (len == 0) 00364 <font class="keywordflow">return</font>; 00365 00366 <font class="comment">// Insert the F strings, each of which begins with one or more</font> 00367 <font class="comment">// 'space' characters. Note the order in which these strings</font> 00368 <font class="comment">// are inserted. This way, degenerate trees will be less likely</font> 00369 <font class="comment">// to occur.</font> 00370 00371 <font class="keywordflow">for</font> (i = 1; i <= F; i++) { 00372 InsertNode((<font class="keywordtype">short</font> <font class="keywordtype">int</font>) (r - i)); 00373 } 00374 00375 <font class="comment">// Finally, insert the whole string just read. The</font> 00376 <font class="comment">// member variables match_length and match_position are set.</font> 00377 00378 InsertNode(r); 00379 00380 <font class="comment">// Now that we're preloaded, continue till done.</font> 00381 00382 <font class="keywordflow">do</font> { 00383 00384 <font class="comment">// m_match_length may be spuriously long near the end of</font> 00385 <font class="comment">// text.</font> 00386 00387 <font class="keywordflow">if</font> (m_match_length > len) { 00388 m_match_length = len; 00389 } 00390 00391 <font class="comment">// Is it cheaper to store this as a single character? If so,</font> 00392 <font class="comment">// make it so.</font> 00393 00394 <font class="keywordflow">if</font> (m_match_length < THRESHOLD) { 00395 <font class="comment">// Send one character. Remember that code_buf[0] is the</font> 00396 <font class="comment">// set of flags for the next eight items.</font> 00397 00398 m_match_length = 1; 00399 code_buf[0] |= mask; 00400 code_buf[code_buf_pos++] = m_ring_buffer[r]; 00401 } 00402 00403 <font class="comment">// Otherwise, we do indeed have a string that can be stored</font> 00404 <font class="comment">// compressed to save space.</font> 00405 00406 <font class="keywordflow">else</font> { 00407 <font class="comment">// The next 16 bits need to contain the position (12 bits)</font> 00408 <font class="comment">// and the length (4 bits).</font> 00409 00410 code_buf[code_buf_pos++] = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) m_match_position; 00411 code_buf[code_buf_pos++] = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) ( 00412 ((m_match_position >> 4) & 0xf0) | 00413 (m_match_length - THRESHOLD) ); 00414 } 00415 00416 <font class="comment">// Shift the mask one bit to the left so that it will be ready</font> 00417 <font class="comment">// to store the new bit.</font> 00418 00419 mask = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) (mask << 1); 00420 00421 <font class="comment">// If the mask is now 0, then we know that we have a full set</font> 00422 <font class="comment">// of flags and items in the code buffer. These need to be</font> 00423 <font class="comment">// output.</font> 00424 00425 <font class="keywordflow">if</font> (!mask) { 00426 <font class="comment">// code_buf is the buffer of characters to be output.</font> 00427 <font class="comment">// code_buf_pos is the number of characters it contains.</font> 00428 00429 SendChars((<font class="keywordtype">char</font> *) code_buf, code_buf_pos); 00430 00431 <font class="comment">// Reset for next buffer...</font> 00432 00433 code_buf[0] = 0; 00434 code_buf_pos = 1; 00435 mask = 1; 00436 } 00437 00438 last_match_length = m_match_length; 00439 00440 <font class="comment">// Delete old strings and read new bytes...</font> 00441 00442 <font class="keywordflow">for</font> (i = 0; i < last_match_length; i++) { 00443 <font class="comment">// Get next character...</font> 00444 00445 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) &c, 1) != 1) 00446 <font class="keywordflow">break</font>; 00447 00448 <font class="comment">// Delete "old strings"</font> 00449 00450 DeleteNode(s); 00451 00452 <font class="comment">// Put this character into the ring buffer.</font> 00453 <font class="comment">// </font> 00454 <font class="comment">// The original comment here says "If the position is near</font> 00455 <font class="comment">// the end of the buffer, extend the buffer to make</font> 00456 <font class="comment">// string comparison easier."</font> 00457 <font class="comment">//</font> 00458 <font class="comment">// That's a little misleading, because the "end" of the </font> 00459 <font class="comment">// buffer is really what we consider to be the "beginning"</font> 00460 <font class="comment">// of the buffer, that is, positions 0 through F.</font> 00461 <font class="comment">//</font> 00462 <font class="comment">// The idea is that the front end of the buffer is duplicated</font> 00463 <font class="comment">// into the back end so that when you're looking at characters</font> 00464 <font class="comment">// at the back end of the buffer, you can index ahead (beyond</font> 00465 <font class="comment">// the normal end of the buffer) and see the characters</font> 00466 <font class="comment">// that are at the front end of the buffer wihtout having</font> 00467 <font class="comment">// to adjust the index.</font> 00468 <font class="comment">//</font> 00469 <font class="comment">// That is...</font> 00470 <font class="comment">//</font> 00471 <font class="comment">// 1234xxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234</font> 00472 <font class="comment">// | | |</font> 00473 <font class="comment">// position 0 end of buffer |</font> 00474 <font class="comment">// |</font> 00475 <font class="comment">// duplicate of front of buffer</font> 00476 00477 m_ring_buffer[s] = c; 00478 00479 <font class="keywordflow">if</font> (s < F - 1) { 00480 m_ring_buffer[s + N] = c; 00481 } 00482 00483 <font class="comment">// Increment the position, and wrap around when we're at</font> 00484 <font class="comment">// the end. Note that this relies on N being a power of 2.</font> 00485 00486 s = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (s + 1) & (N - 1) ); 00487 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); 00488 00489 <font class="comment">// Register the string that is found in </font> 00490 <font class="comment">// m_ring_buffer[r..r+F-1].</font> 00491 00492 InsertNode(r); 00493 } 00494 00495 <font class="comment">// If we didn't quit because we hit the last_match_length,</font> 00496 <font class="comment">// then we must have quit because we ran out of characters</font> 00497 <font class="comment">// to process.</font> 00498 00499 <font class="keywordflow">while</font> (i++ < last_match_length) { 00500 DeleteNode(s); 00501 00502 s = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (s + 1) & (N - 1) ); 00503 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); 00504 00505 <font class="comment">// Note that len hitting 0 is the key that causes the</font> 00506 <font class="comment">// do...while() to terminate. This is the only place</font> 00507 <font class="comment">// within the loop that len is modified.</font> 00508 <font class="comment">//</font> 00509 <font class="comment">// Its original value is F (or a number less than F for</font> 00510 <font class="comment">// short strings).</font> 00511 00512 <font class="keywordflow">if</font> (--len) { 00513 InsertNode(r); <font class="comment">/* buffer may not be empty. */</font> 00514 } 00515 } 00516 00517 <font class="comment">// End of do...while() loop. Continue processing until there</font> 00518 <font class="comment">// are no more characters to be compressed. The variable</font> 00519 <font class="comment">// "len" is used to signal this condition.</font> 00520 } <font class="keywordflow">while</font> (len > 0); 00521 00522 <font class="comment">// There could still be something in the output buffer. Send it</font> 00523 <font class="comment">// now.</font> 00524 00525 <font class="keywordflow">if</font> (code_buf_pos > 1) { 00526 <font class="comment">// code_buf is the encoded string to send.</font> 00527 <font class="comment">// code_buf_ptr is the number of characters.</font> 00528 00529 SendChars((<font class="keywordtype">char</font> *) code_buf, code_buf_pos); 00530 } 00531 00532 00533 <font class="comment">// must set zlen for parent class to know length of compressed buffer</font> 00534 zlen = zpos; 00535 } 00536 00537 00538 <font class="comment">/******************************************************************************</font> 00539 <font class="comment"> * LZSSCompress::Decode - This function "decodes" the input stream into the</font> 00540 <font class="comment"> * output stream.</font> 00541 <font class="comment"> * The GetChars() and SendChars() functions are</font> 00542 <font class="comment"> * used to separate this method from the actual</font> 00543 <font class="comment"> * i/o.</font> 00544 <font class="comment"> */</font> 00545 00546 <font class="keywordtype">void</font> LZSSCompress::Decode(<font class="keywordtype">void</font>) 00547 { 00548 <font class="keywordtype">int</font> k; 00549 <font class="keywordtype">int</font> r; <font class="comment">// node number</font> 00550 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> c[F]; <font class="comment">// an array of chars</font> 00551 <font class="keywordtype">unsigned</font> <font class="keywordtype">char</font> flags; <font class="comment">// 8 bits of flags</font> 00552 <font class="keywordtype">int</font> flag_count; <font class="comment">// which flag we're on</font> 00553 <font class="keywordtype">short</font> <font class="keywordtype">int</font> pos; <font class="comment">// position in the ring buffer</font> 00554 <font class="keywordtype">short</font> <font class="keywordtype">int</font> len; <font class="comment">// number of chars in ring buffer</font> 00555 <font class="keywordtype">unsigned</font> <font class="keywordtype">long</font> totalLen = 0; 00556 00557 direct = 1; <font class="comment">// set direction needed by parent [Get|Send]Chars()</font> 00558 00559 <font class="comment">// Initialize the ring buffer with a common string.</font> 00560 <font class="comment">//</font> 00561 <font class="comment">// Note that the last F bytes of the ring buffer are not filled.</font> 00562 00563 memset(m_ring_buffer, <font class="charliteral">' '</font>, N - F); 00564 00565 r = N - F; 00566 00567 flags = (char) 0; 00568 flag_count = 0; 00569 00570 <font class="keywordflow">for</font> ( ; ; ) { 00571 00572 <font class="comment">// If there are more bits of interest in this flag, then</font> 00573 <font class="comment">// shift that next interesting bit into the 1's position.</font> 00574 <font class="comment">//</font> 00575 <font class="comment">// If this flag has been exhausted, the next byte must </font> 00576 <font class="comment">// be a flag.</font> 00577 00578 <font class="keywordflow">if</font> (flag_count > 0) { 00579 flags = (<font class="keywordtype">unsigned</font> <font class="keywordtype">char</font>) (flags >> 1); 00580 flag_count--; 00581 } 00582 <font class="keywordflow">else</font> { 00583 <font class="comment">// Next byte must be a flag.</font> 00584 00585 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) &flags, 1) != 1) 00586 <font class="keywordflow">break</font>; 00587 00588 <font class="comment">// Set the flag counter. While at first it might appear</font> 00589 <font class="comment">// that this should be an 8 since there are 8 bits in the</font> 00590 <font class="comment">// flag, it should really be a 7 because the shift must</font> 00591 <font class="comment">// be performed 7 times in order to see all 8 bits.</font> 00592 00593 flag_count = 7; 00594 } 00595 00596 <font class="comment">// If the low order bit of the flag is now set, then we know</font> 00597 <font class="comment">// that the next byte is a single, unencoded character.</font> 00598 00599 <font class="keywordflow">if</font> (flags & 1) { 00600 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) c, 1) != 1) 00601 <font class="keywordflow">break</font>; 00602 00603 <font class="keywordflow">if</font> (SendChars((<font class="keywordtype">char</font> *) c, 1) != 1) { 00604 totalLen++; 00605 <font class="keywordflow">break</font>; 00606 } 00607 00608 <font class="comment">// Add to buffer, and increment to next spot. Wrap at end.</font> 00609 00610 m_ring_buffer[r] = c[0]; 00611 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); 00612 } 00613 00614 <font class="comment">// Otherwise, we know that the next two bytes are a</font> 00615 <font class="comment">// <position,length> pair. The position is in 12 bits and</font> 00616 <font class="comment">// the length is in 4 bits.</font> 00617 00618 <font class="keywordflow">else</font> { 00619 <font class="comment">// Original code:</font> 00620 <font class="comment">// if ((i = getc(infile)) == EOF)</font> 00621 <font class="comment">// break;</font> 00622 <font class="comment">// if ((j = getc(infile)) == EOF)</font> 00623 <font class="comment">// break;</font> 00624 <font class="comment">// i |= ((j & 0xf0) << 4); </font> 00625 <font class="comment">// j = (j & 0x0f) + THRESHOLD;</font> 00626 <font class="comment">//</font> 00627 <font class="comment">// I've modified this to only make one input call, and</font> 00628 <font class="comment">// have changed the variable names to something more</font> 00629 <font class="comment">// obvious.</font> 00630 00631 <font class="keywordflow">if</font> (GetChars((<font class="keywordtype">char</font> *) c, 2) != 2) 00632 <font class="keywordflow">break</font>; 00633 00634 <font class="comment">// Convert these two characters into the position and</font> 00635 <font class="comment">// length. Note that the length is always at least</font> 00636 <font class="comment">// THRESHOLD, which is why we're able to get a length</font> 00637 <font class="comment">// of 18 out of only 4 bits.</font> 00638 00639 pos = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( c[0] | ((c[1] & 0xf0) << 4) ); 00640 00641 len = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (c[1] & 0x0f) + THRESHOLD ); 00642 00643 <font class="comment">// There are now "len" characters at position "pos" in</font> 00644 <font class="comment">// the ring buffer that can be pulled out. Note that</font> 00645 <font class="comment">// len is never more than F.</font> 00646 00647 <font class="keywordflow">for</font> (k = 0; k < len; k++) { 00648 c[k] = m_ring_buffer[(pos + k) & (N - 1)]; 00649 00650 <font class="comment">// Add to buffer, and increment to next spot. Wrap at end.</font> 00651 00652 m_ring_buffer[r] = c[k]; 00653 r = (<font class="keywordtype">short</font> <font class="keywordtype">int</font>) ( (r + 1) & (N - 1) ); 00654 } 00655 00656 <font class="comment">// Add the "len" :characters to the output stream.</font> 00657 00658 <font class="keywordflow">if</font> (SendChars((<font class="keywordtype">char</font> *) c, len) != (<font class="keywordtype">unsigned</font> <font class="keywordtype">int</font>)len) { 00659 totalLen += len; 00660 <font class="keywordflow">break</font>; 00661 } 00662 } 00663 } 00664 slen = totalLen; 00665 } </pre></div><hr><address align="right"><small>Generated on Thu Jun 20 22:12:59 2002 for The Sword Project by <a href="http://www.doxygen.org/index.html"> <img src="doxygen.png" alt="doxygen" align="middle" border=0 width=110 height=53></a>1.2.15 </small></address> </body> </html>