<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <title>SphinxBase: src/libsphinxbase/lm/ngram_model.c Source File</title> <link href="tabs.css" rel="stylesheet" type="text/css"/> <link href="navtree.css" rel="stylesheet" type="text/css"/> <script type="text/javascript" src="jquery.js"></script> <script type="text/javascript" src="navtree.js"></script> <script type="text/javascript" src="resize.js"></script> <script type="text/javascript"> $(document).ready(initResizable); </script> <link href="doxygen.css" rel="stylesheet" type="text/css"/> </head> <body> <!-- Generated by Doxygen 1.7.3 --> <div id="top"> <div id="titlearea"> <table cellspacing="0" cellpadding="0"> <tbody> <tr style="height: 56px;"> <td style="padding-left: 0.5em;"> <div id="projectname">SphinxBase <span id="projectnumber">0.6</span></div> </td> </tr> </tbody> </table> </div> <div id="navrow1" class="tabs"> <ul class="tablist"> <li><a href="index.html"><span>Main Page</span></a></li> <li><a href="pages.html"><span>Related Pages</span></a></li> <li><a href="annotated.html"><span>Data Structures</span></a></li> <li class="current"><a href="files.html"><span>Files</span></a></li> </ul> </div> <div id="navrow2" class="tabs2"> <ul class="tablist"> <li><a href="files.html"><span>File List</span></a></li> <li><a href="globals.html"><span>Globals</span></a></li> </ul> </div> </div> <div id="side-nav" class="ui-resizable side-nav-resizable"> <div id="nav-tree"> <div id="nav-tree-contents"> </div> </div> <div id="splitbar" style="-moz-user-select:none;" class="ui-resizable-handle"> </div> </div> <script type="text/javascript"> initNavTree('ngram__model_8c.html',''); </script> <div id="doc-content"> <div class="header"> <div class="headertitle"> <h1>src/libsphinxbase/lm/ngram_model.c</h1> </div> </div> <div class="contents"> <div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */</span> <a name="l00002"></a>00002 <span class="comment">/* ====================================================================</span> <a name="l00003"></a>00003 <span class="comment"> * Copyright (c) 1999-2007 Carnegie Mellon University. All rights</span> <a name="l00004"></a>00004 <span class="comment"> * reserved.</span> <a name="l00005"></a>00005 <span class="comment"> *</span> <a name="l00006"></a>00006 <span class="comment"> * Redistribution and use in source and binary forms, with or without</span> <a name="l00007"></a>00007 <span class="comment"> * modification, are permitted provided that the following conditions</span> <a name="l00008"></a>00008 <span class="comment"> * are met:</span> <a name="l00009"></a>00009 <span class="comment"> *</span> <a name="l00010"></a>00010 <span class="comment"> * 1. Redistributions of source code must retain the above copyright</span> <a name="l00011"></a>00011 <span class="comment"> * notice, this list of conditions and the following disclaimer. </span> <a name="l00012"></a>00012 <span class="comment"> *</span> <a name="l00013"></a>00013 <span class="comment"> * 2. Redistributions in binary form must reproduce the above copyright</span> <a name="l00014"></a>00014 <span class="comment"> * notice, this list of conditions and the following disclaimer in</span> <a name="l00015"></a>00015 <span class="comment"> * the documentation and/or other materials provided with the</span> <a name="l00016"></a>00016 <span class="comment"> * distribution.</span> <a name="l00017"></a>00017 <span class="comment"> *</span> <a name="l00018"></a>00018 <span class="comment"> * This work was supported in part by funding from the Defense Advanced </span> <a name="l00019"></a>00019 <span class="comment"> * Research Projects Agency and the National Science Foundation of the </span> <a name="l00020"></a>00020 <span class="comment"> * United States of America, and the CMU Sphinx Speech Consortium.</span> <a name="l00021"></a>00021 <span class="comment"> *</span> <a name="l00022"></a>00022 <span class="comment"> * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND </span> <a name="l00023"></a>00023 <span class="comment"> * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, </span> <a name="l00024"></a>00024 <span class="comment"> * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR</span> <a name="l00025"></a>00025 <span class="comment"> * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY</span> <a name="l00026"></a>00026 <span class="comment"> * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,</span> <a name="l00027"></a>00027 <span class="comment"> * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT </span> <a name="l00028"></a>00028 <span class="comment"> * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, </span> <a name="l00029"></a>00029 <span class="comment"> * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY </span> <a name="l00030"></a>00030 <span class="comment"> * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT </span> <a name="l00031"></a>00031 <span class="comment"> * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE </span> <a name="l00032"></a>00032 <span class="comment"> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</span> <a name="l00033"></a>00033 <span class="comment"> *</span> <a name="l00034"></a>00034 <span class="comment"> * ====================================================================</span> <a name="l00035"></a>00035 <span class="comment"> *</span> <a name="l00036"></a>00036 <span class="comment"> */</span> <a name="l00037"></a>00037 <span class="comment">/*</span> <a name="l00038"></a>00038 <span class="comment"> * \file ngram_model.c N-Gram language models.</span> <a name="l00039"></a>00039 <span class="comment"> *</span> <a name="l00040"></a>00040 <span class="comment"> * Author: David Huggins-Daines, much code taken from sphinx3/src/libs3decoder/liblm</span> <a name="l00041"></a>00041 <span class="comment"> */</span> <a name="l00042"></a>00042 <a name="l00043"></a>00043 <span class="preprocessor">#include <config.h></span> <a name="l00044"></a>00044 <a name="l00045"></a>00045 <span class="preprocessor">#include <string.h></span> <a name="l00046"></a>00046 <span class="preprocessor">#include <assert.h></span> <a name="l00047"></a>00047 <a name="l00048"></a>00048 <span class="preprocessor">#ifdef HAVE_ICONV</span> <a name="l00049"></a>00049 <span class="preprocessor"></span><span class="preprocessor">#include <iconv.h></span> <a name="l00050"></a>00050 <span class="preprocessor">#endif </span> <a name="l00051"></a>00051 <span class="preprocessor"></span> <a name="l00052"></a>00052 <span class="preprocessor">#include "sphinxbase/ngram_model.h"</span> <a name="l00053"></a>00053 <span class="preprocessor">#include "sphinxbase/ckd_alloc.h"</span> <a name="l00054"></a>00054 <span class="preprocessor">#include "sphinxbase/filename.h"</span> <a name="l00055"></a>00055 <span class="preprocessor">#include "sphinxbase/pio.h"</span> <a name="l00056"></a>00056 <span class="preprocessor">#include "sphinxbase/err.h"</span> <a name="l00057"></a>00057 <span class="preprocessor">#include "sphinxbase/logmath.h"</span> <a name="l00058"></a>00058 <span class="preprocessor">#include "sphinxbase/strfuncs.h"</span> <a name="l00059"></a>00059 <span class="preprocessor">#include "sphinxbase/case.h"</span> <a name="l00060"></a>00060 <a name="l00061"></a>00061 <span class="preprocessor">#include "ngram_model_internal.h"</span> <a name="l00062"></a>00062 <a name="l00063"></a>00063 <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a> <a name="l00064"></a><a class="code" href="ngram__model_8h.html#ad758d3f491d501bdec2ada8088e9b656">00064</a> <a class="code" href="ngram__model_8h.html#ad758d3f491d501bdec2ada8088e9b656" title="Guess the file type for an N-Gram model from the filename.">ngram_file_name_to_type</a>(<span class="keyword">const</span> <span class="keywordtype">char</span> *file_name) <a name="l00065"></a>00065 { <a name="l00066"></a>00066 <span class="keyword">const</span> <span class="keywordtype">char</span> *ext; <a name="l00067"></a>00067 <a name="l00068"></a>00068 ext = strrchr(file_name, <span class="charliteral">'.'</span>); <a name="l00069"></a>00069 <span class="keywordflow">if</span> (ext == NULL) { <a name="l00070"></a>00070 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>; <a name="l00071"></a>00071 } <a name="l00072"></a>00072 <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(ext, <span class="stringliteral">".gz"</span>)) { <a name="l00073"></a>00073 <span class="keywordflow">while</span> (--ext >= file_name) { <a name="l00074"></a>00074 <span class="keywordflow">if</span> (*ext == <span class="charliteral">'.'</span>) <span class="keywordflow">break</span>; <a name="l00075"></a>00075 } <a name="l00076"></a>00076 <span class="keywordflow">if</span> (ext < file_name) { <a name="l00077"></a>00077 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>; <a name="l00078"></a>00078 } <a name="l00079"></a>00079 } <a name="l00080"></a>00080 <span class="keywordflow">else</span> <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(ext, <span class="stringliteral">".bz2"</span>)) { <a name="l00081"></a>00081 <span class="keywordflow">while</span> (--ext >= file_name) { <a name="l00082"></a>00082 <span class="keywordflow">if</span> (*ext == <span class="charliteral">'.'</span>) <span class="keywordflow">break</span>; <a name="l00083"></a>00083 } <a name="l00084"></a>00084 <span class="keywordflow">if</span> (ext < file_name) { <a name="l00085"></a>00085 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>; <a name="l00086"></a>00086 } <a name="l00087"></a>00087 } <a name="l00088"></a>00088 <span class="comment">/* We use strncmp because there might be a .gz on the end. */</span> <a name="l00089"></a>00089 <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ae7c2a7f29d72b9516a947f5c69a043db" title="Like strcmp_nocase() but with a maximum length.">strncmp_nocase</a>(ext, <span class="stringliteral">".ARPA"</span>, 5)) <a name="l00090"></a>00090 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>; <a name="l00091"></a>00091 <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ae7c2a7f29d72b9516a947f5c69a043db" title="Like strcmp_nocase() but with a maximum length.">strncmp_nocase</a>(ext, <span class="stringliteral">".DMP"</span>, 4)) <a name="l00092"></a>00092 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>; <a name="l00093"></a>00093 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>; <a name="l00094"></a>00094 } <a name="l00095"></a>00095 <a name="l00096"></a>00096 <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a> <a name="l00097"></a><a class="code" href="ngram__model_8h.html#affb3d938049e58c191e72858a7191e9c">00097</a> <a class="code" href="ngram__model_8h.html#affb3d938049e58c191e72858a7191e9c" title="Get the N-Gram file type from a string.">ngram_str_to_type</a>(<span class="keyword">const</span> <span class="keywordtype">char</span> *str_name) <a name="l00098"></a>00098 { <a name="l00099"></a>00099 <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(str_name, <span class="stringliteral">"arpa"</span>)) <a name="l00100"></a>00100 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>; <a name="l00101"></a>00101 <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(str_name, <span class="stringliteral">"dmp"</span>)) <a name="l00102"></a>00102 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>; <a name="l00103"></a>00103 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>; <a name="l00104"></a>00104 } <a name="l00105"></a>00105 <a name="l00106"></a>00106 <span class="keywordtype">char</span> <span class="keyword">const</span> * <a name="l00107"></a><a class="code" href="ngram__model_8h.html#a992b8a8d7f623e2662ae94991f6aec55">00107</a> <a class="code" href="ngram__model_8h.html#a992b8a8d7f623e2662ae94991f6aec55" title="Get the canonical name for an N-Gram file type.">ngram_type_to_str</a>(<span class="keywordtype">int</span> type) <a name="l00108"></a>00108 { <a name="l00109"></a>00109 <span class="keywordflow">switch</span> (type) { <a name="l00110"></a>00110 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>: <a name="l00111"></a>00111 <span class="keywordflow">return</span> <span class="stringliteral">"arpa"</span>; <a name="l00112"></a>00112 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>: <a name="l00113"></a>00113 <span class="keywordflow">return</span> <span class="stringliteral">"dmp"</span>; <a name="l00114"></a>00114 <span class="keywordflow">default</span>: <a name="l00115"></a>00115 <span class="keywordflow">return</span> NULL; <a name="l00116"></a>00116 } <a name="l00117"></a>00117 } <a name="l00118"></a>00118 <a name="l00119"></a>00119 <a name="l00120"></a>00120 <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> * <a name="l00121"></a><a class="code" href="ngram__model_8h.html#ab0c840f2bdfc38cea08bb70054f76624">00121</a> <a class="code" href="ngram__model_8h.html#ab0c840f2bdfc38cea08bb70054f76624" title="Read an N-Gram model from a file on disk.">ngram_model_read</a>(<a class="code" href="structcmd__ln__t.html" title="Opaque structure used to hold the results of command-line parsing.">cmd_ln_t</a> *config, <a name="l00122"></a>00122 <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name, <a name="l00123"></a>00123 <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a> file_type, <a name="l00124"></a>00124 <a class="code" href="structlogmath__s.html">logmath_t</a> *lmath) <a name="l00125"></a>00125 { <a name="l00126"></a>00126 <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model = NULL; <a name="l00127"></a>00127 <a name="l00128"></a>00128 <span class="keywordflow">switch</span> (file_type) { <a name="l00129"></a>00129 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a441701bf8ae0a2b79716feb31b5f257a" title="Determine file type automatically.">NGRAM_AUTO</a>: { <a name="l00130"></a>00130 <span class="keywordflow">if</span> ((model = ngram_model_arpa_read(config, file_name, lmath)) != NULL) <a name="l00131"></a>00131 <span class="keywordflow">break</span>; <a name="l00132"></a>00132 <span class="keywordflow">if</span> ((model = ngram_model_dmp_read(config, file_name, lmath)) != NULL) <a name="l00133"></a>00133 <span class="keywordflow">break</span>; <a name="l00134"></a>00134 <span class="keywordflow">return</span> NULL; <a name="l00135"></a>00135 } <a name="l00136"></a>00136 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>: <a name="l00137"></a>00137 model = ngram_model_arpa_read(config, file_name, lmath); <a name="l00138"></a>00138 <span class="keywordflow">break</span>; <a name="l00139"></a>00139 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>: <a name="l00140"></a>00140 model = ngram_model_dmp_read(config, file_name, lmath); <a name="l00141"></a>00141 <span class="keywordflow">break</span>; <a name="l00142"></a>00142 <span class="keywordflow">default</span>: <a name="l00143"></a>00143 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"language model file type not supported\n"</span>); <a name="l00144"></a>00144 <span class="keywordflow">return</span> NULL; <a name="l00145"></a>00145 } <a name="l00146"></a>00146 <a name="l00147"></a>00147 <span class="comment">/* Now set weights based on config if present. */</span> <a name="l00148"></a>00148 <span class="keywordflow">if</span> (config) { <a name="l00149"></a>00149 float32 lw = 1.0; <a name="l00150"></a>00150 float32 wip = 1.0; <a name="l00151"></a>00151 float32 uw = 1.0; <a name="l00152"></a>00152 <a name="l00153"></a>00153 <span class="keywordflow">if</span> (<a class="code" href="cmd__ln_8h.html#ab4ad5ae130e3c2c042590b41768635e3" title="Re-entrant version of cmd_ln_exists().">cmd_ln_exists_r</a>(config, <span class="stringliteral">"-lw"</span>)) <a name="l00154"></a>00154 lw = cmd_ln_float32_r(config, <span class="stringliteral">"-lw"</span>); <a name="l00155"></a>00155 <span class="keywordflow">if</span> (<a class="code" href="cmd__ln_8h.html#ab4ad5ae130e3c2c042590b41768635e3" title="Re-entrant version of cmd_ln_exists().">cmd_ln_exists_r</a>(config, <span class="stringliteral">"-wip"</span>)) <a name="l00156"></a>00156 wip = cmd_ln_float32_r(config, <span class="stringliteral">"-wip"</span>); <a name="l00157"></a>00157 <span class="keywordflow">if</span> (<a class="code" href="cmd__ln_8h.html#ab4ad5ae130e3c2c042590b41768635e3" title="Re-entrant version of cmd_ln_exists().">cmd_ln_exists_r</a>(config, <span class="stringliteral">"-uw"</span>)) <a name="l00158"></a>00158 uw = cmd_ln_float32_r(config, <span class="stringliteral">"-uw"</span>); <a name="l00159"></a>00159 <a name="l00160"></a>00160 <a class="code" href="ngram__model_8h.html#aa4b8d7c1f3d873b8458c0cfee13af4da" title="Apply a language weight, insertion penalty, and unigram weight to a language model.">ngram_model_apply_weights</a>(model, lw, wip, uw); <a name="l00161"></a>00161 } <a name="l00162"></a>00162 <a name="l00163"></a>00163 <span class="keywordflow">return</span> model; <a name="l00164"></a>00164 } <a name="l00165"></a>00165 <a name="l00166"></a>00166 <span class="keywordtype">int</span> <a name="l00167"></a><a class="code" href="ngram__model_8h.html#ac8ff04e1bccbef23bde9e81fb61c57fe">00167</a> <a class="code" href="ngram__model_8h.html#ac8ff04e1bccbef23bde9e81fb61c57fe" title="Write an N-Gram model to disk.">ngram_model_write</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name, <a name="l00168"></a>00168 <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a> file_type) <a name="l00169"></a>00169 { <a name="l00170"></a>00170 <span class="keywordflow">switch</span> (file_type) { <a name="l00171"></a>00171 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a441701bf8ae0a2b79716feb31b5f257a" title="Determine file type automatically.">NGRAM_AUTO</a>: { <a name="l00172"></a>00172 file_type = <a class="code" href="ngram__model_8h.html#ad758d3f491d501bdec2ada8088e9b656" title="Guess the file type for an N-Gram model from the filename.">ngram_file_name_to_type</a>(file_name); <a name="l00173"></a>00173 <span class="comment">/* Default to ARPA (catches .lm and other things) */</span> <a name="l00174"></a>00174 <span class="keywordflow">if</span> (file_type == <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>) <a name="l00175"></a>00175 file_type = <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>; <a name="l00176"></a>00176 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#ac8ff04e1bccbef23bde9e81fb61c57fe" title="Write an N-Gram model to disk.">ngram_model_write</a>(model, file_name, file_type); <a name="l00177"></a>00177 } <a name="l00178"></a>00178 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>: <a name="l00179"></a>00179 <span class="keywordflow">return</span> ngram_model_arpa_write(model, file_name); <a name="l00180"></a>00180 <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>: <a name="l00181"></a>00181 <span class="keywordflow">return</span> ngram_model_dmp_write(model, file_name); <a name="l00182"></a>00182 <span class="keywordflow">default</span>: <a name="l00183"></a>00183 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"language model file type not supported\n"</span>); <a name="l00184"></a>00184 <span class="keywordflow">return</span> -1; <a name="l00185"></a>00185 } <a name="l00186"></a>00186 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"language model file type not supported\n"</span>); <a name="l00187"></a>00187 <span class="keywordflow">return</span> -1; <a name="l00188"></a>00188 } <a name="l00189"></a>00189 <a name="l00190"></a>00190 int32 <a name="l00191"></a>00191 ngram_model_init(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base, <a name="l00192"></a>00192 <a class="code" href="structngram__funcs__s.html" title="Implementation-specific functions for operating on ngram_model_t objects.">ngram_funcs_t</a> *funcs, <a name="l00193"></a>00193 <a class="code" href="structlogmath__s.html">logmath_t</a> *lmath, <a name="l00194"></a>00194 int32 n, int32 n_unigram) <a name="l00195"></a>00195 { <a name="l00196"></a>00196 base-><a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a> = 1; <a name="l00197"></a>00197 base-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> = funcs; <a name="l00198"></a>00198 base-><a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a> = n; <a name="l00199"></a>00199 <span class="comment">/* If this was previously initialized... */</span> <a name="l00200"></a>00200 <span class="keywordflow">if</span> (base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a> == NULL) <a name="l00201"></a>00201 base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(3, <span class="keyword">sizeof</span>(*base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>)); <a name="l00202"></a>00202 <span class="comment">/* Don't reset weights if logmath object hasn't changed. */</span> <a name="l00203"></a>00203 <span class="keywordflow">if</span> (base-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a> != lmath) { <a name="l00204"></a>00204 <span class="comment">/* Set default values for weights. */</span> <a name="l00205"></a>00205 base-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a> = 1.0; <a name="l00206"></a>00206 base-><a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a> = 0; <span class="comment">/* i.e. 1.0 */</span> <a name="l00207"></a>00207 base-><a class="code" href="structngram__model__s.html#a6f0ec7b8b9d13d590bbe4b59df573abc" title="Log of unigram weight.">log_uw</a> = 0; <span class="comment">/* i.e. 1.0 */</span> <a name="l00208"></a>00208 base-><a class="code" href="structngram__model__s.html#a616bf871a67f9cedce17d6b589ee33ea" title="Log of uniform (0-gram) probability.">log_uniform</a> = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(lmath, 1.0 / n_unigram); <a name="l00209"></a>00209 base-><a class="code" href="structngram__model__s.html#aa38c5fdecaefd9a2f43b69f26ae492c1" title="Log of uniform weight (i.e.">log_uniform_weight</a> = <a class="code" href="logmath_8h.html#a1c160c28a9e7d25923f391773b1028c0" title="Get the smallest possible value represented in this base.">logmath_get_zero</a>(lmath); <a name="l00210"></a>00210 base-><a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a> = <a class="code" href="logmath_8h.html#a1c160c28a9e7d25923f391773b1028c0" title="Get the smallest possible value represented in this base.">logmath_get_zero</a>(lmath); <a name="l00211"></a>00211 base-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a> = lmath; <a name="l00212"></a>00212 } <a name="l00213"></a>00213 <span class="comment">/* Allocate or reallocate space for word strings. */</span> <a name="l00214"></a>00214 <span class="keywordflow">if</span> (base-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>) { <a name="l00215"></a>00215 <span class="comment">/* Free all previous word strings if they were allocated. */</span> <a name="l00216"></a>00216 <span class="keywordflow">if</span> (base-><a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>) { <a name="l00217"></a>00217 int32 i; <a name="l00218"></a>00218 <span class="keywordflow">for</span> (i = 0; i < base-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) { <a name="l00219"></a>00219 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(base-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]); <a name="l00220"></a>00220 base-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = NULL; <a name="l00221"></a>00221 } <a name="l00222"></a>00222 } <a name="l00223"></a>00223 base-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a> = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(base-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>, n_unigram * <span class="keyword">sizeof</span>(<span class="keywordtype">char</span> *)); <a name="l00224"></a>00224 } <a name="l00225"></a>00225 <span class="keywordflow">else</span> <a name="l00226"></a>00226 base-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_unigram, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span> *)); <a name="l00227"></a>00227 <span class="comment">/* NOTE: They are no longer case-insensitive since we are allowing</span> <a name="l00228"></a>00228 <span class="comment"> * other encodings for word strings. Beware. */</span> <a name="l00229"></a>00229 <span class="keywordflow">if</span> (base-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>) <a name="l00230"></a>00230 <a class="code" href="hash__table_8h.html#acab374d21e25009d397642e3465308c7" title="Delete all entries from a hash_table.">hash_table_empty</a>(base-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>); <a name="l00231"></a>00231 <span class="keywordflow">else</span> <a name="l00232"></a>00232 base-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a> = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(n_unigram, FALSE); <a name="l00233"></a>00233 base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0] = base-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a> = base-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a> = n_unigram; <a name="l00234"></a>00234 <a name="l00235"></a>00235 <span class="keywordflow">return</span> 0; <a name="l00236"></a>00236 } <a name="l00237"></a>00237 <a name="l00238"></a>00238 <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> * <a name="l00239"></a><a class="code" href="ngram__model_8h.html#a046e6ff8cd8787e412400534a9649a81">00239</a> <a class="code" href="ngram__model_8h.html#a046e6ff8cd8787e412400534a9649a81" title="Retain ownership of an N-Gram model.">ngram_model_retain</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model) <a name="l00240"></a>00240 { <a name="l00241"></a>00241 ++model-><a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a>; <a name="l00242"></a>00242 <span class="keywordflow">return</span> model; <a name="l00243"></a>00243 } <a name="l00244"></a>00244 <a name="l00245"></a>00245 <a name="l00246"></a>00246 <span class="keywordtype">void</span> <a name="l00247"></a><a class="code" href="ngram__model_8h.html#a8304f56d82278824b0a94c254d7235aa">00247</a> <a class="code" href="ngram__model_8h.html#a8304f56d82278824b0a94c254d7235aa" title="Flush any cached N-Gram information.">ngram_model_flush</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model) <a name="l00248"></a>00248 { <a name="l00249"></a>00249 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> && model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a7abf2864db9c8e8d1d5909ea92144ffe" title="Implementation-specific function for purging N-Gram cache.">flush</a>) <a name="l00250"></a>00250 (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a7abf2864db9c8e8d1d5909ea92144ffe" title="Implementation-specific function for purging N-Gram cache.">flush</a>)(model); <a name="l00251"></a>00251 } <a name="l00252"></a>00252 <a name="l00253"></a>00253 <span class="keywordtype">int</span> <a name="l00254"></a><a class="code" href="ngram__model_8h.html#aec73d28e7285e539a0b44a7ac0cbe489">00254</a> <a class="code" href="ngram__model_8h.html#aec73d28e7285e539a0b44a7ac0cbe489" title="Release memory associated with an N-Gram model.">ngram_model_free</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model) <a name="l00255"></a>00255 { <a name="l00256"></a>00256 <span class="keywordtype">int</span> i; <a name="l00257"></a>00257 <a name="l00258"></a>00258 <span class="keywordflow">if</span> (model == NULL) <a name="l00259"></a>00259 <span class="keywordflow">return</span> 0; <a name="l00260"></a>00260 <span class="keywordflow">if</span> (--model-><a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a> > 0) <a name="l00261"></a>00261 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a>; <a name="l00262"></a>00262 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> && model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a4211130880131f38e16022985816952f" title="Implementation-specific function for freeing an ngram_model_t.">free</a>) <a name="l00263"></a>00263 (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a4211130880131f38e16022985816952f" title="Implementation-specific function for freeing an ngram_model_t.">free</a>)(model); <a name="l00264"></a>00264 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>) { <a name="l00265"></a>00265 <span class="comment">/* Free all words. */</span> <a name="l00266"></a>00266 <span class="keywordflow">for</span> (i = 0; i < model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) { <a name="l00267"></a>00267 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]); <a name="l00268"></a>00268 } <a name="l00269"></a>00269 } <a name="l00270"></a>00270 <span class="keywordflow">else</span> { <a name="l00271"></a>00271 <span class="comment">/* Free all class words. */</span> <a name="l00272"></a>00272 <span class="keywordflow">for</span> (i = 0; i < model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; ++i) { <a name="l00273"></a>00273 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass; <a name="l00274"></a>00274 int32 j; <a name="l00275"></a>00275 <a name="l00276"></a>00276 lmclass = model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[i]; <a name="l00277"></a>00277 <span class="keywordflow">for</span> (j = 0; j < lmclass-><a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>; ++j) { <a name="l00278"></a>00278 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[lmclass-><a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&#39; words.">start_wid</a> + j]); <a name="l00279"></a>00279 } <a name="l00280"></a>00280 <span class="keywordflow">for</span> (j = 0; j < lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>; ++j) { <a name="l00281"></a>00281 <span class="keywordflow">if</span> (lmclass->nword_hash[j].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> != -1) { <a name="l00282"></a>00282 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[lmclass->nword_hash[j].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a>]); <a name="l00283"></a>00283 } <a name="l00284"></a>00284 } <a name="l00285"></a>00285 } <a name="l00286"></a>00286 } <a name="l00287"></a>00287 <span class="keywordflow">for</span> (i = 0; i < model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; ++i) { <a name="l00288"></a>00288 ngram_class_free(model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[i]); <a name="l00289"></a>00289 } <a name="l00290"></a>00290 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>); <a name="l00291"></a>00291 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>); <a name="l00292"></a>00292 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>); <a name="l00293"></a>00293 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>); <a name="l00294"></a>00294 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model); <a name="l00295"></a>00295 <span class="keywordflow">return</span> 0; <a name="l00296"></a>00296 } <a name="l00297"></a>00297 <a name="l00298"></a>00298 <span class="keywordtype">int</span> <a name="l00299"></a><a class="code" href="ngram__model_8h.html#a41b938a2c4b129dc0df37b2312d65506">00299</a> <a class="code" href="ngram__model_8h.html#a41b938a2c4b129dc0df37b2312d65506" title="Case-fold word strings in an N-Gram model.">ngram_model_casefold</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keywordtype">int</span> kase) <a name="l00300"></a>00300 { <a name="l00301"></a>00301 <span class="keywordtype">int</span> writable, i; <a name="l00302"></a>00302 <a class="code" href="structhash__table__t.html">hash_table_t</a> *new_wid; <a name="l00303"></a>00303 <a name="l00304"></a>00304 <span class="comment">/* Were word strings already allocated? */</span> <a name="l00305"></a>00305 writable = model-><a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>; <a name="l00306"></a>00306 <span class="comment">/* Either way, we are going to allocate some word strings. */</span> <a name="l00307"></a>00307 model-><a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a> = TRUE; <a name="l00308"></a>00308 <a name="l00309"></a>00309 <span class="comment">/* And, don't forget, we need to rebuild the word to unigram ID</span> <a name="l00310"></a>00310 <span class="comment"> * mapping. */</span> <a name="l00311"></a>00311 new_wid = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>, FALSE); <a name="l00312"></a>00312 <span class="keywordflow">for</span> (i = 0; i < model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) { <a name="l00313"></a>00313 <span class="keywordtype">char</span> *outstr; <a name="l00314"></a>00314 <span class="keywordflow">if</span> (writable) { <a name="l00315"></a>00315 outstr = model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]; <a name="l00316"></a>00316 } <a name="l00317"></a>00317 <span class="keywordflow">else</span> { <a name="l00318"></a>00318 outstr = <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]); <a name="l00319"></a>00319 } <a name="l00320"></a>00320 <span class="comment">/* Don't case-fold <tags> or [classes] */</span> <a name="l00321"></a>00321 <span class="keywordflow">if</span> (outstr[0] == <span class="charliteral">'<'</span> || outstr[0] == <span class="charliteral">'['</span>) { <a name="l00322"></a>00322 } <a name="l00323"></a>00323 <span class="keywordflow">else</span> { <a name="l00324"></a>00324 <span class="keywordflow">switch</span> (kase) { <a name="l00325"></a>00325 <span class="keywordflow">case</span> NGRAM_UPPER: <a name="l00326"></a>00326 <a class="code" href="case_8h.html#a79d99e36b7c2c36dcc6f7f0df746384e" title="Convert str to all upper case.">ucase</a>(outstr); <a name="l00327"></a>00327 <span class="keywordflow">break</span>; <a name="l00328"></a>00328 <span class="keywordflow">case</span> NGRAM_LOWER: <a name="l00329"></a>00329 <a class="code" href="case_8h.html#ac0e30dac40f15762f39270f65bd8cdba" title="Convert str to all lower case.">lcase</a>(outstr); <a name="l00330"></a>00330 <span class="keywordflow">break</span>; <a name="l00331"></a>00331 <span class="keywordflow">default</span>: <a name="l00332"></a>00332 ; <a name="l00333"></a>00333 } <a name="l00334"></a>00334 } <a name="l00335"></a>00335 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = outstr; <a name="l00336"></a>00336 <a name="l00337"></a>00337 <span class="comment">/* Now update the hash table. We might have terrible</span> <a name="l00338"></a>00338 <span class="comment"> * collisions here, so warn about them. */</span> <a name="l00339"></a>00339 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a393c56322e54607a48e6bc61169d92bf" title="Add a 32-bit integer value to a hash table.">hash_table_enter_int32</a>(new_wid, model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], i) != i) { <a name="l00340"></a>00340 <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">"Duplicate word in dictionary after conversion: %s\n"</span>, <a name="l00341"></a>00341 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]); <a name="l00342"></a>00342 } <a name="l00343"></a>00343 } <a name="l00344"></a>00344 <span class="comment">/* Swap out the hash table. */</span> <a name="l00345"></a>00345 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>); <a name="l00346"></a>00346 model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a> = new_wid; <a name="l00347"></a>00347 <span class="keywordflow">return</span> 0; <a name="l00348"></a>00348 } <a name="l00349"></a>00349 <a name="l00350"></a>00350 <span class="preprocessor">#ifdef HAVE_ICONV</span> <a name="l00351"></a>00351 <span class="preprocessor"></span><span class="keywordtype">int</span> <a name="l00352"></a>00352 <a class="code" href="ngram__model_8h.html#aac2b3fd054597c2fdfbb59db97d72ec0" title="Re-encode word strings in an N-Gram model.">ngram_model_recode</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *from, <span class="keyword">const</span> <span class="keywordtype">char</span> *to) <a name="l00353"></a>00353 { <a name="l00354"></a>00354 iconv_t ic; <a name="l00355"></a>00355 <span class="keywordtype">char</span> *outbuf; <a name="l00356"></a>00356 <span class="keywordtype">size_t</span> maxlen; <a name="l00357"></a>00357 <span class="keywordtype">int</span> i, writable; <a name="l00358"></a>00358 <a class="code" href="structhash__table__t.html">hash_table_t</a> *new_wid; <a name="l00359"></a>00359 <a name="l00360"></a>00360 <span class="comment">/* FIXME: Need to do a special case thing for the GB-HEX encoding</span> <a name="l00361"></a>00361 <span class="comment"> * used in Sphinx3 Mandarin models. */</span> <a name="l00362"></a>00362 <span class="keywordflow">if</span> ((ic = iconv_open(to, from)) == (iconv_t)-1) { <a name="l00363"></a>00363 <a class="code" href="err_8h.html#a54ffbfe898d74595c586a1f48f32ef03" title="Print error text; Call perror(&quot;&quot;);.">E_ERROR_SYSTEM</a>(<span class="stringliteral">"iconv_open() failed"</span>); <a name="l00364"></a>00364 <span class="keywordflow">return</span> -1; <a name="l00365"></a>00365 } <a name="l00366"></a>00366 <span class="comment">/* iconv(3) is a piece of crap and won't accept a NULL out buffer,</span> <a name="l00367"></a>00367 <span class="comment"> * unlike wcstombs(3). So we have to either call it over and over</span> <a name="l00368"></a>00368 <span class="comment"> * again until our buffer is big enough, or call it with a huge</span> <a name="l00369"></a>00369 <span class="comment"> * buffer and then copy things back to the output. We will use a</span> <a name="l00370"></a>00370 <span class="comment"> * mix of these two approaches here. We'll keep a single big</span> <a name="l00371"></a>00371 <span class="comment"> * buffer around, and expand it as necessary.</span> <a name="l00372"></a>00372 <span class="comment"> */</span> <a name="l00373"></a>00373 maxlen = 0; <a name="l00374"></a>00374 <span class="keywordflow">for</span> (i = 0; i < model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) { <a name="l00375"></a>00375 <span class="keywordflow">if</span> (strlen(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]) > maxlen) <a name="l00376"></a>00376 maxlen = strlen(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]); <a name="l00377"></a>00377 } <a name="l00378"></a>00378 <span class="comment">/* Were word strings already allocated? */</span> <a name="l00379"></a>00379 writable = model-><a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>; <a name="l00380"></a>00380 <span class="comment">/* Either way, we are going to allocate some word strings. */</span> <a name="l00381"></a>00381 model-><a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a> = TRUE; <a name="l00382"></a>00382 <span class="comment">/* Really should be big enough except for pathological cases. */</span> <a name="l00383"></a>00383 maxlen = maxlen * <span class="keyword">sizeof</span>(int) + 15; <a name="l00384"></a>00384 outbuf = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(maxlen, 1); <a name="l00385"></a>00385 <span class="comment">/* And, don't forget, we need to rebuild the word to unigram ID</span> <a name="l00386"></a>00386 <span class="comment"> * mapping. */</span> <a name="l00387"></a>00387 new_wid = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>, FALSE); <a name="l00388"></a>00388 <span class="keywordflow">for</span> (i = 0; i < model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) { <a name="l00389"></a>00389 ICONV_CONST <span class="keywordtype">char</span> *in; <a name="l00390"></a>00390 <span class="keywordtype">char</span> *out; <a name="l00391"></a>00391 <span class="keywordtype">size_t</span> inleft, outleft, result; <a name="l00392"></a>00392 <a name="l00393"></a>00393 start_conversion: <a name="l00394"></a>00394 in = (ICONV_CONST <span class="keywordtype">char</span> *)model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]; <a name="l00395"></a>00395 <span class="comment">/* Yes, this assumes that we don't have any NUL bytes. */</span> <a name="l00396"></a>00396 inleft = strlen(in); <a name="l00397"></a>00397 out = outbuf; <a name="l00398"></a>00398 outleft = maxlen; <a name="l00399"></a>00399 <a name="l00400"></a>00400 <span class="keywordflow">while</span> ((result = iconv(ic, &in, &inleft, &out, &outleft)) == (size_t)-1) { <a name="l00401"></a>00401 <span class="keywordflow">if</span> (errno != E2BIG) { <a name="l00402"></a>00402 <span class="comment">/* FIXME: if we already converted any words, then they</span> <a name="l00403"></a>00403 <span class="comment"> * are going to be in an inconsistent state. */</span> <a name="l00404"></a>00404 <a class="code" href="err_8h.html#a54ffbfe898d74595c586a1f48f32ef03" title="Print error text; Call perror(&quot;&quot;);.">E_ERROR_SYSTEM</a>(<span class="stringliteral">"iconv() failed"</span>); <a name="l00405"></a>00405 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(outbuf); <a name="l00406"></a>00406 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(new_wid); <a name="l00407"></a>00407 <span class="keywordflow">return</span> -1; <a name="l00408"></a>00408 } <a name="l00409"></a>00409 <span class="comment">/* Reset the internal state of conversion. */</span> <a name="l00410"></a>00410 iconv(ic, NULL, NULL, NULL, NULL); <a name="l00411"></a>00411 <span class="comment">/* Make everything bigger. */</span> <a name="l00412"></a>00412 maxlen *= 2; <a name="l00413"></a>00413 out = outbuf = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(outbuf, maxlen); <a name="l00414"></a>00414 <span class="comment">/* Reset the input pointers. */</span> <a name="l00415"></a>00415 in = (ICONV_CONST <span class="keywordtype">char</span> *)model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]; <a name="l00416"></a>00416 inleft = strlen(in); <a name="l00417"></a>00417 } <a name="l00418"></a>00418 <a name="l00419"></a>00419 <span class="comment">/* Now flush a shift-out sequence, if any. */</span> <a name="l00420"></a>00420 <span class="keywordflow">if</span> ((result = iconv(ic, NULL, NULL, &out, &outleft)) == (size_t)-1) { <a name="l00421"></a>00421 <span class="keywordflow">if</span> (errno != E2BIG) { <a name="l00422"></a>00422 <span class="comment">/* FIXME: if we already converted any words, then they</span> <a name="l00423"></a>00423 <span class="comment"> * are going to be in an inconsistent state. */</span> <a name="l00424"></a>00424 <a class="code" href="err_8h.html#a54ffbfe898d74595c586a1f48f32ef03" title="Print error text; Call perror(&quot;&quot;);.">E_ERROR_SYSTEM</a>(<span class="stringliteral">"iconv() failed (state reset sequence)"</span>); <a name="l00425"></a>00425 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(outbuf); <a name="l00426"></a>00426 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(new_wid); <a name="l00427"></a>00427 <span class="keywordflow">return</span> -1; <a name="l00428"></a>00428 } <a name="l00429"></a>00429 <span class="comment">/* Reset the internal state of conversion. */</span> <a name="l00430"></a>00430 iconv(ic, NULL, NULL, NULL, NULL); <a name="l00431"></a>00431 <span class="comment">/* Make everything bigger. */</span> <a name="l00432"></a>00432 maxlen *= 2; <a name="l00433"></a>00433 outbuf = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(outbuf, maxlen); <a name="l00434"></a>00434 <span class="comment">/* Be very evil. */</span> <a name="l00435"></a>00435 <span class="keywordflow">goto</span> start_conversion; <a name="l00436"></a>00436 } <a name="l00437"></a>00437 <a name="l00438"></a>00438 result = maxlen - outleft; <a name="l00439"></a>00439 <span class="comment">/* Okay, that was hard, now let's go shopping. */</span> <a name="l00440"></a>00440 <span class="keywordflow">if</span> (writable) { <a name="l00441"></a>00441 <span class="comment">/* Grow or shrink the output string as necessary. */</span> <a name="l00442"></a>00442 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], result + 1); <a name="l00443"></a>00443 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i][result] = <span class="charliteral">'\0'</span>; <a name="l00444"></a>00444 } <a name="l00445"></a>00445 <span class="keywordflow">else</span> { <a name="l00446"></a>00446 <span class="comment">/* It actually was not allocated previously, so do that now. */</span> <a name="l00447"></a>00447 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(result + 1, 1); <a name="l00448"></a>00448 } <a name="l00449"></a>00449 <span class="comment">/* Copy the new thing in. */</span> <a name="l00450"></a>00450 memcpy(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], outbuf, result); <a name="l00451"></a>00451 <a name="l00452"></a>00452 <span class="comment">/* Now update the hash table. We might have terrible</span> <a name="l00453"></a>00453 <span class="comment"> * collisions if a non-reversible conversion was requested.,</span> <a name="l00454"></a>00454 <span class="comment"> * so warn about them. */</span> <a name="l00455"></a>00455 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a393c56322e54607a48e6bc61169d92bf" title="Add a 32-bit integer value to a hash table.">hash_table_enter_int32</a>(new_wid, model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], i) != i) { <a name="l00456"></a>00456 <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">"Duplicate word in dictionary after conversion: %s\n"</span>, <a name="l00457"></a>00457 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]); <a name="l00458"></a>00458 } <a name="l00459"></a>00459 } <a name="l00460"></a>00460 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(outbuf); <a name="l00461"></a>00461 iconv_close(ic); <a name="l00462"></a>00462 <span class="comment">/* Swap out the hash table. */</span> <a name="l00463"></a>00463 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>); <a name="l00464"></a>00464 model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a> = new_wid; <a name="l00465"></a>00465 <a name="l00466"></a>00466 <span class="keywordflow">return</span> 0; <a name="l00467"></a>00467 } <a name="l00468"></a>00468 <span class="preprocessor">#else </span><span class="comment">/* !HAVE_ICONV */</span> <a name="l00469"></a>00469 <span class="keywordtype">int</span> <a name="l00470"></a><a class="code" href="ngram__model_8h.html#aac2b3fd054597c2fdfbb59db97d72ec0">00470</a> <a class="code" href="ngram__model_8h.html#aac2b3fd054597c2fdfbb59db97d72ec0" title="Re-encode word strings in an N-Gram model.">ngram_model_recode</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *from, <span class="keyword">const</span> <span class="keywordtype">char</span> *to) <a name="l00471"></a>00471 { <a name="l00472"></a>00472 <span class="keywordflow">return</span> -1; <a name="l00473"></a>00473 } <a name="l00474"></a>00474 <span class="preprocessor">#endif </span><span class="comment">/* !HAVE_ICONV */</span> <a name="l00475"></a>00475 <a name="l00476"></a>00476 <span class="keywordtype">int</span> <a name="l00477"></a><a class="code" href="ngram__model_8h.html#aa4b8d7c1f3d873b8458c0cfee13af4da">00477</a> <a class="code" href="ngram__model_8h.html#aa4b8d7c1f3d873b8458c0cfee13af4da" title="Apply a language weight, insertion penalty, and unigram weight to a language model.">ngram_model_apply_weights</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <a name="l00478"></a>00478 float32 lw, float32 wip, float32 uw) <a name="l00479"></a>00479 { <a name="l00480"></a>00480 <span class="keywordflow">return</span> (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#abfd7e53c672aef5a34ec5114ec475916" title="Implementation-specific function for applying language model weights.">apply_weights</a>)(model, lw, wip, uw); <a name="l00481"></a>00481 } <a name="l00482"></a>00482 <a name="l00483"></a>00483 float32 <a name="l00484"></a><a class="code" href="ngram__model_8h.html#a30170c1307e065e1f7159a04f75df304">00484</a> <a class="code" href="ngram__model_8h.html#a30170c1307e065e1f7159a04f75df304" title="Get the current weights from a language model.">ngram_model_get_weights</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 *out_log_wip, <a name="l00485"></a>00485 int32 *out_log_uw) <a name="l00486"></a>00486 { <a name="l00487"></a>00487 <span class="keywordflow">if</span> (out_log_wip) *out_log_wip = model-><a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a>; <a name="l00488"></a>00488 <span class="keywordflow">if</span> (out_log_uw) *out_log_uw = model-><a class="code" href="structngram__model__s.html#a6f0ec7b8b9d13d590bbe4b59df573abc" title="Log of unigram weight.">log_uw</a>; <a name="l00489"></a>00489 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a>; <a name="l00490"></a>00490 } <a name="l00491"></a>00491 <a name="l00492"></a>00492 <a name="l00493"></a>00493 int32 <a name="l00494"></a><a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e">00494</a> <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid, int32 *history, <a name="l00495"></a>00495 int32 n_hist, int32 *n_used) <a name="l00496"></a>00496 { <a name="l00497"></a>00497 int32 score, class_weight = 0; <a name="l00498"></a>00498 <span class="keywordtype">int</span> i; <a name="l00499"></a>00499 <a name="l00500"></a>00500 <span class="comment">/* Closed vocabulary, OOV word probability is zero */</span> <a name="l00501"></a>00501 <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) <a name="l00502"></a>00502 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>; <a name="l00503"></a>00503 <a name="l00504"></a>00504 <span class="comment">/* "Declassify" wid and history */</span> <a name="l00505"></a>00505 <span class="keywordflow">if</span> (NGRAM_IS_CLASSWID(wid)) { <a name="l00506"></a>00506 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass = model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(wid)]; <a name="l00507"></a>00507 <a name="l00508"></a>00508 class_weight = ngram_class_prob(lmclass, wid); <a name="l00509"></a>00509 <span class="keywordflow">if</span> (class_weight == 1) <span class="comment">/* Meaning, not found in class. */</span> <a name="l00510"></a>00510 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>; <a name="l00511"></a>00511 wid = lmclass-><a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>; <a name="l00512"></a>00512 } <a name="l00513"></a>00513 <span class="keywordflow">for</span> (i = 0; i < n_hist; ++i) { <a name="l00514"></a>00514 <span class="keywordflow">if</span> (history[i] != <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a> && NGRAM_IS_CLASSWID(history[i])) <a name="l00515"></a>00515 history[i] = model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(history[i])]-><a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>; <a name="l00516"></a>00516 } <a name="l00517"></a>00517 score = (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a81b0c7948179c2572fb274401b82278e" title="Implementation-specific function for querying language model score.">score</a>)(model, wid, history, n_hist, n_used); <a name="l00518"></a>00518 <a name="l00519"></a>00519 <span class="comment">/* Multiply by unigram in-class weight. */</span> <a name="l00520"></a>00520 <span class="keywordflow">return</span> score + class_weight; <a name="l00521"></a>00521 } <a name="l00522"></a>00522 <a name="l00523"></a>00523 int32 <a name="l00524"></a><a class="code" href="ngram__model_8h.html#a9c23d79885af400e17ac2a1b7169660d">00524</a> <a class="code" href="ngram__model_8h.html#a9c23d79885af400e17ac2a1b7169660d" title="Get the score (scaled, interpolated log-probability) for a general N-Gram.">ngram_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word, ...) <a name="l00525"></a>00525 { <a name="l00526"></a>00526 va_list history; <a name="l00527"></a>00527 <span class="keyword">const</span> <span class="keywordtype">char</span> *hword; <a name="l00528"></a>00528 int32 *histid; <a name="l00529"></a>00529 int32 n_hist; <a name="l00530"></a>00530 int32 n_used; <a name="l00531"></a>00531 int32 prob; <a name="l00532"></a>00532 <a name="l00533"></a>00533 va_start(history, word); <a name="l00534"></a>00534 n_hist = 0; <a name="l00535"></a>00535 <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) <a name="l00536"></a>00536 ++n_hist; <a name="l00537"></a>00537 va_end(history); <a name="l00538"></a>00538 <a name="l00539"></a>00539 histid = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_hist, <span class="keyword">sizeof</span>(*histid)); <a name="l00540"></a>00540 va_start(history, word); <a name="l00541"></a>00541 n_hist = 0; <a name="l00542"></a>00542 <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) { <a name="l00543"></a>00543 histid[n_hist] = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, hword); <a name="l00544"></a>00544 ++n_hist; <a name="l00545"></a>00545 } <a name="l00546"></a>00546 va_end(history); <a name="l00547"></a>00547 <a name="l00548"></a>00548 prob = <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(model, <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, word), <a name="l00549"></a>00549 histid, n_hist, &n_used); <a name="l00550"></a>00550 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(histid); <a name="l00551"></a>00551 <span class="keywordflow">return</span> prob; <a name="l00552"></a>00552 } <a name="l00553"></a>00553 <a name="l00554"></a>00554 int32 <a name="l00555"></a><a class="code" href="ngram__model_8h.html#a0f8ad53c1a7cab528113b74aad00f15a">00555</a> <a class="code" href="ngram__model_8h.html#a0f8ad53c1a7cab528113b74aad00f15a" title="Quick trigram score lookup.">ngram_tg_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 w3, int32 w2, int32 w1, int32 *n_used) <a name="l00556"></a>00556 { <a name="l00557"></a>00557 int32 hist[2]; <a name="l00558"></a>00558 hist[0] = w2; <a name="l00559"></a>00559 hist[1] = w1; <a name="l00560"></a>00560 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(model, w3, hist, 2, n_used); <a name="l00561"></a>00561 } <a name="l00562"></a>00562 <a name="l00563"></a>00563 int32 <a name="l00564"></a><a class="code" href="ngram__model_8h.html#a8c06698fbcb0ef82420088ace045ae81">00564</a> <a class="code" href="ngram__model_8h.html#a8c06698fbcb0ef82420088ace045ae81" title="Quick bigram score lookup.">ngram_bg_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 w2, int32 w1, int32 *n_used) <a name="l00565"></a>00565 { <a name="l00566"></a>00566 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(model, w2, &w1, 1, n_used); <a name="l00567"></a>00567 } <a name="l00568"></a>00568 <a name="l00569"></a>00569 int32 <a name="l00570"></a><a class="code" href="ngram__model_8h.html#a218d8d140b93d3d8008f8933f9e04ec6">00570</a> <a class="code" href="ngram__model_8h.html#a218d8d140b93d3d8008f8933f9e04ec6" title="Quick &quot;raw&quot; probability lookup for a general N-Gram.">ngram_ng_prob</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid, int32 *history, <a name="l00571"></a>00571 int32 n_hist, int32 *n_used) <a name="l00572"></a>00572 { <a name="l00573"></a>00573 int32 prob, class_weight = 0; <a name="l00574"></a>00574 <span class="keywordtype">int</span> i; <a name="l00575"></a>00575 <a name="l00576"></a>00576 <span class="comment">/* Closed vocabulary, OOV word probability is zero */</span> <a name="l00577"></a>00577 <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) <a name="l00578"></a>00578 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>; <a name="l00579"></a>00579 <a name="l00580"></a>00580 <span class="comment">/* "Declassify" wid and history */</span> <a name="l00581"></a>00581 <span class="keywordflow">if</span> (NGRAM_IS_CLASSWID(wid)) { <a name="l00582"></a>00582 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass = model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(wid)]; <a name="l00583"></a>00583 <a name="l00584"></a>00584 class_weight = ngram_class_prob(lmclass, wid); <a name="l00585"></a>00585 <span class="keywordflow">if</span> (class_weight == 1) <span class="comment">/* Meaning, not found in class. */</span> <a name="l00586"></a>00586 <span class="keywordflow">return</span> class_weight; <a name="l00587"></a>00587 wid = lmclass-><a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>; <a name="l00588"></a>00588 } <a name="l00589"></a>00589 <span class="keywordflow">for</span> (i = 0; i < n_hist; ++i) { <a name="l00590"></a>00590 <span class="keywordflow">if</span> (history[i] != <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a> && NGRAM_IS_CLASSWID(history[i])) <a name="l00591"></a>00591 history[i] = model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(history[i])]-><a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>; <a name="l00592"></a>00592 } <a name="l00593"></a>00593 prob = (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a2a64c66491914168bd830237cc93b16c" title="Implementation-specific function for querying raw language model probability.">raw_score</a>)(model, wid, history, <a name="l00594"></a>00594 n_hist, n_used); <a name="l00595"></a>00595 <span class="comment">/* Multiply by unigram in-class weight. */</span> <a name="l00596"></a>00596 <span class="keywordflow">return</span> prob + class_weight; <a name="l00597"></a>00597 } <a name="l00598"></a>00598 <a name="l00599"></a>00599 int32 <a name="l00600"></a><a class="code" href="ngram__model_8h.html#a68cfda3f503e1a4a87f08aa5a3a5ea88">00600</a> <a class="code" href="ngram__model_8h.html#a68cfda3f503e1a4a87f08aa5a3a5ea88" title="Get the &quot;raw&quot; log-probability for a general N-Gram.">ngram_prob</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word, ...) <a name="l00601"></a>00601 { <a name="l00602"></a>00602 va_list history; <a name="l00603"></a>00603 <span class="keyword">const</span> <span class="keywordtype">char</span> *hword; <a name="l00604"></a>00604 int32 *histid; <a name="l00605"></a>00605 int32 n_hist; <a name="l00606"></a>00606 int32 n_used; <a name="l00607"></a>00607 int32 prob; <a name="l00608"></a>00608 <a name="l00609"></a>00609 va_start(history, word); <a name="l00610"></a>00610 n_hist = 0; <a name="l00611"></a>00611 <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) <a name="l00612"></a>00612 ++n_hist; <a name="l00613"></a>00613 va_end(history); <a name="l00614"></a>00614 <a name="l00615"></a>00615 histid = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_hist, <span class="keyword">sizeof</span>(*histid)); <a name="l00616"></a>00616 va_start(history, word); <a name="l00617"></a>00617 n_hist = 0; <a name="l00618"></a>00618 <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) { <a name="l00619"></a>00619 histid[n_hist] = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, hword); <a name="l00620"></a>00620 ++n_hist; <a name="l00621"></a>00621 } <a name="l00622"></a>00622 va_end(history); <a name="l00623"></a>00623 <a name="l00624"></a>00624 prob = <a class="code" href="ngram__model_8h.html#a218d8d140b93d3d8008f8933f9e04ec6" title="Quick &quot;raw&quot; probability lookup for a general N-Gram.">ngram_ng_prob</a>(model, <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, word), <a name="l00625"></a>00625 histid, n_hist, &n_used); <a name="l00626"></a>00626 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(histid); <a name="l00627"></a>00627 <span class="keywordflow">return</span> prob; <a name="l00628"></a>00628 } <a name="l00629"></a>00629 <a name="l00630"></a>00630 int32 <a name="l00631"></a><a class="code" href="ngram__model_8h.html#ae0cf3a94128927e2be6422d2de34f49b">00631</a> <a class="code" href="ngram__model_8h.html#ae0cf3a94128927e2be6422d2de34f49b" title="Convert score to &quot;raw&quot; log-probability.">ngram_score_to_prob</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base, int32 score) <a name="l00632"></a>00632 { <a name="l00633"></a>00633 int32 prob; <a name="l00634"></a>00634 <a name="l00635"></a>00635 <span class="comment">/* Undo insertion penalty. */</span> <a name="l00636"></a>00636 prob = score - base-><a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a>; <a name="l00637"></a>00637 <span class="comment">/* Undo language weight. */</span> <a name="l00638"></a>00638 prob = (int32)(prob / base-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a>); <a name="l00639"></a>00639 <a name="l00640"></a>00640 <span class="keywordflow">return</span> prob; <a name="l00641"></a>00641 } <a name="l00642"></a>00642 <a name="l00643"></a>00643 int32 <a name="l00644"></a><a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e">00644</a> <a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e" title="Get the unknown word ID for a language model.">ngram_unknown_wid</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model) <a name="l00645"></a>00645 { <a name="l00646"></a>00646 int32 val; <a name="l00647"></a>00647 <a name="l00648"></a>00648 <span class="comment">/* FIXME: This could be memoized for speed if necessary. */</span> <a name="l00649"></a>00649 <span class="comment">/* Look up <UNK>, if not found return NGRAM_INVALID_WID. */</span> <a name="l00650"></a>00650 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#acaf27e8e7e336faf6653649937c42ed8" title="Look up a 32-bit integer value in a hash table.">hash_table_lookup_int32</a>(model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, <span class="stringliteral">"<UNK>"</span>, &val) == -1) <a name="l00651"></a>00651 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>; <a name="l00652"></a>00652 <span class="keywordflow">else</span> <a name="l00653"></a>00653 <span class="keywordflow">return</span> val; <a name="l00654"></a>00654 } <a name="l00655"></a>00655 <a name="l00656"></a>00656 int32 <a name="l00657"></a><a class="code" href="ngram__model_8h.html#ae401a87ad55ae64f286dcd83170f7100">00657</a> <a class="code" href="ngram__model_8h.html#ae401a87ad55ae64f286dcd83170f7100" title="Get the &quot;zero&quot; log-probability value for a language model.">ngram_zero</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model) <a name="l00658"></a>00658 { <a name="l00659"></a>00659 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>; <a name="l00660"></a>00660 } <a name="l00661"></a>00661 <a name="l00662"></a>00662 int32 <a name="l00663"></a><a class="code" href="ngram__model_8h.html#a462d374099a4fe8b3c3195b5e2013545">00663</a> <a class="code" href="ngram__model_8h.html#a462d374099a4fe8b3c3195b5e2013545" title="Get the order of the N-gram model (i.e.">ngram_model_get_size</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model) <a name="l00664"></a>00664 { <a name="l00665"></a>00665 <span class="keywordflow">if</span> (model != NULL) <a name="l00666"></a>00666 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>; <a name="l00667"></a>00667 <span class="keywordflow">return</span> 0; <a name="l00668"></a>00668 } <a name="l00669"></a>00669 <a name="l00670"></a>00670 int32 <span class="keyword">const</span> * <a name="l00671"></a><a class="code" href="ngram__model_8h.html#a187531aafc1f11214c9d061f75eae194">00671</a> <a class="code" href="ngram__model_8h.html#a187531aafc1f11214c9d061f75eae194" title="Get the counts of the various N-grams in the model.">ngram_model_get_counts</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model) <a name="l00672"></a>00672 { <a name="l00673"></a>00673 <span class="keywordflow">if</span> (model != NULL) <a name="l00674"></a>00674 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>; <a name="l00675"></a>00675 <span class="keywordflow">return</span> NULL; <a name="l00676"></a>00676 } <a name="l00677"></a>00677 <a name="l00678"></a>00678 <span class="keywordtype">void</span> <a name="l00679"></a>00679 ngram_iter_init(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor, <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <a name="l00680"></a>00680 <span class="keywordtype">int</span> m, <span class="keywordtype">int</span> successor) <a name="l00681"></a>00681 { <a name="l00682"></a>00682 itor->model = model; <a name="l00683"></a>00683 itor-><a class="code" href="structngram__iter__s.html#a1362ebe2dcf1c441929c845fc97958cb" title="Scratch space for word IDs.">wids</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(model-><a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>, <span class="keyword">sizeof</span>(*itor-><a class="code" href="structngram__iter__s.html#a1362ebe2dcf1c441929c845fc97958cb" title="Scratch space for word IDs.">wids</a>)); <a name="l00684"></a>00684 itor-><a class="code" href="structngram__iter__s.html#a80bce30ba5b180f85ca84d7288715858" title="Order of history.">m</a> = m; <a name="l00685"></a>00685 itor-><a class="code" href="structngram__iter__s.html#a7802a7883b1cc246b1bff629cd2cfb08" title="Is this a successor iterator?">successor</a> = successor; <a name="l00686"></a>00686 } <a name="l00687"></a>00687 <a name="l00688"></a>00688 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> * <a name="l00689"></a><a class="code" href="ngram__model_8h.html#a12683dda2253dc45680102f02fbdb1e2">00689</a> <a class="code" href="ngram__model_8h.html#a12683dda2253dc45680102f02fbdb1e2" title="Iterate over all M-grams.">ngram_model_mgrams</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keywordtype">int</span> m) <a name="l00690"></a>00690 { <a name="l00691"></a>00691 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor; <a name="l00692"></a>00692 <span class="comment">/* The fact that m=n-1 is not exactly obvious. Prevent accidents. */</span> <a name="l00693"></a>00693 <span class="keywordflow">if</span> (m >= model-><a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>) <a name="l00694"></a>00694 <span class="keywordflow">return</span> NULL; <a name="l00695"></a>00695 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a3a264fcbdbe8b4b342c533af090cfe8f" title="Implementation-specific function for iterating.">mgrams</a> == NULL) <a name="l00696"></a>00696 <span class="keywordflow">return</span> NULL; <a name="l00697"></a>00697 itor = (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a3a264fcbdbe8b4b342c533af090cfe8f" title="Implementation-specific function for iterating.">mgrams</a>)(model, m); <a name="l00698"></a>00698 <span class="keywordflow">return</span> itor; <a name="l00699"></a>00699 } <a name="l00700"></a>00700 <a name="l00701"></a>00701 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> * <a name="l00702"></a><a class="code" href="ngram__model_8h.html#ad988e0f41447ee404050caf7c7bb7b63">00702</a> <a class="code" href="ngram__model_8h.html#ad988e0f41447ee404050caf7c7bb7b63" title="Get an iterator over M-grams pointing to the specified M-gram.">ngram_iter</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word, ...) <a name="l00703"></a>00703 { <a name="l00704"></a>00704 va_list history; <a name="l00705"></a>00705 <span class="keyword">const</span> <span class="keywordtype">char</span> *hword; <a name="l00706"></a>00706 int32 *histid; <a name="l00707"></a>00707 int32 n_hist; <a name="l00708"></a>00708 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor; <a name="l00709"></a>00709 <a name="l00710"></a>00710 va_start(history, word); <a name="l00711"></a>00711 n_hist = 0; <a name="l00712"></a>00712 <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) <a name="l00713"></a>00713 ++n_hist; <a name="l00714"></a>00714 va_end(history); <a name="l00715"></a>00715 <a name="l00716"></a>00716 histid = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_hist, <span class="keyword">sizeof</span>(*histid)); <a name="l00717"></a>00717 va_start(history, word); <a name="l00718"></a>00718 n_hist = 0; <a name="l00719"></a>00719 <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) { <a name="l00720"></a>00720 histid[n_hist] = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, hword); <a name="l00721"></a>00721 ++n_hist; <a name="l00722"></a>00722 } <a name="l00723"></a>00723 va_end(history); <a name="l00724"></a>00724 <a name="l00725"></a>00725 itor = <a class="code" href="ngram__model_8h.html#a461c6e5914ce463422dfeaeee377e024" title="Get an iterator over M-grams pointing to the specified M-gram.">ngram_ng_iter</a>(model, <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, word), histid, n_hist); <a name="l00726"></a>00726 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(histid); <a name="l00727"></a>00727 <span class="keywordflow">return</span> itor; <a name="l00728"></a>00728 } <a name="l00729"></a>00729 <a name="l00730"></a>00730 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> * <a name="l00731"></a><a class="code" href="ngram__model_8h.html#a461c6e5914ce463422dfeaeee377e024">00731</a> <a class="code" href="ngram__model_8h.html#a461c6e5914ce463422dfeaeee377e024" title="Get an iterator over M-grams pointing to the specified M-gram.">ngram_ng_iter</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid, int32 *history, int32 n_hist) <a name="l00732"></a>00732 { <a name="l00733"></a>00733 <span class="keywordflow">if</span> (n_hist >= model-><a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>) <a name="l00734"></a>00734 <span class="keywordflow">return</span> NULL; <a name="l00735"></a>00735 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a8148f132a986660ff500e46f9b2ea1c1" title="Implementation-specific function for iterating.">iter</a> == NULL) <a name="l00736"></a>00736 <span class="keywordflow">return</span> NULL; <a name="l00737"></a>00737 <span class="keywordflow">return</span> (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a8148f132a986660ff500e46f9b2ea1c1" title="Implementation-specific function for iterating.">iter</a>)(model, wid, history, n_hist); <a name="l00738"></a>00738 } <a name="l00739"></a>00739 <a name="l00740"></a>00740 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> * <a name="l00741"></a><a class="code" href="ngram__model_8h.html#ae85f41e2defc5b65b12026d29cd4fdaa">00741</a> <a class="code" href="ngram__model_8h.html#ae85f41e2defc5b65b12026d29cd4fdaa" title="Iterate over all M-gram successors of an M-1-gram.">ngram_iter_successors</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor) <a name="l00742"></a>00742 { <a name="l00743"></a>00743 <span class="comment">/* Stop when we are at the highest order N-Gram. */</span> <a name="l00744"></a>00744 <span class="keywordflow">if</span> (itor-><a class="code" href="structngram__iter__s.html#a80bce30ba5b180f85ca84d7288715858" title="Order of history.">m</a> == itor->model-><a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a> - 1) <a name="l00745"></a>00745 <span class="keywordflow">return</span> NULL; <a name="l00746"></a>00746 <span class="keywordflow">return</span> (*itor->model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#af83b5620eb7bc37984ad522b66e266ad" title="Implementation-specific function for iterating.">successors</a>)(itor); <a name="l00747"></a>00747 } <a name="l00748"></a>00748 <a name="l00749"></a>00749 int32 <span class="keyword">const</span> * <a name="l00750"></a><a class="code" href="ngram__model_8h.html#a240c738781daa226a2fc13395dbdb514">00750</a> <a class="code" href="ngram__model_8h.html#a240c738781daa226a2fc13395dbdb514" title="Get information from the current M-gram in an iterator.">ngram_iter_get</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor, <a name="l00751"></a>00751 int32 *out_score, <a name="l00752"></a>00752 int32 *out_bowt) <a name="l00753"></a>00753 { <a name="l00754"></a>00754 <span class="keywordflow">return</span> (*itor->model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#ac515e6ad9a1afbda6b74e8204fd460b5" title="Implementation-specific function for iterating.">iter_get</a>)(itor, out_score, out_bowt); <a name="l00755"></a>00755 } <a name="l00756"></a>00756 <a name="l00757"></a>00757 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> * <a name="l00758"></a><a class="code" href="ngram__model_8h.html#a3a2b285c01393b3ebddaec1fefed11a4">00758</a> <a class="code" href="ngram__model_8h.html#a3a2b285c01393b3ebddaec1fefed11a4" title="Advance an M-gram iterator.">ngram_iter_next</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor) <a name="l00759"></a>00759 { <a name="l00760"></a>00760 <span class="keywordflow">return</span> (*itor->model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a57777056985f8adc3494e859c1102f3e" title="Implementation-specific function for iterating.">iter_next</a>)(itor); <a name="l00761"></a>00761 } <a name="l00762"></a>00762 <a name="l00763"></a>00763 <span class="keywordtype">void</span> <a name="l00764"></a><a class="code" href="ngram__model_8h.html#ac9f746c8a5db78ef8b2fb7c312be4a22">00764</a> <a class="code" href="ngram__model_8h.html#ac9f746c8a5db78ef8b2fb7c312be4a22" title="Terminate an M-gram iterator.">ngram_iter_free</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor) <a name="l00765"></a>00765 { <a name="l00766"></a>00766 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(itor-><a class="code" href="structngram__iter__s.html#a1362ebe2dcf1c441929c845fc97958cb" title="Scratch space for word IDs.">wids</a>); <a name="l00767"></a>00767 (*itor->model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#ad7031e996c16516cf7f52e7a4adee4bb" title="Implementation-specific function for iterating.">iter_free</a>)(itor); <a name="l00768"></a>00768 } <a name="l00769"></a>00769 <a name="l00770"></a>00770 int32 <a name="l00771"></a><a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880">00771</a> <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word) <a name="l00772"></a>00772 { <a name="l00773"></a>00773 int32 val; <a name="l00774"></a>00774 <a name="l00775"></a>00775 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#acaf27e8e7e336faf6653649937c42ed8" title="Look up a 32-bit integer value in a hash table.">hash_table_lookup_int32</a>(model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, word, &val) == -1) <a name="l00776"></a>00776 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e" title="Get the unknown word ID for a language model.">ngram_unknown_wid</a>(model); <a name="l00777"></a>00777 <span class="keywordflow">else</span> <a name="l00778"></a>00778 <span class="keywordflow">return</span> val; <a name="l00779"></a>00779 } <a name="l00780"></a>00780 <a name="l00781"></a>00781 <span class="keyword">const</span> <span class="keywordtype">char</span> * <a name="l00782"></a><a class="code" href="ngram__model_8h.html#a96e36290a005c03464ea6c637ccde2f5">00782</a> <a class="code" href="ngram__model_8h.html#a96e36290a005c03464ea6c637ccde2f5" title="Look up word string for numerical word ID.">ngram_word</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid) <a name="l00783"></a>00783 { <a name="l00784"></a>00784 <span class="comment">/* Remove any class tag */</span> <a name="l00785"></a>00785 wid = NGRAM_BASEWID(wid); <a name="l00786"></a>00786 <span class="keywordflow">if</span> (wid >= model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>) <a name="l00787"></a>00787 <span class="keywordflow">return</span> NULL; <a name="l00788"></a>00788 <span class="keywordflow">return</span> model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[wid]; <a name="l00789"></a>00789 } <a name="l00790"></a>00790 <a name="l00794"></a>00794 int32 <a name="l00795"></a>00795 ngram_add_word_internal(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <a name="l00796"></a>00796 <span class="keyword">const</span> <span class="keywordtype">char</span> *word, <a name="l00797"></a>00797 int32 classid) <a name="l00798"></a>00798 { <a name="l00799"></a>00799 <span class="keywordtype">void</span> *dummy; <a name="l00800"></a>00800 int32 wid; <a name="l00801"></a>00801 <a name="l00802"></a>00802 <span class="comment">/* Take the next available word ID */</span> <a name="l00803"></a>00803 wid = model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; <a name="l00804"></a>00804 <span class="keywordflow">if</span> (classid >= 0) { <a name="l00805"></a>00805 wid = NGRAM_CLASSWID(wid, classid); <a name="l00806"></a>00806 } <a name="l00807"></a>00807 <span class="comment">/* Check for hash collisions. */</span> <a name="l00808"></a>00808 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a9a1e5ed410eb96f514b00fdce770fbd7" title="Look up a key in a hash table and optionally return the associated value.">hash_table_lookup</a>(model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, word, &dummy) == 0) { <a name="l00809"></a>00809 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"Duplicate definition of word %s\n"</span>, word); <a name="l00810"></a>00810 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>; <a name="l00811"></a>00811 } <a name="l00812"></a>00812 <span class="comment">/* Reallocate word_str if necessary. */</span> <a name="l00813"></a>00813 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a> >= model-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a>) { <a name="l00814"></a>00814 model-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a> += UG_ALLOC_STEP; <a name="l00815"></a>00815 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a> = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>, <a name="l00816"></a>00816 <span class="keyword">sizeof</span>(*model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>) * model-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a>); <a name="l00817"></a>00817 } <a name="l00818"></a>00818 <span class="comment">/* Add the word string in the appropriate manner. */</span> <a name="l00819"></a>00819 <span class="comment">/* Class words are always dynamically allocated. */</span> <a name="l00820"></a>00820 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>] = <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(word); <a name="l00821"></a>00821 <span class="comment">/* Now enter it into the hash table. */</span> <a name="l00822"></a>00822 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a393c56322e54607a48e6bc61169d92bf" title="Add a 32-bit integer value to a hash table.">hash_table_enter_int32</a>(model-><a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>], wid) != wid) { <a name="l00823"></a>00823 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"Hash insertion failed for word %s => %p (should not happen)\n"</span>, <a name="l00824"></a>00824 model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>], (<span class="keywordtype">void</span> *)(<span class="keywordtype">long</span>)(wid)); <a name="l00825"></a>00825 } <a name="l00826"></a>00826 <span class="comment">/* Increment number of words. */</span> <a name="l00827"></a>00827 ++model-><a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; <a name="l00828"></a>00828 <span class="keywordflow">return</span> wid; <a name="l00829"></a>00829 } <a name="l00830"></a>00830 <a name="l00831"></a>00831 int32 <a name="l00832"></a><a class="code" href="ngram__model_8h.html#a38e034d58ee0d6a4c0dd6f852cb71c3c">00832</a> <a class="code" href="ngram__model_8h.html#a38e034d58ee0d6a4c0dd6f852cb71c3c" title="Add a word (unigram) to the language model.">ngram_model_add_word</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <a name="l00833"></a>00833 <span class="keyword">const</span> <span class="keywordtype">char</span> *word, float32 weight) <a name="l00834"></a>00834 { <a name="l00835"></a>00835 int32 wid, prob = model-><a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>; <a name="l00836"></a>00836 <a name="l00837"></a>00837 wid = ngram_add_word_internal(model, word, -1); <a name="l00838"></a>00838 <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) <a name="l00839"></a>00839 <span class="keywordflow">return</span> wid; <a name="l00840"></a>00840 <a name="l00841"></a>00841 <span class="comment">/* Do what needs to be done to add the word to the unigram. */</span> <a name="l00842"></a>00842 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> && model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a6d553c95c7f4da4993f2b9df757ac016" title="Implementation-specific function for adding unigrams.">add_ug</a>) <a name="l00843"></a>00843 prob = (*model-><a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-><a class="code" href="structngram__funcs__s.html#a6d553c95c7f4da4993f2b9df757ac016" title="Implementation-specific function for adding unigrams.">add_ug</a>)(model, wid, <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, weight)); <a name="l00844"></a>00844 <span class="keywordflow">if</span> (prob == 0) { <a name="l00845"></a>00845 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>) <a name="l00846"></a>00846 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[wid]); <a name="l00847"></a>00847 <span class="keywordflow">return</span> -1; <a name="l00848"></a>00848 } <a name="l00849"></a>00849 <span class="keywordflow">return</span> wid; <a name="l00850"></a>00850 } <a name="l00851"></a>00851 <a name="l00852"></a>00852 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> * <a name="l00853"></a>00853 ngram_class_new(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 tag_wid, int32 start_wid, <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classwords) <a name="l00854"></a>00854 { <a name="l00855"></a>00855 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass; <a name="l00856"></a>00856 <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *gn; <a name="l00857"></a>00857 float32 tprob; <a name="l00858"></a>00858 <span class="keywordtype">int</span> i; <a name="l00859"></a>00859 <a name="l00860"></a>00860 lmclass = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(1, <span class="keyword">sizeof</span>(*lmclass)); <a name="l00861"></a>00861 lmclass-><a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a> = tag_wid; <a name="l00862"></a>00862 <span class="comment">/* wid_base is the wid (minus class tag) of the first word in the list. */</span> <a name="l00863"></a>00863 lmclass-><a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&#39; words.">start_wid</a> = start_wid; <a name="l00864"></a>00864 lmclass-><a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a> = <a class="code" href="glist_8h.html#aeb046e39c540d2f5f792119ea0d24c48" title="Count the number of element in a given link list.">glist_count</a>(classwords); <a name="l00865"></a>00865 lmclass-><a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(lmclass-><a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>, <span class="keyword">sizeof</span>(*lmclass-><a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>)); <a name="l00866"></a>00866 lmclass->nword_hash = NULL; <a name="l00867"></a>00867 lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> = 0; <a name="l00868"></a>00868 tprob = 0.0; <a name="l00869"></a>00869 <span class="keywordflow">for</span> (gn = classwords; gn; gn = gnode_next(gn)) { <a name="l00870"></a>00870 tprob += gnode_float32(gn); <a name="l00871"></a>00871 } <a name="l00872"></a>00872 <span class="keywordflow">if</span> (tprob > 1.1 || tprob < 0.9) { <a name="l00873"></a>00873 <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">"Total class probability is %f, will normalize\n"</span>, tprob); <a name="l00874"></a>00874 <span class="keywordflow">for</span> (gn = classwords; gn; gn = gnode_next(gn)) { <a name="l00875"></a>00875 gn->data.fl /= tprob; <a name="l00876"></a>00876 } <a name="l00877"></a>00877 } <a name="l00878"></a>00878 <span class="keywordflow">for</span> (i = 0, gn = classwords; gn; ++i, gn = gnode_next(gn)) { <a name="l00879"></a>00879 lmclass-><a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>[i] = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, gnode_float32(gn)); <a name="l00880"></a>00880 } <a name="l00881"></a>00881 <a name="l00882"></a>00882 <span class="keywordflow">return</span> lmclass; <a name="l00883"></a>00883 } <a name="l00884"></a>00884 <a name="l00885"></a>00885 int32 <a name="l00886"></a>00886 ngram_class_add_word(<a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass, int32 wid, int32 lweight) <a name="l00887"></a>00887 { <a name="l00888"></a>00888 int32 hash; <a name="l00889"></a>00889 <a name="l00890"></a>00890 <span class="keywordflow">if</span> (lmclass->nword_hash == NULL) { <a name="l00891"></a>00891 <span class="comment">/* Initialize everything in it to -1 */</span> <a name="l00892"></a>00892 lmclass->nword_hash = <a class="code" href="ckd__alloc_8h.html#a8e89a31c3c70710a8e023a177084bff2" title="Macro for __ckd_malloc__.">ckd_malloc</a>(NGRAM_HASH_SIZE * <span class="keyword">sizeof</span>(*lmclass->nword_hash)); <a name="l00893"></a>00893 memset(lmclass->nword_hash, 0xff, NGRAM_HASH_SIZE * <span class="keyword">sizeof</span>(*lmclass->nword_hash)); <a name="l00894"></a>00894 lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> = NGRAM_HASH_SIZE; <a name="l00895"></a>00895 lmclass-><a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a> = 0; <a name="l00896"></a>00896 } <a name="l00897"></a>00897 <span class="comment">/* Stupidest possible hash function. This will work pretty well</span> <a name="l00898"></a>00898 <span class="comment"> * when this function is called repeatedly with contiguous word</span> <a name="l00899"></a>00899 <span class="comment"> * IDs, though... */</span> <a name="l00900"></a>00900 hash = wid & (lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> - 1); <a name="l00901"></a>00901 <span class="keywordflow">if</span> (lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> == -1) { <a name="l00902"></a>00902 <span class="comment">/* Good, no collision. */</span> <a name="l00903"></a>00903 lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> = wid; <a name="l00904"></a>00904 lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a> = lweight; <a name="l00905"></a>00905 ++lmclass-><a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a>; <a name="l00906"></a>00906 <span class="keywordflow">return</span> hash; <a name="l00907"></a>00907 } <a name="l00908"></a>00908 <span class="keywordflow">else</span> { <a name="l00909"></a>00909 int32 next; <a name="l00910"></a>00910 <span class="comment">/* Collision... Find the end of the hash chain. */</span> <a name="l00911"></a>00911 <span class="keywordflow">while</span> (lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a> != -1) <a name="l00912"></a>00912 hash = lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a>; <a name="l00913"></a>00913 assert(hash != -1); <a name="l00914"></a>00914 <span class="comment">/* Does we has any more bukkit? */</span> <a name="l00915"></a>00915 <span class="keywordflow">if</span> (lmclass-><a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a> == lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>) { <a name="l00916"></a>00916 <span class="comment">/* Oh noes! Ok, we makes more. */</span> <a name="l00917"></a>00917 lmclass->nword_hash = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(lmclass->nword_hash, <a name="l00918"></a>00918 lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> * 2 * <span class="keyword">sizeof</span>(*lmclass->nword_hash)); <a name="l00919"></a>00919 memset(lmclass->nword_hash + lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>, <a name="l00920"></a>00920 0xff, lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> * <span class="keyword">sizeof</span>(*lmclass->nword_hash)); <a name="l00921"></a>00921 <span class="comment">/* Just use the next allocated one (easy) */</span> <a name="l00922"></a>00922 next = lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>; <a name="l00923"></a>00923 lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> *= 2; <a name="l00924"></a>00924 } <a name="l00925"></a>00925 <span class="keywordflow">else</span> { <a name="l00926"></a>00926 <span class="comment">/* Look for any available bucket. We hope this doesn't happen. */</span> <a name="l00927"></a>00927 <span class="keywordflow">for</span> (next = 0; next < lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>; ++next) <a name="l00928"></a>00928 <span class="keywordflow">if</span> (lmclass->nword_hash[next].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> == -1) <a name="l00929"></a>00929 <span class="keywordflow">break</span>; <a name="l00930"></a>00930 <span class="comment">/* This should absolutely not happen. */</span> <a name="l00931"></a>00931 assert(next != lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>); <a name="l00932"></a>00932 } <a name="l00933"></a>00933 lmclass->nword_hash[next].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> = wid; <a name="l00934"></a>00934 lmclass->nword_hash[next].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a> = lweight; <a name="l00935"></a>00935 lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a> = next; <a name="l00936"></a>00936 ++lmclass-><a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a>; <a name="l00937"></a>00937 <span class="keywordflow">return</span> next; <a name="l00938"></a>00938 } <a name="l00939"></a>00939 } <a name="l00940"></a>00940 <a name="l00941"></a>00941 <span class="keywordtype">void</span> <a name="l00942"></a>00942 ngram_class_free(<a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass) <a name="l00943"></a>00943 { <a name="l00944"></a>00944 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(lmclass->nword_hash); <a name="l00945"></a>00945 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(lmclass-><a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>); <a name="l00946"></a>00946 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(lmclass); <a name="l00947"></a>00947 } <a name="l00948"></a>00948 <a name="l00949"></a>00949 int32 <a name="l00950"></a><a class="code" href="ngram__model_8h.html#a39eabb4994cf99c4bc2116e12af0c9f1">00950</a> <a class="code" href="ngram__model_8h.html#a39eabb4994cf99c4bc2116e12af0c9f1" title="Add a word to a class in a language model.">ngram_model_add_class_word</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <a name="l00951"></a>00951 <span class="keyword">const</span> <span class="keywordtype">char</span> *classname, <a name="l00952"></a>00952 <span class="keyword">const</span> <span class="keywordtype">char</span> *word, <a name="l00953"></a>00953 float32 weight) <a name="l00954"></a>00954 { <a name="l00955"></a>00955 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass; <a name="l00956"></a>00956 int32 classid, tag_wid, wid, i, scale; <a name="l00957"></a>00957 float32 fprob; <a name="l00958"></a>00958 <a name="l00959"></a>00959 <span class="comment">/* Find the class corresponding to classname. Linear search</span> <a name="l00960"></a>00960 <span class="comment"> * probably okay here since there won't be very many classes, and</span> <a name="l00961"></a>00961 <span class="comment"> * this doesn't have to be fast. */</span> <a name="l00962"></a>00962 tag_wid = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, classname); <a name="l00963"></a>00963 <span class="keywordflow">if</span> (tag_wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) { <a name="l00964"></a>00964 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"No such word or class tag: %s\n"</span>, classname); <a name="l00965"></a>00965 <span class="keywordflow">return</span> tag_wid; <a name="l00966"></a>00966 } <a name="l00967"></a>00967 <span class="keywordflow">for</span> (classid = 0; classid < model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; ++classid) { <a name="l00968"></a>00968 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[classid]-><a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a> == tag_wid) <a name="l00969"></a>00969 <span class="keywordflow">break</span>; <a name="l00970"></a>00970 } <a name="l00971"></a>00971 <span class="comment">/* Hmm, no such class. It's probably not a good idea to create one. */</span> <a name="l00972"></a>00972 <span class="keywordflow">if</span> (classid == model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>) { <a name="l00973"></a>00973 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"Word %s is not a class tag (call ngram_model_add_class() first)\n"</span>, classname); <a name="l00974"></a>00974 <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>; <a name="l00975"></a>00975 } <a name="l00976"></a>00976 lmclass = model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[classid]; <a name="l00977"></a>00977 <a name="l00978"></a>00978 <span class="comment">/* Add this word to the model's set of words. */</span> <a name="l00979"></a>00979 wid = ngram_add_word_internal(model, word, classid); <a name="l00980"></a>00980 <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) <a name="l00981"></a>00981 <span class="keywordflow">return</span> wid; <a name="l00982"></a>00982 <a name="l00983"></a>00983 <span class="comment">/* This is the fixed probability of the new word. */</span> <a name="l00984"></a>00984 fprob = weight * 1.0f / (lmclass-><a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a> + lmclass-><a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a> + 1); <a name="l00985"></a>00985 <span class="comment">/* Now normalize everything else to fit it in. This is</span> <a name="l00986"></a>00986 <span class="comment"> * accomplished by simply scaling all the other probabilities</span> <a name="l00987"></a>00987 <span class="comment"> * by (1-fprob). */</span> <a name="l00988"></a>00988 scale = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, 1.0 - fprob); <a name="l00989"></a>00989 <span class="keywordflow">for</span> (i = 0; i < lmclass-><a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>; ++i) <a name="l00990"></a>00990 lmclass-><a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>[i] += scale; <a name="l00991"></a>00991 for (i = 0; i < lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>; ++i) <a name="l00992"></a>00992 <span class="keywordflow">if</span> (lmclass->nword_hash[i].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> != -1) <a name="l00993"></a>00993 lmclass->nword_hash[i].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a> += scale; <a name="l00994"></a>00994 <a name="l00995"></a>00995 <span class="comment">/* Now add it to the class hash table. */</span> <a name="l00996"></a>00996 <span class="keywordflow">return</span> ngram_class_add_word(lmclass, wid, <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, fprob)); <a name="l00997"></a>00997 } <a name="l00998"></a>00998 <a name="l00999"></a>00999 int32 <a name="l01000"></a><a class="code" href="ngram__model_8h.html#a0481b03a2bf03ef0a197da0a1f7d1caa">01000</a> <a class="code" href="ngram__model_8h.html#a0481b03a2bf03ef0a197da0a1f7d1caa" title="Add a new class to a language model.">ngram_model_add_class</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <a name="l01001"></a>01001 <span class="keyword">const</span> <span class="keywordtype">char</span> *classname, <a name="l01002"></a>01002 float32 classweight, <a name="l01003"></a>01003 <span class="keywordtype">char</span> **words, <a name="l01004"></a>01004 <span class="keyword">const</span> float32 *weights, <a name="l01005"></a>01005 int32 n_words) <a name="l01006"></a>01006 { <a name="l01007"></a>01007 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass; <a name="l01008"></a>01008 <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classwords = NULL; <a name="l01009"></a>01009 int32 i, start_wid = -1; <a name="l01010"></a>01010 int32 classid, tag_wid; <a name="l01011"></a>01011 <a name="l01012"></a>01012 <span class="comment">/* Check if classname already exists in model. If not, add it.*/</span> <a name="l01013"></a>01013 <span class="keywordflow">if</span> ((tag_wid = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, classname)) == <a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e" title="Get the unknown word ID for a language model.">ngram_unknown_wid</a>(model)) { <a name="l01014"></a>01014 tag_wid = <a class="code" href="ngram__model_8h.html#a38e034d58ee0d6a4c0dd6f852cb71c3c" title="Add a word (unigram) to the language model.">ngram_model_add_word</a>(model, classname, classweight); <a name="l01015"></a>01015 <span class="keywordflow">if</span> (tag_wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) <a name="l01016"></a>01016 <span class="keywordflow">return</span> -1; <a name="l01017"></a>01017 } <a name="l01018"></a>01018 <a name="l01019"></a>01019 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a> == 128) { <a name="l01020"></a>01020 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"Number of classes cannot exceed 128 (sorry)\n"</span>); <a name="l01021"></a>01021 <span class="keywordflow">return</span> -1; <a name="l01022"></a>01022 } <a name="l01023"></a>01023 classid = model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; <a name="l01024"></a>01024 <span class="keywordflow">for</span> (i = 0; i < n_words; ++i) { <a name="l01025"></a>01025 int32 wid; <a name="l01026"></a>01026 <a name="l01027"></a>01027 wid = ngram_add_word_internal(model, words[i], classid); <a name="l01028"></a>01028 <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) <a name="l01029"></a>01029 <span class="keywordflow">return</span> -1; <a name="l01030"></a>01030 <span class="keywordflow">if</span> (start_wid == -1) <a name="l01031"></a>01031 start_wid = NGRAM_BASEWID(wid); <a name="l01032"></a>01032 classwords = <a class="code" href="glist_8h.html#a4fc4db2fbebd7b659554227d411f6737" title="Create and prepend a new list node containing a single-precision float.">glist_add_float32</a>(classwords, weights[i]); <a name="l01033"></a>01033 } <a name="l01034"></a>01034 classwords = <a class="code" href="glist_8h.html#a399a2a093c6c4ce1012762e4c25c8185" title="Reverse the order of the given glist.">glist_reverse</a>(classwords); <a name="l01035"></a>01035 lmclass = ngram_class_new(model, tag_wid, start_wid, classwords); <a name="l01036"></a>01036 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classwords); <a name="l01037"></a>01037 <span class="keywordflow">if</span> (lmclass == NULL) <a name="l01038"></a>01038 <span class="keywordflow">return</span> -1; <a name="l01039"></a>01039 <a name="l01040"></a>01040 ++model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; <a name="l01041"></a>01041 <span class="keywordflow">if</span> (model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a> == NULL) <a name="l01042"></a>01042 model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(1, <span class="keyword">sizeof</span>(*model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>)); <a name="l01043"></a>01043 <span class="keywordflow">else</span> <a name="l01044"></a>01044 model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a> = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>, <a name="l01045"></a>01045 model-><a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a> * <span class="keyword">sizeof</span>(*model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>)); <a name="l01046"></a>01046 model-><a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[classid] = lmclass; <a name="l01047"></a>01047 <span class="keywordflow">return</span> classid; <a name="l01048"></a>01048 } <a name="l01049"></a>01049 <a name="l01050"></a>01050 int32 <a name="l01051"></a>01051 ngram_class_prob(<a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass, int32 wid) <a name="l01052"></a>01052 { <a name="l01053"></a>01053 int32 base_wid = NGRAM_BASEWID(wid); <a name="l01054"></a>01054 <a name="l01055"></a>01055 <span class="keywordflow">if</span> (base_wid < lmclass->start_wid <a name="l01056"></a>01056 || base_wid > lmclass-><a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&#39; words.">start_wid</a> + lmclass-><a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>) { <a name="l01057"></a>01057 int32 hash; <a name="l01058"></a>01058 <a name="l01059"></a>01059 <span class="comment">/* Look it up in the hash table. */</span> <a name="l01060"></a>01060 hash = wid & (lmclass-><a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> - 1); <a name="l01061"></a>01061 <span class="keywordflow">while</span> (hash != -1 && lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> != wid) <a name="l01062"></a>01062 hash = lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a>; <a name="l01063"></a>01063 <span class="keywordflow">if</span> (hash == -1) <a name="l01064"></a>01064 <span class="keywordflow">return</span> 1; <a name="l01065"></a>01065 <span class="keywordflow">return</span> lmclass->nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a>; <a name="l01066"></a>01066 } <a name="l01067"></a>01067 <span class="keywordflow">else</span> { <a name="l01068"></a>01068 <span class="keywordflow">return</span> lmclass-><a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>[base_wid - lmclass-><a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&#39; words.">start_wid</a>]; <a name="l01069"></a>01069 } <a name="l01070"></a>01070 } <a name="l01071"></a>01071 <a name="l01072"></a>01072 int32 <a name="l01073"></a>01073 read_classdef_file(<a class="code" href="structhash__table__t.html">hash_table_t</a> *classes, <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name) <a name="l01074"></a>01074 { <a name="l01075"></a>01075 FILE *fp; <a name="l01076"></a>01076 int32 is_pipe; <a name="l01077"></a>01077 <span class="keywordtype">int</span> inclass; <a name="l01078"></a>01078 int32 rv = -1; <a name="l01079"></a>01079 <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *gn; <a name="l01080"></a>01080 <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classwords = NULL; <a name="l01081"></a>01081 <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classprobs = NULL; <a name="l01082"></a>01082 <span class="keywordtype">char</span> *classname = NULL; <a name="l01083"></a>01083 <a name="l01084"></a>01084 <span class="keywordflow">if</span> ((fp = <a class="code" href="pio_8h.html#aa3d71506049eb49cf03eff1b89ef281f" title="Like fopen, but use popen and zcat if it is determined that &quot;file&quot; is compressed (i...">fopen_comp</a>(file_name, <span class="stringliteral">"r"</span>, &is_pipe)) == NULL) { <a name="l01085"></a>01085 <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">"File %s not found\n"</span>, file_name); <a name="l01086"></a>01086 <span class="keywordflow">return</span> -1; <a name="l01087"></a>01087 } <a name="l01088"></a>01088 <a name="l01089"></a>01089 inclass = FALSE; <a name="l01090"></a>01090 <span class="keywordflow">while</span> (!feof(fp)) { <a name="l01091"></a>01091 <span class="keywordtype">char</span> line[512]; <a name="l01092"></a>01092 <span class="keywordtype">char</span> *wptr[2]; <a name="l01093"></a>01093 <span class="keywordtype">int</span> n_words; <a name="l01094"></a>01094 <a name="l01095"></a>01095 <span class="keywordflow">if</span> (fgets(line, <span class="keyword">sizeof</span>(line), fp) == NULL) <a name="l01096"></a>01096 <span class="keywordflow">break</span>; <a name="l01097"></a>01097 <a name="l01098"></a>01098 n_words = <a class="code" href="strfuncs_8h.html#a5b520fdebcca599db86faaf75a82173f" title="Convert a line to an array of &quot;words&quot;, based on whitespace separators.">str2words</a>(line, wptr, 2); <a name="l01099"></a>01099 <span class="keywordflow">if</span> (n_words <= 0) <a name="l01100"></a>01100 <span class="keywordflow">continue</span>; <a name="l01101"></a>01101 <a name="l01102"></a>01102 <span class="keywordflow">if</span> (inclass) { <a name="l01103"></a>01103 <span class="comment">/* Look for an end of class marker. */</span> <a name="l01104"></a>01104 <span class="keywordflow">if</span> (n_words == 2 && 0 == strcmp(wptr[0], <span class="stringliteral">"END"</span>)) { <a name="l01105"></a>01105 <a class="code" href="structclassdef__s.html" title="One class definition from a classdef file.">classdef_t</a> *classdef; <a name="l01106"></a>01106 <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *word, *weight; <a name="l01107"></a>01107 int32 i; <a name="l01108"></a>01108 <a name="l01109"></a>01109 <span class="keywordflow">if</span> (classname == NULL || 0 != strcmp(wptr[1], classname)) <a name="l01110"></a>01110 <span class="keywordflow">goto</span> error_out; <a name="l01111"></a>01111 inclass = FALSE; <a name="l01112"></a>01112 <a name="l01113"></a>01113 <span class="comment">/* Construct a class from the list of words collected. */</span> <a name="l01114"></a>01114 classdef = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(1, <span class="keyword">sizeof</span>(*classdef)); <a name="l01115"></a>01115 classwords = <a class="code" href="glist_8h.html#a399a2a093c6c4ce1012762e4c25c8185" title="Reverse the order of the given glist.">glist_reverse</a>(classwords); <a name="l01116"></a>01116 classprobs = <a class="code" href="glist_8h.html#a399a2a093c6c4ce1012762e4c25c8185" title="Reverse the order of the given glist.">glist_reverse</a>(classprobs); <a name="l01117"></a>01117 classdef->n_words = <a class="code" href="glist_8h.html#aeb046e39c540d2f5f792119ea0d24c48" title="Count the number of element in a given link list.">glist_count</a>(classwords); <a name="l01118"></a>01118 classdef->words = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(classdef->n_words, <a name="l01119"></a>01119 <span class="keyword">sizeof</span>(*classdef->words)); <a name="l01120"></a>01120 classdef->weights = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(classdef->n_words, <a name="l01121"></a>01121 <span class="keyword">sizeof</span>(*classdef->weights)); <a name="l01122"></a>01122 word = classwords; <a name="l01123"></a>01123 weight = classprobs; <a name="l01124"></a>01124 <span class="keywordflow">for</span> (i = 0; i < classdef->n_words; ++i) { <a name="l01125"></a>01125 classdef->words[i] = <a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(word); <a name="l01126"></a>01126 classdef->weights[i] = gnode_float32(weight); <a name="l01127"></a>01127 word = gnode_next(word); <a name="l01128"></a>01128 weight = gnode_next(weight); <a name="l01129"></a>01129 } <a name="l01130"></a>01130 <a name="l01131"></a>01131 <span class="comment">/* Add this class to the hash table. */</span> <a name="l01132"></a>01132 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#aebfe63c3869c271b125a8413ee384412" title="Try to add a new entry with given key and associated value to hash table h.">hash_table_enter</a>(classes, classname, classdef) != classdef) { <a name="l01133"></a>01133 classdef_free(classdef); <a name="l01134"></a>01134 <span class="keywordflow">goto</span> error_out; <a name="l01135"></a>01135 } <a name="l01136"></a>01136 <a name="l01137"></a>01137 <span class="comment">/* Reset everything. */</span> <a name="l01138"></a>01138 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classwords); <a name="l01139"></a>01139 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classprobs); <a name="l01140"></a>01140 classwords = NULL; <a name="l01141"></a>01141 classprobs = NULL; <a name="l01142"></a>01142 classname = NULL; <a name="l01143"></a>01143 } <a name="l01144"></a>01144 <span class="keywordflow">else</span> { <a name="l01145"></a>01145 float32 fprob; <a name="l01146"></a>01146 <a name="l01147"></a>01147 <span class="keywordflow">if</span> (n_words == 2) <a name="l01148"></a>01148 fprob = (float32)<a class="code" href="strfuncs_8h.html#ab708351fe7308551632a782bfad75a1e" title="Locale independent version of atof().">atof_c</a>(wptr[1]); <a name="l01149"></a>01149 <span class="keywordflow">else</span> <a name="l01150"></a>01150 fprob = 1.0f; <a name="l01151"></a>01151 <span class="comment">/* Add it to the list of words for this class. */</span> <a name="l01152"></a>01152 classwords = <a class="code" href="glist_8h.html#a77a9c20b7df5a289477af405ab778377" title="Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...">glist_add_ptr</a>(classwords, <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(wptr[0])); <a name="l01153"></a>01153 classprobs = <a class="code" href="glist_8h.html#a4fc4db2fbebd7b659554227d411f6737" title="Create and prepend a new list node containing a single-precision float.">glist_add_float32</a>(classprobs, fprob); <a name="l01154"></a>01154 } <a name="l01155"></a>01155 } <a name="l01156"></a>01156 <span class="keywordflow">else</span> { <a name="l01157"></a>01157 <span class="comment">/* Start a new LM class if the LMCLASS marker is seen */</span> <a name="l01158"></a>01158 <span class="keywordflow">if</span> (n_words == 2 && 0 == strcmp(wptr[0], <span class="stringliteral">"LMCLASS"</span>)) { <a name="l01159"></a>01159 <span class="keywordflow">if</span> (inclass) <a name="l01160"></a>01160 <span class="keywordflow">goto</span> error_out; <a name="l01161"></a>01161 inclass = TRUE; <a name="l01162"></a>01162 classname = <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(wptr[1]); <a name="l01163"></a>01163 } <a name="l01164"></a>01164 <span class="comment">/* Otherwise, just ignore whatever junk we got */</span> <a name="l01165"></a>01165 } <a name="l01166"></a>01166 } <a name="l01167"></a>01167 rv = 0; <span class="comment">/* Success. */</span> <a name="l01168"></a>01168 <a name="l01169"></a>01169 error_out: <a name="l01170"></a>01170 <span class="comment">/* Free all the stuff we might have allocated. */</span> <a name="l01171"></a>01171 <a class="code" href="pio_8h.html#a87592c3a2d0a00eed9eda014950beb65" title="Close a file opened using fopen_comp.">fclose_comp</a>(fp, is_pipe); <a name="l01172"></a>01172 <span class="keywordflow">for</span> (gn = classwords; gn; gn = gnode_next(gn)) <a name="l01173"></a>01173 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(<a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(gn)); <a name="l01174"></a>01174 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classwords); <a name="l01175"></a>01175 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classprobs); <a name="l01176"></a>01176 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classname); <a name="l01177"></a>01177 <a name="l01178"></a>01178 <span class="keywordflow">return</span> rv; <a name="l01179"></a>01179 } <a name="l01180"></a>01180 <a name="l01181"></a>01181 <span class="keywordtype">void</span> <a name="l01182"></a>01182 classdef_free(<a class="code" href="structclassdef__s.html" title="One class definition from a classdef file.">classdef_t</a> *classdef) <a name="l01183"></a>01183 { <a name="l01184"></a>01184 int32 i; <a name="l01185"></a>01185 <span class="keywordflow">for</span> (i = 0; i < classdef->n_words; ++i) <a name="l01186"></a>01186 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef->words[i]); <a name="l01187"></a>01187 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef->words); <a name="l01188"></a>01188 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef->weights); <a name="l01189"></a>01189 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef); <a name="l01190"></a>01190 } <a name="l01191"></a>01191 <a name="l01192"></a>01192 <a name="l01193"></a>01193 int32 <a name="l01194"></a><a class="code" href="ngram__model_8h.html#a9b2a86c23543158754373c5456fe890d">01194</a> <a class="code" href="ngram__model_8h.html#a9b2a86c23543158754373c5456fe890d" title="Read a class definition file and add classes to a language model.">ngram_model_read_classdef</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <a name="l01195"></a>01195 <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name) <a name="l01196"></a>01196 { <a name="l01197"></a>01197 <a class="code" href="structhash__table__t.html">hash_table_t</a> *classes; <a name="l01198"></a>01198 <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> hl = NULL; <a name="l01199"></a>01199 <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *gn; <a name="l01200"></a>01200 int32 rv = -1; <a name="l01201"></a>01201 <a name="l01202"></a>01202 classes = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(0, FALSE); <a name="l01203"></a>01203 <span class="keywordflow">if</span> (read_classdef_file(classes, file_name) < 0) { <a name="l01204"></a>01204 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(classes); <a name="l01205"></a>01205 <span class="keywordflow">return</span> -1; <a name="l01206"></a>01206 } <a name="l01207"></a>01207 <a name="l01208"></a>01208 <span class="comment">/* Create a new class in the language model for each classdef. */</span> <a name="l01209"></a>01209 hl = <a class="code" href="hash__table_8h.html#a61f59389f05d8871003da4692a9c2acc" title="Build a glist of valid hash_entry_t pointers from the given hash table.">hash_table_tolist</a>(classes, NULL); <a name="l01210"></a>01210 <span class="keywordflow">for</span> (gn = hl; gn; gn = gnode_next(gn)) { <a name="l01211"></a>01211 <a class="code" href="structhash__entry__s.html" title="A note by ARCHAN at 20050510: Technically what we use is so-called &quot;hash table with buckets&quot...">hash_entry_t</a> *he = <a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(gn); <a name="l01212"></a>01212 <a class="code" href="structclassdef__s.html" title="One class definition from a classdef file.">classdef_t</a> *classdef = he-><a class="code" href="structhash__entry__s.html#a0d57012963084fed93886681108aa636" title="Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...">val</a>; <a name="l01213"></a>01213 <a name="l01214"></a>01214 <span class="keywordflow">if</span> (<a class="code" href="ngram__model_8h.html#a0481b03a2bf03ef0a197da0a1f7d1caa" title="Add a new class to a language model.">ngram_model_add_class</a>(model, he->key, 1.0, <a name="l01215"></a>01215 classdef->words, <a name="l01216"></a>01216 classdef->weights, <a name="l01217"></a>01217 classdef->n_words) < 0) <a name="l01218"></a>01218 <span class="keywordflow">goto</span> error_out; <a name="l01219"></a>01219 } <a name="l01220"></a>01220 rv = 0; <a name="l01221"></a>01221 <a name="l01222"></a>01222 error_out: <a name="l01223"></a>01223 <span class="keywordflow">for</span> (gn = hl; gn; gn = gnode_next(gn)) { <a name="l01224"></a>01224 <a class="code" href="structhash__entry__s.html" title="A note by ARCHAN at 20050510: Technically what we use is so-called &quot;hash table with buckets&quot...">hash_entry_t</a> *he = <a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(gn); <a name="l01225"></a>01225 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>((<span class="keywordtype">char</span> *)he->key); <a name="l01226"></a>01226 classdef_free(he-><a class="code" href="structhash__entry__s.html#a0d57012963084fed93886681108aa636" title="Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...">val</a>); <a name="l01227"></a>01227 } <a name="l01228"></a>01228 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(hl); <a name="l01229"></a>01229 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(classes); <a name="l01230"></a>01230 <span class="keywordflow">return</span> rv; <a name="l01231"></a>01231 } </pre></div></div> </div> <div id="nav-path" class="navpath"> <ul> <li class="navelem"><b>ngram_model.c</b> </li> <li class="footer">Generated on Tue Apr 19 2011 for SphinxBase by  <a href="http://www.doxygen.org/index.html"> <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.3 </li> </ul> </div> </body> </html>