Sophie

Sophie

distrib > Fedora > 14 > x86_64 > media > updates > by-pkgid > 0b420d0fce195cf4115dc6a3be5c2da2 > files > 293

sphinxbase-devel-0.7-1.fc14.i686.rpm

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<title>SphinxBase: src/libsphinxbase/lm/ngram_model.c Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css"/>
</head>
<body>
<!-- Generated by Doxygen 1.7.3 -->
<div id="top">
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr style="height: 56px;">
  <td style="padding-left: 0.5em;">
   <div id="projectname">SphinxBase&#160;<span id="projectnumber">0.6</span></div>
  </td>
 </tr>
 </tbody>
</table>
</div>
  <div id="navrow1" class="tabs">
    <ul class="tablist">
      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
      <li><a href="pages.html"><span>Related&#160;Pages</span></a></li>
      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
      <li class="current"><a href="files.html"><span>Files</span></a></li>
    </ul>
  </div>
  <div id="navrow2" class="tabs2">
    <ul class="tablist">
      <li><a href="files.html"><span>File&#160;List</span></a></li>
      <li><a href="globals.html"><span>Globals</span></a></li>
    </ul>
  </div>
</div>
<div id="side-nav" class="ui-resizable side-nav-resizable">
  <div id="nav-tree">
    <div id="nav-tree-contents">
    </div>
  </div>
  <div id="splitbar" style="-moz-user-select:none;" 
       class="ui-resizable-handle">
  </div>
</div>
<script type="text/javascript">
  initNavTree('ngram__model_8c.html','');
</script>
<div id="doc-content">
<div class="header">
  <div class="headertitle">
<h1>src/libsphinxbase/lm/ngram_model.c</h1>  </div>
</div>
<div class="contents">
<div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */</span>
<a name="l00002"></a>00002 <span class="comment">/* ====================================================================</span>
<a name="l00003"></a>00003 <span class="comment"> * Copyright (c) 1999-2007 Carnegie Mellon University.  All rights</span>
<a name="l00004"></a>00004 <span class="comment"> * reserved.</span>
<a name="l00005"></a>00005 <span class="comment"> *</span>
<a name="l00006"></a>00006 <span class="comment"> * Redistribution and use in source and binary forms, with or without</span>
<a name="l00007"></a>00007 <span class="comment"> * modification, are permitted provided that the following conditions</span>
<a name="l00008"></a>00008 <span class="comment"> * are met:</span>
<a name="l00009"></a>00009 <span class="comment"> *</span>
<a name="l00010"></a>00010 <span class="comment"> * 1. Redistributions of source code must retain the above copyright</span>
<a name="l00011"></a>00011 <span class="comment"> *    notice, this list of conditions and the following disclaimer. </span>
<a name="l00012"></a>00012 <span class="comment"> *</span>
<a name="l00013"></a>00013 <span class="comment"> * 2. Redistributions in binary form must reproduce the above copyright</span>
<a name="l00014"></a>00014 <span class="comment"> *    notice, this list of conditions and the following disclaimer in</span>
<a name="l00015"></a>00015 <span class="comment"> *    the documentation and/or other materials provided with the</span>
<a name="l00016"></a>00016 <span class="comment"> *    distribution.</span>
<a name="l00017"></a>00017 <span class="comment"> *</span>
<a name="l00018"></a>00018 <span class="comment"> * This work was supported in part by funding from the Defense Advanced </span>
<a name="l00019"></a>00019 <span class="comment"> * Research Projects Agency and the National Science Foundation of the </span>
<a name="l00020"></a>00020 <span class="comment"> * United States of America, and the CMU Sphinx Speech Consortium.</span>
<a name="l00021"></a>00021 <span class="comment"> *</span>
<a name="l00022"></a>00022 <span class="comment"> * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS&#39;&#39; AND </span>
<a name="l00023"></a>00023 <span class="comment"> * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, </span>
<a name="l00024"></a>00024 <span class="comment"> * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR</span>
<a name="l00025"></a>00025 <span class="comment"> * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY</span>
<a name="l00026"></a>00026 <span class="comment"> * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,</span>
<a name="l00027"></a>00027 <span class="comment"> * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT </span>
<a name="l00028"></a>00028 <span class="comment"> * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, </span>
<a name="l00029"></a>00029 <span class="comment"> * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY </span>
<a name="l00030"></a>00030 <span class="comment"> * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT </span>
<a name="l00031"></a>00031 <span class="comment"> * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE </span>
<a name="l00032"></a>00032 <span class="comment"> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</span>
<a name="l00033"></a>00033 <span class="comment"> *</span>
<a name="l00034"></a>00034 <span class="comment"> * ====================================================================</span>
<a name="l00035"></a>00035 <span class="comment"> *</span>
<a name="l00036"></a>00036 <span class="comment"> */</span>
<a name="l00037"></a>00037 <span class="comment">/*</span>
<a name="l00038"></a>00038 <span class="comment"> * \file ngram_model.c N-Gram language models.</span>
<a name="l00039"></a>00039 <span class="comment"> *</span>
<a name="l00040"></a>00040 <span class="comment"> * Author: David Huggins-Daines, much code taken from sphinx3/src/libs3decoder/liblm</span>
<a name="l00041"></a>00041 <span class="comment"> */</span>
<a name="l00042"></a>00042 
<a name="l00043"></a>00043 <span class="preprocessor">#include &lt;config.h&gt;</span>
<a name="l00044"></a>00044 
<a name="l00045"></a>00045 <span class="preprocessor">#include &lt;string.h&gt;</span>
<a name="l00046"></a>00046 <span class="preprocessor">#include &lt;assert.h&gt;</span>
<a name="l00047"></a>00047 
<a name="l00048"></a>00048 <span class="preprocessor">#ifdef HAVE_ICONV</span>
<a name="l00049"></a>00049 <span class="preprocessor"></span><span class="preprocessor">#include &lt;iconv.h&gt;</span>
<a name="l00050"></a>00050 <span class="preprocessor">#endif </span>
<a name="l00051"></a>00051 <span class="preprocessor"></span>
<a name="l00052"></a>00052 <span class="preprocessor">#include &quot;sphinxbase/ngram_model.h&quot;</span>
<a name="l00053"></a>00053 <span class="preprocessor">#include &quot;sphinxbase/ckd_alloc.h&quot;</span>
<a name="l00054"></a>00054 <span class="preprocessor">#include &quot;sphinxbase/filename.h&quot;</span>
<a name="l00055"></a>00055 <span class="preprocessor">#include &quot;sphinxbase/pio.h&quot;</span>
<a name="l00056"></a>00056 <span class="preprocessor">#include &quot;sphinxbase/err.h&quot;</span>
<a name="l00057"></a>00057 <span class="preprocessor">#include &quot;sphinxbase/logmath.h&quot;</span>
<a name="l00058"></a>00058 <span class="preprocessor">#include &quot;sphinxbase/strfuncs.h&quot;</span>
<a name="l00059"></a>00059 <span class="preprocessor">#include &quot;sphinxbase/case.h&quot;</span>
<a name="l00060"></a>00060 
<a name="l00061"></a>00061 <span class="preprocessor">#include &quot;ngram_model_internal.h&quot;</span>
<a name="l00062"></a>00062 
<a name="l00063"></a>00063 <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a>
<a name="l00064"></a><a class="code" href="ngram__model_8h.html#ad758d3f491d501bdec2ada8088e9b656">00064</a> <a class="code" href="ngram__model_8h.html#ad758d3f491d501bdec2ada8088e9b656" title="Guess the file type for an N-Gram model from the filename.">ngram_file_name_to_type</a>(<span class="keyword">const</span> <span class="keywordtype">char</span> *file_name)
<a name="l00065"></a>00065 {
<a name="l00066"></a>00066     <span class="keyword">const</span> <span class="keywordtype">char</span> *ext;
<a name="l00067"></a>00067 
<a name="l00068"></a>00068     ext = strrchr(file_name, <span class="charliteral">&#39;.&#39;</span>);
<a name="l00069"></a>00069     <span class="keywordflow">if</span> (ext == NULL) {
<a name="l00070"></a>00070         <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>;
<a name="l00071"></a>00071     }
<a name="l00072"></a>00072     <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(ext, <span class="stringliteral">&quot;.gz&quot;</span>)) {
<a name="l00073"></a>00073         <span class="keywordflow">while</span> (--ext &gt;= file_name) {
<a name="l00074"></a>00074             <span class="keywordflow">if</span> (*ext == <span class="charliteral">&#39;.&#39;</span>) <span class="keywordflow">break</span>;
<a name="l00075"></a>00075         }
<a name="l00076"></a>00076         <span class="keywordflow">if</span> (ext &lt; file_name) {
<a name="l00077"></a>00077             <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>;
<a name="l00078"></a>00078          }
<a name="l00079"></a>00079      }
<a name="l00080"></a>00080      <span class="keywordflow">else</span> <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(ext, <span class="stringliteral">&quot;.bz2&quot;</span>)) {
<a name="l00081"></a>00081          <span class="keywordflow">while</span> (--ext &gt;= file_name) {
<a name="l00082"></a>00082              <span class="keywordflow">if</span> (*ext == <span class="charliteral">&#39;.&#39;</span>) <span class="keywordflow">break</span>;
<a name="l00083"></a>00083          }
<a name="l00084"></a>00084          <span class="keywordflow">if</span> (ext &lt; file_name) {
<a name="l00085"></a>00085              <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>;
<a name="l00086"></a>00086          }
<a name="l00087"></a>00087      }
<a name="l00088"></a>00088      <span class="comment">/* We use strncmp because there might be a .gz on the end. */</span>
<a name="l00089"></a>00089      <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ae7c2a7f29d72b9516a947f5c69a043db" title="Like strcmp_nocase() but with a maximum length.">strncmp_nocase</a>(ext, <span class="stringliteral">&quot;.ARPA&quot;</span>, 5))
<a name="l00090"></a>00090          <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>;
<a name="l00091"></a>00091      <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ae7c2a7f29d72b9516a947f5c69a043db" title="Like strcmp_nocase() but with a maximum length.">strncmp_nocase</a>(ext, <span class="stringliteral">&quot;.DMP&quot;</span>, 4))
<a name="l00092"></a>00092          <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>;
<a name="l00093"></a>00093      <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>;
<a name="l00094"></a>00094  }
<a name="l00095"></a>00095 
<a name="l00096"></a>00096 <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a>
<a name="l00097"></a><a class="code" href="ngram__model_8h.html#affb3d938049e58c191e72858a7191e9c">00097</a> <a class="code" href="ngram__model_8h.html#affb3d938049e58c191e72858a7191e9c" title="Get the N-Gram file type from a string.">ngram_str_to_type</a>(<span class="keyword">const</span> <span class="keywordtype">char</span> *str_name)
<a name="l00098"></a>00098 {
<a name="l00099"></a>00099     <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(str_name, <span class="stringliteral">&quot;arpa&quot;</span>))
<a name="l00100"></a>00100         <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>;
<a name="l00101"></a>00101     <span class="keywordflow">if</span> (0 == <a class="code" href="case_8h.html#ad276a997bd6709d986aa6e1e4e06c210" title="(FIXME! The implementation is incorrect!) Case insensitive string compare.">strcmp_nocase</a>(str_name, <span class="stringliteral">&quot;dmp&quot;</span>))
<a name="l00102"></a>00102         <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>;
<a name="l00103"></a>00103     <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>;
<a name="l00104"></a>00104 }
<a name="l00105"></a>00105 
<a name="l00106"></a>00106 <span class="keywordtype">char</span> <span class="keyword">const</span> *
<a name="l00107"></a><a class="code" href="ngram__model_8h.html#a992b8a8d7f623e2662ae94991f6aec55">00107</a> <a class="code" href="ngram__model_8h.html#a992b8a8d7f623e2662ae94991f6aec55" title="Get the canonical name for an N-Gram file type.">ngram_type_to_str</a>(<span class="keywordtype">int</span> type)
<a name="l00108"></a>00108 {
<a name="l00109"></a>00109     <span class="keywordflow">switch</span> (type) {
<a name="l00110"></a>00110     <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>:
<a name="l00111"></a>00111         <span class="keywordflow">return</span> <span class="stringliteral">&quot;arpa&quot;</span>;
<a name="l00112"></a>00112     <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>:
<a name="l00113"></a>00113         <span class="keywordflow">return</span> <span class="stringliteral">&quot;dmp&quot;</span>;
<a name="l00114"></a>00114     <span class="keywordflow">default</span>:
<a name="l00115"></a>00115         <span class="keywordflow">return</span> NULL;
<a name="l00116"></a>00116     }
<a name="l00117"></a>00117 }
<a name="l00118"></a>00118 
<a name="l00119"></a>00119 
<a name="l00120"></a>00120  <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *
<a name="l00121"></a><a class="code" href="ngram__model_8h.html#ab0c840f2bdfc38cea08bb70054f76624">00121</a>  <a class="code" href="ngram__model_8h.html#ab0c840f2bdfc38cea08bb70054f76624" title="Read an N-Gram model from a file on disk.">ngram_model_read</a>(<a class="code" href="structcmd__ln__t.html" title="Opaque structure used to hold the results of command-line parsing.">cmd_ln_t</a> *config,
<a name="l00122"></a>00122                   <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name,
<a name="l00123"></a>00123                   <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a> file_type,
<a name="l00124"></a>00124                   <a class="code" href="structlogmath__s.html">logmath_t</a> *lmath)
<a name="l00125"></a>00125  {
<a name="l00126"></a>00126      <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model = NULL;
<a name="l00127"></a>00127 
<a name="l00128"></a>00128      <span class="keywordflow">switch</span> (file_type) {
<a name="l00129"></a>00129      <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a441701bf8ae0a2b79716feb31b5f257a" title="Determine file type automatically.">NGRAM_AUTO</a>: {
<a name="l00130"></a>00130          <span class="keywordflow">if</span> ((model = ngram_model_arpa_read(config, file_name, lmath)) != NULL)
<a name="l00131"></a>00131              <span class="keywordflow">break</span>;
<a name="l00132"></a>00132          <span class="keywordflow">if</span> ((model = ngram_model_dmp_read(config, file_name, lmath)) != NULL)
<a name="l00133"></a>00133              <span class="keywordflow">break</span>;
<a name="l00134"></a>00134          <span class="keywordflow">return</span> NULL;
<a name="l00135"></a>00135      }
<a name="l00136"></a>00136      <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>:
<a name="l00137"></a>00137          model = ngram_model_arpa_read(config, file_name, lmath);
<a name="l00138"></a>00138          <span class="keywordflow">break</span>;
<a name="l00139"></a>00139      <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>:
<a name="l00140"></a>00140          model = ngram_model_dmp_read(config, file_name, lmath);
<a name="l00141"></a>00141          <span class="keywordflow">break</span>;
<a name="l00142"></a>00142      <span class="keywordflow">default</span>:
<a name="l00143"></a>00143          <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;language model file type not supported\n&quot;</span>);
<a name="l00144"></a>00144          <span class="keywordflow">return</span> NULL;
<a name="l00145"></a>00145      }
<a name="l00146"></a>00146 
<a name="l00147"></a>00147      <span class="comment">/* Now set weights based on config if present. */</span>
<a name="l00148"></a>00148      <span class="keywordflow">if</span> (config) {
<a name="l00149"></a>00149          float32 lw = 1.0;
<a name="l00150"></a>00150          float32 wip = 1.0;
<a name="l00151"></a>00151          float32 uw = 1.0;
<a name="l00152"></a>00152 
<a name="l00153"></a>00153          <span class="keywordflow">if</span> (<a class="code" href="cmd__ln_8h.html#ab4ad5ae130e3c2c042590b41768635e3" title="Re-entrant version of cmd_ln_exists().">cmd_ln_exists_r</a>(config, <span class="stringliteral">&quot;-lw&quot;</span>))
<a name="l00154"></a>00154              lw = cmd_ln_float32_r(config, <span class="stringliteral">&quot;-lw&quot;</span>);
<a name="l00155"></a>00155          <span class="keywordflow">if</span> (<a class="code" href="cmd__ln_8h.html#ab4ad5ae130e3c2c042590b41768635e3" title="Re-entrant version of cmd_ln_exists().">cmd_ln_exists_r</a>(config, <span class="stringliteral">&quot;-wip&quot;</span>))
<a name="l00156"></a>00156              wip = cmd_ln_float32_r(config, <span class="stringliteral">&quot;-wip&quot;</span>);
<a name="l00157"></a>00157          <span class="keywordflow">if</span> (<a class="code" href="cmd__ln_8h.html#ab4ad5ae130e3c2c042590b41768635e3" title="Re-entrant version of cmd_ln_exists().">cmd_ln_exists_r</a>(config, <span class="stringliteral">&quot;-uw&quot;</span>))
<a name="l00158"></a>00158              uw = cmd_ln_float32_r(config, <span class="stringliteral">&quot;-uw&quot;</span>);
<a name="l00159"></a>00159 
<a name="l00160"></a>00160          <a class="code" href="ngram__model_8h.html#aa4b8d7c1f3d873b8458c0cfee13af4da" title="Apply a language weight, insertion penalty, and unigram weight to a language model.">ngram_model_apply_weights</a>(model, lw, wip, uw);
<a name="l00161"></a>00161      }
<a name="l00162"></a>00162 
<a name="l00163"></a>00163      <span class="keywordflow">return</span> model;
<a name="l00164"></a>00164  }
<a name="l00165"></a>00165 
<a name="l00166"></a>00166  <span class="keywordtype">int</span>
<a name="l00167"></a><a class="code" href="ngram__model_8h.html#ac8ff04e1bccbef23bde9e81fb61c57fe">00167</a>  <a class="code" href="ngram__model_8h.html#ac8ff04e1bccbef23bde9e81fb61c57fe" title="Write an N-Gram model to disk.">ngram_model_write</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name,
<a name="l00168"></a>00168                    <a class="code" href="ngram__model_8h.html#a0e7d02703c48237b2afea436392dcb82" title="File types for N-Gram files.">ngram_file_type_t</a> file_type)
<a name="l00169"></a>00169  {
<a name="l00170"></a>00170      <span class="keywordflow">switch</span> (file_type) {
<a name="l00171"></a>00171      <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a441701bf8ae0a2b79716feb31b5f257a" title="Determine file type automatically.">NGRAM_AUTO</a>: {
<a name="l00172"></a>00172          file_type = <a class="code" href="ngram__model_8h.html#ad758d3f491d501bdec2ada8088e9b656" title="Guess the file type for an N-Gram model from the filename.">ngram_file_name_to_type</a>(file_name);
<a name="l00173"></a>00173          <span class="comment">/* Default to ARPA (catches .lm and other things) */</span>
<a name="l00174"></a>00174          <span class="keywordflow">if</span> (file_type == <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a2cf596c8b0c63a3a0ba1fe33326cc796" title="Not a valid file type.">NGRAM_INVALID</a>)
<a name="l00175"></a>00175              file_type = <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>;
<a name="l00176"></a>00176          <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#ac8ff04e1bccbef23bde9e81fb61c57fe" title="Write an N-Gram model to disk.">ngram_model_write</a>(model, file_name, file_type);
<a name="l00177"></a>00177      }
<a name="l00178"></a>00178      <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a77ef2341d826b3cc8b836e3af1efba99" title="ARPABO text format (the standard).">NGRAM_ARPA</a>:
<a name="l00179"></a>00179          <span class="keywordflow">return</span> ngram_model_arpa_write(model, file_name);
<a name="l00180"></a>00180      <span class="keywordflow">case</span> <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74addfc3620d0fbc6f05f7f8e455245dd92" title="Sphinx .DMP format.">NGRAM_DMP</a>:
<a name="l00181"></a>00181          <span class="keywordflow">return</span> ngram_model_dmp_write(model, file_name);
<a name="l00182"></a>00182      <span class="keywordflow">default</span>:
<a name="l00183"></a>00183          <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;language model file type not supported\n&quot;</span>);
<a name="l00184"></a>00184          <span class="keywordflow">return</span> -1;
<a name="l00185"></a>00185      }
<a name="l00186"></a>00186      <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;language model file type not supported\n&quot;</span>);
<a name="l00187"></a>00187      <span class="keywordflow">return</span> -1;
<a name="l00188"></a>00188  }
<a name="l00189"></a>00189 
<a name="l00190"></a>00190  int32
<a name="l00191"></a>00191  ngram_model_init(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base,
<a name="l00192"></a>00192                   <a class="code" href="structngram__funcs__s.html" title="Implementation-specific functions for operating on ngram_model_t objects.">ngram_funcs_t</a> *funcs,
<a name="l00193"></a>00193                   <a class="code" href="structlogmath__s.html">logmath_t</a> *lmath,
<a name="l00194"></a>00194                   int32 n, int32 n_unigram)
<a name="l00195"></a>00195  {
<a name="l00196"></a>00196      base-&gt;<a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a> = 1;
<a name="l00197"></a>00197      base-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> = funcs;
<a name="l00198"></a>00198      base-&gt;<a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a> = n;
<a name="l00199"></a>00199      <span class="comment">/* If this was previously initialized... */</span>
<a name="l00200"></a>00200     <span class="keywordflow">if</span> (base-&gt;<a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a> == NULL)
<a name="l00201"></a>00201         base-&gt;<a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(3, <span class="keyword">sizeof</span>(*base-&gt;<a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>));
<a name="l00202"></a>00202     <span class="comment">/* Don&#39;t reset weights if logmath object hasn&#39;t changed. */</span>
<a name="l00203"></a>00203     <span class="keywordflow">if</span> (base-&gt;<a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a> != lmath) {
<a name="l00204"></a>00204         <span class="comment">/* Set default values for weights. */</span>
<a name="l00205"></a>00205         base-&gt;<a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a> = 1.0;
<a name="l00206"></a>00206         base-&gt;<a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a> = 0; <span class="comment">/* i.e. 1.0 */</span>
<a name="l00207"></a>00207         base-&gt;<a class="code" href="structngram__model__s.html#a6f0ec7b8b9d13d590bbe4b59df573abc" title="Log of unigram weight.">log_uw</a> = 0;  <span class="comment">/* i.e. 1.0 */</span>
<a name="l00208"></a>00208         base-&gt;<a class="code" href="structngram__model__s.html#a616bf871a67f9cedce17d6b589ee33ea" title="Log of uniform (0-gram) probability.">log_uniform</a> = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(lmath, 1.0 / n_unigram);
<a name="l00209"></a>00209         base-&gt;<a class="code" href="structngram__model__s.html#aa38c5fdecaefd9a2f43b69f26ae492c1" title="Log of uniform weight (i.e.">log_uniform_weight</a> = <a class="code" href="logmath_8h.html#a1c160c28a9e7d25923f391773b1028c0" title="Get the smallest possible value represented in this base.">logmath_get_zero</a>(lmath);
<a name="l00210"></a>00210         base-&gt;<a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a> = <a class="code" href="logmath_8h.html#a1c160c28a9e7d25923f391773b1028c0" title="Get the smallest possible value represented in this base.">logmath_get_zero</a>(lmath);
<a name="l00211"></a>00211         base-&gt;<a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a> = lmath;
<a name="l00212"></a>00212     }
<a name="l00213"></a>00213     <span class="comment">/* Allocate or reallocate space for word strings. */</span>
<a name="l00214"></a>00214     <span class="keywordflow">if</span> (base-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>) {
<a name="l00215"></a>00215         <span class="comment">/* Free all previous word strings if they were allocated. */</span>
<a name="l00216"></a>00216         <span class="keywordflow">if</span> (base-&gt;<a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>) {
<a name="l00217"></a>00217             int32 i;
<a name="l00218"></a>00218             <span class="keywordflow">for</span> (i = 0; i &lt; base-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) {
<a name="l00219"></a>00219                 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(base-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]);
<a name="l00220"></a>00220                 base-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = NULL;
<a name="l00221"></a>00221             }
<a name="l00222"></a>00222         }
<a name="l00223"></a>00223         base-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a> = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(base-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>, n_unigram * <span class="keyword">sizeof</span>(<span class="keywordtype">char</span> *));
<a name="l00224"></a>00224     }
<a name="l00225"></a>00225     <span class="keywordflow">else</span>
<a name="l00226"></a>00226         base-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_unigram, <span class="keyword">sizeof</span>(<span class="keywordtype">char</span> *));
<a name="l00227"></a>00227     <span class="comment">/* NOTE: They are no longer case-insensitive since we are allowing</span>
<a name="l00228"></a>00228 <span class="comment">     * other encodings for word strings.  Beware. */</span>
<a name="l00229"></a>00229     <span class="keywordflow">if</span> (base-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>)
<a name="l00230"></a>00230         <a class="code" href="hash__table_8h.html#acab374d21e25009d397642e3465308c7" title="Delete all entries from a hash_table.">hash_table_empty</a>(base-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>);
<a name="l00231"></a>00231     <span class="keywordflow">else</span>
<a name="l00232"></a>00232         base-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a> = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(n_unigram, FALSE);
<a name="l00233"></a>00233     base-&gt;<a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0] = base-&gt;<a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a> = base-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a> = n_unigram;
<a name="l00234"></a>00234 
<a name="l00235"></a>00235     <span class="keywordflow">return</span> 0;
<a name="l00236"></a>00236 }
<a name="l00237"></a>00237 
<a name="l00238"></a>00238 <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *
<a name="l00239"></a><a class="code" href="ngram__model_8h.html#a046e6ff8cd8787e412400534a9649a81">00239</a> <a class="code" href="ngram__model_8h.html#a046e6ff8cd8787e412400534a9649a81" title="Retain ownership of an N-Gram model.">ngram_model_retain</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model)
<a name="l00240"></a>00240 {
<a name="l00241"></a>00241     ++model-&gt;<a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a>;
<a name="l00242"></a>00242     <span class="keywordflow">return</span> model;
<a name="l00243"></a>00243 }
<a name="l00244"></a>00244 
<a name="l00245"></a>00245 
<a name="l00246"></a>00246 <span class="keywordtype">void</span>
<a name="l00247"></a><a class="code" href="ngram__model_8h.html#a8304f56d82278824b0a94c254d7235aa">00247</a> <a class="code" href="ngram__model_8h.html#a8304f56d82278824b0a94c254d7235aa" title="Flush any cached N-Gram information.">ngram_model_flush</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model)
<a name="l00248"></a>00248 {
<a name="l00249"></a>00249     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> &amp;&amp; model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a7abf2864db9c8e8d1d5909ea92144ffe" title="Implementation-specific function for purging N-Gram cache.">flush</a>)
<a name="l00250"></a>00250         (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a7abf2864db9c8e8d1d5909ea92144ffe" title="Implementation-specific function for purging N-Gram cache.">flush</a>)(model);
<a name="l00251"></a>00251 }
<a name="l00252"></a>00252 
<a name="l00253"></a>00253 <span class="keywordtype">int</span>
<a name="l00254"></a><a class="code" href="ngram__model_8h.html#aec73d28e7285e539a0b44a7ac0cbe489">00254</a> <a class="code" href="ngram__model_8h.html#aec73d28e7285e539a0b44a7ac0cbe489" title="Release memory associated with an N-Gram model.">ngram_model_free</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model)
<a name="l00255"></a>00255 {
<a name="l00256"></a>00256     <span class="keywordtype">int</span> i;
<a name="l00257"></a>00257 
<a name="l00258"></a>00258     <span class="keywordflow">if</span> (model == NULL)
<a name="l00259"></a>00259         <span class="keywordflow">return</span> 0;
<a name="l00260"></a>00260     <span class="keywordflow">if</span> (--model-&gt;<a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a> &gt; 0)
<a name="l00261"></a>00261         <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a3b14986e4dc40ccec1f7e206b7f41d06" title="Reference count.">refcount</a>;
<a name="l00262"></a>00262     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> &amp;&amp; model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a4211130880131f38e16022985816952f" title="Implementation-specific function for freeing an ngram_model_t.">free</a>)
<a name="l00263"></a>00263         (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a4211130880131f38e16022985816952f" title="Implementation-specific function for freeing an ngram_model_t.">free</a>)(model);
<a name="l00264"></a>00264     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>) {
<a name="l00265"></a>00265         <span class="comment">/* Free all words. */</span>
<a name="l00266"></a>00266         <span class="keywordflow">for</span> (i = 0; i &lt; model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) {
<a name="l00267"></a>00267             <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]);
<a name="l00268"></a>00268         }
<a name="l00269"></a>00269     }
<a name="l00270"></a>00270     <span class="keywordflow">else</span> {
<a name="l00271"></a>00271         <span class="comment">/* Free all class words. */</span>
<a name="l00272"></a>00272         <span class="keywordflow">for</span> (i = 0; i &lt; model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; ++i) {
<a name="l00273"></a>00273             <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass;
<a name="l00274"></a>00274             int32 j;
<a name="l00275"></a>00275 
<a name="l00276"></a>00276             lmclass = model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[i];
<a name="l00277"></a>00277             <span class="keywordflow">for</span> (j = 0; j &lt; lmclass-&gt;<a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>; ++j) {
<a name="l00278"></a>00278                 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[lmclass-&gt;<a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&amp;#39; words.">start_wid</a> + j]);
<a name="l00279"></a>00279             }
<a name="l00280"></a>00280             <span class="keywordflow">for</span> (j = 0; j &lt; lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>; ++j) {
<a name="l00281"></a>00281                 <span class="keywordflow">if</span> (lmclass-&gt;nword_hash[j].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> != -1) {
<a name="l00282"></a>00282                     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[lmclass-&gt;nword_hash[j].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a>]);
<a name="l00283"></a>00283                 }
<a name="l00284"></a>00284             }
<a name="l00285"></a>00285         }
<a name="l00286"></a>00286     }
<a name="l00287"></a>00287     <span class="keywordflow">for</span> (i = 0; i &lt; model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; ++i) {
<a name="l00288"></a>00288         ngram_class_free(model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[i]);
<a name="l00289"></a>00289     }
<a name="l00290"></a>00290     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>);
<a name="l00291"></a>00291     <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>);
<a name="l00292"></a>00292     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>);
<a name="l00293"></a>00293     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>);
<a name="l00294"></a>00294     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model);
<a name="l00295"></a>00295     <span class="keywordflow">return</span> 0;
<a name="l00296"></a>00296 }
<a name="l00297"></a>00297 
<a name="l00298"></a>00298 <span class="keywordtype">int</span>
<a name="l00299"></a><a class="code" href="ngram__model_8h.html#a41b938a2c4b129dc0df37b2312d65506">00299</a> <a class="code" href="ngram__model_8h.html#a41b938a2c4b129dc0df37b2312d65506" title="Case-fold word strings in an N-Gram model.">ngram_model_casefold</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keywordtype">int</span> kase)
<a name="l00300"></a>00300 {
<a name="l00301"></a>00301     <span class="keywordtype">int</span> writable, i;
<a name="l00302"></a>00302     <a class="code" href="structhash__table__t.html">hash_table_t</a> *new_wid;
<a name="l00303"></a>00303 
<a name="l00304"></a>00304     <span class="comment">/* Were word strings already allocated? */</span>
<a name="l00305"></a>00305     writable = model-&gt;<a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>;
<a name="l00306"></a>00306     <span class="comment">/* Either way, we are going to allocate some word strings. */</span>
<a name="l00307"></a>00307     model-&gt;<a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a> = TRUE;
<a name="l00308"></a>00308 
<a name="l00309"></a>00309     <span class="comment">/* And, don&#39;t forget, we need to rebuild the word to unigram ID</span>
<a name="l00310"></a>00310 <span class="comment">     * mapping. */</span>
<a name="l00311"></a>00311     new_wid = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>, FALSE);
<a name="l00312"></a>00312     <span class="keywordflow">for</span> (i = 0; i &lt; model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) {
<a name="l00313"></a>00313         <span class="keywordtype">char</span> *outstr;
<a name="l00314"></a>00314         <span class="keywordflow">if</span> (writable) {
<a name="l00315"></a>00315             outstr = model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i];
<a name="l00316"></a>00316         }
<a name="l00317"></a>00317         <span class="keywordflow">else</span> {
<a name="l00318"></a>00318             outstr = <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]);
<a name="l00319"></a>00319         }
<a name="l00320"></a>00320         <span class="comment">/* Don&#39;t case-fold &lt;tags&gt; or [classes] */</span>
<a name="l00321"></a>00321         <span class="keywordflow">if</span> (outstr[0] == <span class="charliteral">&#39;&lt;&#39;</span> || outstr[0] == <span class="charliteral">&#39;[&#39;</span>) {
<a name="l00322"></a>00322         }
<a name="l00323"></a>00323         <span class="keywordflow">else</span> {
<a name="l00324"></a>00324             <span class="keywordflow">switch</span> (kase) {
<a name="l00325"></a>00325             <span class="keywordflow">case</span> NGRAM_UPPER:
<a name="l00326"></a>00326                 <a class="code" href="case_8h.html#a79d99e36b7c2c36dcc6f7f0df746384e" title="Convert str to all upper case.">ucase</a>(outstr);
<a name="l00327"></a>00327                 <span class="keywordflow">break</span>;
<a name="l00328"></a>00328             <span class="keywordflow">case</span> NGRAM_LOWER:
<a name="l00329"></a>00329                 <a class="code" href="case_8h.html#ac0e30dac40f15762f39270f65bd8cdba" title="Convert str to all lower case.">lcase</a>(outstr);
<a name="l00330"></a>00330                 <span class="keywordflow">break</span>;
<a name="l00331"></a>00331             <span class="keywordflow">default</span>:
<a name="l00332"></a>00332                 ;
<a name="l00333"></a>00333             }
<a name="l00334"></a>00334         }
<a name="l00335"></a>00335         model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = outstr;
<a name="l00336"></a>00336 
<a name="l00337"></a>00337         <span class="comment">/* Now update the hash table.  We might have terrible</span>
<a name="l00338"></a>00338 <span class="comment">         * collisions here, so warn about them. */</span>
<a name="l00339"></a>00339         <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a393c56322e54607a48e6bc61169d92bf" title="Add a 32-bit integer value to a hash table.">hash_table_enter_int32</a>(new_wid, model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], i) != i) {
<a name="l00340"></a>00340             <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">&quot;Duplicate word in dictionary after conversion: %s\n&quot;</span>,
<a name="l00341"></a>00341                    model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]);
<a name="l00342"></a>00342         }
<a name="l00343"></a>00343     }
<a name="l00344"></a>00344     <span class="comment">/* Swap out the hash table. */</span>
<a name="l00345"></a>00345     <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>);
<a name="l00346"></a>00346     model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a> = new_wid;
<a name="l00347"></a>00347     <span class="keywordflow">return</span> 0;
<a name="l00348"></a>00348 }
<a name="l00349"></a>00349 
<a name="l00350"></a>00350 <span class="preprocessor">#ifdef HAVE_ICONV</span>
<a name="l00351"></a>00351 <span class="preprocessor"></span><span class="keywordtype">int</span>
<a name="l00352"></a>00352 <a class="code" href="ngram__model_8h.html#aac2b3fd054597c2fdfbb59db97d72ec0" title="Re-encode word strings in an N-Gram model.">ngram_model_recode</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *from, <span class="keyword">const</span> <span class="keywordtype">char</span> *to)
<a name="l00353"></a>00353 {
<a name="l00354"></a>00354     iconv_t ic;
<a name="l00355"></a>00355     <span class="keywordtype">char</span> *outbuf;
<a name="l00356"></a>00356     <span class="keywordtype">size_t</span> maxlen;
<a name="l00357"></a>00357     <span class="keywordtype">int</span> i, writable;
<a name="l00358"></a>00358     <a class="code" href="structhash__table__t.html">hash_table_t</a> *new_wid;
<a name="l00359"></a>00359 
<a name="l00360"></a>00360     <span class="comment">/* FIXME: Need to do a special case thing for the GB-HEX encoding</span>
<a name="l00361"></a>00361 <span class="comment">     * used in Sphinx3 Mandarin models. */</span>
<a name="l00362"></a>00362     <span class="keywordflow">if</span> ((ic = iconv_open(to, from)) == (iconv_t)-1) {
<a name="l00363"></a>00363         <a class="code" href="err_8h.html#a54ffbfe898d74595c586a1f48f32ef03" title="Print error text; Call perror(&amp;quot;&amp;quot;);.">E_ERROR_SYSTEM</a>(<span class="stringliteral">&quot;iconv_open() failed&quot;</span>);
<a name="l00364"></a>00364         <span class="keywordflow">return</span> -1;
<a name="l00365"></a>00365     }
<a name="l00366"></a>00366     <span class="comment">/* iconv(3) is a piece of crap and won&#39;t accept a NULL out buffer,</span>
<a name="l00367"></a>00367 <span class="comment">     * unlike wcstombs(3). So we have to either call it over and over</span>
<a name="l00368"></a>00368 <span class="comment">     * again until our buffer is big enough, or call it with a huge</span>
<a name="l00369"></a>00369 <span class="comment">     * buffer and then copy things back to the output.  We will use a</span>
<a name="l00370"></a>00370 <span class="comment">     * mix of these two approaches here.  We&#39;ll keep a single big</span>
<a name="l00371"></a>00371 <span class="comment">     * buffer around, and expand it as necessary.</span>
<a name="l00372"></a>00372 <span class="comment">     */</span>
<a name="l00373"></a>00373     maxlen = 0;
<a name="l00374"></a>00374     <span class="keywordflow">for</span> (i = 0; i &lt; model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) {
<a name="l00375"></a>00375         <span class="keywordflow">if</span> (strlen(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]) &gt; maxlen)
<a name="l00376"></a>00376             maxlen = strlen(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]);
<a name="l00377"></a>00377     }
<a name="l00378"></a>00378     <span class="comment">/* Were word strings already allocated? */</span>
<a name="l00379"></a>00379     writable = model-&gt;<a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>;
<a name="l00380"></a>00380     <span class="comment">/* Either way, we are going to allocate some word strings. */</span>
<a name="l00381"></a>00381     model-&gt;<a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a> = TRUE;
<a name="l00382"></a>00382     <span class="comment">/* Really should be big enough except for pathological cases. */</span>
<a name="l00383"></a>00383     maxlen = maxlen * <span class="keyword">sizeof</span>(int) + 15;
<a name="l00384"></a>00384     outbuf = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(maxlen, 1);
<a name="l00385"></a>00385     <span class="comment">/* And, don&#39;t forget, we need to rebuild the word to unigram ID</span>
<a name="l00386"></a>00386 <span class="comment">     * mapping. */</span>
<a name="l00387"></a>00387     new_wid = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>, FALSE);
<a name="l00388"></a>00388     <span class="keywordflow">for</span> (i = 0; i &lt; model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>; ++i) {
<a name="l00389"></a>00389         ICONV_CONST <span class="keywordtype">char</span> *in;
<a name="l00390"></a>00390         <span class="keywordtype">char</span> *out;
<a name="l00391"></a>00391         <span class="keywordtype">size_t</span> inleft, outleft, result;
<a name="l00392"></a>00392 
<a name="l00393"></a>00393     start_conversion:
<a name="l00394"></a>00394         in = (ICONV_CONST <span class="keywordtype">char</span> *)model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i];
<a name="l00395"></a>00395         <span class="comment">/* Yes, this assumes that we don&#39;t have any NUL bytes. */</span>
<a name="l00396"></a>00396         inleft = strlen(in);
<a name="l00397"></a>00397         out = outbuf;
<a name="l00398"></a>00398         outleft = maxlen;
<a name="l00399"></a>00399 
<a name="l00400"></a>00400         <span class="keywordflow">while</span> ((result = iconv(ic, &amp;in, &amp;inleft, &amp;out, &amp;outleft)) == (size_t)-1) {
<a name="l00401"></a>00401             <span class="keywordflow">if</span> (errno != E2BIG) {
<a name="l00402"></a>00402                 <span class="comment">/* FIXME: if we already converted any words, then they</span>
<a name="l00403"></a>00403 <span class="comment">                 * are going to be in an inconsistent state. */</span>
<a name="l00404"></a>00404                 <a class="code" href="err_8h.html#a54ffbfe898d74595c586a1f48f32ef03" title="Print error text; Call perror(&amp;quot;&amp;quot;);.">E_ERROR_SYSTEM</a>(<span class="stringliteral">&quot;iconv() failed&quot;</span>);
<a name="l00405"></a>00405                 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(outbuf);
<a name="l00406"></a>00406                 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(new_wid);
<a name="l00407"></a>00407                 <span class="keywordflow">return</span> -1;
<a name="l00408"></a>00408             }
<a name="l00409"></a>00409             <span class="comment">/* Reset the internal state of conversion. */</span>
<a name="l00410"></a>00410             iconv(ic, NULL, NULL, NULL, NULL);
<a name="l00411"></a>00411             <span class="comment">/* Make everything bigger. */</span>
<a name="l00412"></a>00412             maxlen *= 2;
<a name="l00413"></a>00413             out = outbuf = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(outbuf, maxlen);
<a name="l00414"></a>00414             <span class="comment">/* Reset the input pointers. */</span>
<a name="l00415"></a>00415             in = (ICONV_CONST <span class="keywordtype">char</span> *)model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i];
<a name="l00416"></a>00416             inleft = strlen(in);
<a name="l00417"></a>00417         }
<a name="l00418"></a>00418 
<a name="l00419"></a>00419         <span class="comment">/* Now flush a shift-out sequence, if any. */</span>
<a name="l00420"></a>00420         <span class="keywordflow">if</span> ((result = iconv(ic, NULL, NULL, &amp;out, &amp;outleft)) == (size_t)-1) {
<a name="l00421"></a>00421             <span class="keywordflow">if</span> (errno != E2BIG) {
<a name="l00422"></a>00422                 <span class="comment">/* FIXME: if we already converted any words, then they</span>
<a name="l00423"></a>00423 <span class="comment">                 * are going to be in an inconsistent state. */</span>
<a name="l00424"></a>00424                 <a class="code" href="err_8h.html#a54ffbfe898d74595c586a1f48f32ef03" title="Print error text; Call perror(&amp;quot;&amp;quot;);.">E_ERROR_SYSTEM</a>(<span class="stringliteral">&quot;iconv() failed (state reset sequence)&quot;</span>);
<a name="l00425"></a>00425                 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(outbuf);
<a name="l00426"></a>00426                 <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(new_wid);
<a name="l00427"></a>00427                 <span class="keywordflow">return</span> -1;
<a name="l00428"></a>00428             }
<a name="l00429"></a>00429             <span class="comment">/* Reset the internal state of conversion. */</span>
<a name="l00430"></a>00430             iconv(ic, NULL, NULL, NULL, NULL);
<a name="l00431"></a>00431             <span class="comment">/* Make everything bigger. */</span>
<a name="l00432"></a>00432             maxlen *= 2;
<a name="l00433"></a>00433             outbuf = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(outbuf, maxlen);
<a name="l00434"></a>00434             <span class="comment">/* Be very evil. */</span>
<a name="l00435"></a>00435             <span class="keywordflow">goto</span> start_conversion;
<a name="l00436"></a>00436         }
<a name="l00437"></a>00437 
<a name="l00438"></a>00438         result = maxlen - outleft;
<a name="l00439"></a>00439         <span class="comment">/* Okay, that was hard, now let&#39;s go shopping. */</span>
<a name="l00440"></a>00440         <span class="keywordflow">if</span> (writable) {
<a name="l00441"></a>00441             <span class="comment">/* Grow or shrink the output string as necessary. */</span>
<a name="l00442"></a>00442             model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], result + 1);
<a name="l00443"></a>00443             model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i][result] = <span class="charliteral">&#39;\0&#39;</span>;
<a name="l00444"></a>00444         }
<a name="l00445"></a>00445         <span class="keywordflow">else</span> {
<a name="l00446"></a>00446             <span class="comment">/* It actually was not allocated previously, so do that now. */</span>
<a name="l00447"></a>00447             model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i] = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(result + 1, 1);
<a name="l00448"></a>00448         }
<a name="l00449"></a>00449         <span class="comment">/* Copy the new thing in. */</span>
<a name="l00450"></a>00450         memcpy(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], outbuf, result);
<a name="l00451"></a>00451 
<a name="l00452"></a>00452         <span class="comment">/* Now update the hash table.  We might have terrible</span>
<a name="l00453"></a>00453 <span class="comment">         * collisions if a non-reversible conversion was requested.,</span>
<a name="l00454"></a>00454 <span class="comment">         * so warn about them. */</span>
<a name="l00455"></a>00455         <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a393c56322e54607a48e6bc61169d92bf" title="Add a 32-bit integer value to a hash table.">hash_table_enter_int32</a>(new_wid, model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], i) != i) {
<a name="l00456"></a>00456             <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">&quot;Duplicate word in dictionary after conversion: %s\n&quot;</span>,
<a name="l00457"></a>00457                    model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i]);
<a name="l00458"></a>00458         }
<a name="l00459"></a>00459     }
<a name="l00460"></a>00460     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(outbuf);
<a name="l00461"></a>00461     iconv_close(ic);
<a name="l00462"></a>00462     <span class="comment">/* Swap out the hash table. */</span>
<a name="l00463"></a>00463     <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>);
<a name="l00464"></a>00464     model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a> = new_wid;
<a name="l00465"></a>00465 
<a name="l00466"></a>00466     <span class="keywordflow">return</span> 0;
<a name="l00467"></a>00467 }
<a name="l00468"></a>00468 <span class="preprocessor">#else </span><span class="comment">/* !HAVE_ICONV */</span>
<a name="l00469"></a>00469 <span class="keywordtype">int</span>
<a name="l00470"></a><a class="code" href="ngram__model_8h.html#aac2b3fd054597c2fdfbb59db97d72ec0">00470</a> <a class="code" href="ngram__model_8h.html#aac2b3fd054597c2fdfbb59db97d72ec0" title="Re-encode word strings in an N-Gram model.">ngram_model_recode</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *from, <span class="keyword">const</span> <span class="keywordtype">char</span> *to)
<a name="l00471"></a>00471 {
<a name="l00472"></a>00472     <span class="keywordflow">return</span> -1;
<a name="l00473"></a>00473 }
<a name="l00474"></a>00474 <span class="preprocessor">#endif </span><span class="comment">/* !HAVE_ICONV */</span>
<a name="l00475"></a>00475 
<a name="l00476"></a>00476 <span class="keywordtype">int</span>
<a name="l00477"></a><a class="code" href="ngram__model_8h.html#aa4b8d7c1f3d873b8458c0cfee13af4da">00477</a> <a class="code" href="ngram__model_8h.html#aa4b8d7c1f3d873b8458c0cfee13af4da" title="Apply a language weight, insertion penalty, and unigram weight to a language model.">ngram_model_apply_weights</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model,
<a name="l00478"></a>00478                           float32 lw, float32 wip, float32 uw)
<a name="l00479"></a>00479 {
<a name="l00480"></a>00480     <span class="keywordflow">return</span> (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#abfd7e53c672aef5a34ec5114ec475916" title="Implementation-specific function for applying language model weights.">apply_weights</a>)(model, lw, wip, uw);
<a name="l00481"></a>00481 }
<a name="l00482"></a>00482 
<a name="l00483"></a>00483 float32
<a name="l00484"></a><a class="code" href="ngram__model_8h.html#a30170c1307e065e1f7159a04f75df304">00484</a> <a class="code" href="ngram__model_8h.html#a30170c1307e065e1f7159a04f75df304" title="Get the current weights from a language model.">ngram_model_get_weights</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 *out_log_wip,
<a name="l00485"></a>00485                         int32 *out_log_uw)
<a name="l00486"></a>00486 {
<a name="l00487"></a>00487     <span class="keywordflow">if</span> (out_log_wip) *out_log_wip = model-&gt;<a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a>;
<a name="l00488"></a>00488     <span class="keywordflow">if</span> (out_log_uw) *out_log_uw = model-&gt;<a class="code" href="structngram__model__s.html#a6f0ec7b8b9d13d590bbe4b59df573abc" title="Log of unigram weight.">log_uw</a>;
<a name="l00489"></a>00489     <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a>;
<a name="l00490"></a>00490 }
<a name="l00491"></a>00491 
<a name="l00492"></a>00492 
<a name="l00493"></a>00493 int32
<a name="l00494"></a><a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e">00494</a> <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid, int32 *history,
<a name="l00495"></a>00495                int32 n_hist, int32 *n_used)
<a name="l00496"></a>00496 {
<a name="l00497"></a>00497     int32 score, class_weight = 0;
<a name="l00498"></a>00498     <span class="keywordtype">int</span> i;
<a name="l00499"></a>00499 
<a name="l00500"></a>00500     <span class="comment">/* Closed vocabulary, OOV word probability is zero */</span>
<a name="l00501"></a>00501     <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>)
<a name="l00502"></a>00502         <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>;
<a name="l00503"></a>00503 
<a name="l00504"></a>00504     <span class="comment">/* &quot;Declassify&quot; wid and history */</span>
<a name="l00505"></a>00505     <span class="keywordflow">if</span> (NGRAM_IS_CLASSWID(wid)) {
<a name="l00506"></a>00506         <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass = model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(wid)];
<a name="l00507"></a>00507 
<a name="l00508"></a>00508         class_weight = ngram_class_prob(lmclass, wid);
<a name="l00509"></a>00509         <span class="keywordflow">if</span> (class_weight == 1) <span class="comment">/* Meaning, not found in class. */</span>
<a name="l00510"></a>00510             <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>;
<a name="l00511"></a>00511         wid = lmclass-&gt;<a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>;
<a name="l00512"></a>00512     }
<a name="l00513"></a>00513     <span class="keywordflow">for</span> (i = 0; i &lt; n_hist; ++i) {
<a name="l00514"></a>00514         <span class="keywordflow">if</span> (history[i] != <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a> &amp;&amp; NGRAM_IS_CLASSWID(history[i]))
<a name="l00515"></a>00515             history[i] = model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(history[i])]-&gt;<a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>;
<a name="l00516"></a>00516     }
<a name="l00517"></a>00517     score = (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a81b0c7948179c2572fb274401b82278e" title="Implementation-specific function for querying language model score.">score</a>)(model, wid, history, n_hist, n_used);
<a name="l00518"></a>00518 
<a name="l00519"></a>00519     <span class="comment">/* Multiply by unigram in-class weight. */</span>
<a name="l00520"></a>00520     <span class="keywordflow">return</span> score + class_weight;
<a name="l00521"></a>00521 }
<a name="l00522"></a>00522 
<a name="l00523"></a>00523 int32
<a name="l00524"></a><a class="code" href="ngram__model_8h.html#a9c23d79885af400e17ac2a1b7169660d">00524</a> <a class="code" href="ngram__model_8h.html#a9c23d79885af400e17ac2a1b7169660d" title="Get the score (scaled, interpolated log-probability) for a general N-Gram.">ngram_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word, ...)
<a name="l00525"></a>00525 {
<a name="l00526"></a>00526     va_list history;
<a name="l00527"></a>00527     <span class="keyword">const</span> <span class="keywordtype">char</span> *hword;
<a name="l00528"></a>00528     int32 *histid;
<a name="l00529"></a>00529     int32 n_hist;
<a name="l00530"></a>00530     int32 n_used;
<a name="l00531"></a>00531     int32 prob;
<a name="l00532"></a>00532 
<a name="l00533"></a>00533     va_start(history, word);
<a name="l00534"></a>00534     n_hist = 0;
<a name="l00535"></a>00535     <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL)
<a name="l00536"></a>00536         ++n_hist;
<a name="l00537"></a>00537     va_end(history);
<a name="l00538"></a>00538 
<a name="l00539"></a>00539     histid = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_hist, <span class="keyword">sizeof</span>(*histid));
<a name="l00540"></a>00540     va_start(history, word);
<a name="l00541"></a>00541     n_hist = 0;
<a name="l00542"></a>00542     <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) {
<a name="l00543"></a>00543         histid[n_hist] = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, hword);
<a name="l00544"></a>00544         ++n_hist;
<a name="l00545"></a>00545     }
<a name="l00546"></a>00546     va_end(history);
<a name="l00547"></a>00547 
<a name="l00548"></a>00548     prob = <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(model, <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, word),
<a name="l00549"></a>00549                           histid, n_hist, &amp;n_used);
<a name="l00550"></a>00550     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(histid);
<a name="l00551"></a>00551     <span class="keywordflow">return</span> prob;
<a name="l00552"></a>00552 }
<a name="l00553"></a>00553 
<a name="l00554"></a>00554 int32
<a name="l00555"></a><a class="code" href="ngram__model_8h.html#a0f8ad53c1a7cab528113b74aad00f15a">00555</a> <a class="code" href="ngram__model_8h.html#a0f8ad53c1a7cab528113b74aad00f15a" title="Quick trigram score lookup.">ngram_tg_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 w3, int32 w2, int32 w1, int32 *n_used)
<a name="l00556"></a>00556 {
<a name="l00557"></a>00557     int32 hist[2];
<a name="l00558"></a>00558     hist[0] = w2;
<a name="l00559"></a>00559     hist[1] = w1;
<a name="l00560"></a>00560     <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(model, w3, hist, 2, n_used);
<a name="l00561"></a>00561 }
<a name="l00562"></a>00562 
<a name="l00563"></a>00563 int32
<a name="l00564"></a><a class="code" href="ngram__model_8h.html#a8c06698fbcb0ef82420088ace045ae81">00564</a> <a class="code" href="ngram__model_8h.html#a8c06698fbcb0ef82420088ace045ae81" title="Quick bigram score lookup.">ngram_bg_score</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 w2, int32 w1, int32 *n_used)
<a name="l00565"></a>00565 {
<a name="l00566"></a>00566     <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(model, w2, &amp;w1, 1, n_used);
<a name="l00567"></a>00567 }
<a name="l00568"></a>00568 
<a name="l00569"></a>00569 int32
<a name="l00570"></a><a class="code" href="ngram__model_8h.html#a218d8d140b93d3d8008f8933f9e04ec6">00570</a> <a class="code" href="ngram__model_8h.html#a218d8d140b93d3d8008f8933f9e04ec6" title="Quick &amp;quot;raw&amp;quot; probability lookup for a general N-Gram.">ngram_ng_prob</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid, int32 *history,
<a name="l00571"></a>00571               int32 n_hist, int32 *n_used)
<a name="l00572"></a>00572 {
<a name="l00573"></a>00573     int32 prob, class_weight = 0;
<a name="l00574"></a>00574     <span class="keywordtype">int</span> i;
<a name="l00575"></a>00575 
<a name="l00576"></a>00576     <span class="comment">/* Closed vocabulary, OOV word probability is zero */</span>
<a name="l00577"></a>00577     <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>)
<a name="l00578"></a>00578         <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>;
<a name="l00579"></a>00579 
<a name="l00580"></a>00580     <span class="comment">/* &quot;Declassify&quot; wid and history */</span>
<a name="l00581"></a>00581     <span class="keywordflow">if</span> (NGRAM_IS_CLASSWID(wid)) {
<a name="l00582"></a>00582         <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass = model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(wid)];
<a name="l00583"></a>00583 
<a name="l00584"></a>00584         class_weight = ngram_class_prob(lmclass, wid);
<a name="l00585"></a>00585         <span class="keywordflow">if</span> (class_weight == 1) <span class="comment">/* Meaning, not found in class. */</span>
<a name="l00586"></a>00586             <span class="keywordflow">return</span> class_weight;
<a name="l00587"></a>00587         wid = lmclass-&gt;<a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>;
<a name="l00588"></a>00588     }
<a name="l00589"></a>00589     <span class="keywordflow">for</span> (i = 0; i &lt; n_hist; ++i) {
<a name="l00590"></a>00590         <span class="keywordflow">if</span> (history[i] != <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a> &amp;&amp; NGRAM_IS_CLASSWID(history[i]))
<a name="l00591"></a>00591             history[i] = model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[NGRAM_CLASSID(history[i])]-&gt;<a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a>;
<a name="l00592"></a>00592     }
<a name="l00593"></a>00593     prob = (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a2a64c66491914168bd830237cc93b16c" title="Implementation-specific function for querying raw language model probability.">raw_score</a>)(model, wid, history,
<a name="l00594"></a>00594                                       n_hist, n_used);
<a name="l00595"></a>00595     <span class="comment">/* Multiply by unigram in-class weight. */</span>
<a name="l00596"></a>00596     <span class="keywordflow">return</span> prob + class_weight;
<a name="l00597"></a>00597 }
<a name="l00598"></a>00598 
<a name="l00599"></a>00599 int32
<a name="l00600"></a><a class="code" href="ngram__model_8h.html#a68cfda3f503e1a4a87f08aa5a3a5ea88">00600</a> <a class="code" href="ngram__model_8h.html#a68cfda3f503e1a4a87f08aa5a3a5ea88" title="Get the &amp;quot;raw&amp;quot; log-probability for a general N-Gram.">ngram_prob</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word, ...)
<a name="l00601"></a>00601 {
<a name="l00602"></a>00602     va_list history;
<a name="l00603"></a>00603     <span class="keyword">const</span> <span class="keywordtype">char</span> *hword;
<a name="l00604"></a>00604     int32 *histid;
<a name="l00605"></a>00605     int32 n_hist;
<a name="l00606"></a>00606     int32 n_used;
<a name="l00607"></a>00607     int32 prob;
<a name="l00608"></a>00608 
<a name="l00609"></a>00609     va_start(history, word);
<a name="l00610"></a>00610     n_hist = 0;
<a name="l00611"></a>00611     <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL)
<a name="l00612"></a>00612         ++n_hist;
<a name="l00613"></a>00613     va_end(history);
<a name="l00614"></a>00614 
<a name="l00615"></a>00615     histid = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_hist, <span class="keyword">sizeof</span>(*histid));
<a name="l00616"></a>00616     va_start(history, word);
<a name="l00617"></a>00617     n_hist = 0;
<a name="l00618"></a>00618     <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) {
<a name="l00619"></a>00619         histid[n_hist] = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, hword);
<a name="l00620"></a>00620         ++n_hist;
<a name="l00621"></a>00621     }
<a name="l00622"></a>00622     va_end(history);
<a name="l00623"></a>00623 
<a name="l00624"></a>00624     prob = <a class="code" href="ngram__model_8h.html#a218d8d140b93d3d8008f8933f9e04ec6" title="Quick &amp;quot;raw&amp;quot; probability lookup for a general N-Gram.">ngram_ng_prob</a>(model, <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, word),
<a name="l00625"></a>00625                          histid, n_hist, &amp;n_used);
<a name="l00626"></a>00626     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(histid);
<a name="l00627"></a>00627     <span class="keywordflow">return</span> prob;
<a name="l00628"></a>00628 }
<a name="l00629"></a>00629 
<a name="l00630"></a>00630 int32
<a name="l00631"></a><a class="code" href="ngram__model_8h.html#ae0cf3a94128927e2be6422d2de34f49b">00631</a> <a class="code" href="ngram__model_8h.html#ae0cf3a94128927e2be6422d2de34f49b" title="Convert score to &amp;quot;raw&amp;quot; log-probability.">ngram_score_to_prob</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base, int32 score)
<a name="l00632"></a>00632 {
<a name="l00633"></a>00633     int32 prob;
<a name="l00634"></a>00634 
<a name="l00635"></a>00635     <span class="comment">/* Undo insertion penalty. */</span>
<a name="l00636"></a>00636     prob = score - base-&gt;<a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a>;
<a name="l00637"></a>00637     <span class="comment">/* Undo language weight. */</span>
<a name="l00638"></a>00638     prob = (int32)(prob / base-&gt;<a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a>);
<a name="l00639"></a>00639 
<a name="l00640"></a>00640     <span class="keywordflow">return</span> prob;
<a name="l00641"></a>00641 }
<a name="l00642"></a>00642 
<a name="l00643"></a>00643 int32
<a name="l00644"></a><a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e">00644</a> <a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e" title="Get the unknown word ID for a language model.">ngram_unknown_wid</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model)
<a name="l00645"></a>00645 {
<a name="l00646"></a>00646     int32 val;
<a name="l00647"></a>00647 
<a name="l00648"></a>00648     <span class="comment">/* FIXME: This could be memoized for speed if necessary. */</span>
<a name="l00649"></a>00649     <span class="comment">/* Look up &lt;UNK&gt;, if not found return NGRAM_INVALID_WID. */</span>
<a name="l00650"></a>00650     <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#acaf27e8e7e336faf6653649937c42ed8" title="Look up a 32-bit integer value in a hash table.">hash_table_lookup_int32</a>(model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, <span class="stringliteral">&quot;&lt;UNK&gt;&quot;</span>, &amp;val) == -1)
<a name="l00651"></a>00651         <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>;
<a name="l00652"></a>00652     <span class="keywordflow">else</span>
<a name="l00653"></a>00653         <span class="keywordflow">return</span> val;
<a name="l00654"></a>00654 }
<a name="l00655"></a>00655 
<a name="l00656"></a>00656 int32
<a name="l00657"></a><a class="code" href="ngram__model_8h.html#ae401a87ad55ae64f286dcd83170f7100">00657</a> <a class="code" href="ngram__model_8h.html#ae401a87ad55ae64f286dcd83170f7100" title="Get the &amp;quot;zero&amp;quot; log-probability value for a language model.">ngram_zero</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model)
<a name="l00658"></a>00658 {
<a name="l00659"></a>00659     <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>;
<a name="l00660"></a>00660 }
<a name="l00661"></a>00661 
<a name="l00662"></a>00662 int32
<a name="l00663"></a><a class="code" href="ngram__model_8h.html#a462d374099a4fe8b3c3195b5e2013545">00663</a> <a class="code" href="ngram__model_8h.html#a462d374099a4fe8b3c3195b5e2013545" title="Get the order of the N-gram model (i.e.">ngram_model_get_size</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model)
<a name="l00664"></a>00664 {
<a name="l00665"></a>00665   <span class="keywordflow">if</span> (model != NULL)
<a name="l00666"></a>00666     <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>;
<a name="l00667"></a>00667   <span class="keywordflow">return</span> 0;
<a name="l00668"></a>00668 }
<a name="l00669"></a>00669 
<a name="l00670"></a>00670 int32 <span class="keyword">const</span> *
<a name="l00671"></a><a class="code" href="ngram__model_8h.html#a187531aafc1f11214c9d061f75eae194">00671</a> <a class="code" href="ngram__model_8h.html#a187531aafc1f11214c9d061f75eae194" title="Get the counts of the various N-grams in the model.">ngram_model_get_counts</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model)
<a name="l00672"></a>00672 {
<a name="l00673"></a>00673   <span class="keywordflow">if</span> (model != NULL)
<a name="l00674"></a>00674     <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>;
<a name="l00675"></a>00675   <span class="keywordflow">return</span> NULL;
<a name="l00676"></a>00676 }
<a name="l00677"></a>00677 
<a name="l00678"></a>00678 <span class="keywordtype">void</span>
<a name="l00679"></a>00679 ngram_iter_init(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor, <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model,
<a name="l00680"></a>00680                 <span class="keywordtype">int</span> m, <span class="keywordtype">int</span> successor)
<a name="l00681"></a>00681 {
<a name="l00682"></a>00682     itor-&gt;model = model;
<a name="l00683"></a>00683     itor-&gt;<a class="code" href="structngram__iter__s.html#a1362ebe2dcf1c441929c845fc97958cb" title="Scratch space for word IDs.">wids</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(model-&gt;<a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>, <span class="keyword">sizeof</span>(*itor-&gt;<a class="code" href="structngram__iter__s.html#a1362ebe2dcf1c441929c845fc97958cb" title="Scratch space for word IDs.">wids</a>));
<a name="l00684"></a>00684     itor-&gt;<a class="code" href="structngram__iter__s.html#a80bce30ba5b180f85ca84d7288715858" title="Order of history.">m</a> = m;
<a name="l00685"></a>00685     itor-&gt;<a class="code" href="structngram__iter__s.html#a7802a7883b1cc246b1bff629cd2cfb08" title="Is this a successor iterator?">successor</a> = successor;
<a name="l00686"></a>00686 }
<a name="l00687"></a>00687 
<a name="l00688"></a>00688 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *
<a name="l00689"></a><a class="code" href="ngram__model_8h.html#a12683dda2253dc45680102f02fbdb1e2">00689</a> <a class="code" href="ngram__model_8h.html#a12683dda2253dc45680102f02fbdb1e2" title="Iterate over all M-grams.">ngram_model_mgrams</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keywordtype">int</span> m)
<a name="l00690"></a>00690 {
<a name="l00691"></a>00691     <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor;
<a name="l00692"></a>00692     <span class="comment">/* The fact that m=n-1 is not exactly obvious.  Prevent accidents. */</span>
<a name="l00693"></a>00693     <span class="keywordflow">if</span> (m &gt;= model-&gt;<a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>)
<a name="l00694"></a>00694         <span class="keywordflow">return</span> NULL;
<a name="l00695"></a>00695     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a3a264fcbdbe8b4b342c533af090cfe8f" title="Implementation-specific function for iterating.">mgrams</a> == NULL)
<a name="l00696"></a>00696         <span class="keywordflow">return</span> NULL;
<a name="l00697"></a>00697     itor = (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a3a264fcbdbe8b4b342c533af090cfe8f" title="Implementation-specific function for iterating.">mgrams</a>)(model, m);
<a name="l00698"></a>00698     <span class="keywordflow">return</span> itor;
<a name="l00699"></a>00699 }
<a name="l00700"></a>00700 
<a name="l00701"></a>00701 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *
<a name="l00702"></a><a class="code" href="ngram__model_8h.html#ad988e0f41447ee404050caf7c7bb7b63">00702</a> <a class="code" href="ngram__model_8h.html#ad988e0f41447ee404050caf7c7bb7b63" title="Get an iterator over M-grams pointing to the specified M-gram.">ngram_iter</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word, ...)
<a name="l00703"></a>00703 {
<a name="l00704"></a>00704     va_list history;
<a name="l00705"></a>00705     <span class="keyword">const</span> <span class="keywordtype">char</span> *hword;
<a name="l00706"></a>00706     int32 *histid;
<a name="l00707"></a>00707     int32 n_hist;
<a name="l00708"></a>00708     <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor;
<a name="l00709"></a>00709 
<a name="l00710"></a>00710     va_start(history, word);
<a name="l00711"></a>00711     n_hist = 0;
<a name="l00712"></a>00712     <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL)
<a name="l00713"></a>00713         ++n_hist;
<a name="l00714"></a>00714     va_end(history);
<a name="l00715"></a>00715 
<a name="l00716"></a>00716     histid = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n_hist, <span class="keyword">sizeof</span>(*histid));
<a name="l00717"></a>00717     va_start(history, word);
<a name="l00718"></a>00718     n_hist = 0;
<a name="l00719"></a>00719     <span class="keywordflow">while</span> ((hword = va_arg(history, <span class="keyword">const</span> <span class="keywordtype">char</span> *)) != NULL) {
<a name="l00720"></a>00720         histid[n_hist] = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, hword);
<a name="l00721"></a>00721         ++n_hist;
<a name="l00722"></a>00722     }
<a name="l00723"></a>00723     va_end(history);
<a name="l00724"></a>00724 
<a name="l00725"></a>00725     itor = <a class="code" href="ngram__model_8h.html#a461c6e5914ce463422dfeaeee377e024" title="Get an iterator over M-grams pointing to the specified M-gram.">ngram_ng_iter</a>(model, <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, word), histid, n_hist);
<a name="l00726"></a>00726     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(histid);
<a name="l00727"></a>00727     <span class="keywordflow">return</span> itor;
<a name="l00728"></a>00728 }
<a name="l00729"></a>00729 
<a name="l00730"></a>00730 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *
<a name="l00731"></a><a class="code" href="ngram__model_8h.html#a461c6e5914ce463422dfeaeee377e024">00731</a> <a class="code" href="ngram__model_8h.html#a461c6e5914ce463422dfeaeee377e024" title="Get an iterator over M-grams pointing to the specified M-gram.">ngram_ng_iter</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid, int32 *history, int32 n_hist)
<a name="l00732"></a>00732 {
<a name="l00733"></a>00733     <span class="keywordflow">if</span> (n_hist &gt;= model-&gt;<a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a>)
<a name="l00734"></a>00734         <span class="keywordflow">return</span> NULL;
<a name="l00735"></a>00735     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a8148f132a986660ff500e46f9b2ea1c1" title="Implementation-specific function for iterating.">iter</a> == NULL)
<a name="l00736"></a>00736         <span class="keywordflow">return</span> NULL;
<a name="l00737"></a>00737     <span class="keywordflow">return</span> (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a8148f132a986660ff500e46f9b2ea1c1" title="Implementation-specific function for iterating.">iter</a>)(model, wid, history, n_hist);
<a name="l00738"></a>00738 }
<a name="l00739"></a>00739 
<a name="l00740"></a>00740 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *
<a name="l00741"></a><a class="code" href="ngram__model_8h.html#ae85f41e2defc5b65b12026d29cd4fdaa">00741</a> <a class="code" href="ngram__model_8h.html#ae85f41e2defc5b65b12026d29cd4fdaa" title="Iterate over all M-gram successors of an M-1-gram.">ngram_iter_successors</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor)
<a name="l00742"></a>00742 {
<a name="l00743"></a>00743     <span class="comment">/* Stop when we are at the highest order N-Gram. */</span>
<a name="l00744"></a>00744     <span class="keywordflow">if</span> (itor-&gt;<a class="code" href="structngram__iter__s.html#a80bce30ba5b180f85ca84d7288715858" title="Order of history.">m</a> == itor-&gt;model-&gt;<a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a> - 1)
<a name="l00745"></a>00745         <span class="keywordflow">return</span> NULL;
<a name="l00746"></a>00746     <span class="keywordflow">return</span> (*itor-&gt;model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#af83b5620eb7bc37984ad522b66e266ad" title="Implementation-specific function for iterating.">successors</a>)(itor);
<a name="l00747"></a>00747 }
<a name="l00748"></a>00748 
<a name="l00749"></a>00749 int32 <span class="keyword">const</span> *
<a name="l00750"></a><a class="code" href="ngram__model_8h.html#a240c738781daa226a2fc13395dbdb514">00750</a> <a class="code" href="ngram__model_8h.html#a240c738781daa226a2fc13395dbdb514" title="Get information from the current M-gram in an iterator.">ngram_iter_get</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor,
<a name="l00751"></a>00751                int32 *out_score,
<a name="l00752"></a>00752                int32 *out_bowt)
<a name="l00753"></a>00753 {
<a name="l00754"></a>00754     <span class="keywordflow">return</span> (*itor-&gt;model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#ac515e6ad9a1afbda6b74e8204fd460b5" title="Implementation-specific function for iterating.">iter_get</a>)(itor, out_score, out_bowt);
<a name="l00755"></a>00755 }
<a name="l00756"></a>00756 
<a name="l00757"></a>00757 <a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *
<a name="l00758"></a><a class="code" href="ngram__model_8h.html#a3a2b285c01393b3ebddaec1fefed11a4">00758</a> <a class="code" href="ngram__model_8h.html#a3a2b285c01393b3ebddaec1fefed11a4" title="Advance an M-gram iterator.">ngram_iter_next</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor)
<a name="l00759"></a>00759 {
<a name="l00760"></a>00760     <span class="keywordflow">return</span> (*itor-&gt;model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a57777056985f8adc3494e859c1102f3e" title="Implementation-specific function for iterating.">iter_next</a>)(itor);
<a name="l00761"></a>00761 }
<a name="l00762"></a>00762 
<a name="l00763"></a>00763 <span class="keywordtype">void</span>
<a name="l00764"></a><a class="code" href="ngram__model_8h.html#ac9f746c8a5db78ef8b2fb7c312be4a22">00764</a> <a class="code" href="ngram__model_8h.html#ac9f746c8a5db78ef8b2fb7c312be4a22" title="Terminate an M-gram iterator.">ngram_iter_free</a>(<a class="code" href="structngram__iter__s.html" title="Base iterator structure for N-grams.">ngram_iter_t</a> *itor)
<a name="l00765"></a>00765 {
<a name="l00766"></a>00766     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(itor-&gt;<a class="code" href="structngram__iter__s.html#a1362ebe2dcf1c441929c845fc97958cb" title="Scratch space for word IDs.">wids</a>);
<a name="l00767"></a>00767     (*itor-&gt;model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#ad7031e996c16516cf7f52e7a4adee4bb" title="Implementation-specific function for iterating.">iter_free</a>)(itor);
<a name="l00768"></a>00768 }
<a name="l00769"></a>00769 
<a name="l00770"></a>00770 int32
<a name="l00771"></a><a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880">00771</a> <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, <span class="keyword">const</span> <span class="keywordtype">char</span> *word)
<a name="l00772"></a>00772 {
<a name="l00773"></a>00773     int32 val;
<a name="l00774"></a>00774 
<a name="l00775"></a>00775     <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#acaf27e8e7e336faf6653649937c42ed8" title="Look up a 32-bit integer value in a hash table.">hash_table_lookup_int32</a>(model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, word, &amp;val) == -1)
<a name="l00776"></a>00776         <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e" title="Get the unknown word ID for a language model.">ngram_unknown_wid</a>(model);
<a name="l00777"></a>00777     <span class="keywordflow">else</span>
<a name="l00778"></a>00778         <span class="keywordflow">return</span> val;
<a name="l00779"></a>00779 }
<a name="l00780"></a>00780 
<a name="l00781"></a>00781 <span class="keyword">const</span> <span class="keywordtype">char</span> *
<a name="l00782"></a><a class="code" href="ngram__model_8h.html#a96e36290a005c03464ea6c637ccde2f5">00782</a> <a class="code" href="ngram__model_8h.html#a96e36290a005c03464ea6c637ccde2f5" title="Look up word string for numerical word ID.">ngram_word</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 wid)
<a name="l00783"></a>00783 {
<a name="l00784"></a>00784     <span class="comment">/* Remove any class tag */</span>
<a name="l00785"></a>00785     wid = NGRAM_BASEWID(wid);
<a name="l00786"></a>00786     <span class="keywordflow">if</span> (wid &gt;= model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>)
<a name="l00787"></a>00787         <span class="keywordflow">return</span> NULL;
<a name="l00788"></a>00788     <span class="keywordflow">return</span> model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[wid];
<a name="l00789"></a>00789 }
<a name="l00790"></a>00790 
<a name="l00794"></a>00794 int32
<a name="l00795"></a>00795 ngram_add_word_internal(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model,
<a name="l00796"></a>00796                         <span class="keyword">const</span> <span class="keywordtype">char</span> *word,
<a name="l00797"></a>00797                         int32 classid)
<a name="l00798"></a>00798 {
<a name="l00799"></a>00799     <span class="keywordtype">void</span> *dummy;
<a name="l00800"></a>00800     int32 wid;
<a name="l00801"></a>00801 
<a name="l00802"></a>00802     <span class="comment">/* Take the next available word ID */</span>
<a name="l00803"></a>00803     wid = model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>;
<a name="l00804"></a>00804     <span class="keywordflow">if</span> (classid &gt;= 0) {
<a name="l00805"></a>00805         wid = NGRAM_CLASSWID(wid, classid);
<a name="l00806"></a>00806     }
<a name="l00807"></a>00807     <span class="comment">/* Check for hash collisions. */</span>
<a name="l00808"></a>00808     <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a9a1e5ed410eb96f514b00fdce770fbd7" title="Look up a key in a hash table and optionally return the associated value.">hash_table_lookup</a>(model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, word, &amp;dummy) == 0) {
<a name="l00809"></a>00809         <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;Duplicate definition of word %s\n&quot;</span>, word);
<a name="l00810"></a>00810         <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>;
<a name="l00811"></a>00811     }
<a name="l00812"></a>00812     <span class="comment">/* Reallocate word_str if necessary. */</span>
<a name="l00813"></a>00813     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a> &gt;= model-&gt;<a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a>) {
<a name="l00814"></a>00814         model-&gt;<a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a> += UG_ALLOC_STEP;
<a name="l00815"></a>00815         model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a> = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>,
<a name="l00816"></a>00816                                       <span class="keyword">sizeof</span>(*model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>) * model-&gt;<a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a>);
<a name="l00817"></a>00817     }
<a name="l00818"></a>00818     <span class="comment">/* Add the word string in the appropriate manner. */</span>
<a name="l00819"></a>00819     <span class="comment">/* Class words are always dynamically allocated. */</span>
<a name="l00820"></a>00820     model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>] = <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(word);
<a name="l00821"></a>00821     <span class="comment">/* Now enter it into the hash table. */</span>
<a name="l00822"></a>00822     <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#a393c56322e54607a48e6bc61169d92bf" title="Add a 32-bit integer value to a hash table.">hash_table_enter_int32</a>(model-&gt;<a class="code" href="structngram__model__s.html#a75567419a8002ef6e916c81f5d9ee9ed" title="Mapping of unigram names to word IDs.">wid</a>, model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>], wid) != wid) {
<a name="l00823"></a>00823         <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;Hash insertion failed for word %s =&gt; %p (should not happen)\n&quot;</span>,
<a name="l00824"></a>00824                 model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>], (<span class="keywordtype">void</span> *)(<span class="keywordtype">long</span>)(wid));
<a name="l00825"></a>00825     }
<a name="l00826"></a>00826     <span class="comment">/* Increment number of words. */</span>
<a name="l00827"></a>00827     ++model-&gt;<a class="code" href="structngram__model__s.html#a74f85927ef0d5513a1e6c02d13864be3" title="Number of actual word strings (NOT the same as the number of unigrams, due to class words)...">n_words</a>;
<a name="l00828"></a>00828     <span class="keywordflow">return</span> wid;
<a name="l00829"></a>00829 }
<a name="l00830"></a>00830 
<a name="l00831"></a>00831 int32
<a name="l00832"></a><a class="code" href="ngram__model_8h.html#a38e034d58ee0d6a4c0dd6f852cb71c3c">00832</a> <a class="code" href="ngram__model_8h.html#a38e034d58ee0d6a4c0dd6f852cb71c3c" title="Add a word (unigram) to the language model.">ngram_model_add_word</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model,
<a name="l00833"></a>00833                      <span class="keyword">const</span> <span class="keywordtype">char</span> *word, float32 weight)
<a name="l00834"></a>00834 {
<a name="l00835"></a>00835     int32 wid, prob = model-&gt;<a class="code" href="structngram__model__s.html#a65425a599c4bcc4dda809d81149b8fc0" title="Zero probability, cached here for quick lookup.">log_zero</a>;
<a name="l00836"></a>00836 
<a name="l00837"></a>00837     wid = ngram_add_word_internal(model, word, -1);
<a name="l00838"></a>00838     <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>)
<a name="l00839"></a>00839         <span class="keywordflow">return</span> wid;
<a name="l00840"></a>00840 
<a name="l00841"></a>00841     <span class="comment">/* Do what needs to be done to add the word to the unigram. */</span>
<a name="l00842"></a>00842     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a> &amp;&amp; model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a6d553c95c7f4da4993f2b9df757ac016" title="Implementation-specific function for adding unigrams.">add_ug</a>)
<a name="l00843"></a>00843         prob = (*model-&gt;<a class="code" href="structngram__model__s.html#ad3d9d8ad9773f958a89534220eda6fb9" title="Implementation-specific methods.">funcs</a>-&gt;<a class="code" href="structngram__funcs__s.html#a6d553c95c7f4da4993f2b9df757ac016" title="Implementation-specific function for adding unigrams.">add_ug</a>)(model, wid, <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-&gt;<a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, weight));
<a name="l00844"></a>00844     <span class="keywordflow">if</span> (prob == 0) {
<a name="l00845"></a>00845         <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#a78a3253febced2cae4732044da466ee6" title="Are word strings writable?">writable</a>)
<a name="l00846"></a>00846             <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(model-&gt;<a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[wid]);
<a name="l00847"></a>00847         <span class="keywordflow">return</span> -1;
<a name="l00848"></a>00848     }
<a name="l00849"></a>00849     <span class="keywordflow">return</span> wid;
<a name="l00850"></a>00850 }
<a name="l00851"></a>00851 
<a name="l00852"></a>00852 <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *
<a name="l00853"></a>00853 ngram_class_new(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model, int32 tag_wid, int32 start_wid, <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classwords)
<a name="l00854"></a>00854 {
<a name="l00855"></a>00855     <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass;
<a name="l00856"></a>00856     <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *gn;
<a name="l00857"></a>00857     float32 tprob;
<a name="l00858"></a>00858     <span class="keywordtype">int</span> i;
<a name="l00859"></a>00859 
<a name="l00860"></a>00860     lmclass = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(1, <span class="keyword">sizeof</span>(*lmclass));
<a name="l00861"></a>00861     lmclass-&gt;<a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a> = tag_wid;
<a name="l00862"></a>00862     <span class="comment">/* wid_base is the wid (minus class tag) of the first word in the list. */</span>
<a name="l00863"></a>00863     lmclass-&gt;<a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&amp;#39; words.">start_wid</a> = start_wid;
<a name="l00864"></a>00864     lmclass-&gt;<a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a> = <a class="code" href="glist_8h.html#aeb046e39c540d2f5f792119ea0d24c48" title="Count the number of element in a given link list.">glist_count</a>(classwords);
<a name="l00865"></a>00865     lmclass-&gt;<a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(lmclass-&gt;<a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>, <span class="keyword">sizeof</span>(*lmclass-&gt;<a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>));
<a name="l00866"></a>00866     lmclass-&gt;nword_hash = NULL;
<a name="l00867"></a>00867     lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> = 0;
<a name="l00868"></a>00868     tprob = 0.0;
<a name="l00869"></a>00869     <span class="keywordflow">for</span> (gn = classwords; gn; gn = gnode_next(gn)) {
<a name="l00870"></a>00870         tprob += gnode_float32(gn);
<a name="l00871"></a>00871     }
<a name="l00872"></a>00872     <span class="keywordflow">if</span> (tprob &gt; 1.1 || tprob &lt; 0.9) {
<a name="l00873"></a>00873         <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">&quot;Total class probability is %f, will normalize\n&quot;</span>, tprob);
<a name="l00874"></a>00874         <span class="keywordflow">for</span> (gn = classwords; gn; gn = gnode_next(gn)) {
<a name="l00875"></a>00875             gn-&gt;data.fl /= tprob;
<a name="l00876"></a>00876         }
<a name="l00877"></a>00877     }
<a name="l00878"></a>00878     <span class="keywordflow">for</span> (i = 0, gn = classwords; gn; ++i, gn = gnode_next(gn)) {
<a name="l00879"></a>00879         lmclass-&gt;<a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>[i] = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-&gt;<a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, gnode_float32(gn));
<a name="l00880"></a>00880     }
<a name="l00881"></a>00881 
<a name="l00882"></a>00882     <span class="keywordflow">return</span> lmclass;
<a name="l00883"></a>00883 }
<a name="l00884"></a>00884 
<a name="l00885"></a>00885 int32
<a name="l00886"></a>00886 ngram_class_add_word(<a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass, int32 wid, int32 lweight)
<a name="l00887"></a>00887 {
<a name="l00888"></a>00888     int32 hash;
<a name="l00889"></a>00889 
<a name="l00890"></a>00890     <span class="keywordflow">if</span> (lmclass-&gt;nword_hash == NULL) {
<a name="l00891"></a>00891         <span class="comment">/* Initialize everything in it to -1 */</span>
<a name="l00892"></a>00892         lmclass-&gt;nword_hash = <a class="code" href="ckd__alloc_8h.html#a8e89a31c3c70710a8e023a177084bff2" title="Macro for __ckd_malloc__.">ckd_malloc</a>(NGRAM_HASH_SIZE * <span class="keyword">sizeof</span>(*lmclass-&gt;nword_hash));
<a name="l00893"></a>00893         memset(lmclass-&gt;nword_hash, 0xff, NGRAM_HASH_SIZE * <span class="keyword">sizeof</span>(*lmclass-&gt;nword_hash));
<a name="l00894"></a>00894         lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> = NGRAM_HASH_SIZE;
<a name="l00895"></a>00895         lmclass-&gt;<a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a> = 0;
<a name="l00896"></a>00896     }
<a name="l00897"></a>00897     <span class="comment">/* Stupidest possible hash function.  This will work pretty well</span>
<a name="l00898"></a>00898 <span class="comment">     * when this function is called repeatedly with contiguous word</span>
<a name="l00899"></a>00899 <span class="comment">     * IDs, though... */</span>
<a name="l00900"></a>00900     hash = wid &amp; (lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> - 1);
<a name="l00901"></a>00901     <span class="keywordflow">if</span> (lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> == -1) {
<a name="l00902"></a>00902         <span class="comment">/* Good, no collision. */</span>
<a name="l00903"></a>00903         lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> = wid;
<a name="l00904"></a>00904         lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a> = lweight;
<a name="l00905"></a>00905         ++lmclass-&gt;<a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a>;
<a name="l00906"></a>00906         <span class="keywordflow">return</span> hash;
<a name="l00907"></a>00907     }
<a name="l00908"></a>00908     <span class="keywordflow">else</span> {
<a name="l00909"></a>00909         int32 next; 
<a name="l00910"></a>00910         <span class="comment">/* Collision... Find the end of the hash chain. */</span>
<a name="l00911"></a>00911         <span class="keywordflow">while</span> (lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a> != -1)
<a name="l00912"></a>00912             hash = lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a>;
<a name="l00913"></a>00913         assert(hash != -1);
<a name="l00914"></a>00914         <span class="comment">/* Does we has any more bukkit? */</span>
<a name="l00915"></a>00915         <span class="keywordflow">if</span> (lmclass-&gt;<a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a> == lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>) {
<a name="l00916"></a>00916             <span class="comment">/* Oh noes!  Ok, we makes more. */</span>
<a name="l00917"></a>00917             lmclass-&gt;nword_hash = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(lmclass-&gt;nword_hash, 
<a name="l00918"></a>00918                                               lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> * 2 * <span class="keyword">sizeof</span>(*lmclass-&gt;nword_hash));
<a name="l00919"></a>00919             memset(lmclass-&gt;nword_hash + lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>,
<a name="l00920"></a>00920                    0xff, lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> * <span class="keyword">sizeof</span>(*lmclass-&gt;nword_hash));
<a name="l00921"></a>00921             <span class="comment">/* Just use the next allocated one (easy) */</span>
<a name="l00922"></a>00922             next = lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>;
<a name="l00923"></a>00923             lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> *= 2;
<a name="l00924"></a>00924         }
<a name="l00925"></a>00925         <span class="keywordflow">else</span> {
<a name="l00926"></a>00926             <span class="comment">/* Look for any available bucket.  We hope this doesn&#39;t happen. */</span>
<a name="l00927"></a>00927             <span class="keywordflow">for</span> (next = 0; next &lt; lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>; ++next)
<a name="l00928"></a>00928                 <span class="keywordflow">if</span> (lmclass-&gt;nword_hash[next].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> == -1)
<a name="l00929"></a>00929                     <span class="keywordflow">break</span>;
<a name="l00930"></a>00930             <span class="comment">/* This should absolutely not happen. */</span>
<a name="l00931"></a>00931             assert(next != lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>);
<a name="l00932"></a>00932         }
<a name="l00933"></a>00933         lmclass-&gt;nword_hash[next].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> = wid;
<a name="l00934"></a>00934         lmclass-&gt;nword_hash[next].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a> = lweight;
<a name="l00935"></a>00935         lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a> = next;
<a name="l00936"></a>00936         ++lmclass-&gt;<a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a>;
<a name="l00937"></a>00937         <span class="keywordflow">return</span> next;
<a name="l00938"></a>00938     }
<a name="l00939"></a>00939 }
<a name="l00940"></a>00940 
<a name="l00941"></a>00941 <span class="keywordtype">void</span>
<a name="l00942"></a>00942 ngram_class_free(<a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass)
<a name="l00943"></a>00943 {
<a name="l00944"></a>00944     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(lmclass-&gt;nword_hash);
<a name="l00945"></a>00945     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(lmclass-&gt;<a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>);
<a name="l00946"></a>00946     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(lmclass);
<a name="l00947"></a>00947 }
<a name="l00948"></a>00948 
<a name="l00949"></a>00949 int32
<a name="l00950"></a><a class="code" href="ngram__model_8h.html#a39eabb4994cf99c4bc2116e12af0c9f1">00950</a> <a class="code" href="ngram__model_8h.html#a39eabb4994cf99c4bc2116e12af0c9f1" title="Add a word to a class in a language model.">ngram_model_add_class_word</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model,
<a name="l00951"></a>00951                            <span class="keyword">const</span> <span class="keywordtype">char</span> *classname,
<a name="l00952"></a>00952                            <span class="keyword">const</span> <span class="keywordtype">char</span> *word,
<a name="l00953"></a>00953                            float32 weight)
<a name="l00954"></a>00954 {
<a name="l00955"></a>00955     <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass;
<a name="l00956"></a>00956     int32 classid, tag_wid, wid, i, scale;
<a name="l00957"></a>00957     float32 fprob;
<a name="l00958"></a>00958 
<a name="l00959"></a>00959     <span class="comment">/* Find the class corresponding to classname.  Linear search</span>
<a name="l00960"></a>00960 <span class="comment">     * probably okay here since there won&#39;t be very many classes, and</span>
<a name="l00961"></a>00961 <span class="comment">     * this doesn&#39;t have to be fast. */</span>
<a name="l00962"></a>00962     tag_wid = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, classname);
<a name="l00963"></a>00963     <span class="keywordflow">if</span> (tag_wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>) {
<a name="l00964"></a>00964         <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;No such word or class tag: %s\n&quot;</span>, classname);
<a name="l00965"></a>00965         <span class="keywordflow">return</span> tag_wid;
<a name="l00966"></a>00966     }
<a name="l00967"></a>00967     <span class="keywordflow">for</span> (classid = 0; classid &lt; model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>; ++classid) {
<a name="l00968"></a>00968         <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[classid]-&gt;<a class="code" href="structngram__class__s.html#ab5f3cc0142c9fd91b3c3d0e59906b556" title="Base word ID for this class tag.">tag_wid</a> == tag_wid)
<a name="l00969"></a>00969             <span class="keywordflow">break</span>;
<a name="l00970"></a>00970     }
<a name="l00971"></a>00971     <span class="comment">/* Hmm, no such class.  It&#39;s probably not a good idea to create one. */</span>
<a name="l00972"></a>00972     <span class="keywordflow">if</span> (classid == model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>) {
<a name="l00973"></a>00973         <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;Word %s is not a class tag (call ngram_model_add_class() first)\n&quot;</span>, classname);
<a name="l00974"></a>00974         <span class="keywordflow">return</span> <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>;
<a name="l00975"></a>00975     }
<a name="l00976"></a>00976     lmclass = model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[classid];
<a name="l00977"></a>00977 
<a name="l00978"></a>00978     <span class="comment">/* Add this word to the model&#39;s set of words. */</span>
<a name="l00979"></a>00979     wid = ngram_add_word_internal(model, word, classid);
<a name="l00980"></a>00980     <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>)
<a name="l00981"></a>00981         <span class="keywordflow">return</span> wid;
<a name="l00982"></a>00982 
<a name="l00983"></a>00983     <span class="comment">/* This is the fixed probability of the new word. */</span>
<a name="l00984"></a>00984     fprob = weight * 1.0f / (lmclass-&gt;<a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a> + lmclass-&gt;<a class="code" href="structngram__class__s.html#a79438cd582363800bc05da31a9ca49d6" title="Number of words in nword_hash.">n_hash_inuse</a> + 1);
<a name="l00985"></a>00985     <span class="comment">/* Now normalize everything else to fit it in.  This is</span>
<a name="l00986"></a>00986 <span class="comment">     * accomplished by simply scaling all the other probabilities</span>
<a name="l00987"></a>00987 <span class="comment">     * by (1-fprob). */</span>
<a name="l00988"></a>00988     scale = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-&gt;<a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, 1.0 - fprob);
<a name="l00989"></a>00989     <span class="keywordflow">for</span> (i = 0; i &lt; lmclass-&gt;<a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>; ++i)
<a name="l00990"></a>00990         lmclass-&gt;<a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>[i] += scale;
<a name="l00991"></a>00991     for (i = 0; i &lt; lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a>; ++i)
<a name="l00992"></a>00992         <span class="keywordflow">if</span> (lmclass-&gt;nword_hash[i].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> != -1)
<a name="l00993"></a>00993             lmclass-&gt;nword_hash[i].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a> += scale;
<a name="l00994"></a>00994 
<a name="l00995"></a>00995     <span class="comment">/* Now add it to the class hash table. */</span>
<a name="l00996"></a>00996     <span class="keywordflow">return</span> ngram_class_add_word(lmclass, wid, <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(model-&gt;<a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, fprob));
<a name="l00997"></a>00997 }
<a name="l00998"></a>00998 
<a name="l00999"></a>00999 int32
<a name="l01000"></a><a class="code" href="ngram__model_8h.html#a0481b03a2bf03ef0a197da0a1f7d1caa">01000</a> <a class="code" href="ngram__model_8h.html#a0481b03a2bf03ef0a197da0a1f7d1caa" title="Add a new class to a language model.">ngram_model_add_class</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model,
<a name="l01001"></a>01001                       <span class="keyword">const</span> <span class="keywordtype">char</span> *classname,
<a name="l01002"></a>01002                       float32 classweight,
<a name="l01003"></a>01003                       <span class="keywordtype">char</span> **words,
<a name="l01004"></a>01004                       <span class="keyword">const</span> float32 *weights,
<a name="l01005"></a>01005                       int32 n_words)
<a name="l01006"></a>01006 {
<a name="l01007"></a>01007     <a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass;
<a name="l01008"></a>01008     <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classwords = NULL;
<a name="l01009"></a>01009     int32 i, start_wid = -1;
<a name="l01010"></a>01010     int32 classid, tag_wid;
<a name="l01011"></a>01011 
<a name="l01012"></a>01012     <span class="comment">/* Check if classname already exists in model.  If not, add it.*/</span>
<a name="l01013"></a>01013     <span class="keywordflow">if</span> ((tag_wid = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(model, classname)) == <a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e" title="Get the unknown word ID for a language model.">ngram_unknown_wid</a>(model)) {
<a name="l01014"></a>01014         tag_wid = <a class="code" href="ngram__model_8h.html#a38e034d58ee0d6a4c0dd6f852cb71c3c" title="Add a word (unigram) to the language model.">ngram_model_add_word</a>(model, classname, classweight);
<a name="l01015"></a>01015         <span class="keywordflow">if</span> (tag_wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>)
<a name="l01016"></a>01016             <span class="keywordflow">return</span> -1;
<a name="l01017"></a>01017     }
<a name="l01018"></a>01018 
<a name="l01019"></a>01019     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a> == 128) {
<a name="l01020"></a>01020         <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;Number of classes cannot exceed 128 (sorry)\n&quot;</span>);
<a name="l01021"></a>01021         <span class="keywordflow">return</span> -1;
<a name="l01022"></a>01022     }
<a name="l01023"></a>01023     classid = model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>;
<a name="l01024"></a>01024     <span class="keywordflow">for</span> (i = 0; i &lt; n_words; ++i) {
<a name="l01025"></a>01025         int32 wid;
<a name="l01026"></a>01026 
<a name="l01027"></a>01027         wid = ngram_add_word_internal(model, words[i], classid);
<a name="l01028"></a>01028         <span class="keywordflow">if</span> (wid == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a>)
<a name="l01029"></a>01029             <span class="keywordflow">return</span> -1;
<a name="l01030"></a>01030         <span class="keywordflow">if</span> (start_wid == -1)
<a name="l01031"></a>01031             start_wid = NGRAM_BASEWID(wid);
<a name="l01032"></a>01032         classwords = <a class="code" href="glist_8h.html#a4fc4db2fbebd7b659554227d411f6737" title="Create and prepend a new list node containing a single-precision float.">glist_add_float32</a>(classwords, weights[i]);
<a name="l01033"></a>01033     }
<a name="l01034"></a>01034     classwords = <a class="code" href="glist_8h.html#a399a2a093c6c4ce1012762e4c25c8185" title="Reverse the order of the given glist.">glist_reverse</a>(classwords);
<a name="l01035"></a>01035     lmclass = ngram_class_new(model, tag_wid, start_wid, classwords);
<a name="l01036"></a>01036     <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classwords);
<a name="l01037"></a>01037     <span class="keywordflow">if</span> (lmclass == NULL)
<a name="l01038"></a>01038         <span class="keywordflow">return</span> -1;
<a name="l01039"></a>01039 
<a name="l01040"></a>01040     ++model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a>;
<a name="l01041"></a>01041     <span class="keywordflow">if</span> (model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a> == NULL)
<a name="l01042"></a>01042         model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a> = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(1, <span class="keyword">sizeof</span>(*model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>));
<a name="l01043"></a>01043     <span class="keywordflow">else</span>
<a name="l01044"></a>01044         model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a> = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>,
<a name="l01045"></a>01045                                      model-&gt;<a class="code" href="structngram__model__s.html#adeb914f8e9f011a5c960f5ee9cd33919" title="Number of classes (maximum 128)">n_classes</a> * <span class="keyword">sizeof</span>(*model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>));
<a name="l01046"></a>01046     model-&gt;<a class="code" href="structngram__model__s.html#a39384af3e6b53591d433436db011ac8d" title="Word class definitions.">classes</a>[classid] = lmclass;
<a name="l01047"></a>01047     <span class="keywordflow">return</span> classid;
<a name="l01048"></a>01048 }
<a name="l01049"></a>01049 
<a name="l01050"></a>01050 int32
<a name="l01051"></a>01051 ngram_class_prob(<a class="code" href="structngram__class__s.html" title="Implementation of ngram_class_t.">ngram_class_t</a> *lmclass, int32 wid)
<a name="l01052"></a>01052 {
<a name="l01053"></a>01053     int32 base_wid = NGRAM_BASEWID(wid);
<a name="l01054"></a>01054 
<a name="l01055"></a>01055     <span class="keywordflow">if</span> (base_wid &lt; lmclass-&gt;start_wid
<a name="l01056"></a>01056         || base_wid &gt; lmclass-&gt;<a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&amp;#39; words.">start_wid</a> + lmclass-&gt;<a class="code" href="structngram__class__s.html#af13562cbc44647435f315b18df5688dc" title="Number of base words for this class.">n_words</a>) {
<a name="l01057"></a>01057         int32 hash;
<a name="l01058"></a>01058 
<a name="l01059"></a>01059         <span class="comment">/* Look it up in the hash table. */</span>
<a name="l01060"></a>01060         hash = wid &amp; (lmclass-&gt;<a class="code" href="structngram__class__s.html#a7f450019eb6dc2e31b18eb3ab6009920" title="Number of buckets in nword_hash (power of 2)">n_hash</a> - 1);
<a name="l01061"></a>01061         <span class="keywordflow">while</span> (hash != -1 &amp;&amp; lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#ad0178b5a86ec23ce790b6b7cb64db0b9" title="Word ID of this bucket.">wid</a> != wid)
<a name="l01062"></a>01062             hash = lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a88ede5798cadc2bece12c49fa038b090" title="Index of next bucket (or -1 for no collision)">next</a>;
<a name="l01063"></a>01063         <span class="keywordflow">if</span> (hash == -1)
<a name="l01064"></a>01064             <span class="keywordflow">return</span> 1;
<a name="l01065"></a>01065         <span class="keywordflow">return</span> lmclass-&gt;nword_hash[hash].<a class="code" href="structngram__class__s_1_1ngram__hash__s.html#a6ac4c01b0c8d29f770f4780e38ab0923" title="Probability for this word.">prob1</a>;
<a name="l01066"></a>01066     }
<a name="l01067"></a>01067     <span class="keywordflow">else</span> {
<a name="l01068"></a>01068         <span class="keywordflow">return</span> lmclass-&gt;<a class="code" href="structngram__class__s.html#a50077f48f135f1c666745a21574e4205" title="Probability table for base words.">prob1</a>[base_wid - lmclass-&gt;<a class="code" href="structngram__class__s.html#a370c88602c7c1f7e3ff1a767c027f5cb" title="Starting base word ID for this class&amp;#39; words.">start_wid</a>];
<a name="l01069"></a>01069     }
<a name="l01070"></a>01070 }
<a name="l01071"></a>01071 
<a name="l01072"></a>01072 int32
<a name="l01073"></a>01073 read_classdef_file(<a class="code" href="structhash__table__t.html">hash_table_t</a> *classes, <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name)
<a name="l01074"></a>01074 {
<a name="l01075"></a>01075     FILE *fp;
<a name="l01076"></a>01076     int32 is_pipe;
<a name="l01077"></a>01077     <span class="keywordtype">int</span> inclass;  
<a name="l01078"></a>01078     int32 rv = -1;
<a name="l01079"></a>01079     <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *gn;
<a name="l01080"></a>01080     <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classwords = NULL;
<a name="l01081"></a>01081     <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> classprobs = NULL;
<a name="l01082"></a>01082     <span class="keywordtype">char</span> *classname = NULL;
<a name="l01083"></a>01083 
<a name="l01084"></a>01084     <span class="keywordflow">if</span> ((fp = <a class="code" href="pio_8h.html#aa3d71506049eb49cf03eff1b89ef281f" title="Like fopen, but use popen and zcat if it is determined that &amp;quot;file&amp;quot; is compressed (i...">fopen_comp</a>(file_name, <span class="stringliteral">&quot;r&quot;</span>, &amp;is_pipe)) == NULL) {
<a name="l01085"></a>01085         <a class="code" href="err_8h.html#a5f7b2f58f5a663a6bdd51f197ae21993" title="Print error message to standard error stream.">E_ERROR</a>(<span class="stringliteral">&quot;File %s not found\n&quot;</span>, file_name);
<a name="l01086"></a>01086         <span class="keywordflow">return</span> -1;
<a name="l01087"></a>01087     }
<a name="l01088"></a>01088 
<a name="l01089"></a>01089     inclass = FALSE;
<a name="l01090"></a>01090     <span class="keywordflow">while</span> (!feof(fp)) {
<a name="l01091"></a>01091         <span class="keywordtype">char</span> line[512];
<a name="l01092"></a>01092         <span class="keywordtype">char</span> *wptr[2];
<a name="l01093"></a>01093         <span class="keywordtype">int</span> n_words;
<a name="l01094"></a>01094 
<a name="l01095"></a>01095         <span class="keywordflow">if</span> (fgets(line, <span class="keyword">sizeof</span>(line), fp) == NULL)
<a name="l01096"></a>01096             <span class="keywordflow">break</span>;
<a name="l01097"></a>01097 
<a name="l01098"></a>01098         n_words = <a class="code" href="strfuncs_8h.html#a5b520fdebcca599db86faaf75a82173f" title="Convert a line to an array of &amp;quot;words&amp;quot;, based on whitespace separators.">str2words</a>(line, wptr, 2);
<a name="l01099"></a>01099         <span class="keywordflow">if</span> (n_words &lt;= 0)
<a name="l01100"></a>01100             <span class="keywordflow">continue</span>;
<a name="l01101"></a>01101 
<a name="l01102"></a>01102         <span class="keywordflow">if</span> (inclass) {
<a name="l01103"></a>01103             <span class="comment">/* Look for an end of class marker. */</span>
<a name="l01104"></a>01104             <span class="keywordflow">if</span> (n_words == 2 &amp;&amp; 0 == strcmp(wptr[0], <span class="stringliteral">&quot;END&quot;</span>)) {
<a name="l01105"></a>01105                 <a class="code" href="structclassdef__s.html" title="One class definition from a classdef file.">classdef_t</a> *classdef;
<a name="l01106"></a>01106                 <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *word, *weight;
<a name="l01107"></a>01107                 int32 i;
<a name="l01108"></a>01108 
<a name="l01109"></a>01109                 <span class="keywordflow">if</span> (classname == NULL || 0 != strcmp(wptr[1], classname))
<a name="l01110"></a>01110                     <span class="keywordflow">goto</span> error_out;
<a name="l01111"></a>01111                 inclass = FALSE;
<a name="l01112"></a>01112 
<a name="l01113"></a>01113                 <span class="comment">/* Construct a class from the list of words collected. */</span>
<a name="l01114"></a>01114                 classdef = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(1, <span class="keyword">sizeof</span>(*classdef));
<a name="l01115"></a>01115                 classwords = <a class="code" href="glist_8h.html#a399a2a093c6c4ce1012762e4c25c8185" title="Reverse the order of the given glist.">glist_reverse</a>(classwords);
<a name="l01116"></a>01116                 classprobs = <a class="code" href="glist_8h.html#a399a2a093c6c4ce1012762e4c25c8185" title="Reverse the order of the given glist.">glist_reverse</a>(classprobs);
<a name="l01117"></a>01117                 classdef-&gt;n_words = <a class="code" href="glist_8h.html#aeb046e39c540d2f5f792119ea0d24c48" title="Count the number of element in a given link list.">glist_count</a>(classwords);
<a name="l01118"></a>01118                 classdef-&gt;words = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(classdef-&gt;n_words,
<a name="l01119"></a>01119                                              <span class="keyword">sizeof</span>(*classdef-&gt;words));
<a name="l01120"></a>01120                 classdef-&gt;weights = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(classdef-&gt;n_words,
<a name="l01121"></a>01121                                                <span class="keyword">sizeof</span>(*classdef-&gt;weights));
<a name="l01122"></a>01122                 word = classwords;
<a name="l01123"></a>01123                 weight = classprobs;
<a name="l01124"></a>01124                 <span class="keywordflow">for</span> (i = 0; i &lt; classdef-&gt;n_words; ++i) {
<a name="l01125"></a>01125                     classdef-&gt;words[i] = <a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(word);
<a name="l01126"></a>01126                     classdef-&gt;weights[i] = gnode_float32(weight);
<a name="l01127"></a>01127                     word = gnode_next(word);
<a name="l01128"></a>01128                     weight = gnode_next(weight);
<a name="l01129"></a>01129                 }
<a name="l01130"></a>01130                 
<a name="l01131"></a>01131                 <span class="comment">/* Add this class to the hash table. */</span>
<a name="l01132"></a>01132                 <span class="keywordflow">if</span> (<a class="code" href="hash__table_8h.html#aebfe63c3869c271b125a8413ee384412" title="Try to add a new entry with given key and associated value to hash table h.">hash_table_enter</a>(classes, classname, classdef) != classdef) {
<a name="l01133"></a>01133                     classdef_free(classdef);
<a name="l01134"></a>01134                     <span class="keywordflow">goto</span> error_out;
<a name="l01135"></a>01135                 }
<a name="l01136"></a>01136 
<a name="l01137"></a>01137                 <span class="comment">/* Reset everything. */</span>
<a name="l01138"></a>01138                 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classwords);
<a name="l01139"></a>01139                 <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classprobs);
<a name="l01140"></a>01140                 classwords = NULL;
<a name="l01141"></a>01141                 classprobs = NULL;
<a name="l01142"></a>01142                 classname = NULL;
<a name="l01143"></a>01143             }
<a name="l01144"></a>01144             <span class="keywordflow">else</span> {
<a name="l01145"></a>01145                 float32 fprob;
<a name="l01146"></a>01146 
<a name="l01147"></a>01147                 <span class="keywordflow">if</span> (n_words == 2)
<a name="l01148"></a>01148                     fprob = (float32)<a class="code" href="strfuncs_8h.html#ab708351fe7308551632a782bfad75a1e" title="Locale independent version of atof().">atof_c</a>(wptr[1]);
<a name="l01149"></a>01149                 <span class="keywordflow">else</span>
<a name="l01150"></a>01150                     fprob = 1.0f;
<a name="l01151"></a>01151                 <span class="comment">/* Add it to the list of words for this class. */</span>
<a name="l01152"></a>01152                 classwords = <a class="code" href="glist_8h.html#a77a9c20b7df5a289477af405ab778377" title="Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...">glist_add_ptr</a>(classwords, <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(wptr[0]));
<a name="l01153"></a>01153                 classprobs = <a class="code" href="glist_8h.html#a4fc4db2fbebd7b659554227d411f6737" title="Create and prepend a new list node containing a single-precision float.">glist_add_float32</a>(classprobs, fprob);
<a name="l01154"></a>01154             }
<a name="l01155"></a>01155         }
<a name="l01156"></a>01156         <span class="keywordflow">else</span> {
<a name="l01157"></a>01157             <span class="comment">/* Start a new LM class if the LMCLASS marker is seen */</span>
<a name="l01158"></a>01158             <span class="keywordflow">if</span> (n_words == 2 &amp;&amp; 0 == strcmp(wptr[0], <span class="stringliteral">&quot;LMCLASS&quot;</span>)) {
<a name="l01159"></a>01159                 <span class="keywordflow">if</span> (inclass)
<a name="l01160"></a>01160                     <span class="keywordflow">goto</span> error_out;
<a name="l01161"></a>01161                 inclass = TRUE;
<a name="l01162"></a>01162                 classname = <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(wptr[1]);
<a name="l01163"></a>01163             }
<a name="l01164"></a>01164             <span class="comment">/* Otherwise, just ignore whatever junk we got */</span>
<a name="l01165"></a>01165         }
<a name="l01166"></a>01166     }
<a name="l01167"></a>01167     rv = 0; <span class="comment">/* Success. */</span>
<a name="l01168"></a>01168 
<a name="l01169"></a>01169 error_out:
<a name="l01170"></a>01170     <span class="comment">/* Free all the stuff we might have allocated. */</span>
<a name="l01171"></a>01171     <a class="code" href="pio_8h.html#a87592c3a2d0a00eed9eda014950beb65" title="Close a file opened using fopen_comp.">fclose_comp</a>(fp, is_pipe);
<a name="l01172"></a>01172     <span class="keywordflow">for</span> (gn = classwords; gn; gn = gnode_next(gn))
<a name="l01173"></a>01173         <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(<a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(gn));
<a name="l01174"></a>01174     <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classwords);
<a name="l01175"></a>01175     <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(classprobs);
<a name="l01176"></a>01176     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classname);
<a name="l01177"></a>01177 
<a name="l01178"></a>01178     <span class="keywordflow">return</span> rv;
<a name="l01179"></a>01179 }
<a name="l01180"></a>01180 
<a name="l01181"></a>01181 <span class="keywordtype">void</span>
<a name="l01182"></a>01182 classdef_free(<a class="code" href="structclassdef__s.html" title="One class definition from a classdef file.">classdef_t</a> *classdef)
<a name="l01183"></a>01183 {
<a name="l01184"></a>01184     int32 i;
<a name="l01185"></a>01185     <span class="keywordflow">for</span> (i = 0; i &lt; classdef-&gt;n_words; ++i)
<a name="l01186"></a>01186         <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef-&gt;words[i]);
<a name="l01187"></a>01187     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef-&gt;words);
<a name="l01188"></a>01188     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef-&gt;weights);
<a name="l01189"></a>01189     <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(classdef);
<a name="l01190"></a>01190 }
<a name="l01191"></a>01191 
<a name="l01192"></a>01192 
<a name="l01193"></a>01193 int32
<a name="l01194"></a><a class="code" href="ngram__model_8h.html#a9b2a86c23543158754373c5456fe890d">01194</a> <a class="code" href="ngram__model_8h.html#a9b2a86c23543158754373c5456fe890d" title="Read a class definition file and add classes to a language model.">ngram_model_read_classdef</a>(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *model,
<a name="l01195"></a>01195                           <span class="keyword">const</span> <span class="keywordtype">char</span> *file_name)
<a name="l01196"></a>01196 {
<a name="l01197"></a>01197     <a class="code" href="structhash__table__t.html">hash_table_t</a> *classes;
<a name="l01198"></a>01198     <a class="code" href="structgnode__s.html" title="A node in a generic list.">glist_t</a> hl = NULL;
<a name="l01199"></a>01199     <a class="code" href="structgnode__s.html" title="A node in a generic list.">gnode_t</a> *gn;
<a name="l01200"></a>01200     int32 rv = -1;
<a name="l01201"></a>01201 
<a name="l01202"></a>01202     classes = <a class="code" href="hash__table_8h.html#a56d93e8c03e066b77377ac6eab50cfae" title="Allocate a new hash table for a given expected size.">hash_table_new</a>(0, FALSE);
<a name="l01203"></a>01203     <span class="keywordflow">if</span> (read_classdef_file(classes, file_name) &lt; 0) {
<a name="l01204"></a>01204         <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(classes);
<a name="l01205"></a>01205         <span class="keywordflow">return</span> -1;
<a name="l01206"></a>01206     }
<a name="l01207"></a>01207     
<a name="l01208"></a>01208     <span class="comment">/* Create a new class in the language model for each classdef. */</span>
<a name="l01209"></a>01209     hl = <a class="code" href="hash__table_8h.html#a61f59389f05d8871003da4692a9c2acc" title="Build a glist of valid hash_entry_t pointers from the given hash table.">hash_table_tolist</a>(classes, NULL);
<a name="l01210"></a>01210     <span class="keywordflow">for</span> (gn = hl; gn; gn = gnode_next(gn)) {
<a name="l01211"></a>01211         <a class="code" href="structhash__entry__s.html" title="A note by ARCHAN at 20050510: Technically what we use is so-called &amp;quot;hash table with buckets&amp;quot...">hash_entry_t</a> *he = <a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(gn);
<a name="l01212"></a>01212         <a class="code" href="structclassdef__s.html" title="One class definition from a classdef file.">classdef_t</a> *classdef = he-&gt;<a class="code" href="structhash__entry__s.html#a0d57012963084fed93886681108aa636" title="Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...">val</a>;
<a name="l01213"></a>01213 
<a name="l01214"></a>01214         <span class="keywordflow">if</span> (<a class="code" href="ngram__model_8h.html#a0481b03a2bf03ef0a197da0a1f7d1caa" title="Add a new class to a language model.">ngram_model_add_class</a>(model, he-&gt;key, 1.0,
<a name="l01215"></a>01215                                   classdef-&gt;words,
<a name="l01216"></a>01216                                   classdef-&gt;weights,
<a name="l01217"></a>01217                                   classdef-&gt;n_words) &lt; 0)
<a name="l01218"></a>01218             <span class="keywordflow">goto</span> error_out;
<a name="l01219"></a>01219     }
<a name="l01220"></a>01220     rv = 0;
<a name="l01221"></a>01221 
<a name="l01222"></a>01222 error_out:
<a name="l01223"></a>01223     <span class="keywordflow">for</span> (gn = hl; gn; gn = gnode_next(gn)) {
<a name="l01224"></a>01224         <a class="code" href="structhash__entry__s.html" title="A note by ARCHAN at 20050510: Technically what we use is so-called &amp;quot;hash table with buckets&amp;quot...">hash_entry_t</a> *he = <a class="code" href="glist_8h.html#ace56682f14d84cc456c805d26fd86734" title="Head of a list of gnodes.">gnode_ptr</a>(gn);
<a name="l01225"></a>01225         <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>((<span class="keywordtype">char</span> *)he-&gt;key);
<a name="l01226"></a>01226         classdef_free(he-&gt;<a class="code" href="structhash__entry__s.html#a0d57012963084fed93886681108aa636" title="Key-length; the key string does not have to be a C-style NULL terminated string; it can have arbitrar...">val</a>);
<a name="l01227"></a>01227     }
<a name="l01228"></a>01228     <a class="code" href="glist_8h.html#a45380e15d2c33afc554fd60a8828580c" title="Free the given generic list; user-defined data contained within is not automatically freed...">glist_free</a>(hl);
<a name="l01229"></a>01229     <a class="code" href="hash__table_8h.html#a0a588c22946f8cc16328973035ed19e3" title="Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...">hash_table_free</a>(classes);
<a name="l01230"></a>01230     <span class="keywordflow">return</span> rv;
<a name="l01231"></a>01231 }
</pre></div></div>
</div>
  <div id="nav-path" class="navpath">
    <ul>
      <li class="navelem"><b>ngram_model.c</b>      </li>
      <li class="footer">Generated on Tue Apr 19 2011 for SphinxBase by&#160;
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.3 </li>
    </ul>
  </div>

</body>
</html>