<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <title>SphinxBase: src/libsphinxbase/lm/lm3g_model.c Source File</title> <link href="tabs.css" rel="stylesheet" type="text/css"/> <link href="navtree.css" rel="stylesheet" type="text/css"/> <script type="text/javascript" src="jquery.js"></script> <script type="text/javascript" src="navtree.js"></script> <script type="text/javascript" src="resize.js"></script> <script type="text/javascript"> $(document).ready(initResizable); </script> <link href="doxygen.css" rel="stylesheet" type="text/css"/> </head> <body> <!-- Generated by Doxygen 1.7.3 --> <div id="top"> <div id="titlearea"> <table cellspacing="0" cellpadding="0"> <tbody> <tr style="height: 56px;"> <td style="padding-left: 0.5em;"> <div id="projectname">SphinxBase <span id="projectnumber">0.6</span></div> </td> </tr> </tbody> </table> </div> <div id="navrow1" class="tabs"> <ul class="tablist"> <li><a href="index.html"><span>Main Page</span></a></li> <li><a href="pages.html"><span>Related Pages</span></a></li> <li><a href="annotated.html"><span>Data Structures</span></a></li> <li class="current"><a href="files.html"><span>Files</span></a></li> </ul> </div> <div id="navrow2" class="tabs2"> <ul class="tablist"> <li><a href="files.html"><span>File List</span></a></li> <li><a href="globals.html"><span>Globals</span></a></li> </ul> </div> </div> <div id="side-nav" class="ui-resizable side-nav-resizable"> <div id="nav-tree"> <div id="nav-tree-contents"> </div> </div> <div id="splitbar" style="-moz-user-select:none;" class="ui-resizable-handle"> </div> </div> <script type="text/javascript"> initNavTree('lm3g__model_8c.html',''); </script> <div id="doc-content"> <div class="header"> <div class="headertitle"> <h1>src/libsphinxbase/lm/lm3g_model.c</h1> </div> </div> <div class="contents"> <div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */</span> <a name="l00002"></a>00002 <span class="comment">/* ====================================================================</span> <a name="l00003"></a>00003 <span class="comment"> * Copyright (c) 1999-2007 Carnegie Mellon University. All rights</span> <a name="l00004"></a>00004 <span class="comment"> * reserved.</span> <a name="l00005"></a>00005 <span class="comment"> *</span> <a name="l00006"></a>00006 <span class="comment"> * Redistribution and use in source and binary forms, with or without</span> <a name="l00007"></a>00007 <span class="comment"> * modification, are permitted provided that the following conditions</span> <a name="l00008"></a>00008 <span class="comment"> * are met:</span> <a name="l00009"></a>00009 <span class="comment"> *</span> <a name="l00010"></a>00010 <span class="comment"> * 1. Redistributions of source code must retain the above copyright</span> <a name="l00011"></a>00011 <span class="comment"> * notice, this list of conditions and the following disclaimer. </span> <a name="l00012"></a>00012 <span class="comment"> *</span> <a name="l00013"></a>00013 <span class="comment"> * 2. Redistributions in binary form must reproduce the above copyright</span> <a name="l00014"></a>00014 <span class="comment"> * notice, this list of conditions and the following disclaimer in</span> <a name="l00015"></a>00015 <span class="comment"> * the documentation and/or other materials provided with the</span> <a name="l00016"></a>00016 <span class="comment"> * distribution.</span> <a name="l00017"></a>00017 <span class="comment"> *</span> <a name="l00018"></a>00018 <span class="comment"> * This work was supported in part by funding from the Defense Advanced </span> <a name="l00019"></a>00019 <span class="comment"> * Research Projects Agency and the National Science Foundation of the </span> <a name="l00020"></a>00020 <span class="comment"> * United States of America, and the CMU Sphinx Speech Consortium.</span> <a name="l00021"></a>00021 <span class="comment"> *</span> <a name="l00022"></a>00022 <span class="comment"> * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND </span> <a name="l00023"></a>00023 <span class="comment"> * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, </span> <a name="l00024"></a>00024 <span class="comment"> * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR</span> <a name="l00025"></a>00025 <span class="comment"> * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY</span> <a name="l00026"></a>00026 <span class="comment"> * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,</span> <a name="l00027"></a>00027 <span class="comment"> * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT </span> <a name="l00028"></a>00028 <span class="comment"> * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, </span> <a name="l00029"></a>00029 <span class="comment"> * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY </span> <a name="l00030"></a>00030 <span class="comment"> * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT </span> <a name="l00031"></a>00031 <span class="comment"> * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE </span> <a name="l00032"></a>00032 <span class="comment"> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</span> <a name="l00033"></a>00033 <span class="comment"> *</span> <a name="l00034"></a>00034 <span class="comment"> * ====================================================================</span> <a name="l00035"></a>00035 <span class="comment"> *</span> <a name="l00036"></a>00036 <span class="comment"> */</span> <a name="l00037"></a>00037 <span class="comment">/*</span> <a name="l00038"></a>00038 <span class="comment"> * \file lm3g_model.c Core Sphinx 3-gram code used in</span> <a name="l00039"></a>00039 <span class="comment"> * DMP/DMP32/ARPA (for now) model code.</span> <a name="l00040"></a>00040 <span class="comment"> *</span> <a name="l00041"></a>00041 <span class="comment"> * Author: A cast of thousands, probably.</span> <a name="l00042"></a>00042 <span class="comment"> */</span> <a name="l00043"></a>00043 <span class="preprocessor">#include <string.h></span> <a name="l00044"></a>00044 <span class="preprocessor">#include <assert.h></span> <a name="l00045"></a>00045 <span class="preprocessor">#include <limits.h></span> <a name="l00046"></a>00046 <a name="l00047"></a>00047 <span class="preprocessor">#include "sphinxbase/listelem_alloc.h"</span> <a name="l00048"></a>00048 <span class="preprocessor">#include "sphinxbase/ckd_alloc.h"</span> <a name="l00049"></a>00049 <span class="preprocessor">#include "sphinxbase/err.h"</span> <a name="l00050"></a>00050 <a name="l00051"></a>00051 <span class="preprocessor">#include "lm3g_model.h"</span> <a name="l00052"></a>00052 <a name="l00053"></a>00053 <span class="keywordtype">void</span> <a name="l00054"></a>00054 lm3g_tginfo_free(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base, <a class="code" href="structlm3g__model__s.html" title="Common internal structure for Sphinx 3-gram models.">lm3g_model_t</a> *lm3g) <a name="l00055"></a>00055 { <a name="l00056"></a>00056 <span class="keywordflow">if</span> (lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a> == NULL) <a name="l00057"></a>00057 <span class="keywordflow">return</span>; <a name="l00058"></a>00058 <a class="code" href="listelem__alloc_8h.html#a94c02e93a0abaa2bd79636cbac6cced2" title="Finalize and release all memory associated with a list element allocator.">listelem_alloc_free</a>(lm3g-><a class="code" href="structlm3g__model__s.html#a2c96ba8aa34632e4d42908f9384d1b01" title="List element allocator for tginfo.">le</a>); <a name="l00059"></a>00059 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a>); <a name="l00060"></a>00060 } <a name="l00061"></a>00061 <a name="l00062"></a>00062 <span class="keywordtype">void</span> <a name="l00063"></a>00063 lm3g_tginfo_reset(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base, <a class="code" href="structlm3g__model__s.html" title="Common internal structure for Sphinx 3-gram models.">lm3g_model_t</a> *lm3g) <a name="l00064"></a>00064 { <a name="l00065"></a>00065 <span class="keywordflow">if</span> (lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a> == NULL) <a name="l00066"></a>00066 <span class="keywordflow">return</span>; <a name="l00067"></a>00067 <a class="code" href="listelem__alloc_8h.html#a94c02e93a0abaa2bd79636cbac6cced2" title="Finalize and release all memory associated with a list element allocator.">listelem_alloc_free</a>(lm3g-><a class="code" href="structlm3g__model__s.html#a2c96ba8aa34632e4d42908f9384d1b01" title="List element allocator for tginfo.">le</a>); <a name="l00068"></a>00068 memset(lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a>, 0, base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0] * <span class="keyword">sizeof</span>(<a class="code" href="structtginfo__s.html" title="Trigram information cache.">tginfo_t</a> *)); <a name="l00069"></a>00069 lm3g-><a class="code" href="structlm3g__model__s.html#a2c96ba8aa34632e4d42908f9384d1b01" title="List element allocator for tginfo.">le</a> = <a class="code" href="listelem__alloc_8h.html#a4b08b49eaa74cbe9a3c95170cee78de7" title="Initialize and return a list element allocator.">listelem_alloc_init</a>(<span class="keyword">sizeof</span>(<a class="code" href="structtginfo__s.html" title="Trigram information cache.">tginfo_t</a>)); <a name="l00070"></a>00070 } <a name="l00071"></a>00071 <a name="l00072"></a>00072 <span class="keywordtype">void</span> <a name="l00073"></a>00073 lm3g_apply_weights(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base, <a name="l00074"></a>00074 <a class="code" href="structlm3g__model__s.html" title="Common internal structure for Sphinx 3-gram models.">lm3g_model_t</a> *lm3g, <a name="l00075"></a>00075 float32 lw, float32 wip, float32 uw) <a name="l00076"></a>00076 { <a name="l00077"></a>00077 int32 log_wip, log_uw, log_uniform_weight; <a name="l00078"></a>00078 <span class="keywordtype">int</span> i; <a name="l00079"></a>00079 <a name="l00080"></a>00080 <span class="comment">/* Precalculate some log values we will like. */</span> <a name="l00081"></a>00081 log_wip = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(base-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, wip); <a name="l00082"></a>00082 log_uw = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(base-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, uw); <a name="l00083"></a>00083 log_uniform_weight = <a class="code" href="logmath_8h.html#aebb4711268322fa7aec31e5798fe7e90" title="Convert linear floating point number to integer log in base B.">logmath_log</a>(base-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, 1.0 - uw); <a name="l00084"></a>00084 <a name="l00085"></a>00085 <span class="keywordflow">for</span> (i = 0; i < base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0]; ++i) { <a name="l00086"></a>00086 int32 prob1, bo_wt, n_used; <a name="l00087"></a>00087 <a name="l00088"></a>00088 <span class="comment">/* Backoff weights just get scaled by the lw. */</span> <a name="l00089"></a>00089 bo_wt = (int32)(lm3g->unigrams[i].<a class="code" href="structunigram__s.html#ad33b4af5b40a8d13ffae932bab003df6" title="Unigram backoff weight.">bo_wt1</a>.l / base-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a>); <a name="l00090"></a>00090 <span class="comment">/* Unscaling unigram probs is a bit more complicated, so punt</span> <a name="l00091"></a>00091 <span class="comment"> * it back to the general code. */</span> <a name="l00092"></a>00092 prob1 = <a class="code" href="ngram__model_8h.html#a218d8d140b93d3d8008f8933f9e04ec6" title="Quick &quot;raw&quot; probability lookup for a general N-Gram.">ngram_ng_prob</a>(base, i, NULL, 0, &n_used); <a name="l00093"></a>00093 <span class="comment">/* Now compute the new scaled probabilities. */</span> <a name="l00094"></a>00094 lm3g->unigrams[i].<a class="code" href="structunigram__s.html#ad33b4af5b40a8d13ffae932bab003df6" title="Unigram backoff weight.">bo_wt1</a>.l = (int32)(bo_wt * lw); <a name="l00095"></a>00095 <span class="keywordflow">if</span> (strcmp(base-><a class="code" href="structngram__model__s.html#ae625e779e340845f03fb3da164e93039" title="Unigram names.">word_str</a>[i], <span class="stringliteral">"<s>"</span>) == 0) { <span class="comment">/* FIXME: configurable start_sym */</span> <a name="l00096"></a>00096 <span class="comment">/* Apply language weight and WIP */</span> <a name="l00097"></a>00097 lm3g->unigrams[i].<a class="code" href="structunigram__s.html#a488db9623272838a933cd4b768409fea" title="Unigram probability.">prob1</a>.l = (int32)(prob1 * lw) + log_wip; <a name="l00098"></a>00098 } <a name="l00099"></a>00099 <span class="keywordflow">else</span> { <a name="l00100"></a>00100 <span class="comment">/* Interpolate unigram probability with uniform. */</span> <a name="l00101"></a>00101 prob1 += log_uw; <a name="l00102"></a>00102 prob1 = <a class="code" href="logmath_8h.html#a5eb70928578b0115c9c7ac2765396a06" title="Add two values in log space (i.e.">logmath_add</a>(base-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, prob1, base-><a class="code" href="structngram__model__s.html#a616bf871a67f9cedce17d6b589ee33ea" title="Log of uniform (0-gram) probability.">log_uniform</a> + log_uniform_weight); <a name="l00103"></a>00103 <span class="comment">/* Apply language weight and WIP */</span> <a name="l00104"></a>00104 lm3g->unigrams[i].<a class="code" href="structunigram__s.html#a488db9623272838a933cd4b768409fea" title="Unigram probability.">prob1</a>.l = (int32)(prob1 * lw) + log_wip; <a name="l00105"></a>00105 } <a name="l00106"></a>00106 } <a name="l00107"></a>00107 <a name="l00108"></a>00108 <span class="keywordflow">for</span> (i = 0; i < lm3g-><a class="code" href="structlm3g__model__s.html#a273e6ea4c39d1a563cc59f00b4b6ee98" title="prob2 size">n_prob2</a>; ++i) { <a name="l00109"></a>00109 int32 prob2; <a name="l00110"></a>00110 <span class="comment">/* Can't just punt this back to general code since it is quantized. */</span> <a name="l00111"></a>00111 prob2 = (int32)((lm3g-><a class="code" href="structlm3g__model__s.html#a34acf8c1ffaa4bb712ce1196eea59678" title="Table of actual bigram probs.">prob2</a>[i].l - base-><a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a>) / base-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a>); <a name="l00112"></a>00112 lm3g-><a class="code" href="structlm3g__model__s.html#a34acf8c1ffaa4bb712ce1196eea59678" title="Table of actual bigram probs.">prob2</a>[i].l = (int32)(prob2 * lw) + log_wip; <a name="l00113"></a>00113 } <a name="l00114"></a>00114 <a name="l00115"></a>00115 <span class="keywordflow">if</span> (base-><a class="code" href="structngram__model__s.html#a3c87bc1b678662a2c8930b3b8c33a80f" title="This is an n-gram model (1, 2, 3, ...).">n</a> > 2) { <a name="l00116"></a>00116 <span class="keywordflow">for</span> (i = 0; i < lm3g-><a class="code" href="structlm3g__model__s.html#ad4bbdd65d13712fe653afe8b1de9b096" title="bo_wt2 size">n_bo_wt2</a>; ++i) { <a name="l00117"></a>00117 lm3g-><a class="code" href="structlm3g__model__s.html#a7ee629aa1b8e88529127cf4da470d80f" title="Table of actual bigram backoff weights.">bo_wt2</a>[i].l = (int32)(lm3g-><a class="code" href="structlm3g__model__s.html#a7ee629aa1b8e88529127cf4da470d80f" title="Table of actual bigram backoff weights.">bo_wt2</a>[i].l / base-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a> * lw); <a name="l00118"></a>00118 } <a name="l00119"></a>00119 <span class="keywordflow">for</span> (i = 0; i < lm3g-><a class="code" href="structlm3g__model__s.html#add28369f51e657ee54deed5291c84d09" title="prob3 size">n_prob3</a>; i++) { <a name="l00120"></a>00120 int32 prob3; <a name="l00121"></a>00121 <span class="comment">/* Can't just punt this back to general code since it is quantized. */</span> <a name="l00122"></a>00122 prob3 = (int32)((lm3g-><a class="code" href="structlm3g__model__s.html#adc9ed2ad1f2daefdd40713a9dd371673" title="Table of actual trigram probs.">prob3</a>[i].l - base-><a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a>) / base-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a>); <a name="l00123"></a>00123 lm3g-><a class="code" href="structlm3g__model__s.html#adc9ed2ad1f2daefdd40713a9dd371673" title="Table of actual trigram probs.">prob3</a>[i].l = (int32)(prob3 * lw) + log_wip; <a name="l00124"></a>00124 } <a name="l00125"></a>00125 } <a name="l00126"></a>00126 <a name="l00127"></a>00127 <span class="comment">/* Store updated values in the model. */</span> <a name="l00128"></a>00128 base-><a class="code" href="structngram__model__s.html#a3d6bf5632760a16e52cb881d7010d774" title="Log of word insertion penalty.">log_wip</a> = log_wip; <a name="l00129"></a>00129 base-><a class="code" href="structngram__model__s.html#a6f0ec7b8b9d13d590bbe4b59df573abc" title="Log of unigram weight.">log_uw</a> = log_uw; <a name="l00130"></a>00130 base-><a class="code" href="structngram__model__s.html#aa38c5fdecaefd9a2f43b69f26ae492c1" title="Log of uniform weight (i.e.">log_uniform_weight</a> = log_uniform_weight; <a name="l00131"></a>00131 base-><a class="code" href="structngram__model__s.html#a76ea0c65b23de80091e7c602bdb43bde" title="Language model scaling factor.">lw</a> = lw; <a name="l00132"></a>00132 } <a name="l00133"></a>00133 <a name="l00134"></a>00134 int32 <a name="l00135"></a>00135 lm3g_add_ug(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *base, <a name="l00136"></a>00136 <a class="code" href="structlm3g__model__s.html" title="Common internal structure for Sphinx 3-gram models.">lm3g_model_t</a> *lm3g, int32 wid, int32 lweight) <a name="l00137"></a>00137 { <a name="l00138"></a>00138 int32 score; <a name="l00139"></a>00139 <a name="l00140"></a>00140 <span class="comment">/* This would be very bad if this happened! */</span> <a name="l00141"></a>00141 assert(!NGRAM_IS_CLASSWID(wid)); <a name="l00142"></a>00142 <a name="l00143"></a>00143 <span class="comment">/* Reallocate unigram array. */</span> <a name="l00144"></a>00144 lm3g->unigrams = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(lm3g->unigrams, <a name="l00145"></a>00145 <span class="keyword">sizeof</span>(*lm3g->unigrams) * base-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a>); <a name="l00146"></a>00146 memset(lm3g->unigrams + base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0], 0, <a name="l00147"></a>00147 (base-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a> - base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0]) * <span class="keyword">sizeof</span>(*lm3g->unigrams)); <a name="l00148"></a>00148 <span class="comment">/* Reallocate tginfo array. */</span> <a name="l00149"></a>00149 lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a> = <a class="code" href="ckd__alloc_8h.html#afd496738b3e114bd494c5a0955f1bfb3" title="Macro for __ckd_realloc__.">ckd_realloc</a>(lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a>, <a name="l00150"></a>00150 <span class="keyword">sizeof</span>(*lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a>) * base-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a>); <a name="l00151"></a>00151 memset(lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a> + base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0], 0, <a name="l00152"></a>00152 (base-><a class="code" href="structngram__model__s.html#a3e41109b30668bdfc077614c1ef49960" title="Number of allocated word strings (for new word addition)">n_1g_alloc</a> - base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0]) * <span class="keyword">sizeof</span>(*lm3g-><a class="code" href="structlm3g__model__s.html#a9be0c8197334e3ef632e9e3abdad6a4f" title="tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*...">tginfo</a>)); <a name="l00153"></a>00153 <span class="comment">/* FIXME: we really ought to update base->log_uniform *and*</span> <a name="l00154"></a>00154 <span class="comment"> * renormalize all the other unigrams. This is really slow, so I</span> <a name="l00155"></a>00155 <span class="comment"> * will probably just provide a function to renormalize after</span> <a name="l00156"></a>00156 <span class="comment"> * adding unigrams, for anyone who really cares. */</span> <a name="l00157"></a>00157 <span class="comment">/* This could be simplified but then we couldn't do it in logmath */</span> <a name="l00158"></a>00158 score = lweight + base-><a class="code" href="structngram__model__s.html#a616bf871a67f9cedce17d6b589ee33ea" title="Log of uniform (0-gram) probability.">log_uniform</a> + base-><a class="code" href="structngram__model__s.html#a6f0ec7b8b9d13d590bbe4b59df573abc" title="Log of unigram weight.">log_uw</a>; <a name="l00159"></a>00159 score = <a class="code" href="logmath_8h.html#a5eb70928578b0115c9c7ac2765396a06" title="Add two values in log space (i.e.">logmath_add</a>(base-><a class="code" href="structngram__model__s.html#a2ca373109c651ac998b33153eb38fd95" title="Log-math object.">lmath</a>, score, <a name="l00160"></a>00160 base-><a class="code" href="structngram__model__s.html#a616bf871a67f9cedce17d6b589ee33ea" title="Log of uniform (0-gram) probability.">log_uniform</a> + base-><a class="code" href="structngram__model__s.html#aa38c5fdecaefd9a2f43b69f26ae492c1" title="Log of uniform weight (i.e.">log_uniform_weight</a>); <a name="l00161"></a>00161 lm3g->unigrams[wid].<a class="code" href="structunigram__s.html#a488db9623272838a933cd4b768409fea" title="Unigram probability.">prob1</a>.l = score; <a name="l00162"></a>00162 <span class="comment">/* This unigram by definition doesn't participate in any bigrams,</span> <a name="l00163"></a>00163 <span class="comment"> * so its backoff weight and bigram pointer are both undefined. */</span> <a name="l00164"></a>00164 lm3g->unigrams[wid].<a class="code" href="structunigram__s.html#ad33b4af5b40a8d13ffae932bab003df6" title="Unigram backoff weight.">bo_wt1</a>.l = 0; <a name="l00165"></a>00165 lm3g->unigrams[wid].<a class="code" href="structunigram__s.html#ae148f631c0d9851b14bb9cb31c0c061d" title="Index of 1st entry in lm_t.bigrams[].">bigrams</a> = 0; <a name="l00166"></a>00166 <span class="comment">/* Finally, increase the unigram count */</span> <a name="l00167"></a>00167 ++base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0]; <a name="l00168"></a>00168 <span class="comment">/* FIXME: Note that this can actually be quite bogus due to the</span> <a name="l00169"></a>00169 <span class="comment"> * presence of class words. If wid falls outside the unigram</span> <a name="l00170"></a>00170 <span class="comment"> * count, increase it to compensate, at the cost of no longer</span> <a name="l00171"></a>00171 <span class="comment"> * really knowing how many unigrams we have :( */</span> <a name="l00172"></a>00172 <span class="keywordflow">if</span> (wid >= base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0]) <a name="l00173"></a>00173 base-><a class="code" href="structngram__model__s.html#a9dcba9b49cc1cd189b257e5838da0eee" title="Counts for 1, 2, 3, ...">n_counts</a>[0] = wid + 1; <a name="l00174"></a>00174 <a name="l00175"></a>00175 <span class="keywordflow">return</span> score; <a name="l00176"></a>00176 } <a name="l00177"></a>00177 <a name="l00178"></a>00178 <span class="keywordtype">void</span> <a name="l00179"></a>00179 init_sorted_list(<a class="code" href="structsorted__list__t.html" title="The sorted list.">sorted_list_t</a> * l) <a name="l00180"></a>00180 { <a name="l00181"></a>00181 <span class="comment">/* FIXME FIXME FIXME: Fixed size array!??! */</span> <a name="l00182"></a>00182 l->list = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(MAX_SORTED_ENTRIES, <a name="l00183"></a>00183 <span class="keyword">sizeof</span>(<a class="code" href="structsorted__entry__s.html" title="Bigram probs and bo-wts, and trigram probs are kept in separate tables rather than within the bigram_...">sorted_entry_t</a>)); <a name="l00184"></a>00184 l->list[0].<a class="code" href="structsorted__entry__s.html#a7bfd6c0c9c7240695a5909044177122f" title="value being kept in this node">val</a>.l = INT_MIN; <a name="l00185"></a>00185 l->list[0].<a class="code" href="structsorted__entry__s.html#a186102da5e815345b92b1cc8895e5eb5" title="index of another entry.">lower</a> = 0; <a name="l00186"></a>00186 l->list[0].<a class="code" href="structsorted__entry__s.html#af3d7fdd1865a965689906a2b4c12c641" title="index of another entry.">higher</a> = 0; <a name="l00187"></a>00187 l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a> = 1; <a name="l00188"></a>00188 } <a name="l00189"></a>00189 <a name="l00190"></a>00190 <span class="keywordtype">void</span> <a name="l00191"></a>00191 free_sorted_list(<a class="code" href="structsorted__list__t.html" title="The sorted list.">sorted_list_t</a> * l) <a name="l00192"></a>00192 { <a name="l00193"></a>00193 free(l->list); <a name="l00194"></a>00194 } <a name="l00195"></a>00195 <a name="l00196"></a>00196 <a class="code" href="unionlmprob__t.html" title="Type used to store language model probabilities.">lmprob_t</a> * <a name="l00197"></a>00197 vals_in_sorted_list(<a class="code" href="structsorted__list__t.html" title="The sorted list.">sorted_list_t</a> * l) <a name="l00198"></a>00198 { <a name="l00199"></a>00199 <a class="code" href="unionlmprob__t.html" title="Type used to store language model probabilities.">lmprob_t</a> *vals; <a name="l00200"></a>00200 int32 i; <a name="l00201"></a>00201 <a name="l00202"></a>00202 vals = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a>, <span class="keyword">sizeof</span>(<a class="code" href="unionlmprob__t.html" title="Type used to store language model probabilities.">lmprob_t</a>)); <a name="l00203"></a>00203 <span class="keywordflow">for</span> (i = 0; i < l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a>; i++) <a name="l00204"></a>00204 vals[i] = l->list[i].<a class="code" href="structsorted__entry__s.html#a7bfd6c0c9c7240695a5909044177122f" title="value being kept in this node">val</a>; <a name="l00205"></a>00205 return (vals); <a name="l00206"></a>00206 } <a name="l00207"></a>00207 <a name="l00208"></a>00208 int32 <a name="l00209"></a>00209 sorted_id(<a class="code" href="structsorted__list__t.html" title="The sorted list.">sorted_list_t</a> * l, int32 *val) <a name="l00210"></a>00210 { <a name="l00211"></a>00211 int32 i = 0; <a name="l00212"></a>00212 <a name="l00213"></a>00213 <span class="keywordflow">for</span> (;;) { <a name="l00214"></a>00214 <span class="keywordflow">if</span> (*val == l->list[i].<a class="code" href="structsorted__entry__s.html#a7bfd6c0c9c7240695a5909044177122f" title="value being kept in this node">val</a>.l) <a name="l00215"></a>00215 <span class="keywordflow">return</span> (i); <a name="l00216"></a>00216 <span class="keywordflow">if</span> (*val < l->list[i].val.l) { <a name="l00217"></a>00217 <span class="keywordflow">if</span> (l->list[i].<a class="code" href="structsorted__entry__s.html#a186102da5e815345b92b1cc8895e5eb5" title="index of another entry.">lower</a> == 0) { <a name="l00218"></a>00218 <span class="keywordflow">if</span> (l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a> >= MAX_SORTED_ENTRIES) { <a name="l00219"></a>00219 <span class="comment">/* Make the best of a bad situation. */</span> <a name="l00220"></a>00220 <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">"sorted list overflow (%d => %d)\n"</span>, <a name="l00221"></a>00221 *val, l->list[i].<a class="code" href="structsorted__entry__s.html#a7bfd6c0c9c7240695a5909044177122f" title="value being kept in this node">val</a>.l); <a name="l00222"></a>00222 <span class="keywordflow">return</span> i; <a name="l00223"></a>00223 } <a name="l00224"></a>00224 <a name="l00225"></a>00225 l->list[i].<a class="code" href="structsorted__entry__s.html#a186102da5e815345b92b1cc8895e5eb5" title="index of another entry.">lower</a> = l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a>; <a name="l00226"></a>00226 (l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a>)++; <a name="l00227"></a>00227 i = l->list[i].<a class="code" href="structsorted__entry__s.html#a186102da5e815345b92b1cc8895e5eb5" title="index of another entry.">lower</a>; <a name="l00228"></a>00228 l->list[i].<a class="code" href="structsorted__entry__s.html#a7bfd6c0c9c7240695a5909044177122f" title="value being kept in this node">val</a>.l = *val; <a name="l00229"></a>00229 <span class="keywordflow">return</span> (i); <a name="l00230"></a>00230 } <a name="l00231"></a>00231 <span class="keywordflow">else</span> <a name="l00232"></a>00232 i = l->list[i].<a class="code" href="structsorted__entry__s.html#a186102da5e815345b92b1cc8895e5eb5" title="index of another entry.">lower</a>; <a name="l00233"></a>00233 } <a name="l00234"></a>00234 <span class="keywordflow">else</span> { <a name="l00235"></a>00235 <span class="keywordflow">if</span> (l->list[i].<a class="code" href="structsorted__entry__s.html#af3d7fdd1865a965689906a2b4c12c641" title="index of another entry.">higher</a> == 0) { <a name="l00236"></a>00236 <span class="keywordflow">if</span> (l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a> >= MAX_SORTED_ENTRIES) { <a name="l00237"></a>00237 <span class="comment">/* Make the best of a bad situation. */</span> <a name="l00238"></a>00238 <a class="code" href="err_8h.html#a6a794bec721b555ac1f2167f9e12f662" title="Print warning information to standard error stream.">E_WARN</a>(<span class="stringliteral">"sorted list overflow (%d => %d)\n"</span>, <a name="l00239"></a>00239 *val, l->list[i].<a class="code" href="structsorted__entry__s.html#a7bfd6c0c9c7240695a5909044177122f" title="value being kept in this node">val</a>); <a name="l00240"></a>00240 <span class="keywordflow">return</span> i; <a name="l00241"></a>00241 } <a name="l00242"></a>00242 <a name="l00243"></a>00243 l->list[i].<a class="code" href="structsorted__entry__s.html#af3d7fdd1865a965689906a2b4c12c641" title="index of another entry.">higher</a> = l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a>; <a name="l00244"></a>00244 (l-><a class="code" href="structsorted__list__t.html#aa7468ec9a2fe7c61d2bc76ba43c575ce" title="first free element in list">free</a>)++; <a name="l00245"></a>00245 i = l->list[i].<a class="code" href="structsorted__entry__s.html#af3d7fdd1865a965689906a2b4c12c641" title="index of another entry.">higher</a>; <a name="l00246"></a>00246 l->list[i].<a class="code" href="structsorted__entry__s.html#a7bfd6c0c9c7240695a5909044177122f" title="value being kept in this node">val</a>.l = *val; <a name="l00247"></a>00247 <span class="keywordflow">return</span> (i); <a name="l00248"></a>00248 } <a name="l00249"></a>00249 <span class="keywordflow">else</span> <a name="l00250"></a>00250 i = l->list[i].<a class="code" href="structsorted__entry__s.html#af3d7fdd1865a965689906a2b4c12c641" title="index of another entry.">higher</a>; <a name="l00251"></a>00251 } <a name="l00252"></a>00252 } <a name="l00253"></a>00253 } <a name="l00254"></a>00254 </pre></div></div> </div> <div id="nav-path" class="navpath"> <ul> <li class="navelem"><b>lm3g_model.c</b> </li> <li class="footer">Generated on Tue Apr 19 2011 for SphinxBase by  <a href="http://www.doxygen.org/index.html"> <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.3 </li> </ul> </div> </body> </html>