Sophie

Sophie

distrib > Fedora > 14 > x86_64 > media > updates > by-pkgid > 0b420d0fce195cf4115dc6a3be5c2da2 > files > 332

sphinxbase-devel-0.7-1.fc14.i686.rpm

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<title>SphinxBase: src/sphinx_lmtools/sphinx_lm_eval.c Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript">
$(document).ready(initResizable);
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css"/>
</head>
<body>
<!-- Generated by Doxygen 1.7.3 -->
<div id="top">
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
 <tbody>
 <tr style="height: 56px;">
  <td style="padding-left: 0.5em;">
   <div id="projectname">SphinxBase&#160;<span id="projectnumber">0.6</span></div>
  </td>
 </tr>
 </tbody>
</table>
</div>
  <div id="navrow1" class="tabs">
    <ul class="tablist">
      <li><a href="index.html"><span>Main&#160;Page</span></a></li>
      <li><a href="pages.html"><span>Related&#160;Pages</span></a></li>
      <li><a href="annotated.html"><span>Data&#160;Structures</span></a></li>
      <li class="current"><a href="files.html"><span>Files</span></a></li>
    </ul>
  </div>
  <div id="navrow2" class="tabs2">
    <ul class="tablist">
      <li><a href="files.html"><span>File&#160;List</span></a></li>
      <li><a href="globals.html"><span>Globals</span></a></li>
    </ul>
  </div>
</div>
<div id="side-nav" class="ui-resizable side-nav-resizable">
  <div id="nav-tree">
    <div id="nav-tree-contents">
    </div>
  </div>
  <div id="splitbar" style="-moz-user-select:none;" 
       class="ui-resizable-handle">
  </div>
</div>
<script type="text/javascript">
  initNavTree('sphinx__lm__eval_8c.html','');
</script>
<div id="doc-content">
<div class="header">
  <div class="headertitle">
<h1>src/sphinx_lmtools/sphinx_lm_eval.c</h1>  </div>
</div>
<div class="contents">
<a href="sphinx__lm__eval_8c.html">Go to the documentation of this file.</a><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */</span>
<a name="l00002"></a>00002 <span class="comment">/* ====================================================================</span>
<a name="l00003"></a>00003 <span class="comment"> * Copyright (c) 2008 Carnegie Mellon University.  All rights </span>
<a name="l00004"></a>00004 <span class="comment"> * reserved.</span>
<a name="l00005"></a>00005 <span class="comment"> *</span>
<a name="l00006"></a>00006 <span class="comment"> * Redistribution and use in source and binary forms, with or without</span>
<a name="l00007"></a>00007 <span class="comment"> * modification, are permitted provided that the following conditions</span>
<a name="l00008"></a>00008 <span class="comment"> * are met:</span>
<a name="l00009"></a>00009 <span class="comment"> *</span>
<a name="l00010"></a>00010 <span class="comment"> * 1. Redistributions of source code must retain the above copyright</span>
<a name="l00011"></a>00011 <span class="comment"> *    notice, this list of conditions and the following disclaimer. </span>
<a name="l00012"></a>00012 <span class="comment"> *</span>
<a name="l00013"></a>00013 <span class="comment"> * 2. Redistributions in binary form must reproduce the above copyright</span>
<a name="l00014"></a>00014 <span class="comment"> *    notice, this list of conditions and the following disclaimer in</span>
<a name="l00015"></a>00015 <span class="comment"> *    the documentation and/or other materials provided with the</span>
<a name="l00016"></a>00016 <span class="comment"> *    distribution.</span>
<a name="l00017"></a>00017 <span class="comment"> *</span>
<a name="l00018"></a>00018 <span class="comment"> * This work was supported in part by funding from the Defense Advanced </span>
<a name="l00019"></a>00019 <span class="comment"> * Research Projects Agency and the National Science Foundation of the </span>
<a name="l00020"></a>00020 <span class="comment"> * United States of America, and the CMU Sphinx Speech Consortium.</span>
<a name="l00021"></a>00021 <span class="comment"> *</span>
<a name="l00022"></a>00022 <span class="comment"> * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS&#39;&#39; AND </span>
<a name="l00023"></a>00023 <span class="comment"> * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, </span>
<a name="l00024"></a>00024 <span class="comment"> * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR</span>
<a name="l00025"></a>00025 <span class="comment"> * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY</span>
<a name="l00026"></a>00026 <span class="comment"> * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,</span>
<a name="l00027"></a>00027 <span class="comment"> * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT </span>
<a name="l00028"></a>00028 <span class="comment"> * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, </span>
<a name="l00029"></a>00029 <span class="comment"> * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY </span>
<a name="l00030"></a>00030 <span class="comment"> * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT </span>
<a name="l00031"></a>00031 <span class="comment"> * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE </span>
<a name="l00032"></a>00032 <span class="comment"> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</span>
<a name="l00033"></a>00033 <span class="comment"> *</span>
<a name="l00034"></a>00034 <span class="comment"> * ====================================================================</span>
<a name="l00035"></a>00035 <span class="comment"> *</span>
<a name="l00036"></a>00036 <span class="comment"> */</span>
<a name="l00041"></a>00041 <span class="preprocessor">#include &lt;sphinxbase/logmath.h&gt;</span>
<a name="l00042"></a>00042 <span class="preprocessor">#include &lt;sphinxbase/ngram_model.h&gt;</span>
<a name="l00043"></a>00043 <span class="preprocessor">#include &lt;sphinxbase/cmd_ln.h&gt;</span>
<a name="l00044"></a>00044 <span class="preprocessor">#include &lt;sphinxbase/ckd_alloc.h&gt;</span>
<a name="l00045"></a>00045 <span class="preprocessor">#include &lt;sphinxbase/err.h&gt;</span>
<a name="l00046"></a>00046 <span class="preprocessor">#include &lt;sphinxbase/pio.h&gt;</span>
<a name="l00047"></a>00047 <span class="preprocessor">#include &lt;sphinxbase/strfuncs.h&gt;</span>
<a name="l00048"></a>00048 
<a name="l00049"></a>00049 <span class="preprocessor">#include &lt;stdio.h&gt;</span>
<a name="l00050"></a>00050 <span class="preprocessor">#include &lt;string.h&gt;</span>
<a name="l00051"></a>00051 <span class="preprocessor">#include &lt;math.h&gt;</span>
<a name="l00052"></a>00052 
<a name="l00053"></a>00053 <span class="keyword">static</span> <span class="keyword">const</span> <a class="code" href="structarg__t.html" title="Argument definition structure.">arg_t</a> defn[] = {
<a name="l00054"></a>00054   { <span class="stringliteral">&quot;-help&quot;</span>,
<a name="l00055"></a>00055     <a class="code" href="cmd__ln_8h.html#ac7d08ff59bb6905c3375162e75913e88" title="Boolean (true/false) argument (optional).">ARG_BOOLEAN</a>,
<a name="l00056"></a>00056     <span class="stringliteral">&quot;no&quot;</span>,
<a name="l00057"></a>00057     <span class="stringliteral">&quot;Shows the usage of the tool&quot;</span>},
<a name="l00058"></a>00058 
<a name="l00059"></a>00059   { <span class="stringliteral">&quot;-logbase&quot;</span>,
<a name="l00060"></a>00060     <a class="code" href="cmd__ln_8h.html#aceb617c8572cf5ad6257b35e6d8919e7">ARG_FLOAT64</a>,
<a name="l00061"></a>00061     <span class="stringliteral">&quot;1.0001&quot;</span>,
<a name="l00062"></a>00062     <span class="stringliteral">&quot;Base in which all log-likelihoods calculated&quot;</span> },
<a name="l00063"></a>00063 
<a name="l00064"></a>00064   { <span class="stringliteral">&quot;-lm&quot;</span>,
<a name="l00065"></a>00065     <a class="code" href="cmd__ln_8h.html#a4de5ed5fcf59a18b24bc9f6449cc9356" title="String argument (optional).">ARG_STRING</a>,
<a name="l00066"></a>00066     NULL,
<a name="l00067"></a>00067     <span class="stringliteral">&quot;Language model file&quot;</span>},
<a name="l00068"></a>00068 
<a name="l00069"></a>00069   { <span class="stringliteral">&quot;-probdef&quot;</span>,
<a name="l00070"></a>00070     <a class="code" href="cmd__ln_8h.html#a4de5ed5fcf59a18b24bc9f6449cc9356" title="String argument (optional).">ARG_STRING</a>,
<a name="l00071"></a>00071     NULL,
<a name="l00072"></a>00072     <span class="stringliteral">&quot;Probability definition file for classes in LM&quot;</span>},
<a name="l00073"></a>00073 
<a name="l00074"></a>00074   { <span class="stringliteral">&quot;-lmctlfn&quot;</span>,
<a name="l00075"></a>00075     <a class="code" href="cmd__ln_8h.html#a4de5ed5fcf59a18b24bc9f6449cc9356" title="String argument (optional).">ARG_STRING</a>,
<a name="l00076"></a>00076     NULL,
<a name="l00077"></a>00077     <span class="stringliteral">&quot;Control file listing a set of language models&quot;</span>},
<a name="l00078"></a>00078 
<a name="l00079"></a>00079   { <span class="stringliteral">&quot;-lmname&quot;</span>,
<a name="l00080"></a>00080     <a class="code" href="cmd__ln_8h.html#a4de5ed5fcf59a18b24bc9f6449cc9356" title="String argument (optional).">ARG_STRING</a>,
<a name="l00081"></a>00081     NULL,
<a name="l00082"></a>00082     <span class="stringliteral">&quot;Name of language model in -lmctlfn to use for all utterances&quot;</span> },
<a name="l00083"></a>00083 
<a name="l00084"></a>00084   { <span class="stringliteral">&quot;-lsn&quot;</span>,
<a name="l00085"></a>00085     <a class="code" href="cmd__ln_8h.html#a4de5ed5fcf59a18b24bc9f6449cc9356" title="String argument (optional).">ARG_STRING</a>,
<a name="l00086"></a>00086     NULL,
<a name="l00087"></a>00087     <span class="stringliteral">&quot;Transcription file to evaluate&quot;</span>},
<a name="l00088"></a>00088 
<a name="l00089"></a>00089   { <span class="stringliteral">&quot;-text&quot;</span>,
<a name="l00090"></a>00090     <a class="code" href="cmd__ln_8h.html#a4de5ed5fcf59a18b24bc9f6449cc9356" title="String argument (optional).">ARG_STRING</a>,
<a name="l00091"></a>00091     <span class="stringliteral">&quot;Text string to evaluate&quot;</span>},
<a name="l00092"></a>00092 
<a name="l00093"></a>00093   { <span class="stringliteral">&quot;-mmap&quot;</span>,
<a name="l00094"></a>00094     <a class="code" href="cmd__ln_8h.html#ac7d08ff59bb6905c3375162e75913e88" title="Boolean (true/false) argument (optional).">ARG_BOOLEAN</a>,
<a name="l00095"></a>00095     <span class="stringliteral">&quot;no&quot;</span>,
<a name="l00096"></a>00096     <span class="stringliteral">&quot;Use memory-mapped I/O for reading binary LM files&quot;</span>},
<a name="l00097"></a>00097 
<a name="l00098"></a>00098   { <span class="stringliteral">&quot;-lw&quot;</span>,
<a name="l00099"></a>00099     <a class="code" href="cmd__ln_8h.html#ad9b1952e4f1def9ee6a88791375b3901">ARG_FLOAT32</a>,
<a name="l00100"></a>00100     <span class="stringliteral">&quot;1.0&quot;</span>,
<a name="l00101"></a>00101     <span class="stringliteral">&quot;Language model weight&quot;</span> },
<a name="l00102"></a>00102 
<a name="l00103"></a>00103   { <span class="stringliteral">&quot;-wip&quot;</span>,
<a name="l00104"></a>00104     <a class="code" href="cmd__ln_8h.html#ad9b1952e4f1def9ee6a88791375b3901">ARG_FLOAT32</a>,
<a name="l00105"></a>00105     <span class="stringliteral">&quot;1.0&quot;</span>,
<a name="l00106"></a>00106     <span class="stringliteral">&quot;Word insertion probability&quot;</span> },
<a name="l00107"></a>00107 
<a name="l00108"></a>00108   { <span class="stringliteral">&quot;-uw&quot;</span>,
<a name="l00109"></a>00109     <a class="code" href="cmd__ln_8h.html#ad9b1952e4f1def9ee6a88791375b3901">ARG_FLOAT32</a>,
<a name="l00110"></a>00110     <span class="stringliteral">&quot;1.0&quot;</span>,
<a name="l00111"></a>00111     <span class="stringliteral">&quot;Unigram probability weight (interpolated with uniform distribution)&quot;</span>},
<a name="l00112"></a>00112 
<a name="l00113"></a>00113   { <span class="stringliteral">&quot;-verbose&quot;</span>,
<a name="l00114"></a>00114     <a class="code" href="cmd__ln_8h.html#ac7d08ff59bb6905c3375162e75913e88" title="Boolean (true/false) argument (optional).">ARG_BOOLEAN</a>,
<a name="l00115"></a>00115     <span class="stringliteral">&quot;no&quot;</span>,
<a name="l00116"></a>00116     <span class="stringliteral">&quot;Print details of perplexity calculation&quot;</span> },
<a name="l00117"></a>00117 
<a name="l00118"></a>00118   <span class="comment">/* FIXME: Support -lmstartsym, -lmendsym, -lmctlfn, -ctl_lm */</span>
<a name="l00119"></a>00119   { NULL, 0, NULL, NULL }
<a name="l00120"></a>00120 };
<a name="l00121"></a>00121 
<a name="l00122"></a>00122 <span class="keyword">static</span> <span class="keywordtype">int</span> verbose;
<a name="l00123"></a>00123 
<a name="l00124"></a>00124 <span class="keyword">static</span> <span class="keywordtype">int</span>
<a name="l00125"></a>00125 calc_entropy(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *lm, <span class="keywordtype">char</span> **words, int32 n,
<a name="l00126"></a>00126              int32 *out_n_ccs, int32 *out_n_oovs, int32 *out_lm_score)
<a name="l00127"></a>00127 {
<a name="l00128"></a>00128         int32 *wids;
<a name="l00129"></a>00129         int32 startwid;
<a name="l00130"></a>00130         int32 i, ch, nccs, noovs, unk;
<a name="l00131"></a>00131 
<a name="l00132"></a>00132         <span class="keywordflow">if</span> (n == 0)
<a name="l00133"></a>00133             <span class="keywordflow">return</span> 0;
<a name="l00134"></a>00134 
<a name="l00135"></a>00135         unk = <a class="code" href="ngram__model_8h.html#a1469e9e1c8516a77c9ac1e248a61ef4e" title="Get the unknown word ID for a language model.">ngram_unknown_wid</a>(lm);
<a name="l00136"></a>00136 
<a name="l00137"></a>00137         <span class="comment">/* Reverse this array into an array of word IDs. */</span>
<a name="l00138"></a>00138         wids = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n, <span class="keyword">sizeof</span>(*wids));
<a name="l00139"></a>00139         <span class="keywordflow">for</span> (i = 0; i &lt; n; ++i)
<a name="l00140"></a>00140                 wids[n-i-1] = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(lm, words[i]);
<a name="l00141"></a>00141         <span class="comment">/* Skip &lt;s&gt; as it&#39;s a context cue (HACK, this should be configurable). */</span>
<a name="l00142"></a>00142         startwid = <a class="code" href="ngram__model_8h.html#ad03d4355d4ea659815dc25bce8d83880" title="Look up numerical word ID.">ngram_wid</a>(lm, <span class="stringliteral">&quot;&lt;s&gt;&quot;</span>);
<a name="l00143"></a>00143 
<a name="l00144"></a>00144         <span class="comment">/* Now evaluate the list of words in reverse using the</span>
<a name="l00145"></a>00145 <span class="comment">         * remainder of the array as the history. */</span>
<a name="l00146"></a>00146         ch = noovs = nccs = 0;
<a name="l00147"></a>00147         <span class="keywordflow">for</span> (i = 0; i &lt; n; ++i) {
<a name="l00148"></a>00148                 int32 n_used;
<a name="l00149"></a>00149                 int32 prob;
<a name="l00150"></a>00150 
<a name="l00151"></a>00151                 <span class="comment">/* Skip &lt;s&gt; as it&#39;s a context cue (HACK, this should be configurable). */</span>
<a name="l00152"></a>00152                 <span class="keywordflow">if</span> (wids[i] == startwid) {
<a name="l00153"></a>00153                         ++nccs;
<a name="l00154"></a>00154                         <span class="keywordflow">continue</span>;
<a name="l00155"></a>00155                 }
<a name="l00156"></a>00156                 <span class="comment">/* Skip and count OOVs. */</span>
<a name="l00157"></a>00157                 <span class="keywordflow">if</span> (wids[i] == <a class="code" href="ngram__model_8h.html#a3d4b3dddd0ff67e13d30c9bf053d01ab" title="Impossible word ID.">NGRAM_INVALID_WID</a> || wids[i] == unk) {
<a name="l00158"></a>00158                         ++noovs;
<a name="l00159"></a>00159                         <span class="keywordflow">continue</span>;
<a name="l00160"></a>00160                 }
<a name="l00161"></a>00161                 <span class="comment">/* Sum up information for each N-gram */</span>
<a name="l00162"></a>00162                 prob = <a class="code" href="ngram__model_8h.html#a6ac5799e78ea4ad82a11e2439016471e" title="Quick general N-Gram score lookup.">ngram_ng_score</a>(lm,
<a name="l00163"></a>00163                                       wids[i], wids + i + 1,
<a name="l00164"></a>00164                                       n - i - 1, &amp;n_used);
<a name="l00165"></a>00165                 <span class="keywordflow">if</span> (verbose) {
<a name="l00166"></a>00166                     <span class="keywordtype">int</span> m;
<a name="l00167"></a>00167                     printf(<span class="stringliteral">&quot;log P(%s|&quot;</span>, <a class="code" href="ngram__model_8h.html#a96e36290a005c03464ea6c637ccde2f5" title="Look up word string for numerical word ID.">ngram_word</a>(lm, wids[i]));
<a name="l00168"></a>00168                     m = i + <a class="code" href="ngram__model_8h.html#a462d374099a4fe8b3c3195b5e2013545" title="Get the order of the N-gram model (i.e.">ngram_model_get_size</a>(lm) - 1;
<a name="l00169"></a>00169                     <span class="keywordflow">if</span> (m &gt;= n)
<a name="l00170"></a>00170                         m = n - 1;
<a name="l00171"></a>00171                     <span class="keywordflow">while</span> (m &gt; i) {
<a name="l00172"></a>00172                         printf(<span class="stringliteral">&quot;%s &quot;</span>, <a class="code" href="ngram__model_8h.html#a96e36290a005c03464ea6c637ccde2f5" title="Look up word string for numerical word ID.">ngram_word</a>(lm, wids[m--]));
<a name="l00173"></a>00173                     }
<a name="l00174"></a>00174                     printf(<span class="stringliteral">&quot;) = %d\n&quot;</span>, prob);
<a name="l00175"></a>00175                 }
<a name="l00176"></a>00176                 ch -= prob;
<a name="l00177"></a>00177         }
<a name="l00178"></a>00178 
<a name="l00179"></a>00179         <span class="keywordflow">if</span> (out_n_ccs) *out_n_ccs = nccs;
<a name="l00180"></a>00180         <span class="keywordflow">if</span> (out_n_oovs) *out_n_oovs = noovs;
<a name="l00181"></a>00181 
<a name="l00182"></a>00182         <span class="comment">/* Calculate cross-entropy CH = - 1/N sum log P(W|H) */</span>
<a name="l00183"></a>00183         n -= (nccs + noovs);
<a name="l00184"></a>00184         <span class="keywordflow">if</span> (n &lt;= 0)
<a name="l00185"></a>00185             <span class="keywordflow">return</span> 0;
<a name="l00186"></a>00186         <span class="keywordflow">if</span> (out_lm_score)
<a name="l00187"></a>00187             *out_lm_score = -ch;
<a name="l00188"></a>00188         <span class="keywordflow">return</span> ch / n;
<a name="l00189"></a>00189 }
<a name="l00190"></a>00190 
<a name="l00191"></a>00191 <span class="keyword">static</span> <span class="keywordtype">void</span>
<a name="l00192"></a>00192 evaluate_file(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *lm, <a class="code" href="structlogmath__s.html">logmath_t</a> *lmath, <span class="keyword">const</span> <span class="keywordtype">char</span> *lsnfn)
<a name="l00193"></a>00193 {
<a name="l00194"></a>00194         FILE *fh;
<a name="l00195"></a>00195         <a class="code" href="structlineiter__t.html" title="Line iterator for files.">lineiter_t</a> *litor;
<a name="l00196"></a>00196         int32 nccs, noovs, nwords, lscr;
<a name="l00197"></a>00197         float64 ch, log_to_log2;;
<a name="l00198"></a>00198 
<a name="l00199"></a>00199         <span class="keywordflow">if</span> ((fh = fopen(lsnfn, <span class="stringliteral">&quot;r&quot;</span>)) == NULL)
<a name="l00200"></a>00200                 <a class="code" href="err_8h.html#a5229a1d58f5f5e69963a8d038ff5bc3e" title="Print error text; Call perror(&amp;quot;&amp;quot;); exit(errno);.">E_FATAL_SYSTEM</a>(<span class="stringliteral">&quot;failed to open transcript file %s&quot;</span>, lsnfn);
<a name="l00201"></a>00201 
<a name="l00202"></a>00202         <span class="comment">/* We have to keep ch in floating-point to avoid overflows, so</span>
<a name="l00203"></a>00203 <span class="comment">         * we might as well use log2. */</span>
<a name="l00204"></a>00204         log_to_log2 = log(<a class="code" href="logmath_8h.html#a6114206ec0321d7015c42fc7b81cb83e" title="Get the log base.">logmath_get_base</a>(lmath)) / log(2);
<a name="l00205"></a>00205         nccs = noovs = nwords = 0;
<a name="l00206"></a>00206         ch = 0.0;
<a name="l00207"></a>00207         <span class="keywordflow">for</span> (litor = <a class="code" href="pio_8h.html#a22d0125ab198f02f8bbe543417d99566" title="Start reading lines from a file.">lineiter_start</a>(fh); litor; litor = <a class="code" href="pio_8h.html#aff8df0b6928746d61b3520555263f71e" title="Move to the next line in the file.">lineiter_next</a>(litor)) {
<a name="l00208"></a>00208                 <span class="keywordtype">char</span> **words;
<a name="l00209"></a>00209                 int32 n, tmp_ch, tmp_noovs, tmp_nccs, tmp_lscr;
<a name="l00210"></a>00210 
<a name="l00211"></a>00211                 n = <a class="code" href="strfuncs_8h.html#a5b520fdebcca599db86faaf75a82173f" title="Convert a line to an array of &amp;quot;words&amp;quot;, based on whitespace separators.">str2words</a>(litor-&gt;buf, NULL, 0);
<a name="l00212"></a>00212                 <span class="keywordflow">if</span> (n &lt; 0)
<a name="l00213"></a>00213                         <a class="code" href="err_8h.html#a1a4495946ab2449d61108fe829a94613" title="Exit with non-zero status after error message.">E_FATAL</a>(<span class="stringliteral">&quot;str2words(line, NULL, 0) = %d, should not happen\n&quot;</span>, n);
<a name="l00214"></a>00214                 <span class="keywordflow">if</span> (n == 0) <span class="comment">/* Do nothing! */</span>
<a name="l00215"></a>00215                         <span class="keywordflow">continue</span>;
<a name="l00216"></a>00216                 words = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n, <span class="keyword">sizeof</span>(*words));
<a name="l00217"></a>00217                 <a class="code" href="strfuncs_8h.html#a5b520fdebcca599db86faaf75a82173f" title="Convert a line to an array of &amp;quot;words&amp;quot;, based on whitespace separators.">str2words</a>(litor-&gt;buf, words, n);
<a name="l00218"></a>00218 
<a name="l00219"></a>00219                 <span class="comment">/* Remove any utterance ID (FIXME: has to be a single &quot;word&quot;) */</span>
<a name="l00220"></a>00220                 <span class="keywordflow">if</span> (words[n-1][0] == <span class="charliteral">&#39;(&#39;</span>
<a name="l00221"></a>00221                     &amp;&amp; words[n-1][strlen(words[n-1])-1] == <span class="charliteral">&#39;)&#39;</span>)
<a name="l00222"></a>00222                         n = n - 1;
<a name="l00223"></a>00223 
<a name="l00224"></a>00224                 tmp_ch = calc_entropy(lm, words, n, &amp;tmp_nccs,
<a name="l00225"></a>00225                                       &amp;tmp_noovs, &amp;tmp_lscr);
<a name="l00226"></a>00226 
<a name="l00227"></a>00227                 ch += (float64) tmp_ch * (n - tmp_nccs - tmp_noovs) * log_to_log2;
<a name="l00228"></a>00228                 nccs += tmp_nccs;
<a name="l00229"></a>00229                 noovs += tmp_noovs;
<a name="l00230"></a>00230                 lscr += tmp_lscr;
<a name="l00231"></a>00231                 nwords += n;
<a name="l00232"></a>00232                 
<a name="l00233"></a>00233                 <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(words);
<a name="l00234"></a>00234         }
<a name="l00235"></a>00235 
<a name="l00236"></a>00236         ch /= (nwords - nccs - noovs);
<a name="l00237"></a>00237         printf(<span class="stringliteral">&quot;cross-entropy: %f bits\n&quot;</span>, ch);
<a name="l00238"></a>00238 
<a name="l00239"></a>00239         <span class="comment">/* Calculate perplexity pplx = exp CH */</span>
<a name="l00240"></a>00240         printf(<span class="stringliteral">&quot;perplexity: %f\n&quot;</span>, pow(2.0, ch));
<a name="l00241"></a>00241         printf(<span class="stringliteral">&quot;lm score: %d\n&quot;</span>, lscr);
<a name="l00242"></a>00242 
<a name="l00243"></a>00243         <span class="comment">/* Report OOVs and CCs */</span>
<a name="l00244"></a>00244         printf(<span class="stringliteral">&quot;%d words evaluated\n&quot;</span>, nwords);
<a name="l00245"></a>00245         printf(<span class="stringliteral">&quot;%d OOVs (%.2f%%), %d context cues removed\n&quot;</span>,
<a name="l00246"></a>00246                noovs, (<span class="keywordtype">double</span>)noovs / nwords * 100, nccs);
<a name="l00247"></a>00247 }
<a name="l00248"></a>00248 
<a name="l00249"></a>00249 <span class="keyword">static</span> <span class="keywordtype">void</span>
<a name="l00250"></a>00250 evaluate_string(<a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *lm, <a class="code" href="structlogmath__s.html">logmath_t</a> *lmath, <span class="keyword">const</span> <span class="keywordtype">char</span> *text)
<a name="l00251"></a>00251 {
<a name="l00252"></a>00252         <span class="keywordtype">char</span> *textfoo;
<a name="l00253"></a>00253         <span class="keywordtype">char</span> **words;
<a name="l00254"></a>00254         int32 n, ch, noovs, nccs, lscr;
<a name="l00255"></a>00255 
<a name="l00256"></a>00256         <span class="comment">/* Split it into an array of strings. */</span>
<a name="l00257"></a>00257         textfoo = <a class="code" href="ckd__alloc_8h.html#ad313f92478859f9e4ea99d0f6e78c393" title="Macro for __ckd_salloc__.">ckd_salloc</a>(text);
<a name="l00258"></a>00258         n = <a class="code" href="strfuncs_8h.html#a5b520fdebcca599db86faaf75a82173f" title="Convert a line to an array of &amp;quot;words&amp;quot;, based on whitespace separators.">str2words</a>(textfoo, NULL, 0);
<a name="l00259"></a>00259         <span class="keywordflow">if</span> (n &lt; 0)
<a name="l00260"></a>00260                 <a class="code" href="err_8h.html#a1a4495946ab2449d61108fe829a94613" title="Exit with non-zero status after error message.">E_FATAL</a>(<span class="stringliteral">&quot;str2words(textfoo, NULL, 0) = %d, should not happen\n&quot;</span>, n);
<a name="l00261"></a>00261         <span class="keywordflow">if</span> (n == 0) <span class="comment">/* Do nothing! */</span>
<a name="l00262"></a>00262                 <span class="keywordflow">return</span>;
<a name="l00263"></a>00263         words = <a class="code" href="ckd__alloc_8h.html#aa00ef21903bc4f8a972488417adc8d2e" title="Macros to simplify the use of above functions.">ckd_calloc</a>(n, <span class="keyword">sizeof</span>(*words));
<a name="l00264"></a>00264         <a class="code" href="strfuncs_8h.html#a5b520fdebcca599db86faaf75a82173f" title="Convert a line to an array of &amp;quot;words&amp;quot;, based on whitespace separators.">str2words</a>(textfoo, words, n);
<a name="l00265"></a>00265 
<a name="l00266"></a>00266         ch = calc_entropy(lm, words, n, &amp;nccs, &amp;noovs, &amp;lscr);
<a name="l00267"></a>00267 
<a name="l00268"></a>00268         printf(<span class="stringliteral">&quot;input: %s\n&quot;</span>, text);
<a name="l00269"></a>00269         printf(<span class="stringliteral">&quot;cross-entropy: %f bits\n&quot;</span>,
<a name="l00270"></a>00270                ch * log(<a class="code" href="logmath_8h.html#a6114206ec0321d7015c42fc7b81cb83e" title="Get the log base.">logmath_get_base</a>(lmath)) / log(2));
<a name="l00271"></a>00271 
<a name="l00272"></a>00272         <span class="comment">/* Calculate perplexity pplx = exp CH */</span>
<a name="l00273"></a>00273         printf(<span class="stringliteral">&quot;perplexity: %f\n&quot;</span>, <a class="code" href="logmath_8h.html#ae8b0a168e29e448c0d6de66dc46e099e" title="Convert integer log in base B to linear floating point.">logmath_exp</a>(lmath, ch));
<a name="l00274"></a>00274         printf(<span class="stringliteral">&quot;lm score: %d\n&quot;</span>, lscr);
<a name="l00275"></a>00275 
<a name="l00276"></a>00276         <span class="comment">/* Report OOVs and CCs */</span>
<a name="l00277"></a>00277         printf(<span class="stringliteral">&quot;%d words evaluated\n&quot;</span>, n);
<a name="l00278"></a>00278         printf(<span class="stringliteral">&quot;%d OOVs, %d context cues removed\n&quot;</span>,
<a name="l00279"></a>00279               noovs, nccs);
<a name="l00280"></a>00280 
<a name="l00281"></a>00281         <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(textfoo);
<a name="l00282"></a>00282         <a class="code" href="ckd__alloc_8h.html#a31c6b405558620ac37599737b5722fbf" title="Test and free a 1-D array.">ckd_free</a>(words);
<a name="l00283"></a>00283 }
<a name="l00284"></a>00284 
<a name="l00285"></a>00285 <span class="keywordtype">int</span>
<a name="l00286"></a>00286 main(<span class="keywordtype">int</span> argc, <span class="keywordtype">char</span> *argv[])
<a name="l00287"></a>00287 {
<a name="l00288"></a>00288         <a class="code" href="structcmd__ln__t.html" title="Opaque structure used to hold the results of command-line parsing.">cmd_ln_t</a> *<a class="code" href="structsphinx__wave2feat__s.html#a484d308befbde315664da8520ebc410d" title="Configuration parameters.">config</a>;
<a name="l00289"></a>00289         <a class="code" href="structngram__model__s.html" title="Common implementation of ngram_model_t.">ngram_model_t</a> *lm = NULL;
<a name="l00290"></a>00290         <a class="code" href="structlogmath__s.html">logmath_t</a> *lmath;
<a name="l00291"></a>00291         <span class="keyword">const</span> <span class="keywordtype">char</span> *lmfn, *probdefn, *lsnfn, *text;
<a name="l00292"></a>00292 
<a name="l00293"></a>00293         <span class="keywordflow">if</span> ((config = <a class="code" href="cmd__ln_8h.html#aa5a3a9e49198d8fd0dd3424fb880b6b6" title="Parse a list of strings into argumetns.">cmd_ln_parse_r</a>(NULL, defn, argc, argv, TRUE)) == NULL)
<a name="l00294"></a>00294                 <span class="keywordflow">return</span> 1;
<a name="l00295"></a>00295 
<a name="l00296"></a>00296         verbose = <a class="code" href="cmd__ln_8h.html#a159e691c95089689cf9a8f85a67830a6" title="Retrieve a boolean value from a command-line object.">cmd_ln_boolean_r</a>(config, <span class="stringliteral">&quot;-verbose&quot;</span>);
<a name="l00297"></a>00297 
<a name="l00298"></a>00298         <span class="comment">/* Create log math object. */</span>
<a name="l00299"></a>00299         <span class="keywordflow">if</span> ((lmath = logmath_init
<a name="l00300"></a>00300              (cmd_ln_float64_r(config, <span class="stringliteral">&quot;-logbase&quot;</span>), 0, 0)) == NULL) {
<a name="l00301"></a>00301                 <a class="code" href="err_8h.html#a1a4495946ab2449d61108fe829a94613" title="Exit with non-zero status after error message.">E_FATAL</a>(<span class="stringliteral">&quot;Failed to initialize log math\n&quot;</span>);
<a name="l00302"></a>00302         }
<a name="l00303"></a>00303 
<a name="l00304"></a>00304         <span class="comment">/* Load the language model. */</span>
<a name="l00305"></a>00305         lmfn = <a class="code" href="cmd__ln_8h.html#af0aa15288e06fc8271298e4fa7cdc91a" title="Retrieve a string from a command-line object.">cmd_ln_str_r</a>(config, <span class="stringliteral">&quot;-lm&quot;</span>);
<a name="l00306"></a>00306         <span class="keywordflow">if</span> (lmfn == NULL
<a name="l00307"></a>00307             || (lm = <a class="code" href="ngram__model_8h.html#ab0c840f2bdfc38cea08bb70054f76624" title="Read an N-Gram model from a file on disk.">ngram_model_read</a>(config, lmfn,
<a name="l00308"></a>00308                                       <a class="code" href="ngram__model_8h.html#a406c0d64c15a9d1749d07c8ab6e0ae74a441701bf8ae0a2b79716feb31b5f257a" title="Determine file type automatically.">NGRAM_AUTO</a>, lmath)) == NULL) {
<a name="l00309"></a>00309                 <a class="code" href="err_8h.html#a1a4495946ab2449d61108fe829a94613" title="Exit with non-zero status after error message.">E_FATAL</a>(<span class="stringliteral">&quot;Failed to load language model from %s\n&quot;</span>,
<a name="l00310"></a>00310                         <a class="code" href="cmd__ln_8h.html#af0aa15288e06fc8271298e4fa7cdc91a" title="Retrieve a string from a command-line object.">cmd_ln_str_r</a>(config, <span class="stringliteral">&quot;-lm&quot;</span>));
<a name="l00311"></a>00311         }
<a name="l00312"></a>00312         <span class="keywordflow">if</span> ((probdefn = <a class="code" href="cmd__ln_8h.html#af0aa15288e06fc8271298e4fa7cdc91a" title="Retrieve a string from a command-line object.">cmd_ln_str_r</a>(config, <span class="stringliteral">&quot;-probdef&quot;</span>)) != NULL)
<a name="l00313"></a>00313             <a class="code" href="ngram__model_8h.html#a9b2a86c23543158754373c5456fe890d" title="Read a class definition file and add classes to a language model.">ngram_model_read_classdef</a>(lm, probdefn);
<a name="l00314"></a>00314         <a class="code" href="ngram__model_8h.html#aa4b8d7c1f3d873b8458c0cfee13af4da" title="Apply a language weight, insertion penalty, and unigram weight to a language model.">ngram_model_apply_weights</a>(lm,
<a name="l00315"></a>00315                                   cmd_ln_float32_r(config, <span class="stringliteral">&quot;-lw&quot;</span>),
<a name="l00316"></a>00316                                   cmd_ln_float32_r(config, <span class="stringliteral">&quot;-wip&quot;</span>),
<a name="l00317"></a>00317                                   cmd_ln_float32_r(config, <span class="stringliteral">&quot;-uw&quot;</span>));
<a name="l00318"></a>00318 
<a name="l00319"></a>00319         <span class="comment">/* Now evaluate some text. */</span>
<a name="l00320"></a>00320         lsnfn = <a class="code" href="cmd__ln_8h.html#af0aa15288e06fc8271298e4fa7cdc91a" title="Retrieve a string from a command-line object.">cmd_ln_str_r</a>(config, <span class="stringliteral">&quot;-lsn&quot;</span>);
<a name="l00321"></a>00321         text = <a class="code" href="cmd__ln_8h.html#af0aa15288e06fc8271298e4fa7cdc91a" title="Retrieve a string from a command-line object.">cmd_ln_str_r</a>(config, <span class="stringliteral">&quot;-text&quot;</span>);
<a name="l00322"></a>00322         <span class="keywordflow">if</span> (lsnfn) {
<a name="l00323"></a>00323                 evaluate_file(lm, lmath, lsnfn);
<a name="l00324"></a>00324         }
<a name="l00325"></a>00325         <span class="keywordflow">else</span> <span class="keywordflow">if</span> (text) {
<a name="l00326"></a>00326                 evaluate_string(lm, lmath, text);
<a name="l00327"></a>00327         }
<a name="l00328"></a>00328 
<a name="l00329"></a>00329         <span class="keywordflow">return</span> 0;
<a name="l00330"></a>00330 }
</pre></div></div>
</div>
  <div id="nav-path" class="navpath">
    <ul>
      <li class="navelem"><a class="el" href="sphinx__lm__eval_8c.html">sphinx_lm_eval.c</a>      </li>
      <li class="footer">Generated on Tue Apr 19 2011 for SphinxBase by&#160;
<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.3 </li>
    </ul>
  </div>

</body>
</html>