<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <title>SphinxBase: src/libsphinxbase/fe/fe_internal.h Source File</title> <link href="tabs.css" rel="stylesheet" type="text/css"/> <link href="navtree.css" rel="stylesheet" type="text/css"/> <script type="text/javascript" src="jquery.js"></script> <script type="text/javascript" src="navtree.js"></script> <script type="text/javascript" src="resize.js"></script> <script type="text/javascript"> $(document).ready(initResizable); </script> <link href="doxygen.css" rel="stylesheet" type="text/css"/> </head> <body> <!-- Generated by Doxygen 1.7.3 --> <div id="top"> <div id="titlearea"> <table cellspacing="0" cellpadding="0"> <tbody> <tr style="height: 56px;"> <td style="padding-left: 0.5em;"> <div id="projectname">SphinxBase <span id="projectnumber">0.6</span></div> </td> </tr> </tbody> </table> </div> <div id="navrow1" class="tabs"> <ul class="tablist"> <li><a href="index.html"><span>Main Page</span></a></li> <li><a href="pages.html"><span>Related Pages</span></a></li> <li><a href="annotated.html"><span>Data Structures</span></a></li> <li class="current"><a href="files.html"><span>Files</span></a></li> </ul> </div> <div id="navrow2" class="tabs2"> <ul class="tablist"> <li><a href="files.html"><span>File List</span></a></li> <li><a href="globals.html"><span>Globals</span></a></li> </ul> </div> </div> <div id="side-nav" class="ui-resizable side-nav-resizable"> <div id="nav-tree"> <div id="nav-tree-contents"> </div> </div> <div id="splitbar" style="-moz-user-select:none;" class="ui-resizable-handle"> </div> </div> <script type="text/javascript"> initNavTree('fe__internal_8h.html',''); </script> <div id="doc-content"> <div class="header"> <div class="headertitle"> <h1>src/libsphinxbase/fe/fe_internal.h</h1> </div> </div> <div class="contents"> <div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */</span> <a name="l00002"></a>00002 <span class="comment">/* ====================================================================</span> <a name="l00003"></a>00003 <span class="comment"> * Copyright (c) 1996-2004 Carnegie Mellon University. All rights</span> <a name="l00004"></a>00004 <span class="comment"> * reserved.</span> <a name="l00005"></a>00005 <span class="comment"> *</span> <a name="l00006"></a>00006 <span class="comment"> * Redistribution and use in source and binary forms, with or without</span> <a name="l00007"></a>00007 <span class="comment"> * modification, are permitted provided that the following conditions</span> <a name="l00008"></a>00008 <span class="comment"> * are met:</span> <a name="l00009"></a>00009 <span class="comment"> *</span> <a name="l00010"></a>00010 <span class="comment"> * 1. Redistributions of source code must retain the above copyright</span> <a name="l00011"></a>00011 <span class="comment"> * notice, this list of conditions and the following disclaimer. </span> <a name="l00012"></a>00012 <span class="comment"> *</span> <a name="l00013"></a>00013 <span class="comment"> * 2. Redistributions in binary form must reproduce the above copyright</span> <a name="l00014"></a>00014 <span class="comment"> * notice, this list of conditions and the following disclaimer in</span> <a name="l00015"></a>00015 <span class="comment"> * the documentation and/or other materials provided with the</span> <a name="l00016"></a>00016 <span class="comment"> * distribution.</span> <a name="l00017"></a>00017 <span class="comment"> *</span> <a name="l00018"></a>00018 <span class="comment"> * This work was supported in part by funding from the Defense Advanced </span> <a name="l00019"></a>00019 <span class="comment"> * Research Projects Agency and the National Science Foundation of the </span> <a name="l00020"></a>00020 <span class="comment"> * United States of America, and the CMU Sphinx Speech Consortium.</span> <a name="l00021"></a>00021 <span class="comment"> *</span> <a name="l00022"></a>00022 <span class="comment"> * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND </span> <a name="l00023"></a>00023 <span class="comment"> * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, </span> <a name="l00024"></a>00024 <span class="comment"> * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR</span> <a name="l00025"></a>00025 <span class="comment"> * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY</span> <a name="l00026"></a>00026 <span class="comment"> * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,</span> <a name="l00027"></a>00027 <span class="comment"> * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT </span> <a name="l00028"></a>00028 <span class="comment"> * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, </span> <a name="l00029"></a>00029 <span class="comment"> * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY </span> <a name="l00030"></a>00030 <span class="comment"> * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT </span> <a name="l00031"></a>00031 <span class="comment"> * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE </span> <a name="l00032"></a>00032 <span class="comment"> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.</span> <a name="l00033"></a>00033 <span class="comment"> *</span> <a name="l00034"></a>00034 <span class="comment"> * ====================================================================</span> <a name="l00035"></a>00035 <span class="comment"> *</span> <a name="l00036"></a>00036 <span class="comment"> */</span> <a name="l00037"></a>00037 <a name="l00038"></a>00038 <span class="preprocessor">#ifndef __FE_INTERNAL_H__</span> <a name="l00039"></a>00039 <span class="preprocessor"></span><span class="preprocessor">#define __FE_INTERNAL_H__</span> <a name="l00040"></a>00040 <span class="preprocessor"></span> <a name="l00041"></a>00041 <span class="preprocessor">#ifdef HAVE_CONFIG_H</span> <a name="l00042"></a>00042 <span class="preprocessor"></span><span class="preprocessor">#include <config.h></span> <a name="l00043"></a>00043 <span class="preprocessor">#endif</span> <a name="l00044"></a>00044 <span class="preprocessor"></span> <a name="l00045"></a>00045 <span class="preprocessor">#include "sphinxbase/fe.h"</span> <a name="l00046"></a>00046 <span class="preprocessor">#include "sphinxbase/fixpoint.h"</span> <a name="l00047"></a>00047 <a name="l00048"></a>00048 <span class="preprocessor">#ifdef __cplusplus</span> <a name="l00049"></a>00049 <span class="preprocessor"></span><span class="keyword">extern</span> <span class="stringliteral">"C"</span> { <a name="l00050"></a>00050 <span class="preprocessor">#endif</span> <a name="l00051"></a>00051 <span class="preprocessor"></span><span class="preprocessor">#if 0</span> <a name="l00052"></a>00052 <span class="preprocessor"></span><span class="comment">/* Fool Emacs. */</span> <a name="l00053"></a>00053 } <a name="l00054"></a>00054 <span class="preprocessor">#endif</span> <a name="l00055"></a>00055 <span class="preprocessor"></span> <a name="l00056"></a>00056 <span class="preprocessor">#ifdef FIXED16</span> <a name="l00057"></a>00057 <span class="preprocessor"></span><span class="comment">/* Q15 format */</span> <a name="l00058"></a>00058 <span class="keyword">typedef</span> int16 frame_t; <a name="l00059"></a>00059 <span class="keyword">typedef</span> int16 window_t; <a name="l00060"></a>00060 <span class="keyword">typedef</span> int32 powspec_t; <a name="l00061"></a>00061 <span class="keyword">typedef</span> <span class="keyword">struct </span>{ int16 r, i; } <a class="code" href="structcomplex.html">complex</a>; <a name="l00062"></a>00062 <span class="preprocessor">#elif defined(FIXED_POINT)</span> <a name="l00063"></a>00063 <span class="preprocessor"></span><span class="keyword">typedef</span> fixed32 frame_t; <a name="l00064"></a>00064 <span class="keyword">typedef</span> int32 powspec_t; <a name="l00065"></a>00065 <span class="keyword">typedef</span> fixed32 window_t; <a name="l00066"></a>00066 <span class="keyword">typedef</span> <span class="keyword">struct </span>{ fixed32 r, i; } <a class="code" href="structcomplex.html">complex</a>; <a name="l00067"></a>00067 <span class="preprocessor">#else </span><span class="comment">/* FIXED_POINT */</span> <a name="l00068"></a>00068 <span class="keyword">typedef</span> float64 frame_t; <a name="l00069"></a>00069 <span class="keyword">typedef</span> float64 powspec_t; <a name="l00070"></a>00070 <span class="keyword">typedef</span> float64 window_t; <a name="l00071"></a>00071 <span class="keyword">typedef</span> <span class="keyword">struct </span>{ float64 r, i; } <a class="code" href="structcomplex.html">complex</a>; <a name="l00072"></a>00072 <span class="preprocessor">#endif </span><span class="comment">/* FIXED_POINT */</span> <a name="l00073"></a>00073 <a name="l00074"></a>00074 <span class="comment">/* Values for the 'logspec' field. */</span> <a name="l00075"></a>00075 <span class="keyword">enum</span> { <a name="l00076"></a>00076 RAW_LOG_SPEC = 1, <a name="l00077"></a>00077 SMOOTH_LOG_SPEC = 2 <a name="l00078"></a>00078 }; <a name="l00079"></a>00079 <a name="l00080"></a>00080 <span class="comment">/* Values for the 'transform' field. */</span> <a name="l00081"></a>00081 <span class="keyword">enum</span> { <a name="l00082"></a>00082 LEGACY_DCT = 0, <a name="l00083"></a>00083 DCT_II = 1, <a name="l00084"></a>00084 DCT_HTK = 2 <a name="l00085"></a>00085 }; <a name="l00086"></a>00086 <a name="l00087"></a>00087 <span class="keyword">typedef</span> <span class="keyword">struct </span><a class="code" href="structmelfb__s.html" title="Base Struct to hold all structure for MFCC computation.">melfb_s</a> <a class="code" href="structmelfb__s.html" title="Base Struct to hold all structure for MFCC computation.">melfb_t</a>; <a name="l00089"></a><a class="code" href="structmelfb__s.html">00089</a> <span class="keyword">struct </span><a class="code" href="structmelfb__s.html" title="Base Struct to hold all structure for MFCC computation.">melfb_s</a> { <a name="l00090"></a>00090 float32 sampling_rate; <a name="l00091"></a>00091 int32 num_cepstra; <a name="l00092"></a>00092 int32 num_filters; <a name="l00093"></a>00093 int32 fft_size; <a name="l00094"></a>00094 float32 lower_filt_freq; <a name="l00095"></a>00095 float32 upper_filt_freq; <a name="l00096"></a>00096 <span class="comment">/* DCT coefficients. */</span> <a name="l00097"></a>00097 mfcc_t **mel_cosine; <a name="l00098"></a>00098 <span class="comment">/* Filter coefficients. */</span> <a name="l00099"></a>00099 mfcc_t *filt_coeffs; <a name="l00100"></a>00100 int16 *spec_start; <a name="l00101"></a>00101 int16 *filt_start; <a name="l00102"></a>00102 int16 *filt_width; <a name="l00103"></a>00103 <span class="comment">/* Luxury mobile home. */</span> <a name="l00104"></a>00104 int32 doublewide; <a name="l00105"></a>00105 <span class="keywordtype">char</span> <span class="keyword">const</span> *warp_type; <a name="l00106"></a>00106 <span class="keywordtype">char</span> <span class="keyword">const</span> *warp_params; <a name="l00107"></a>00107 uint32 warp_id; <a name="l00108"></a>00108 <span class="comment">/* Precomputed normalization constants for unitary DCT-II/DCT-III */</span> <a name="l00109"></a>00109 mfcc_t sqrt_inv_n, sqrt_inv_2n; <a name="l00110"></a>00110 <span class="comment">/* Value and coefficients for HTK-style liftering */</span> <a name="l00111"></a>00111 int32 lifter_val; <a name="l00112"></a>00112 mfcc_t *lifter; <a name="l00113"></a>00113 <span class="comment">/* Normalize filters to unit area */</span> <a name="l00114"></a>00114 int32 unit_area; <a name="l00115"></a>00115 <span class="comment">/* Round filter frequencies to DFT points (hurts accuracy, but is</span> <a name="l00116"></a>00116 <span class="comment"> useful for legacy purposes) */</span> <a name="l00117"></a>00117 int32 round_filters; <a name="l00118"></a>00118 }; <a name="l00119"></a>00119 <a name="l00120"></a>00120 <span class="comment">/* sqrt(1/2), also used for unitary DCT-II/DCT-III */</span> <a name="l00121"></a>00121 <span class="preprocessor">#define SQRT_HALF FLOAT2MFCC(0.707106781186548)</span> <a name="l00122"></a>00122 <span class="preprocessor"></span> <a name="l00124"></a><a class="code" href="structfe__s.html">00124</a> <span class="keyword">struct </span><a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_s</a> { <a name="l00125"></a>00125 <a class="code" href="structcmd__ln__t.html" title="Opaque structure used to hold the results of command-line parsing.">cmd_ln_t</a> *config; <a name="l00126"></a>00126 <span class="keywordtype">int</span> refcount; <a name="l00127"></a>00127 <a name="l00128"></a>00128 float32 sampling_rate; <a name="l00129"></a>00129 int16 frame_rate; <a name="l00130"></a>00130 int16 frame_shift; <a name="l00131"></a>00131 <a name="l00132"></a>00132 float32 window_length; <a name="l00133"></a>00133 int16 frame_size; <a name="l00134"></a>00134 int16 fft_size; <a name="l00135"></a>00135 <a name="l00136"></a>00136 uint8 fft_order; <a name="l00137"></a>00137 uint8 feature_dimension; <a name="l00138"></a>00138 uint8 num_cepstra; <a name="l00139"></a>00139 uint8 remove_dc; <a name="l00140"></a>00140 uint8 log_spec; <a name="l00141"></a>00141 uint8 swap; <a name="l00142"></a>00142 uint8 dither; <a name="l00143"></a>00143 uint8 transform; <a name="l00144"></a>00144 <a name="l00145"></a>00145 float32 pre_emphasis_alpha; <a name="l00146"></a>00146 int32 seed; <a name="l00147"></a>00147 <a name="l00148"></a>00148 int16 frame_counter; <a name="l00149"></a>00149 uint8 start_flag; <a name="l00150"></a>00150 uint8 reserved; <a name="l00151"></a>00151 <a name="l00152"></a>00152 <span class="comment">/* Twiddle factors for FFT. */</span> <a name="l00153"></a>00153 frame_t *ccc, *sss; <a name="l00154"></a>00154 <span class="comment">/* Mel filter parameters. */</span> <a name="l00155"></a>00155 <a class="code" href="structmelfb__s.html" title="Base Struct to hold all structure for MFCC computation.">melfb_t</a> *mel_fb; <a name="l00156"></a>00156 <span class="comment">/* Half of a Hamming Window. */</span> <a name="l00157"></a>00157 window_t *hamming_window; <a name="l00158"></a>00158 <a name="l00159"></a>00159 <span class="comment">/* Temporary buffers for processing. */</span> <a name="l00160"></a>00160 <span class="comment">/* FIXME: too many of these. */</span> <a name="l00161"></a>00161 int16 *spch; <a name="l00162"></a>00162 frame_t *frame; <a name="l00163"></a>00163 powspec_t *spec, *mfspec; <a name="l00164"></a>00164 int16 *overflow_samps; <a name="l00165"></a>00165 int16 num_overflow_samps; <a name="l00166"></a>00166 int16 prior; <a name="l00167"></a>00167 }; <a name="l00168"></a>00168 <a name="l00169"></a>00169 <span class="preprocessor">#define BB_SAMPLING_RATE 16000</span> <a name="l00170"></a>00170 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_BB_FFT_SIZE 512</span> <a name="l00171"></a>00171 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_BB_FRAME_SHIFT 160</span> <a name="l00172"></a>00172 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_BB_NUM_FILTERS 40</span> <a name="l00173"></a>00173 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_BB_LOWER_FILT_FREQ 133.33334</span> <a name="l00174"></a>00174 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_BB_UPPER_FILT_FREQ 6855.4976</span> <a name="l00175"></a>00175 <span class="preprocessor"></span> <a name="l00176"></a>00176 <span class="preprocessor">#define NB_SAMPLING_RATE 8000</span> <a name="l00177"></a>00177 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_NB_FFT_SIZE 256</span> <a name="l00178"></a>00178 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_NB_FRAME_SHIFT 80</span> <a name="l00179"></a>00179 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_NB_NUM_FILTERS 31</span> <a name="l00180"></a>00180 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_NB_LOWER_FILT_FREQ 200</span> <a name="l00181"></a>00181 <span class="preprocessor"></span><span class="preprocessor">#define DEFAULT_NB_UPPER_FILT_FREQ 3500</span> <a name="l00182"></a>00182 <span class="preprocessor"></span> <a name="l00183"></a>00183 <span class="keywordtype">void</span> fe_init_dither(int32 seed); <a name="l00184"></a>00184 <a name="l00185"></a>00185 <span class="comment">/* Apply 1/2 bit noise to a buffer of audio. */</span> <a name="l00186"></a>00186 int32 fe_dither(int16 *buffer, int32 nsamps); <a name="l00187"></a>00187 <a name="l00188"></a>00188 <span class="comment">/* Load a frame of data into the fe. */</span> <a name="l00189"></a>00189 <span class="keywordtype">int</span> fe_read_frame(<a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_t</a> *fe, int16 <span class="keyword">const</span> *in, int32 len); <a name="l00190"></a>00190 <a name="l00191"></a>00191 <span class="comment">/* Shift the input buffer back and read more data. */</span> <a name="l00192"></a>00192 <span class="keywordtype">int</span> fe_shift_frame(<a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_t</a> *fe, int16 <span class="keyword">const</span> *in, int32 len); <a name="l00193"></a>00193 <a name="l00194"></a>00194 <span class="comment">/* Process a frame of data into features. */</span> <a name="l00195"></a>00195 int32 fe_write_frame(<a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_t</a> *fe, mfcc_t *fea); <a name="l00196"></a>00196 <a name="l00197"></a>00197 <span class="comment">/* Initialization functions. */</span> <a name="l00198"></a>00198 int32 fe_build_melfilters(<a class="code" href="structmelfb__s.html" title="Base Struct to hold all structure for MFCC computation.">melfb_t</a> *MEL_FB); <a name="l00199"></a>00199 int32 fe_compute_melcosine(<a class="code" href="structmelfb__s.html" title="Base Struct to hold all structure for MFCC computation.">melfb_t</a> *MEL_FB); <a name="l00200"></a>00200 <span class="keywordtype">void</span> fe_create_hamming(window_t *in, int32 in_len); <a name="l00201"></a>00201 <span class="keywordtype">void</span> fe_create_twiddle(<a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_t</a> *fe); <a name="l00202"></a>00202 <a name="l00203"></a>00203 <span class="comment">/* Miscellaneous processing functions. */</span> <a name="l00204"></a>00204 <span class="keywordtype">void</span> fe_spec2cep(<a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_t</a> * fe, <span class="keyword">const</span> powspec_t * mflogspec, mfcc_t * mfcep); <a name="l00205"></a>00205 <span class="keywordtype">void</span> fe_dct2(<a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_t</a> *fe, <span class="keyword">const</span> powspec_t *mflogspec, mfcc_t *mfcep, <span class="keywordtype">int</span> htk); <a name="l00206"></a>00206 <span class="keywordtype">void</span> fe_dct3(<a class="code" href="structfe__s.html" title="Structure for the front-end computation.">fe_t</a> *fe, <span class="keyword">const</span> mfcc_t *mfcep, powspec_t *mflogspec); <a name="l00207"></a>00207 <a name="l00208"></a>00208 <span class="preprocessor">#ifdef __cplusplus</span> <a name="l00209"></a>00209 <span class="preprocessor"></span>} <a name="l00210"></a>00210 <span class="preprocessor">#endif</span> <a name="l00211"></a>00211 <span class="preprocessor"></span> <a name="l00212"></a>00212 <span class="preprocessor">#endif </span><span class="comment">/* __FE_INTERNAL_H__ */</span> </pre></div></div> </div> <div id="nav-path" class="navpath"> <ul> <li class="navelem"><b>fe_internal.h</b> </li> <li class="footer">Generated on Tue Apr 19 2011 for SphinxBase by  <a href="http://www.doxygen.org/index.html"> <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.7.3 </li> </ul> </div> </body> </html>