Sophie

Sophie

distrib > Mandriva > 2011.0 > i586 > media > contrib-release-debug > by-pkgid > a3f7190ce7be4b857b18774d441886bf > files > 25

cvoicecontrol-debug-0.9-0.alpha.7mdv2011.0.i586.rpm

/***************************************************************************
                          cvoicecontrol.h  -  a simple speech recognizer
                             -------------------
    begin                : Sat Feb 12 2000
    copyright            : (C) 2000 by Daniel Kiecza
    email                : daniel@kiecza.de
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#ifndef CVOICECONTROL_H
#define CVOICECONTROL_H

/********************************************************************************
 * Symmetric Sahoe&Chiba warping function:
 ********************************************************************************
 *
 *
 *     |-------|-------|-------|-------|
 *     |       |       |       |       |   The element x in the DTW matrix
 *     |       |       |       |       |   is calculated as the minimum of
 *     |       |       |       |       |   the three paths is shown in the
 *     |-------|-------|-------|-------|   diagram to the left:
 *     |i-2/j  |i-1/j  |i/j    |       |
 *     |       |   o-------x   |       |    P1: M[i-2][j-1] +
 *     |      P1__/    |__/|   |       |         2 x Dist(sample[j], last_frame) +
 *     |-----__/-----__/---|---|-------|         Dist(sample[j], frame)
 *     |    /  |    /P2|   |   |       |
 *     |   o   |   o   |   o   |       |    P2: M[i-1][j-1] +
 *     |       |       |__/    |       |         2 x Dist(sample[j], frame)
 *     |-------|-----__/P3-----|-------|
 *     |       |    /  |       |       |    P3: M[i-1][j-2] +
 *     |       |   o   |       |       |         2 x Dist(sample[j-1], frame) +
 *     |       |       |i/j-2  |       |         Dist(sample[j], frame)
 *     |-------|-------|-------|-------|
 *
 */

/********************************************************************************
 * recognizer variables
 ********************************************************************************
 *
 *
 * width of the diagonal window in the DTW matrix that the
 *  evaluation is limited to.
 * This is used to reduce the number of DTW matrix elements
 *  that need to be calculated
 *            _________________________
 *           |                  *     .|    \
 *           |               *     .   |     |
 *           |            *     .     *|     |
 *           |         *<-w->.     *   |     |
 * reference |      *     .     *      |      > DTW-Matrix
 *           |   *     .     *         |     |
 *           |*     .<-w->*            |     |
 *           |   .     *               |     |
 *           |._____*__________________|    /
 *                    sample
 *
 *       Legend:  w = adjust_window_width
 */
int adjust_window_width;

/*
 * strictly speaking, time-alignment in the DTW matrix starts
 *  in the bottom left corner.
 * This value allows alignment to start up to `sloppy_corner`
 *  elements away from the corner
 *            _____ ... ____            _____ ... ____
 *          r|o             |          |o             |    \
 *          e|o             |          |o             |     |
 *          f|o             |          |o             |     |
 *          e|o             | -------> |o             |     |
 * sloppy_  r|o     ...     | sloppy_  |o     ...     |      > DTW-Matrix
 *  corner  e|o             |  corner  |o             |     |
 *  = 0     n|o             |  = 3     |x             |     |
 *          c|o             |          |x             |     |
 *          e|xoooo ... oooo|          |xxxoo ... oooo|    /
 *                sample                    sample
 *
 *    Legend: x = distance value,  o = (infinity)
 */
int sloppy_corner;

/*****
  time alignment scores must stay below this value
  otherwise the according samples are ignored in
  the further evaluation process
  *****/
float score_threshold;

/*****
  a (very high) float value that is considered "infinity"
  *****/
float float_max;

/***** these macros are used in the DTW warping function
  to calculate the maximum/minimum of two or three
  variables
  *****/
#define MAX2(a,b) ((a>b)?(a):(b))
#define MAX3(a,b,c) ((a>b)?(MAX2(a,c)):(MAX2(b,c)))
#define MIN2(a,b) ((a<b)?(a):(b))
#define MIN3(a,b,c) ((a<b)?(MIN2(a,c)):(MIN2(b,c)))

/********************************************************************************
 * preprocessing variables
 ********************************************************************************/

/* (none) */

/********************************************************************************
 * recording variables
 ********************************************************************************
 *
 * Recording is started when the micro level exceeds a
 * certain threshold.
 * some phones (like 's') produce a relatively small level
 * and are therefore likely to be cut from the beginning of
 * an utterance.
 * To overcome this, the following strategy is used:
 * in a circular 'prefetch' buffer incoming chunks of audio
 * data are stored constantly:
 *
 *      _____     _____     _____     _____     _____
 *     |     |   |     |   |     |   |     |   |     |
 *     |  1  |-->|  2  |-->|  3  |-->|  4  |-->|  5  |--
 *     |_____|   |_____|   |_____|   |_____|   |_____|  |
 *       / \                                            |
 *        |_____________________________________________|
 *
 *
 * The first junk is put in box '0', the next junk in box '1'
 * ... the fifth junk is put in box '4', the sixth junk is
 * put in box '0', generally the n-th junk is put in
 * box 'n modulo 5'
 * As a result, this circular buffer always contains the five
 * last chunks of the acoustic history.
 * When the actual recording is started, we can extract these
 * five chunks from the buffer and prepend them (in the proper)
 * order) to the recorded data.
 *
 * prefetch        circular buffer
 * prefetch_N      number of boxes
 * prefetch_pos    current position/box in buffer
 */

#endif