SphinxBase 0.6
feat.h
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * feat.h -- Cepstral features computation.
39 *
40 * **********************************************
41 * CMU ARPA Speech Project
42 *
43 * Copyright (c) 1999 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
46 *
47 * HISTORY
48 * $Log$
49 * Revision 1.1 2006/04/05 20:27:30 dhdfu
50 * A Great Reorganzation of header files and executables
51 *
52 * Revision 1.17 2006/02/23 03:59:40 arthchan2003
53 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
54 *
55 * Revision 1.16.4.1 2005/07/05 06:25:08 arthchan2003
56 * Fixed dox-doc.
57 *
58 * Revision 1.16 2005/06/22 03:29:35 arthchan2003
59 * Makefile.am s for all subdirectory of libs3decoder/
60 *
61 * Revision 1.5 2005/06/13 04:02:56 archan
62 * Fixed most doxygen-style documentation under libs3decoder.
63 *
64 * Revision 1.4 2005/04/21 23:50:26 archan
65 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
66 *
67 * Revision 1.3 2005/03/30 01:22:46 archan
68 * Fixed mistakes in last updates. Add
69 *
70 *
71 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
72 * Adding feat_free() to free allocated memory
73 *
74 * 04-Jan-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
75 * Started.
76 */
77
78
79#ifndef _S3_FEAT_H_
80#define _S3_FEAT_H_
81
82#include <stdio.h>
83
84/* Win32/WinCE DLL gunk */
85#include <sphinxbase/sphinxbase_export.h>
87#include <sphinxbase/fe.h>
88#include <sphinxbase/cmn.h>
89#include <sphinxbase/agc.h>
90
91#ifdef __cplusplus
92extern "C" {
93#endif
94#if 0
95/* Fool Emacs. */
96}
97#endif
98
102#define LIVEBUFBLOCKSIZE 256
104#define S3_MAX_FRAMES 15000 /* RAH, I believe this is still too large, but better than before */
105
106#define cepstral_to_feature_command_line_macro() \
107{ "-feat", \
108 ARG_STRING, \
109 "1s_c_d_dd", \
110 "Feature stream type, depends on the acoustic model" }, \
111{ "-ceplen", \
112 ARG_INT32, \
113 "13", \
114 "Number of components in the input feature vector" }, \
115{ "-cmn", \
116 ARG_STRING, \
117 "current", \
118 "Cepstral mean normalization scheme ('current', 'prior', or 'none')" }, \
119{ "-cmninit", \
120 ARG_STRING, \
121 "8.0", \
122 "Initial values (comma-separated) for cepstral mean when 'prior' is used" }, \
123{ "-varnorm", \
124 ARG_BOOLEAN, \
125 "no", \
126 "Variance normalize each utterance (only if CMN == current)" }, \
127{ "-agc", \
128 ARG_STRING, \
129 "none", \
130 "Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')" }, \
131{ "-agcthresh", \
132 ARG_FLOAT32, \
133 "2.0", \
134 "Initial threshold for automatic gain control" }, \
135{ "-lda", \
136 ARG_STRING, \
137 NULL, \
138 "File containing transformation matrix to be applied to features (single-stream features only)" }, \
139{ "-ldadim", \
140 ARG_INT32, \
141 "0", \
142 "Dimensionality of output of feature transformation (0 to use entire matrix)" }, \
143{"-svspec", \
144 ARG_STRING, \
145 NULL, \
146 "Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)"}
147
155typedef struct feat_s {
157 char *name;
158 int32 cepsize;
159 int32 n_stream;
160 uint32 *stream_len;
161 int32 window_size;
163 int32 n_sv;
164 uint32 *sv_len;
165 int32 **subvecs;
166 mfcc_t *sv_buf;
167 int32 sv_dim;
170 int32 varnorm;
186 void (*compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat);
192 mfcc_t **cepbuf;
193 mfcc_t **tmpcepbuf;
194 int32 bufpos;
195 int32 curpos;
197 mfcc_t ***lda;
198 uint32 n_lda;
199 uint32 out_dim;
200} feat_t;
201
205#define feat_name(f) ((f)->name)
209#define feat_cepsize(f) ((f)->cepsize)
213#define feat_window_size(f) ((f)->window_size)
219#define feat_n_stream(f) ((f)->n_stream)
225#define feat_stream_len(f,i) ((f)->stream_len[i])
229#define feat_dimension1(f) ((f)->n_sv ? (f)->n_sv : f->n_stream)
233#define feat_dimension2(f,i) ((f)->lda ? (f)->out_dim : ((f)->sv_len ? (f)->sv_len[i] : f->stream_len[i]))
237#define feat_dimension(f) ((f)->out_dim)
241#define feat_stream_lengths(f) ((f)->lda ? (&(f)->out_dim) : (f)->sv_len ? (f)->sv_len : f->stream_len)
242
265SPHINXBASE_EXPORT
266int32 **parse_subvecs(char const *str);
267
271SPHINXBASE_EXPORT
272void subvecs_free(int32 **subvecs);
273
274
287SPHINXBASE_EXPORT
288mfcc_t ***feat_array_alloc(feat_t *fcb,
290 int32 nfr
291 );
292
296SPHINXBASE_EXPORT
297mfcc_t ***feat_array_realloc(feat_t *fcb,
299 mfcc_t ***old_feat,
300 int32 ofr,
301 int32 nfr
302 );
303
307SPHINXBASE_EXPORT
308void feat_array_free(mfcc_t ***feat);
309
310
326SPHINXBASE_EXPORT
327feat_t *feat_init(char const *type,
331 int32 varnorm,
336 int32 breport,
337 int32 cepsize
340 );
341
346SPHINXBASE_EXPORT
347int32 feat_read_lda(feat_t *feat,
348 const char *ldafile,
349 int32 dim
350 );
351
355SPHINXBASE_EXPORT
356void feat_lda_transform(feat_t *fcb,
357 mfcc_t ***inout_feat,
358 uint32 nfr
359 );
360
379SPHINXBASE_EXPORT
380int feat_set_subvecs(feat_t *fcb, int32 **subvecs);
381
385SPHINXBASE_EXPORT
386void feat_print(feat_t *fcb,
387 mfcc_t ***feat,
388 int32 nfr,
389 FILE *fp
390 );
391
392
409SPHINXBASE_EXPORT
410int32 feat_s2mfc2feat(feat_t *fcb,
411 const char *file,
412 const char *dir,
414 const char *cepext,
417 int32 sf, int32 ef, /* Start/End frames
418 within file to be read. Use
419 0,-1 to process entire
420 file */
421 mfcc_t ***feat,
423 int32 maxfr
427 );
428
429
458SPHINXBASE_EXPORT
459int32 feat_s2mfc2feat_live(feat_t *fcb,
460 mfcc_t **uttcep,
461 int32 *inout_ncep,
463 int32 beginutt,
464 int32 endutt,
465 mfcc_t ***ofeat
468 );
469
470
476SPHINXBASE_EXPORT
478
484SPHINXBASE_EXPORT
485int feat_free(feat_t *f
486 );
487
491SPHINXBASE_EXPORT
492void feat_report(feat_t *f
493 );
494#ifdef __cplusplus
495}
496#endif
497
498
499#endif
routine that implements automatic gain control
enum agc_type_e agc_type_t
Types of acoustic gain control to apply to the features.
Apply Cepstral Mean Normalization (CMN) to the set of input mfc frames.
enum cmn_type_e cmn_type_t
Types of cepstral mean normalization to apply to the features.
SPHINXBASE_EXPORT void cmn(cmn_t *cmn, mfcc_t **mfc, int32 varnorm, int32 n_frame)
CMN for the whole sentence.
Definition cmn.c:150
SPHINXBASE_EXPORT mfcc_t *** feat_array_alloc(feat_t *fcb, int32 nfr)
Allocate an array to hold several frames worth of feature vectors.
Definition feat.c:356
SPHINXBASE_EXPORT feat_t * feat_init(char const *type, cmn_type_t cmn, int32 varnorm, agc_type_t agc, int32 breport, int32 cepsize)
Initialize feature module to use the selected type of feature stream.
Definition feat.c:705
SPHINXBASE_EXPORT int32 ** parse_subvecs(char const *str)
Parse subvector specification string.
Definition feat.c:169
SPHINXBASE_EXPORT int32 feat_s2mfc2feat_live(feat_t *fcb, mfcc_t **uttcep, int32 *inout_ncep, int32 beginutt, int32 endutt, mfcc_t ***ofeat)
Feature computation routine for live mode decoder.
Definition feat.c:1307
SPHINXBASE_EXPORT void feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
Transform a block of features using the feature module's LDA transform.
Definition lda.c:139
SPHINXBASE_EXPORT int32 feat_read_lda(feat_t *feat, const char *ldafile, int32 dim)
Add an LDA transformation to the feature module from a file.
Definition lda.c:61
SPHINXBASE_EXPORT mfcc_t *** feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
Realloate the array of features.
Definition feat.c:389
SPHINXBASE_EXPORT int32 feat_s2mfc2feat(feat_t *fcb, const char *file, const char *dir, const char *cepext, int32 sf, int32 ef, mfcc_t ***feat, int32 maxfr)
Read a specified MFC file (or given segment within it), perform CMN/AGC as indicated by fcb,...
Definition feat.c:1169
SPHINXBASE_EXPORT int feat_free(feat_t *f)
Release resource associated with feat_t.
Definition feat.c:1430
SPHINXBASE_EXPORT int feat_set_subvecs(feat_t *fcb, int32 **subvecs)
Add a subvector specification to the feature module.
Definition feat.c:277
SPHINXBASE_EXPORT void subvecs_free(int32 **subvecs)
Free array of subvector specs.
Definition feat.c:267
SPHINXBASE_EXPORT void feat_array_free(mfcc_t ***feat)
Free a buffer allocated with feat_array_alloc()
Definition feat.c:418
SPHINXBASE_EXPORT void feat_report(feat_t *f)
Report the feat_t data structure.
Definition feat.c:1461
SPHINXBASE_EXPORT feat_t * feat_retain(feat_t *f)
Retain ownership of feat_t.
Definition feat.c:1423
SPHINXBASE_EXPORT void feat_print(feat_t *fcb, mfcc_t ***feat, int32 nfr, FILE *fp)
Print the given block of feature vectors to the given FILE.
Definition feat.c:895
Basic type definitions used in Sphinx.
Structure holding data for doing AGC.
Definition agc.h:113
wrapper of operation of the cepstral mean normalization.
Definition cmn.h:128
Definition feat.h:154
int32 window_size
Number of extra frames around given input frame needed to compute corresponding output feature (so to...
Definition feat.h:160
int32 ** subvecs
Subvector specification (or NULL for none)
Definition feat.h:164
mfcc_t ** tmpcepbuf
Array of pointers into cepbuf to handle border cases.
Definition feat.h:192
uint32 n_lda
Number of linear transformations in lda.
Definition feat.h:197
mfcc_t *** lda
Array of linear transformations (for LDA, MLLT, or whatever)
Definition feat.h:196
agc_t * agc_struct
Structure that stores the temporary variables for acoustic gain control.
Definition feat.h:188
int32 sv_dim
Total dimensionality of subvector (length of sv_buf)
Definition feat.h:166
cmn_type_t cmn
Type of CMN to be performed on each utterance.
Definition feat.h:168
agc_type_t agc
Type of AGC to be performed on each utterance.
Definition feat.h:171
int32 n_stream
Number of feature streams; e.g., 4 in Sphinx-II.
Definition feat.h:158
int refcount
Reference count.
Definition feat.h:155
uint32 * stream_len
Vector length of each feature stream.
Definition feat.h:159
int32 varnorm
Whether variance normalization is to be performed on each utt; Irrelevant if no CMN is performed.
Definition feat.h:169
mfcc_t ** cepbuf
Circular buffer of MFCC frames for live feature computation.
Definition feat.h:191
int32 bufpos
Write index in cepbuf.
Definition feat.h:193
void(* compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat)
Feature computation function.
Definition feat.h:185
char * name
Printable name for this feature type.
Definition feat.h:156
uint32 out_dim
Output dimensionality.
Definition feat.h:198
uint32 * sv_len
Vector length of each subvector.
Definition feat.h:163
int32 curpos
Read index in cepbuf.
Definition feat.h:194
int32 cepsize
Size of input speech vector (typically, a cepstrum vector)
Definition feat.h:157
cmn_t * cmn_struct
Structure that stores the temporary variables for cepstral means normalization.
Definition feat.h:186
mfcc_t * sv_buf
Temporary copy buffer for subvector projection.
Definition feat.h:165
int32 n_sv
Number of subvectors.
Definition feat.h:162
Structure for describing a speech feature type Structure for describing a speech feature type (no.