SphinxBase 0.6
fe.h
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38/*
39 * fe.h
40 *
41 * $Log: fe.h,v $
42 * Revision 1.11 2005/02/05 02:15:02 egouvea
43 * Removed fe_process(), never used
44 *
45 * Revision 1.10 2004/12/10 16:48:55 rkm
46 * Added continuous density acoustic model handling
47 *
48 *
49 */
50
51#if defined(WIN32) && !defined(GNUWINCE)
52#define srand48(x) srand(x)
53#define lrand48() rand()
54#endif
55
56#ifndef _NEW_FE_H_
57#define _NEW_FE_H_
58
59/* Win32/WinCE DLL gunk */
60#include <sphinxbase/sphinxbase_export.h>
61
62#include <sphinxbase/cmd_ln.h>
63#include <sphinxbase/fixpoint.h>
64
65#ifdef __cplusplus
66extern "C" {
67#endif
68#if 0
69/* Fool Emacs. */
70}
71#endif
72
73#ifdef WORDS_BIGENDIAN
74#define NATIVE_ENDIAN "big"
75#else
76#define NATIVE_ENDIAN "little"
77#endif
78
80#define DEFAULT_SAMPLING_RATE 16000
82#define DEFAULT_FRAME_RATE 100
85#define DEFAULT_FRAME_SHIFT 160
87#define DEFAULT_WINDOW_LENGTH 0.025625
89#define DEFAULT_FFT_SIZE 512
91#define DEFAULT_NUM_CEPSTRA 13
93#define DEFAULT_NUM_FILTERS 40
95#define DEFAULT_LOWER_FILT_FREQ 133.33334
97#define DEFAULT_UPPER_FILT_FREQ 6855.4976
99#define DEFAULT_PRE_EMPHASIS_ALPHA 0.97
101#define DEFAULT_WARP_TYPE "inverse_linear"
103#define SEED -1
104
105#define waveform_to_cepstral_command_line_macro() \
106 { "-logspec", \
107 ARG_BOOLEAN, \
108 "no", \
109 "Write out logspectral files instead of cepstra" }, \
110 \
111 { "-smoothspec", \
112 ARG_BOOLEAN, \
113 "no", \
114 "Write out cepstral-smoothed logspectral files" }, \
115 \
116 { "-transform", \
117 ARG_STRING, \
118 "legacy", \
119 "Which type of transform to use to calculate cepstra (legacy, dct, or htk)" }, \
120 \
121 { "-alpha", \
122 ARG_FLOAT32, \
123 ARG_STRINGIFY(DEFAULT_PRE_EMPHASIS_ALPHA), \
124 "Preemphasis parameter" }, \
125 \
126 { "-samprate", \
127 ARG_FLOAT32, \
128 ARG_STRINGIFY(DEFAULT_SAMPLING_RATE), \
129 "Sampling rate" }, \
130 \
131 { "-frate", \
132 ARG_INT32, \
133 ARG_STRINGIFY(DEFAULT_FRAME_RATE), \
134 "Frame rate" }, \
135 \
136 { "-wlen", \
137 ARG_FLOAT32, \
138 ARG_STRINGIFY(DEFAULT_WINDOW_LENGTH), \
139 "Hamming window length" }, \
140 \
141 { "-nfft", \
142 ARG_INT32, \
143 ARG_STRINGIFY(DEFAULT_FFT_SIZE), \
144 "Size of FFT" }, \
145 \
146 { "-nfilt", \
147 ARG_INT32, \
148 ARG_STRINGIFY(DEFAULT_NUM_FILTERS), \
149 "Number of filter banks" }, \
150 \
151 { "-lowerf", \
152 ARG_FLOAT32, \
153 ARG_STRINGIFY(DEFAULT_LOWER_FILT_FREQ), \
154 "Lower edge of filters" }, \
155 \
156 { "-upperf", \
157 ARG_FLOAT32, \
158 ARG_STRINGIFY(DEFAULT_UPPER_FILT_FREQ), \
159 "Upper edge of filters" }, \
160 \
161 { "-unit_area", \
162 ARG_BOOLEAN, \
163 "yes", \
164 "Normalize mel filters to unit area" }, \
165 \
166 { "-round_filters", \
167 ARG_BOOLEAN, \
168 "yes", \
169 "Round mel filter frequencies to DFT points" }, \
170 \
171 { "-ncep", \
172 ARG_INT32, \
173 ARG_STRINGIFY(DEFAULT_NUM_CEPSTRA), \
174 "Number of cep coefficients" }, \
175 \
176 { "-doublebw", \
177 ARG_BOOLEAN, \
178 "no", \
179 "Use double bandwidth filters (same center freq)" }, \
180 \
181 { "-lifter", \
182 ARG_INT32, \
183 "0", \
184 "Length of sin-curve for liftering, or 0 for no liftering." }, \
185 \
186 { "-input_endian", \
187 ARG_STRING, \
188 NATIVE_ENDIAN, \
189 "Endianness of input data, big or little, ignored if NIST or MS Wav" }, \
190 \
191 { "-warp_type", \
192 ARG_STRING, \
193 DEFAULT_WARP_TYPE, \
194 "Warping function type (or shape)" }, \
195 \
196 { "-warp_params", \
197 ARG_STRING, \
198 NULL, \
199 "Parameters defining the warping function" }, \
200 \
201 { "-dither", \
202 ARG_BOOLEAN, \
203 "no", \
204 "Add 1/2-bit noise" }, \
205 \
206 { "-seed", \
207 ARG_INT32, \
208 ARG_STRINGIFY(SEED), \
209 "Seed for random number generator; if less than zero, pick our own" }, \
210 \
211 { "-remove_dc", \
212 ARG_BOOLEAN, \
213 "no", \
214 "Remove DC offset from each frame" }, \
215 \
216 { "-verbose", \
217 ARG_BOOLEAN, \
218 "no", \
219 "Show input filenames" } \
220
221
222#ifdef FIXED_POINT
224typedef fixed32 mfcc_t;
225
227#define FLOAT2MFCC(x) FLOAT2FIX(x)
229#define MFCC2FLOAT(x) FIX2FLOAT(x)
231#define MFCCMUL(a,b) FIXMUL(a,b)
232#define MFCCLN(x,in,out) FIXLN_ANY(x,in,out)
233#else /* !FIXED_POINT */
234
236typedef float32 mfcc_t;
238#define FLOAT2MFCC(x) (x)
240#define MFCC2FLOAT(x) (x)
242#define MFCCMUL(a,b) ((a)*(b))
243#define MFCCLN(x,in,out) log(x)
244#endif /* !FIXED_POINT */
245
249typedef struct fe_s fe_t;
250
254enum fe_error_e {
255 FE_SUCCESS = 0,
256 FE_OUTPUT_FILE_SUCCESS = 0,
257 FE_CONTROL_FILE_ERROR = -1,
258 FE_START_ERROR = -2,
259 FE_UNKNOWN_SINGLE_OR_BATCH = -3,
260 FE_INPUT_FILE_OPEN_ERROR = -4,
261 FE_INPUT_FILE_READ_ERROR = -5,
262 FE_MEM_ALLOC_ERROR = -6,
263 FE_OUTPUT_FILE_WRITE_ERROR = -7,
264 FE_OUTPUT_FILE_OPEN_ERROR = -8,
265 FE_ZERO_ENERGY_ERROR = -9,
266 FE_INVALID_PARAM_ERROR = -10
267};
268
276SPHINXBASE_EXPORT
277fe_t* fe_init_auto(void);
278
286SPHINXBASE_EXPORT
287arg_t const *fe_get_args(void);
288
299SPHINXBASE_EXPORT
300fe_t *fe_init_auto_r(cmd_ln_t *config);
301
309SPHINXBASE_EXPORT
310const cmd_ln_t *fe_get_config(fe_t *fe);
311
316SPHINXBASE_EXPORT
317int fe_start_utt(fe_t *fe);
318
331SPHINXBASE_EXPORT
332int fe_get_output_size(fe_t *fe);
333
346SPHINXBASE_EXPORT
347void fe_get_input_size(fe_t *fe, int *out_frame_shift,
348 int *out_frame_size);
349
364SPHINXBASE_EXPORT
365int fe_end_utt(fe_t *fe, mfcc_t *out_cepvector, int32 *out_nframes);
366
372SPHINXBASE_EXPORT
373fe_t *fe_retain(fe_t *fe);
374
382SPHINXBASE_EXPORT
383int fe_free(fe_t *fe);
384
393SPHINXBASE_EXPORT
394int fe_process_frame(fe_t *fe, int16 const *spch,
395 int32 nsamps, mfcc_t *out_cep);
396
444SPHINXBASE_EXPORT
445int fe_process_frames(fe_t *fe,
446 int16 const **inout_spch,
447 size_t *inout_nsamps,
448 mfcc_t **buf_cep,
449 int32 *inout_nframes);
450
466SPHINXBASE_EXPORT
467int fe_process_utt(fe_t *fe,
468 int16 const *spch,
469 size_t nsamps,
470 mfcc_t ***cep_block,
471 int32 *nframes
472 );
473
477SPHINXBASE_EXPORT
478void fe_free_2d(void *arr);
479
483SPHINXBASE_EXPORT
484int fe_mfcc_to_float(fe_t *fe,
485 mfcc_t **input,
486 float32 **output,
487 int32 nframes);
488
492SPHINXBASE_EXPORT
493int fe_float_to_mfcc(fe_t *fe,
494 float32 **input,
495 mfcc_t **output,
496 int32 nframes);
497
521SPHINXBASE_EXPORT
522int fe_logspec_to_mfcc(fe_t *fe,
523 const mfcc_t *fr_spec,
524 mfcc_t *fr_cep
525 );
526
535SPHINXBASE_EXPORT
536int fe_logspec_dct2(fe_t *fe,
537 const mfcc_t *fr_spec,
538 mfcc_t *fr_cep
539 );
540
549SPHINXBASE_EXPORT
550int fe_mfcc_dct3(fe_t *fe,
551 const mfcc_t *fr_cep,
552 mfcc_t *fr_spec
553 );
554
555#ifdef __cplusplus
556}
557#endif
558
559
560#endif
Command-line and other configurationparsing and handling.
Argument definition structure.
Opaque structure used to hold the results of command-line parsing.
Structure for the front-end computation.