SphinxBase 0.6
fe_internal.h
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1996-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#ifndef __FE_INTERNAL_H__
39#define __FE_INTERNAL_H__
40
41#ifdef HAVE_CONFIG_H
42#include <config.h>
43#endif
44
45#include "sphinxbase/fe.h"
46#include "sphinxbase/fixpoint.h"
47
48#ifdef __cplusplus
49extern "C" {
50#endif
51#if 0
52/* Fool Emacs. */
53}
54#endif
55
56#ifdef FIXED16
57/* Q15 format */
58typedef int16 frame_t;
59typedef int16 window_t;
60typedef int32 powspec_t;
61typedef struct { int16 r, i; } complex;
62#elif defined(FIXED_POINT)
63typedef fixed32 frame_t;
64typedef int32 powspec_t;
65typedef fixed32 window_t;
66typedef struct { fixed32 r, i; } complex;
67#else /* FIXED_POINT */
68typedef float64 frame_t;
69typedef float64 powspec_t;
70typedef float64 window_t;
71typedef struct { float64 r, i; } complex;
72#endif /* FIXED_POINT */
73
74/* Values for the 'logspec' field. */
75enum {
76 RAW_LOG_SPEC = 1,
77 SMOOTH_LOG_SPEC = 2
78};
79
80/* Values for the 'transform' field. */
81enum {
82 LEGACY_DCT = 0,
83 DCT_II = 1,
84 DCT_HTK = 2
85};
86
87typedef struct melfb_s melfb_t;
89struct melfb_s {
90 float32 sampling_rate;
91 int32 num_cepstra;
92 int32 num_filters;
93 int32 fft_size;
94 float32 lower_filt_freq;
95 float32 upper_filt_freq;
96 /* DCT coefficients. */
97 mfcc_t **mel_cosine;
98 /* Filter coefficients. */
99 mfcc_t *filt_coeffs;
100 int16 *spec_start;
101 int16 *filt_start;
102 int16 *filt_width;
103 /* Luxury mobile home. */
104 int32 doublewide;
105 char const *warp_type;
106 char const *warp_params;
107 uint32 warp_id;
108 /* Precomputed normalization constants for unitary DCT-II/DCT-III */
109 mfcc_t sqrt_inv_n, sqrt_inv_2n;
110 /* Value and coefficients for HTK-style liftering */
111 int32 lifter_val;
112 mfcc_t *lifter;
113 /* Normalize filters to unit area */
114 int32 unit_area;
115 /* Round filter frequencies to DFT points (hurts accuracy, but is
116 useful for legacy purposes) */
117 int32 round_filters;
118};
119
120/* sqrt(1/2), also used for unitary DCT-II/DCT-III */
121#define SQRT_HALF FLOAT2MFCC(0.707106781186548)
122
124struct fe_s {
125 cmd_ln_t *config;
126 int refcount;
127
128 float32 sampling_rate;
129 int16 frame_rate;
130 int16 frame_shift;
131
132 float32 window_length;
133 int16 frame_size;
134 int16 fft_size;
135
136 uint8 fft_order;
137 uint8 feature_dimension;
138 uint8 num_cepstra;
139 uint8 remove_dc;
140 uint8 log_spec;
141 uint8 swap;
142 uint8 dither;
143 uint8 transform;
144
145 float32 pre_emphasis_alpha;
146 int32 seed;
147
148 int16 frame_counter;
149 uint8 start_flag;
150 uint8 reserved;
151
152 /* Twiddle factors for FFT. */
153 frame_t *ccc, *sss;
154 /* Mel filter parameters. */
155 melfb_t *mel_fb;
156 /* Half of a Hamming Window. */
157 window_t *hamming_window;
158
159 /* Temporary buffers for processing. */
160 /* FIXME: too many of these. */
161 int16 *spch;
162 frame_t *frame;
163 powspec_t *spec, *mfspec;
164 int16 *overflow_samps;
165 int16 num_overflow_samps;
166 int16 prior;
167};
168
169#define BB_SAMPLING_RATE 16000
170#define DEFAULT_BB_FFT_SIZE 512
171#define DEFAULT_BB_FRAME_SHIFT 160
172#define DEFAULT_BB_NUM_FILTERS 40
173#define DEFAULT_BB_LOWER_FILT_FREQ 133.33334
174#define DEFAULT_BB_UPPER_FILT_FREQ 6855.4976
175
176#define NB_SAMPLING_RATE 8000
177#define DEFAULT_NB_FFT_SIZE 256
178#define DEFAULT_NB_FRAME_SHIFT 80
179#define DEFAULT_NB_NUM_FILTERS 31
180#define DEFAULT_NB_LOWER_FILT_FREQ 200
181#define DEFAULT_NB_UPPER_FILT_FREQ 3500
182
183void fe_init_dither(int32 seed);
184
185/* Apply 1/2 bit noise to a buffer of audio. */
186int32 fe_dither(int16 *buffer, int32 nsamps);
187
188/* Load a frame of data into the fe. */
189int fe_read_frame(fe_t *fe, int16 const *in, int32 len);
190
191/* Shift the input buffer back and read more data. */
192int fe_shift_frame(fe_t *fe, int16 const *in, int32 len);
193
194/* Process a frame of data into features. */
195int32 fe_write_frame(fe_t *fe, mfcc_t *fea);
196
197/* Initialization functions. */
198int32 fe_build_melfilters(melfb_t *MEL_FB);
199int32 fe_compute_melcosine(melfb_t *MEL_FB);
200void fe_create_hamming(window_t *in, int32 in_len);
201void fe_create_twiddle(fe_t *fe);
202
203/* Miscellaneous processing functions. */
204void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep);
205void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk);
206void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec);
207
208#ifdef __cplusplus
209}
210#endif
211
212#endif /* __FE_INTERNAL_H__ */
Opaque structure used to hold the results of command-line parsing.
Definition f2c.h:16
Structure for the front-end computation.
Base Struct to hold all structure for MFCC computation.
Definition fe_internal.h:89