SphinxBase 0.6
cont_ad.h
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2001 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * cont_ad.h -- Continuous A/D listening and silence filtering module.
39 *
40 * **********************************************
41 * CMU ARPA Speech Project
42 *
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
46 *
47 * HISTORY
48 *
49 * 13-Jul-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
50 * Added spf and adbufsize to cont_ad_t in order to support variable
51 * frame sizes depending on audio sampling rate.
52 *
53 * 30-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
54 * Added FILE* argument to cont_ad_powhist_dump().
55 *
56 * 16-Jan-98 Paul Placeway (pwp@cs.cmu.edu) at Carnegie Mellon University
57 * Changed to use dB instead of the weird power measure.
58 * Added most system parameters to cont_ad_t instead of hardwiring
59 * them in cont_ad.c.
60 * Added cont_ad_set_params() and cont_ad_get_params().
61 *
62 * 28-Jul-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
63 * Added cont_ad_t.siglvl.
64 *
65 * 27-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
66 * Added the option for cont_ad_read to return -1 on EOF.
67 *
68 * 21-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
69 * Added cont_ad_set_thresh().
70 *
71 * 20-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
72 * Separated thresholds for speech and silence.
73 *
74 * 17-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
75 * Created, based loosely on Steve Reed's original implementation.
76 */
77
78
79#ifndef _CONT_AD_H_
80#define _CONT_AD_H_
81
82/* Win32/WinCE DLL gunk */
83#include <sphinxbase/sphinxbase_export.h>
85#include <sphinxbase/ad.h>
86
114#include <stdio.h>
115
116
117#ifdef __cplusplus
118extern "C" {
119#endif
120#if 0
121/* Fool Emacs. */
122}
123#endif
124
125/* States of continuous listening module */
126#define CONT_AD_STATE_SIL 0
127#define CONT_AD_STATE_SPEECH 1
128
129
135typedef struct spseg_s {
136 int32 startfrm;
137 int32 nfrm;
138 struct spseg_s *next;
139} spseg_t;
140
141
151typedef struct {
152 /* Function to be called for obtaining A/D data (see prototype for ad_read in ad.h) */
153 int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max);
156 int32 rawmode;
158 int16 *adbuf;
160 /* **************************************************************************
161 * state, read_ts, and siglvl are provided for READ-ONLY use by client
162 * applications, and are updated by calls to cont_ad_read() (see below). All
163 * other variables should be left alone.
164 */
165 int32 state;
167 int32 read_ts;
171 int32 seglen;
175 int32 siglvl;
178 /* ************************************************************************ */
179
180 int32 sps;
183 int32 eof;
185 int32 spf;
186 int32 adbufsize;
188 int32 headfrm;
189 int32 n_frm;
190 int32 n_sample;
191 int32 tot_frm;
194 int32 *pow_hist;
195 char *frm_pow;
198 int32 delta_sil;
200 int32 min_noise;
201 int32 max_noise;
202 int32 winsize;
204 int32 sil_onset;
205 int32 leader;
206 int32 trailer;
213 float32 adapt_rate;
222 int32 n_other;
227 FILE *rawfp;
231 FILE *logfp;
237} cont_ad_t;
238
239
255SPHINXBASE_EXPORT
257 int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max)
261 );
262
269SPHINXBASE_EXPORT
271 int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max));
272
273
302SPHINXBASE_EXPORT
303int32 cont_ad_read (cont_ad_t *r,
304 int16 *buf,
307 int32 max
310 );
311
315SPHINXBASE_EXPORT
317
330SPHINXBASE_EXPORT
331int32 cont_ad_calib (cont_ad_t *cont
332 );
333
345SPHINXBASE_EXPORT
346int32 cont_ad_calib_loop (cont_ad_t *r, int16 *buf, int32 max);
347
359SPHINXBASE_EXPORT
361
374SPHINXBASE_EXPORT
375int32 cont_ad_set_thresh (cont_ad_t *cont,
376 int32 sil,
377 int32 sp
378 );
379
380
388SPHINXBASE_EXPORT
389int32 cont_ad_set_params (cont_ad_t *r, int32 delta_sil, int32 delta_speech,
390 int32 min_noise, int32 max_noise,
391 int32 winsize, int32 speech_onset, int32 sil_onset,
392 int32 leader, int32 trailer,
393 float32 adapt_rate);
394
402SPHINXBASE_EXPORT
403int32 cont_ad_get_params (cont_ad_t *r, int32 *delta_sil, int32 *delta_speech,
404 int32 *min_noise, int32 *max_noise,
405 int32 *winsize, int32 *speech_onset, int32 *sil_onset,
406 int32 *leader, int32 *trailer,
407 float32 *adapt_rate);
408
413SPHINXBASE_EXPORT
414int32 cont_ad_reset (cont_ad_t *cont); /* In: Object pointer from cont_ad_init */
415
416
420SPHINXBASE_EXPORT
421int32 cont_ad_close (cont_ad_t *cont); /* In: Object pointer from cont_ad_init */
422
423
427SPHINXBASE_EXPORT
428void cont_ad_powhist_dump (FILE *fp, cont_ad_t *cont);
429
430
435SPHINXBASE_EXPORT
436int32 cont_ad_detach (cont_ad_t *c);
437
438
444SPHINXBASE_EXPORT
445int32 cont_ad_attach (cont_ad_t *c, ad_rec_t *a, int32 (*func)(ad_rec_t *, int16 *, int32));
446
447
459SPHINXBASE_EXPORT
460int32 cont_ad_set_rawfp (cont_ad_t *c, /* The cont_ad object being addressed */
461 FILE *fp); /* File to which raw audio data is to
462 be dumped; NULL to stop dumping. */
463
471SPHINXBASE_EXPORT
472int32 cont_ad_set_logfp (cont_ad_t *c, /* The cont_ad object being addressed */
473 FILE *fp); /* File to which logs are written;
474 NULL to stop logging. */
475
484SPHINXBASE_EXPORT
485int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech);
486
487#ifdef __cplusplus
488}
489#endif
490
491
492#endif
generic live audio interface for recording and playback
SPHINXBASE_EXPORT int32 cont_ad_reset(cont_ad_t *cont)
Reset, discarding any accumulated speech segments.
SPHINXBASE_EXPORT int32 cont_ad_set_logfp(cont_ad_t *c, FILE *fp)
Set the file to which cont_ad logs its progress.
SPHINXBASE_EXPORT void cont_ad_powhist_dump(FILE *fp, cont_ad_t *cont)
Dump the power histogram.
SPHINXBASE_EXPORT int32 cont_ad_close(cont_ad_t *cont)
Close the continuous listening object.
SPHINXBASE_EXPORT int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech)
Set the silence and speech thresholds.
SPHINXBASE_EXPORT int32 cont_ad_attach(cont_ad_t *c, ad_rec_t *a, int32(*func)(ad_rec_t *, int16 *, int32))
Attach the continuous listening module to the given audio device/function.
SPHINXBASE_EXPORT int32 cont_ad_set_rawfp(cont_ad_t *c, FILE *fp)
Set a file for dumping raw audio input.
SPHINXBASE_EXPORT int32 cont_ad_buffer_space(cont_ad_t *r)
Get the maximum number of samples which can be passed into cont_ad_read().
SPHINXBASE_EXPORT int32 cont_ad_calib_size(cont_ad_t *r)
Get the number of samples required to calibrate the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_calib(cont_ad_t *cont)
Calibrate the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_calib_loop(cont_ad_t *r, int16 *buf, int32 max)
Calibrate the silence filter without an audio device.
SPHINXBASE_EXPORT int32 cont_ad_detach(cont_ad_t *c)
Detach the given continuous listening module from the associated audio device.
SPHINXBASE_EXPORT int32 cont_ad_set_params(cont_ad_t *r, int32 delta_sil, int32 delta_speech, int32 min_noise, int32 max_noise, int32 winsize, int32 speech_onset, int32 sil_onset, int32 leader, int32 trailer, float32 adapt_rate)
Set the changable parameters.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init_rawmode(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initializes a continuous listening object which simply passes data through (!)
SPHINXBASE_EXPORT int32 cont_ad_read(cont_ad_t *r, int16 *buf, int32 max)
Read raw audio data into the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_get_params(cont_ad_t *r, int32 *delta_sil, int32 *delta_speech, int32 *min_noise, int32 *max_noise, int32 *winsize, int32 *speech_onset, int32 *sil_onset, int32 *leader, int32 *trailer, float32 *adapt_rate)
PWP 1/14/98 – get the changable params.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initialize a continuous listening/silence filtering object.
SPHINXBASE_EXPORT int32 cont_ad_set_thresh(cont_ad_t *cont, int32 sil, int32 sp)
Set silence and speech threshold parameters.
Basic type definitions used in Sphinx.
Definition ad.h:255
Continuous listening module or object Continuous listening module or object.
Definition cont_ad.h:151
int32 * pow_hist
Histogram of frame power, moving window, decayed.
Definition cont_ad.h:194
int32 thresh_speech
Frame considered to be speech if power >= thresh_speech (for transitioning from SILENCE to SPEECH sta...
Definition cont_ad.h:208
int32 leader
pad beggining of speech with this many extra frms
Definition cont_ad.h:205
int32 sps
Samples/sec; moved from ad->sps to break dependence on ad by N.
Definition cont_ad.h:180
int32 win_validfrm
Number of frames currently available from win_startfrm for analysis.
Definition cont_ad.h:221
char * frm_pow
Frame power.
Definition cont_ad.h:195
int32 speech_onset
start speech on >= these many frames out of winsize, of >= delta_speech
Definition cont_ad.h:203
int32 thresh_update
Number of frames before next update to pow_hist/thresholds.
Definition cont_ad.h:212
int32 seglen
Total no.
Definition cont_ad.h:171
int32 tot_frm
Total number of frames of A/D data read, including consumed ones.
Definition cont_ad.h:191
int32 prev_sample
For pre-emphasis filter.
Definition cont_ad.h:187
int32 siglvl
Max signal level for the data consumed by the most recent cont_ad_read call (dB range: 0-99).
Definition cont_ad.h:175
int32 spf
Samples/frame; audio level is analyzed within frames.
Definition cont_ad.h:185
int32 adbufsize
Buffer size (Number of samples)
Definition cont_ad.h:186
int32 state
State of data returned by most recent cont_ad_read call; CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
Definition cont_ad.h:165
int32 n_other
If in SILENCE state, number of frames in analysis window considered to be speech; otherwise number of...
Definition cont_ad.h:222
int16 * adbuf
Circular buffer for maintaining A/D data read until consumed.
Definition cont_ad.h:158
int32 delta_sil
Max silence power/frame ABOVE noise level.
Definition cont_ad.h:198
int32 win_startfrm
Where next analysis window begins.
Definition cont_ad.h:220
int32 rawmode
Pass all input data through, without filtering silence.
Definition cont_ad.h:156
int32 n_sample
Number of samples of unconsumed data in adbuf.
Definition cont_ad.h:190
int32 read_ts
Absolute timestamp (total no.
Definition cont_ad.h:167
int32 winsize
how many frames to look at for speech det
Definition cont_ad.h:202
int32 auto_thresh
Do automatic threshold adjustment or not.
Definition cont_ad.h:197
int32 sil_onset
end speech on >= these many frames out of winsize, of <= delta_sil
Definition cont_ad.h:204
int32 headfrm
Frame number in adbuf with unconsumed A/D data.
Definition cont_ad.h:188
int32 trailer
pad end of speech with this many extra frms
Definition cont_ad.h:206
ad_rec_t * ad
A/D device argument for adfunc.
Definition cont_ad.h:154
int32 n_frm
Number of complete frames of unconsumed A/D data in adbuf.
Definition cont_ad.h:189
spseg_t * spseg_head
First of unconsumed speech segments.
Definition cont_ad.h:224
float32 adapt_rate
Linear interpolation constant for rate at which noise level adapted to each estimate; range: 0-1; 0=>...
Definition cont_ad.h:213
int32 min_noise
noise lower than this we ignore
Definition cont_ad.h:200
int32 n_calib_frame
Number of frames of calibration data seen so far.
Definition cont_ad.h:236
spseg_t * spseg_tail
Last of unconsumed speech segments.
Definition cont_ad.h:225
FILE * logfp
If non-NULL, write detailed logs of this object's progress to the file.
Definition cont_ad.h:231
int32 thresh_sil
Frame considered to be silence if power <= thresh_sil (for transitioning from SPEECH to SILENCE state...
Definition cont_ad.h:210
int32 tail_state
State at the end of its internal buffer (internal use): CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
Definition cont_ad.h:217
int32 max_noise
noise higher than this signals an error
Definition cont_ad.h:201
int32 noise_level
PWP: what we claim as the "current" noise level.
Definition cont_ad.h:192
int32 eof
Whether the source ad device has encountered EOF.
Definition cont_ad.h:183
FILE * rawfp
If non-NULL, raw audio input data processed by cont_ad is dumped to this file.
Definition cont_ad.h:227
int32 delta_speech
Min speech power/frame ABOVE noise level.
Definition cont_ad.h:199
int32 startfrm
Frame-id in adbuf (see below) of start of this segment.
Definition cont_ad.h:136
int32 nfrm
Number of frames in segment (may wrap around adbuf)
Definition cont_ad.h:137
struct spseg_s * next
Next speech segment (with some intervening silence)
Definition cont_ad.h:138
(FOR INTERNAL USE ) Data structure for maintaining speech (non-silence) segments not yet consumed by ...