83#include <sphinxbase/sphinxbase_export.h>
126#define CONT_AD_STATE_SIL 0
127#define CONT_AD_STATE_SPEECH 1
153 int32 (*adfunc)(
ad_rec_t *ad, int16 *buf, int32 max);
257 int32 (*adfunc)(
ad_rec_t *ad, int16 *buf, int32 max)
271 int32 (*adfunc)(
ad_rec_t *ad, int16 *buf, int32 max));
390 int32 min_noise, int32 max_noise,
391 int32 winsize, int32 speech_onset, int32 sil_onset,
392 int32 leader, int32 trailer,
404 int32 *min_noise, int32 *max_noise,
405 int32 *winsize, int32 *speech_onset, int32 *sil_onset,
406 int32 *leader, int32 *trailer,
407 float32 *adapt_rate);
generic live audio interface for recording and playback
SPHINXBASE_EXPORT int32 cont_ad_reset(cont_ad_t *cont)
Reset, discarding any accumulated speech segments.
SPHINXBASE_EXPORT int32 cont_ad_set_logfp(cont_ad_t *c, FILE *fp)
Set the file to which cont_ad logs its progress.
SPHINXBASE_EXPORT void cont_ad_powhist_dump(FILE *fp, cont_ad_t *cont)
Dump the power histogram.
SPHINXBASE_EXPORT int32 cont_ad_close(cont_ad_t *cont)
Close the continuous listening object.
SPHINXBASE_EXPORT int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech)
Set the silence and speech thresholds.
SPHINXBASE_EXPORT int32 cont_ad_attach(cont_ad_t *c, ad_rec_t *a, int32(*func)(ad_rec_t *, int16 *, int32))
Attach the continuous listening module to the given audio device/function.
SPHINXBASE_EXPORT int32 cont_ad_set_rawfp(cont_ad_t *c, FILE *fp)
Set a file for dumping raw audio input.
SPHINXBASE_EXPORT int32 cont_ad_buffer_space(cont_ad_t *r)
Get the maximum number of samples which can be passed into cont_ad_read().
SPHINXBASE_EXPORT int32 cont_ad_calib_size(cont_ad_t *r)
Get the number of samples required to calibrate the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_calib(cont_ad_t *cont)
Calibrate the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_calib_loop(cont_ad_t *r, int16 *buf, int32 max)
Calibrate the silence filter without an audio device.
SPHINXBASE_EXPORT int32 cont_ad_detach(cont_ad_t *c)
Detach the given continuous listening module from the associated audio device.
SPHINXBASE_EXPORT int32 cont_ad_set_params(cont_ad_t *r, int32 delta_sil, int32 delta_speech, int32 min_noise, int32 max_noise, int32 winsize, int32 speech_onset, int32 sil_onset, int32 leader, int32 trailer, float32 adapt_rate)
Set the changable parameters.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init_rawmode(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initializes a continuous listening object which simply passes data through (!)
SPHINXBASE_EXPORT int32 cont_ad_read(cont_ad_t *r, int16 *buf, int32 max)
Read raw audio data into the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_get_params(cont_ad_t *r, int32 *delta_sil, int32 *delta_speech, int32 *min_noise, int32 *max_noise, int32 *winsize, int32 *speech_onset, int32 *sil_onset, int32 *leader, int32 *trailer, float32 *adapt_rate)
PWP 1/14/98 – get the changable params.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initialize a continuous listening/silence filtering object.
SPHINXBASE_EXPORT int32 cont_ad_set_thresh(cont_ad_t *cont, int32 sil, int32 sp)
Set silence and speech threshold parameters.
Basic type definitions used in Sphinx.
Continuous listening module or object Continuous listening module or object.
int32 * pow_hist
Histogram of frame power, moving window, decayed.
int32 thresh_speech
Frame considered to be speech if power >= thresh_speech (for transitioning from SILENCE to SPEECH sta...
int32 leader
pad beggining of speech with this many extra frms
int32 sps
Samples/sec; moved from ad->sps to break dependence on ad by N.
int32 win_validfrm
Number of frames currently available from win_startfrm for analysis.
char * frm_pow
Frame power.
int32 speech_onset
start speech on >= these many frames out of winsize, of >= delta_speech
int32 thresh_update
Number of frames before next update to pow_hist/thresholds.
int32 tot_frm
Total number of frames of A/D data read, including consumed ones.
int32 prev_sample
For pre-emphasis filter.
int32 siglvl
Max signal level for the data consumed by the most recent cont_ad_read call (dB range: 0-99).
int32 spf
Samples/frame; audio level is analyzed within frames.
int32 adbufsize
Buffer size (Number of samples)
int32 state
State of data returned by most recent cont_ad_read call; CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
int32 n_other
If in SILENCE state, number of frames in analysis window considered to be speech; otherwise number of...
int16 * adbuf
Circular buffer for maintaining A/D data read until consumed.
int32 delta_sil
Max silence power/frame ABOVE noise level.
int32 win_startfrm
Where next analysis window begins.
int32 rawmode
Pass all input data through, without filtering silence.
int32 n_sample
Number of samples of unconsumed data in adbuf.
int32 read_ts
Absolute timestamp (total no.
int32 winsize
how many frames to look at for speech det
int32 auto_thresh
Do automatic threshold adjustment or not.
int32 sil_onset
end speech on >= these many frames out of winsize, of <= delta_sil
int32 headfrm
Frame number in adbuf with unconsumed A/D data.
int32 trailer
pad end of speech with this many extra frms
ad_rec_t * ad
A/D device argument for adfunc.
int32 n_frm
Number of complete frames of unconsumed A/D data in adbuf.
spseg_t * spseg_head
First of unconsumed speech segments.
float32 adapt_rate
Linear interpolation constant for rate at which noise level adapted to each estimate; range: 0-1; 0=>...
int32 min_noise
noise lower than this we ignore
int32 n_calib_frame
Number of frames of calibration data seen so far.
spseg_t * spseg_tail
Last of unconsumed speech segments.
FILE * logfp
If non-NULL, write detailed logs of this object's progress to the file.
int32 thresh_sil
Frame considered to be silence if power <= thresh_sil (for transitioning from SPEECH to SILENCE state...
int32 tail_state
State at the end of its internal buffer (internal use): CONT_AD_STATE_SIL or CONT_AD_STATE_SPEECH.
int32 max_noise
noise higher than this signals an error
int32 noise_level
PWP: what we claim as the "current" noise level.
int32 eof
Whether the source ad device has encountered EOF.
FILE * rawfp
If non-NULL, raw audio input data processed by cont_ad is dumped to this file.
int32 delta_speech
Min speech power/frame ABOVE noise level.
int32 startfrm
Frame-id in adbuf (see below) of start of this segment.
int32 nfrm
Number of frames in segment (may wrap around adbuf)
struct spseg_s * next
Next speech segment (with some intervening silence)
(FOR INTERNAL USE ) Data structure for maintaining speech (non-silence) segments not yet consumed by ...