SphinxBase 0.6
cont_adseg.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2001 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * cont_adseg.c -- Continuously listen and segment input speech into utterances.
39 *
40 * HISTORY
41 *
42 * 27-Jun-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
43 * Created.
44 */
45
46#include <stdio.h>
47#include <stdlib.h>
48#include <string.h>
49#include <assert.h>
50#include <math.h>
51
53#include <sphinxbase/ad.h>
54#include <sphinxbase/cont_ad.h>
55#include <sphinxbase/err.h>
56
57/*
58 * Segment raw A/D input data into utterances whenever silence region of given
59 * duration is encountered.
60 * Utterances are written to files named 0001.raw, 0002.raw, 0003.raw, etc.
61 */
62int
63main(int32 argc, char **argv)
64{
65 ad_rec_t *ad;
66 cont_ad_t *cont;
67 int32 k, uttno, ts, uttlen, sps, endsilsamples;
68 float endsil;
69 int16 buf[4096];
70 FILE *fp;
71 char file[1024];
72
73 if ((argc != 3) ||
74 (sscanf(argv[1], "%d", &sps) != 1) ||
75 (sscanf(argv[2], "%f", &endsil) != 1) || (endsil <= 0.0)) {
76 E_FATAL("Usage: %s <sampling-rate> <utt-end-sil(sec)>\n", argv[0]);
77 }
78
79 /* Convert desired min. inter-utterance silence duration to #samples */
80 endsilsamples = (int32) (endsil * sps);
81
82 /* Open raw A/D device */
83 if ((ad = ad_open_sps(sps)) == NULL)
84 E_FATAL("ad_open_sps(%d) failed\n", sps);
85
86 /* Associate new continuous listening module with opened raw A/D device */
87 if ((cont = cont_ad_init(ad, ad_read)) == NULL)
88 E_FATAL("cont_ad_init failed\n");
89
90 /* Calibrate continuous listening for background noise/silence level */
91 printf("Calibrating ...");
92 fflush(stdout);
93 ad_start_rec(ad);
94 if (cont_ad_calib(cont) < 0)
95 printf(" failed\n");
96 else
97 printf(" done\n");
98
99 /* Forever listen for utterances */
100 printf("You may speak now\n");
101 fflush(stdout);
102 uttno = 0;
103 for (;;) {
104 /* Wait for beginning of next utterance; for non-silence data */
105 while ((k = cont_ad_read(cont, buf, 4096)) == 0);
106 if (k < 0)
107 E_FATAL("cont_ad_read failed\n");
108
109 /* Non-silence data received; open and write to new logging file */
110 uttno++;
111 sprintf(file, "%04d.raw", uttno);
112 if ((fp = fopen(file, "wb")) == NULL)
113 E_FATAL_SYSTEM("Failed to open '%s' for reading", file);
114 fwrite(buf, sizeof(int16), k, fp);
115 uttlen = k;
116 printf("Utterance %04d, logging to %s\n", uttno, file);
117
118 /* Note current timestamp */
119 ts = cont->read_ts;
120
121 /* Read utterance data until a gap of at least 1 sec observed */
122 for (;;) {
123 if ((k = cont_ad_read(cont, buf, 4096)) < 0)
124 E_FATAL("cont_ad_read failed\n");
125 if (k == 0) {
126 /*
127 * No speech data available; check current timestamp. End of
128 * utterance if no non-silence data been read for at least 1 sec.
129 */
130 if ((cont->read_ts - ts) > endsilsamples)
131 break;
132 }
133 else {
134 /* Note timestamp at the end of most recently read speech data */
135 ts = cont->read_ts;
136 uttlen += k;
137 fwrite(buf, sizeof(int16), k, fp);
138 }
139 }
140 fclose(fp);
141
142 printf("\tUtterance %04d = %d samples (%.1fsec)\n\n",
143 uttno, uttlen, (double) uttlen / (double) sps);
144 }
145
146 ad_stop_rec(ad);
147 cont_ad_close(cont);
148 ad_close(ad);
149 return 0;
150}
generic live audio interface for recording and playback
SPHINXBASE_EXPORT ad_rec_t * ad_open_sps(int32 samples_per_sec)
Open the default audio device with a given sampling rate.
Definition ad_alsa.c:290
Continuous A/D listening and silence filtering module.
SPHINXBASE_EXPORT int32 cont_ad_close(cont_ad_t *cont)
Close the continuous listening object.
SPHINXBASE_EXPORT int32 cont_ad_calib(cont_ad_t *cont)
Calibrate the silence filter.
SPHINXBASE_EXPORT int32 cont_ad_read(cont_ad_t *r, int16 *buf, int32 max)
Read raw audio data into the silence filter.
SPHINXBASE_EXPORT cont_ad_t * cont_ad_init(ad_rec_t *ad, int32(*adfunc)(ad_rec_t *ad, int16 *buf, int32 max))
Initialize a continuous listening/silence filtering object.
Implementation of logging routines.
#define E_FATAL
Exit with non-zero status after error message.
Definition err.h:127
#define E_FATAL_SYSTEM
Print error text; Call perror(""); exit(errno);.
Definition err.h:132
Basic type definitions used in Sphinx.
Definition ad.h:255
Continuous listening module or object Continuous listening module or object.
Definition cont_ad.h:151
int32 read_ts
Absolute timestamp (total no.
Definition cont_ad.h:167