SphinxBase 0.6
feat.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * feat.c -- Feature vector description and cepstra->feature computation.
39 *
40 * **********************************************
41 * CMU ARPA Speech Project
42 *
43 * Copyright (c) 1996 Carnegie Mellon University.
44 * ALL RIGHTS RESERVED.
45 * **********************************************
46 *
47 * HISTORY
48 * $Log$
49 * Revision 1.22 2006/02/23 03:59:40 arthchan2003
50 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
51 *
52 * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003
53 * Free stuffs in cmn and feat corectly.
54 *
55 * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003
56 * Add message to show the directory which the feature is searched for.
57 *
58 * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003
59 * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point.
60 *
61 * Revision 1.21 2005/06/22 03:29:35 arthchan2003
62 * Makefile.am s for all subdirectory of libs3decoder/
63 *
64 * Revision 1.4 2005/04/21 23:50:26 archan
65 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
66 *
67 * Revision 1.3 2005/03/30 01:22:46 archan
68 * Fixed mistakes in last updates. Add
69 *
70 *
71 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
72 * Adding feat_free() to free allocated memory
73 *
74 * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
75 * Modified feat_s2mfc2feat_block() to handle empty buffers at
76 * the end of an utterance
77 *
78 * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University
79 * Added feat_s2mfc2feat_block() to allow feature computation
80 * from sequences of blocks of cepstral vectors
81 *
82 * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
83 * Major changes to accommodate arbitrary feature input types. Added
84 * feat_read(), moved various cep2feat functions from other files into
85 * this one. Also, made this module object-oriented with the feat_t type.
86 * Changed definition of s2mfc_read to let the caller manage MFC buffers.
87 *
88 * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
89 * Added unistd.h include.
90 *
91 * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
92 * Added check for sf argument to s2mfc_read being within file size.
93 *
94 * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
95 * Added sf, ef parameters to s2mfc_read().
96 *
97 * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
98 * Added feat_cepsize().
99 * Added different feature-handling (s2_4x, s3_1x39 at this point).
100 * Moved feature-dependent functions to feature-dependent files.
101 *
102 * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
103 * Moved constant declarations from feat.h into here.
104 *
105 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
106 * Created.
107 */
108
109
110/*
111 * This module encapsulates different feature streams used by the Sphinx group. New
112 * stream types can be added by augmenting feat_init() and providing an accompanying
113 * compute_feat function. It also provides a "generic" feature vector definition for
114 * handling "arbitrary" speech input feature types (see the last section in feat_init()).
115 * In this case the speech input data should already be feature vectors; no computation,
116 * such as MFC->feature conversion, is available or needed.
117 */
118
119#include <assert.h>
120#include <string.h>
121#ifdef HAVE_CONFIG_H
122#include <config.h>
123#endif
124
125#ifdef _MSC_VER
126#pragma warning (disable: 4244 4996)
127#endif
128
129#include "sphinxbase/fe.h"
130#include "sphinxbase/feat.h"
131#include "sphinxbase/bio.h"
132#include "sphinxbase/pio.h"
133#include "sphinxbase/cmn.h"
134#include "sphinxbase/agc.h"
135#include "sphinxbase/err.h"
136#include "sphinxbase/ckd_alloc.h"
137#include "sphinxbase/prim_type.h"
138#include "sphinxbase/glist.h"
139
140#define FEAT_VERSION "1.0"
141#define FEAT_DCEP_WIN 2
142
143#ifdef DUMP_FEATURES
144static void
145cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text)
146{
147 int32 i, j;
148
149 E_INFO("%s\n", text);
150 for (i = 0; i < nfr; i++) {
151 for (j = 0; j < fcb->cepsize; j++) {
152 fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j]));
153 }
154 fprintf(stderr, "\n");
155 }
156}
157static void
158feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text)
159{
160 E_INFO("%s\n", text);
161 feat_print(fcb, feat, nfr, stderr);
162}
163#else /* !DUMP_FEATURES */
164#define cep_dump_dbg(fcb,mfc,nfr,text)
165#define feat_print_dbg(fcb,mfc,nfr,text)
166#endif
167
168int32 **
169parse_subvecs(char const *str)
170{
171 char const *strp;
172 int32 n, n2, l;
173 glist_t dimlist; /* List of dimensions in one subvector */
174 glist_t veclist; /* List of dimlists (subvectors) */
175 int32 **subvec;
176 gnode_t *gn, *gn2;
177
178 veclist = NULL;
179
180 strp = str;
181 for (;;) {
182 dimlist = NULL;
183
184 for (;;) {
185 if (sscanf(strp, "%d%n", &n, &l) != 1)
186 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
187 strp - str);
188 strp += l;
189
190 if (*strp == '-') {
191 strp++;
192
193 if (sscanf(strp, "%d%n", &n2, &l) != 1)
194 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str,
195 strp - str);
196 strp += l;
197 }
198 else
199 n2 = n;
200
201 if ((n < 0) || (n > n2))
202 E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str,
203 strp - str);
204
205 for (; n <= n2; n++) {
206 gnode_t *gn;
207 for (gn = dimlist; gn; gn = gnode_next(gn))
208 if (gnode_int32(gn) == n)
209 break;
210 if (gn != NULL)
211 E_FATAL("'%s': Duplicate dimension ending @pos %d\n",
212 str, strp - str);
213
214 dimlist = glist_add_int32(dimlist, n);
215 }
216
217 if ((*strp == '\0') || (*strp == '/'))
218 break;
219
220 if (*strp != ',')
221 E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str);
222
223 strp++;
224 }
225
226 veclist = glist_add_ptr(veclist, (void *) dimlist);
227
228 if (*strp == '\0')
229 break;
230
231 assert(*strp == '/');
232 strp++;
233 }
234
235 /* Convert the glists to arrays; remember the glists are in reverse order of the input! */
236 n = glist_count(veclist); /* #Subvectors */
237 subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */
238 subvec[n] = NULL; /* sentinel */
239
240 for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) {
241 gn2 = (glist_t) gnode_ptr(gn);
242
243 n2 = glist_count(gn2); /* Length of this subvector */
244 if (n2 <= 0)
245 E_FATAL("'%s': 0-length subvector\n", str);
246
247 subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */
248 subvec[n][n2] = -1; /* sentinel */
249
250 for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2)
251 subvec[n][n2] = gnode_int32(gn2);
252 assert((n2 < 0) && (!gn2));
253 }
254 assert((n < 0) && (!gn));
255
256 /* Free the glists */
257 for (gn = veclist; gn; gn = gnode_next(gn)) {
258 gn2 = (glist_t) gnode_ptr(gn);
259 glist_free(gn2);
260 }
261 glist_free(veclist);
262
263 return subvec;
264}
265
266void
267subvecs_free(int32 **subvecs)
268{
269 int32 **sv;
270
271 for (sv = subvecs; sv && *sv; ++sv)
272 ckd_free(*sv);
273 ckd_free(subvecs);
274}
275
276int
277feat_set_subvecs(feat_t *fcb, int32 **subvecs)
278{
279 int32 **sv;
280 int32 n_sv, n_dim, i;
281
282 if (subvecs == NULL) {
283 subvecs_free(fcb->subvecs);
284 ckd_free(fcb->sv_buf);
285 ckd_free(fcb->sv_len);
286 fcb->n_sv = 0;
287 fcb->subvecs = NULL;
288 fcb->sv_len = NULL;
289 fcb->sv_buf = NULL;
290 fcb->sv_dim = 0;
291 return 0;
292 }
293
294 if (fcb->n_stream != 1) {
295 E_ERROR("Subvector specifications require single-stream features!");
296 return -1;
297 }
298
299 n_sv = 0;
300 n_dim = 0;
301 for (sv = subvecs; sv && *sv; ++sv) {
302 int32 *d;
303
304 for (d = *sv; d && *d != -1; ++d) {
305 ++n_dim;
306 }
307 ++n_sv;
308 }
309 if (n_dim > feat_dimension(fcb)) {
310 E_ERROR("Total dimensionality of subvector specification %d "
311 "> feature dimensionality %d\n", n_dim, feat_dimension(fcb));
312 return -1;
313 }
314
315 fcb->n_sv = n_sv;
316 fcb->subvecs = subvecs;
317 fcb->sv_len = ckd_calloc(n_sv, sizeof(*fcb->sv_len));
318 fcb->sv_buf = ckd_calloc(n_dim, sizeof(*fcb->sv_buf));
319 fcb->sv_dim = n_dim;
320 for (i = 0; i < n_sv; ++i) {
321 int32 *d;
322 for (d = subvecs[i]; d && *d != -1; ++d) {
323 ++fcb->sv_len[i];
324 }
325 }
326
327 return 0;
328}
329
333static void
334feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
335{
336 uint32 i;
337
338 if (fcb->subvecs == NULL)
339 return;
340 for (i = 0; i < nfr; ++i) {
341 mfcc_t *out;
342 int32 j;
343
344 out = fcb->sv_buf;
345 for (j = 0; j < fcb->n_sv; ++j) {
346 int32 *d;
347 for (d = fcb->subvecs[j]; d && *d != -1; ++d) {
348 *out++ = inout_feat[i][0][*d];
349 }
350 }
351 memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf));
352 }
353}
354
355mfcc_t ***
356feat_array_alloc(feat_t * fcb, int32 nfr)
357{
358 int32 i, j, k;
359 mfcc_t *data, *d, ***feat;
360
361 assert(fcb);
362 assert(nfr > 0);
363 assert(feat_dimension(fcb) > 0);
364
365 /* Make sure to use the dimensionality of the features *before*
366 LDA and subvector projection. */
367 k = 0;
368 for (i = 0; i < fcb->n_stream; ++i)
369 k += fcb->stream_len[i];
370 assert(k >= feat_dimension(fcb));
371 assert(k >= fcb->sv_dim);
372
373 feat =
374 (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *));
375 data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t));
376
377 for (i = 0; i < nfr; i++) {
378 d = data + i * k;
379 for (j = 0; j < feat_dimension1(fcb); j++) {
380 feat[i][j] = d;
381 d += feat_dimension2(fcb, j);
382 }
383 }
384
385 return feat;
386}
387
388mfcc_t ***
389feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
390{
391 int32 i, k, cf;
392 mfcc_t*** new_feat;
393
394 assert(fcb);
395 assert(nfr > 0);
396 assert(ofr > 0);
397 assert(feat_dimension(fcb) > 0);
398
399 /* Make sure to use the dimensionality of the features *before*
400 LDA and subvector projection. */
401 k = 0;
402 for (i = 0; i < fcb->n_stream; ++i)
403 k += fcb->stream_len[i];
404 assert(k >= feat_dimension(fcb));
405 assert(k >= fcb->sv_dim);
406
407 new_feat = feat_array_alloc(fcb, nfr);
408
409 cf = (nfr < ofr) ? nfr : ofr;
410 memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t));
411
412 feat_array_free(old_feat);
413
414 return new_feat;
415}
416
417void
418feat_array_free(mfcc_t ***feat)
419{
420 ckd_free(feat[0][0]);
421 ckd_free_2d((void **)feat);
422}
423
424static void
425feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
426{
427 mfcc_t *f;
428 mfcc_t *w, *_w;
429 mfcc_t *w1, *w_1, *_w1, *_w_1;
430 mfcc_t d1, d2;
431 int32 i, j;
432
433 assert(fcb);
434 assert(feat_cepsize(fcb) == 13);
435 assert(feat_n_stream(fcb) == 4);
436 assert(feat_stream_len(fcb, 0) == 12);
437 assert(feat_stream_len(fcb, 1) == 24);
438 assert(feat_stream_len(fcb, 2) == 3);
439 assert(feat_stream_len(fcb, 3) == 12);
440 assert(feat_window_size(fcb) == 4);
441
442 /* CEP; skip C0 */
443 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
444
445 /*
446 * DCEP(SHORT): mfc[2] - mfc[-2]
447 * DCEP(LONG): mfc[4] - mfc[-4]
448 */
449 w = mfc[2] + 1; /* +1 to skip C0 */
450 _w = mfc[-2] + 1;
451
452 f = feat[1];
453 for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */
454 f[i] = w[i] - _w[i];
455
456 w = mfc[4] + 1; /* +1 to skip C0 */
457 _w = mfc[-4] + 1;
458
459 for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */
460 f[i] = w[j] - _w[j];
461
462 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
463 w1 = mfc[3] + 1; /* Final +1 to skip C0 */
464 _w1 = mfc[-1] + 1;
465 w_1 = mfc[1] + 1;
466 _w_1 = mfc[-3] + 1;
467
468 f = feat[3];
469 for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
470 d1 = w1[i] - _w1[i];
471 d2 = w_1[i] - _w_1[i];
472
473 f[i] = d1 - d2;
474 }
475
476 /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */
477 f = feat[2];
478 f[0] = mfc[0][0];
479 f[1] = mfc[2][0] - mfc[-2][0];
480
481 d1 = mfc[3][0] - mfc[-1][0];
482 d2 = mfc[1][0] - mfc[-3][0];
483 f[2] = d1 - d2;
484}
485
486
487static void
488feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
489{
490 mfcc_t *f;
491 mfcc_t *w, *_w;
492 mfcc_t *w1, *w_1, *_w1, *_w_1;
493 mfcc_t d1, d2;
494 int32 i;
495
496 assert(fcb);
497 assert(feat_cepsize(fcb) == 13);
498 assert(feat_n_stream(fcb) == 1);
499 assert(feat_stream_len(fcb, 0) == 39);
500 assert(feat_window_size(fcb) == 3);
501
502 /* CEP; skip C0 */
503 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t));
504 /*
505 * DCEP: mfc[2] - mfc[-2];
506 */
507 f = feat[0] + feat_cepsize(fcb) - 1;
508 w = mfc[2] + 1; /* +1 to skip C0 */
509 _w = mfc[-2] + 1;
510
511 for (i = 0; i < feat_cepsize(fcb) - 1; i++)
512 f[i] = w[i] - _w[i];
513
514 /* POW: C0, DC0, D2C0 */
515 f += feat_cepsize(fcb) - 1;
516
517 f[0] = mfc[0][0];
518 f[1] = mfc[2][0] - mfc[-2][0];
519
520 d1 = mfc[3][0] - mfc[-1][0];
521 d2 = mfc[1][0] - mfc[-3][0];
522 f[2] = d1 - d2;
523
524 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */
525 f += 3;
526
527 w1 = mfc[3] + 1; /* Final +1 to skip C0 */
528 _w1 = mfc[-1] + 1;
529 w_1 = mfc[1] + 1;
530 _w_1 = mfc[-3] + 1;
531
532 for (i = 0; i < feat_cepsize(fcb) - 1; i++) {
533 d1 = w1[i] - _w1[i];
534 d2 = w_1[i] - _w_1[i];
535
536 f[i] = d1 - d2;
537 }
538}
539
540
541static void
542feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
543{
544 assert(fcb);
545 assert(feat_n_stream(fcb) == 1);
546 assert(feat_window_size(fcb) == 0);
547
548 /* CEP */
549 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
550}
551
552static void
553feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
554{
555 mfcc_t *f;
556 mfcc_t *w, *_w;
557 int32 i;
558
559 assert(fcb);
560 assert(feat_n_stream(fcb) == 1);
561 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2);
562 assert(feat_window_size(fcb) == 2);
563
564 /* CEP */
565 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
566
567 /*
568 * DCEP: mfc[2] - mfc[-2];
569 */
570 f = feat[0] + feat_cepsize(fcb);
571 w = mfc[2];
572 _w = mfc[-2];
573
574 for (i = 0; i < feat_cepsize(fcb); i++)
575 f[i] = w[i] - _w[i];
576}
577
578static void
579feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
580{
581 mfcc_t *f;
582 mfcc_t *w, *_w;
583 mfcc_t *w1, *w_1, *_w1, *_w_1;
584 mfcc_t d1, d2;
585 int32 i;
586
587 assert(fcb);
588 assert(feat_n_stream(fcb) == 1);
589 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3);
590 assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1);
591
592 /* CEP */
593 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
594
595 /*
596 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
597 */
598 f = feat[0] + feat_cepsize(fcb);
599 w = mfc[FEAT_DCEP_WIN];
600 _w = mfc[-FEAT_DCEP_WIN];
601
602 for (i = 0; i < feat_cepsize(fcb); i++)
603 f[i] = w[i] - _w[i];
604
605 /*
606 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
607 * where w = FEAT_DCEP_WIN
608 */
609 f += feat_cepsize(fcb);
610
611 w1 = mfc[FEAT_DCEP_WIN + 1];
612 _w1 = mfc[-FEAT_DCEP_WIN + 1];
613 w_1 = mfc[FEAT_DCEP_WIN - 1];
614 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
615
616 for (i = 0; i < feat_cepsize(fcb); i++) {
617 d1 = w1[i] - _w1[i];
618 d2 = w_1[i] - _w_1[i];
619
620 f[i] = d1 - d2;
621 }
622}
623
624static void
625feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
626{
627 mfcc_t *f;
628 mfcc_t *w, *_w;
629 mfcc_t *w1, *w_1, *_w1, *_w_1;
630 mfcc_t d1, d2;
631 int32 i;
632
633 assert(fcb);
634 assert(feat_n_stream(fcb) == 1);
635 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4);
636 assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2);
637
638 /* CEP */
639 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t));
640
641 /*
642 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN;
643 */
644 f = feat[0] + feat_cepsize(fcb);
645 w = mfc[FEAT_DCEP_WIN];
646 _w = mfc[-FEAT_DCEP_WIN];
647
648 for (i = 0; i < feat_cepsize(fcb); i++)
649 f[i] = w[i] - _w[i];
650
651 /*
652 * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2;
653 */
654 f += feat_cepsize(fcb);
655 w = mfc[FEAT_DCEP_WIN * 2];
656 _w = mfc[-FEAT_DCEP_WIN * 2];
657
658 for (i = 0; i < feat_cepsize(fcb); i++)
659 f[i] = w[i] - _w[i];
660
661 /*
662 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]),
663 * where w = FEAT_DCEP_WIN
664 */
665 f += feat_cepsize(fcb);
666
667 w1 = mfc[FEAT_DCEP_WIN + 1];
668 _w1 = mfc[-FEAT_DCEP_WIN + 1];
669 w_1 = mfc[FEAT_DCEP_WIN - 1];
670 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
671
672 for (i = 0; i < feat_cepsize(fcb); i++) {
673 d1 = w1[i] - _w1[i];
674 d2 = w_1[i] - _w_1[i];
675
676 f[i] = d1 - d2;
677 }
678}
679
680static void
681feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
682{
683 int32 win, i, j;
684
685 win = feat_window_size(fcb);
686
687 /* Concatenate input features */
688 for (i = -win; i <= win; ++i) {
689 uint32 spos = 0;
690
691 for (j = 0; j < feat_n_stream(fcb); ++j) {
692 uint32 stream_len;
693
694 /* Unscale the stream length by the window. */
695 stream_len = feat_stream_len(fcb, j) / (2 * win + 1);
696 memcpy(feat[j] + ((i + win) * stream_len),
697 mfc[i] + spos,
698 stream_len * sizeof(mfcc_t));
699 spos += stream_len;
700 }
701 }
702}
703
704feat_t *
705feat_init(char const *type, cmn_type_t cmn, int32 varnorm,
706 agc_type_t agc, int32 breport, int32 cepsize)
707{
708 feat_t *fcb;
709
710 if (cepsize == 0)
711 cepsize = 13;
712 if (breport)
713 E_INFO
714 ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
715 type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]);
716
717 fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t));
718 fcb->refcount = 1;
719 fcb->name = (char *) ckd_salloc(type);
720 if (strcmp(type, "s2_4x") == 0) {
721 /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */
722 if (cepsize != 13) {
723 E_ERROR("s2_4x features require cepsize == 13\n");
724 ckd_free(fcb);
725 return NULL;
726 }
727 fcb->cepsize = 13;
728 fcb->n_stream = 4;
729 fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32));
730 fcb->stream_len[0] = 12;
731 fcb->stream_len[1] = 24;
732 fcb->stream_len[2] = 3;
733 fcb->stream_len[3] = 12;
734 fcb->out_dim = 51;
735 fcb->window_size = 4;
736 fcb->compute_feat = feat_s2_4x_cep2feat;
737 }
738 else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) {
739 /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */
740 if (cepsize != 13) {
741 E_ERROR("s2_4x features require cepsize == 13\n");
742 ckd_free(fcb);
743 return NULL;
744 }
745 fcb->cepsize = 13;
746 fcb->n_stream = 1;
747 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
748 fcb->stream_len[0] = 39;
749 fcb->out_dim = 39;
750 fcb->window_size = 3;
751 fcb->compute_feat = feat_s3_1x39_cep2feat;
752 }
753 else if (strncmp(type, "1s_c_d_dd", 9) == 0) {
754 fcb->cepsize = cepsize;
755 fcb->n_stream = 1;
756 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
757 fcb->stream_len[0] = cepsize * 3;
758 fcb->out_dim = cepsize * 3;
759 fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */
760 fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
761 }
762 else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) {
763 fcb->cepsize = cepsize;
764 fcb->n_stream = 1;
765 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
766 fcb->stream_len[0] = cepsize * 4;
767 fcb->out_dim = cepsize * 4;
768 fcb->window_size = FEAT_DCEP_WIN * 2;
769 fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
770 }
771 else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) {
772 /* 1-stream cep/dcep */
773 fcb->cepsize = cepsize;
774 fcb->n_stream = 1;
775 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
776 fcb->stream_len[0] = feat_cepsize(fcb) * 2;
777 fcb->out_dim = fcb->stream_len[0];
778 fcb->window_size = 2;
779 fcb->compute_feat = feat_s3_cep_dcep;
780 }
781 else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) {
782 /* 1-stream cep */
783 fcb->cepsize = cepsize;
784 fcb->n_stream = 1;
785 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
786 fcb->stream_len[0] = feat_cepsize(fcb);
787 fcb->out_dim = fcb->stream_len[0];
788 fcb->window_size = 0;
789 fcb->compute_feat = feat_s3_cep;
790 }
791 else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) {
792 /* 1-stream cep with frames concatenated, so called cepwin features */
793 if (strncmp(type, "1s_3c", 5) == 0)
794 fcb->window_size = 3;
795 else
796 fcb->window_size = 4;
797
798 fcb->cepsize = cepsize;
799 fcb->n_stream = 1;
800 fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32));
801 fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1);
802 fcb->out_dim = fcb->stream_len[0];
803 fcb->compute_feat = feat_copy;
804 }
805 else {
806 int32 i, l, k;
807 char *strp;
808 char *mtype = ckd_salloc(type);
809 char *wd = ckd_salloc(type);
810 /*
811 * Generic definition: Format should be %d,%d,%d,...,%d (i.e.,
812 * comma separated list of feature stream widths; #items =
813 * #streams). An optional window size (frames will be
814 * concatenated) is also allowed, which can be specified with
815 * a colon after the list of feature streams.
816 */
817 l = strlen(mtype);
818 k = 0;
819 for (i = 1; i < l - 1; i++) {
820 if (mtype[i] == ',') {
821 mtype[i] = ' ';
822 k++;
823 }
824 else if (mtype[i] == ':') {
825 mtype[i] = '\0';
826 fcb->window_size = atoi(mtype + i + 1);
827 break;
828 }
829 }
830 k++; /* Presumably there are (#commas+1) streams */
831 fcb->n_stream = k;
832 fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32));
833
834 /* Scan individual feature stream lengths */
835 strp = mtype;
836 i = 0;
837 fcb->out_dim = 0;
838 fcb->cepsize = 0;
839 while (sscanf(strp, "%s%n", wd, &l) == 1) {
840 strp += l;
841 if ((i >= fcb->n_stream)
842 || (sscanf(wd, "%d", &(fcb->stream_len[i])) != 1)
843 || (fcb->stream_len[i] <= 0))
844 E_FATAL("Bad feature type argument\n");
845 /* Input size before windowing */
846 fcb->cepsize += fcb->stream_len[i];
847 if (fcb->window_size > 0)
848 fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
849 /* Output size after windowing */
850 fcb->out_dim += fcb->stream_len[i];
851 i++;
852 }
853 if (i != fcb->n_stream)
854 E_FATAL("Bad feature type argument\n");
855 if (fcb->cepsize != cepsize)
856 E_FATAL("Bad feature type argument\n");
857
858 /* Input is already the feature stream */
859 fcb->compute_feat = feat_copy;
860 ckd_free(mtype);
861 ckd_free(wd);
862 }
863
864 if (cmn != CMN_NONE)
865 fcb->cmn_struct = cmn_init(feat_cepsize(fcb));
866 fcb->cmn = cmn;
867 fcb->varnorm = varnorm;
868 if (agc != AGC_NONE) {
869 fcb->agc_struct = agc_init();
870 /*
871 * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things
872 * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY
873 * switches to EMAX
874 */
875 /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */
876 agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
877 }
878 fcb->agc = agc;
879 /*
880 * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt()
881 */
882 fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE,
883 feat_cepsize(fcb),
884 sizeof(mfcc_t));
885 /* This one is actually just an array of pointers to "flatten out"
886 * wraparounds. */
887 fcb->tmpcepbuf = ckd_calloc(2 * feat_window_size(fcb) + 1,
888 sizeof(*fcb->tmpcepbuf));
889
890 return fcb;
891}
892
893
894void
895feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp)
896{
897 int32 i, j, k;
898
899 for (i = 0; i < nfr; i++) {
900 fprintf(fp, "%8d:\n", i);
901
902 for (j = 0; j < feat_dimension1(fcb); j++) {
903 fprintf(fp, "\t%2d:", j);
904
905 for (k = 0; k < feat_dimension2(fcb, j); k++)
906 fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k]));
907 fprintf(fp, "\n");
908 }
909 }
910
911 fflush(fp);
912}
913
914static void
915feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
916{
917 cmn_type_t cmn_type = fcb->cmn;
918
919 if (!(beginutt && endutt)
920 && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */
921 cmn_type = CMN_PRIOR;
922
923 switch (cmn_type) {
924 case CMN_CURRENT:
925 cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
926 break;
927 case CMN_PRIOR:
928 cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
929 if (endutt)
930 cmn_prior_update(fcb->cmn_struct);
931 break;
932 default:
933 ;
934 }
935 cep_dump_dbg(fcb, mfc, nfr, "After CMN");
936}
937
938static void
939feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
940{
941 agc_type_t agc_type = fcb->agc;
942
943 if (!(beginutt && endutt)
944 && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */
945 agc_type = AGC_EMAX;
946
947 switch (agc_type) {
948 case AGC_MAX:
949 agc_max(fcb->agc_struct, mfc, nfr);
950 break;
951 case AGC_EMAX:
952 agc_emax(fcb->agc_struct, mfc, nfr);
953 if (endutt)
954 agc_emax_update(fcb->agc_struct);
955 break;
956 case AGC_NOISE:
957 agc_noise(fcb->agc_struct, mfc, nfr);
958 break;
959 default:
960 ;
961 }
962 cep_dump_dbg(fcb, mfc, nfr, "After AGC");
963}
964
965static void
966feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat)
967{
968 int32 i;
969
970 cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)");
971
972 /* Create feature vectors */
973 for (i = win; i < nfr - win; i++) {
974 fcb->compute_feat(fcb, mfc + i, feat[i - win]);
975 }
976
977 feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation");
978
979 if (fcb->lda) {
980 feat_lda_transform(fcb, feat, nfr - win * 2);
981 feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA");
982 }
983
984 if (fcb->subvecs) {
985 feat_subvec_project(fcb, feat, nfr - win * 2);
986 feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection");
987 }
988}
989
990
1003static int32
1004feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win,
1005 int32 sf, int32 ef,
1006 mfcc_t ***out_mfc,
1007 int32 maxfr,
1008 int32 cepsize)
1009{
1010 FILE *fp;
1011 int32 n_float32;
1012 float32 *float_feat;
1013 struct stat statbuf;
1014 int32 i, n, byterev;
1015 int32 start_pad, end_pad;
1016 mfcc_t **mfc;
1017
1018 /* Initialize the output pointer to NULL, so that any attempts to
1019 free() it if we fail before allocating it will not segfault! */
1020 if (out_mfc)
1021 *out_mfc = NULL;
1022 E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
1023 if (ef >= 0 && ef <= sf) {
1024 E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
1025 return -1;
1026 }
1027
1028 /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */
1029 if ((stat_retry(file, &statbuf) < 0)
1030 || ((fp = fopen(file, "rb")) == NULL)) {
1031 E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno));
1032 return -1;
1033 }
1034
1035 /* Read #floats in header */
1036 if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) {
1037 E_ERROR("%s: fread(#floats) failed\n", file);
1038 fclose(fp);
1039 return -1;
1040 }
1041
1042 /* Check if n_float32 matches file size */
1043 byterev = 0;
1044 if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */
1045 n = n_float32;
1046 SWAP_INT32(&n);
1047
1048 if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */
1049 E_ERROR
1050 ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
1051 file, n_float32, n_float32, statbuf.st_size,
1052 statbuf.st_size);
1053 fclose(fp);
1054 return -1;
1055 }
1056
1057 n_float32 = n;
1058 byterev = 1;
1059 }
1060 if (n_float32 <= 0) {
1061 E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32);
1062 fclose(fp);
1063 return -1;
1064 }
1065
1066 /* Convert n to #frames of input */
1067 n = n_float32 / cepsize;
1068 if (n * cepsize != n_float32) {
1069 E_ERROR("Header size field: %d; not multiple of %d\n", n_float32,
1070 cepsize);
1071 fclose(fp);
1072 return -1;
1073 }
1074
1075 /* Check start and end frames */
1076 if (sf > 0) {
1077 if (sf >= n) {
1078 E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file,
1079 sf, n);
1080 fclose(fp);
1081 return -1;
1082 }
1083 }
1084 if (ef < 0)
1085 ef = n-1;
1086 else if (ef >= n) {
1087 E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n",
1088 file, ef, n);
1089 ef = n-1;
1090 }
1091
1092 /* Add window to start and end frames */
1093 sf -= win;
1094 ef += win;
1095 if (sf < 0) {
1096 start_pad = -sf;
1097 sf = 0;
1098 }
1099 else
1100 start_pad = 0;
1101 if (ef >= n) {
1102 end_pad = ef - n + 1;
1103 ef = n - 1;
1104 }
1105 else
1106 end_pad = 0;
1107
1108 /* Limit n if indicated by [sf..ef] */
1109 if ((ef - sf + 1) < n)
1110 n = (ef - sf + 1);
1111 if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
1112 E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n",
1113 file, maxfr, n + start_pad + end_pad);
1114 fclose(fp);
1115 return -1;
1116 }
1117
1118 /* If no output buffer was supplied, then skip the actual data reading. */
1119 if (out_mfc != NULL) {
1120 /* Position at desired start frame and read actual MFC data */
1121 mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t));
1122 if (sf > 0)
1123 fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR);
1124 n_float32 = n * cepsize;
1125#ifdef FIXED_POINT
1126 float_feat = ckd_calloc(n_float32, sizeof(float32));
1127#else
1128 float_feat = mfc[start_pad];
1129#endif
1130 if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) {
1131 E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
1132 ckd_free_2d(mfc);
1133 fclose(fp);
1134 return -1;
1135 }
1136 if (byterev) {
1137 for (i = 0; i < n_float32; i++) {
1138 SWAP_FLOAT32(&float_feat[i]);
1139 }
1140 }
1141#ifdef FIXED_POINT
1142 for (i = 0; i < n_float32; ++i) {
1143 mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
1144 }
1145 ckd_free(float_feat);
1146#endif
1147
1148 /* Normalize */
1149 feat_cmn(fcb, mfc + start_pad, n, 1, 1);
1150 feat_agc(fcb, mfc + start_pad, n, 1, 1);
1151
1152 /* Replicate start and end frames if necessary. */
1153 for (i = 0; i < start_pad; ++i)
1154 memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t));
1155 for (i = 0; i < end_pad; ++i)
1156 memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
1157 cepsize * sizeof(mfcc_t));
1158
1159 *out_mfc = mfc;
1160 }
1161
1162 fclose(fp);
1163 return n + start_pad + end_pad;
1164}
1165
1166
1167
1168int32
1169feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext,
1170 int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
1171{
1172 char *path;
1173 char *ps = "/";
1174 int32 win, nfr;
1175 int32 file_length, cepext_length, path_length = 0;
1176 mfcc_t **mfc;
1177
1178 if (fcb->cepsize <= 0) {
1179 E_ERROR("Bad cepsize: %d\n", fcb->cepsize);
1180 return -1;
1181 }
1182
1183 if (cepext == NULL)
1184 cepext = "";
1185
1186 /*
1187 * Create mfc filename, combining file, dir and extension if
1188 * necessary
1189 */
1190
1191 /*
1192 * First we decide about the path. If dir is defined, then use
1193 * it. Otherwise assume the filename already contains the path.
1194 */
1195 if (dir == NULL) {
1196 dir = "";
1197 ps = "";
1198 /*
1199 * This is not true but some 3rd party apps
1200 * may parse the output explicitly checking for this line
1201 */
1202 E_INFO("At directory . (current directory)\n");
1203 }
1204 else {
1205 E_INFO("At directory %s\n", dir);
1206 /*
1207 * Do not forget the path separator!
1208 */
1209 path_length += strlen(dir) + 1;
1210 }
1211
1212 /*
1213 * Include cepext, if it's not already part of the filename.
1214 */
1215 file_length = strlen(file);
1216 cepext_length = strlen(cepext);
1217 if ((file_length > cepext_length)
1218 && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
1219 cepext = "";
1220 cepext_length = 0;
1221 }
1222
1223 /*
1224 * Do not forget the '\0'
1225 */
1226 path_length += file_length + cepext_length + 1;
1227 path = (char*) ckd_calloc(path_length, sizeof(char));
1228
1229#ifdef HAVE_SNPRINTF
1230 /*
1231 * Paranoia is our best friend...
1232 */
1233 while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) {
1234 path_length = file_length;
1235 path = (char*) ckd_realloc(path, path_length * sizeof(char));
1236 }
1237#else
1238 sprintf(path, "%s%s%s%s", dir, ps, file, cepext);
1239#endif
1240
1241 win = feat_window_size(fcb);
1242 /* Pad maxfr with win, so we read enough raw feature data to
1243 * calculate the requisite number of dynamic features. */
1244 if (maxfr >= 0)
1245 maxfr += win * 2;
1246
1247 if (feat != NULL) {
1248 /* Read mfc file including window or padding if necessary. */
1249 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
1250 ckd_free(path);
1251 if (nfr < 0) {
1252 ckd_free_2d((void **) mfc);
1253 return -1;
1254 }
1255
1256 /* Actually compute the features */
1257 feat_compute_utt(fcb, mfc, nfr, win, feat);
1258
1259 ckd_free_2d((void **) mfc);
1260 }
1261 else {
1262 /* Just calculate the number of frames we would need. */
1263 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize);
1264 ckd_free(path);
1265 if (nfr < 0)
1266 return nfr;
1267 }
1268
1269
1270 return (nfr - win * 2);
1271}
1272
1273static int32
1274feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep,
1275 int32 nfr, mfcc_t *** ofeat)
1276{
1277 mfcc_t **cepbuf;
1278 int32 i, win, cepsize;
1279
1280 win = feat_window_size(fcb);
1281 cepsize = feat_cepsize(fcb);
1282
1283 /* Copy and pad out the utterance (this requires that the
1284 * feature computation functions always access the buffer via
1285 * the frame pointers, which they do) */
1286 cepbuf = ckd_calloc(nfr + win * 2, sizeof(mfcc_t *));
1287 memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *));
1288
1289 /* Do normalization before we interpolate on the boundary */
1290 feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
1291 feat_agc(fcb, cepbuf + win, nfr, 1, 1);
1292
1293 /* Now interpolate */
1294 for (i = 0; i < win; ++i) {
1295 cepbuf[i] = fcb->cepbuf[i];
1296 memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t));
1297 cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
1298 memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t));
1299 }
1300 /* Compute as usual. */
1301 feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
1302 ckd_free(cepbuf);
1303 return nfr;
1304}
1305
1306int32
1307feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep,
1308 int32 beginutt, int32 endutt, mfcc_t *** ofeat)
1309{
1310 int32 win, cepsize, nbufcep;
1311 int32 i, j, nfeatvec;
1312 int32 zero = 0;
1313
1314 /* Avoid having to check this everywhere. */
1315 if (inout_ncep == NULL) inout_ncep = &zero;
1316
1317 /* Special case for entire utterances. */
1318 if (beginutt && endutt && *inout_ncep > 0)
1319 return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);
1320
1321 win = feat_window_size(fcb);
1322 cepsize = feat_cepsize(fcb);
1323
1324 /* Empty the input buffer on start of utterance. */
1325 if (beginutt)
1326 fcb->bufpos = fcb->curpos;
1327
1328 /* Calculate how much data is in the buffer already. */
1329 nbufcep = fcb->bufpos - fcb->curpos;
1330 if (nbufcep < 0)
1331 nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
1332 /* Add any data that we have to replicate. */
1333 if (beginutt && *inout_ncep > 0)
1334 nbufcep += win;
1335 if (endutt)
1336 nbufcep += win;
1337
1338 /* Only consume as much input as will fit in the buffer. */
1339 if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
1340 /* We also can't overwrite the trailing window, hence the
1341 * reason why win is subtracted here. */
1342 *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
1343 /* Cancel end of utterance processing. */
1344 endutt = FALSE;
1345 }
1346
1347 /* FIXME: Don't modify the input! */
1348 feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
1349 feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);
1350
1351 /* Replicate first frame into the first win frames if we're at the
1352 * beginning of the utterance and there was some actual input to
1353 * deal with. (FIXME: Not entirely sure why that condition) */
1354 if (beginutt && *inout_ncep > 0) {
1355 for (i = 0; i < win; i++) {
1356 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
1357 cepsize * sizeof(mfcc_t));
1358 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1359 }
1360 /* Move the current pointer past this data. */
1361 fcb->curpos = fcb->bufpos;
1362 nbufcep -= win;
1363 }
1364
1365 /* Copy in frame data to the circular buffer. */
1366 for (i = 0; i < *inout_ncep; ++i) {
1367 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
1368 cepsize * sizeof(mfcc_t));
1369 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1370 ++nbufcep;
1371 }
1372
1373 /* Replicate last frame into the last win frames if we're at the
1374 * end of the utterance (even if there was no input, so we can
1375 * flush the output). */
1376 if (endutt) {
1377 int32 tpos; /* Index of last input frame. */
1378 if (fcb->bufpos == 0)
1379 tpos = LIVEBUFBLOCKSIZE - 1;
1380 else
1381 tpos = fcb->bufpos - 1;
1382 for (i = 0; i < win; ++i) {
1383 memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
1384 cepsize * sizeof(mfcc_t));
1385 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1386 }
1387 }
1388
1389 /* We have to leave the trailing window of frames. */
1390 nfeatvec = nbufcep - win;
1391 if (nfeatvec <= 0)
1392 return 0; /* Do nothing. */
1393
1394 for (i = 0; i < nfeatvec; ++i) {
1395 /* Handle wraparound cases. */
1396 if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
1397 /* Use tmpcepbuf for this case. Actually, we just need the pointers. */
1398 for (j = -win; j <= win; ++j) {
1399 int32 tmppos =
1400 (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
1401 fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
1402 }
1403 fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
1404 }
1405 else {
1406 fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
1407 }
1408 /* Move the read pointer forward. */
1409 ++fcb->curpos;
1410 fcb->curpos %= LIVEBUFBLOCKSIZE;
1411 }
1412
1413 if (fcb->lda)
1414 feat_lda_transform(fcb, ofeat, nfeatvec);
1415
1416 if (fcb->subvecs)
1417 feat_subvec_project(fcb, ofeat, nfeatvec);
1418
1419 return nfeatvec;
1420}
1421
1422feat_t *
1424{
1425 ++f->refcount;
1426 return f;
1427}
1428
1429int
1431{
1432 if (f == NULL)
1433 return 0;
1434 if (--f->refcount > 0)
1435 return f->refcount;
1436
1437 if (f->cepbuf)
1438 ckd_free_2d((void **) f->cepbuf);
1439 ckd_free(f->tmpcepbuf);
1440
1441 if (f->name) {
1442 ckd_free((void *) f->name);
1443 }
1444 if (f->lda)
1445 ckd_free_3d((void ***) f->lda);
1446
1447 ckd_free(f->stream_len);
1448 ckd_free(f->sv_len);
1449 ckd_free(f->sv_buf);
1450 subvecs_free(f->subvecs);
1451
1452 cmn_free(f->cmn_struct);
1453 agc_free(f->agc_struct);
1454
1455 ckd_free(f);
1456 return 0;
1457}
1458
1459
1460void
1462{
1463 int i;
1464 E_INFO_NOFN("Initialization of feat_t, report:\n");
1465 E_INFO_NOFN("Feature type = %s\n", f->name);
1466 E_INFO_NOFN("Cepstral size = %d\n", f->cepsize);
1467 E_INFO_NOFN("Number of streams = %d\n", f->n_stream);
1468 for (i = 0; i < f->n_stream; i++) {
1469 E_INFO_NOFN("Vector size of stream[%d]: %d\n", i,
1470 f->stream_len[i]);
1471 }
1472 E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv);
1473 for (i = 0; i < f->n_sv; i++) {
1474 int32 *sv;
1475
1476 E_INFO_NOFN("Components of subvector[%d]:", i);
1477 for (sv = f->subvecs[i]; sv && *sv != -1; ++sv)
1478 E_INFOCONT(" %d", *sv);
1479 E_INFOCONT("\n");
1480 }
1481 E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn);
1482 E_INFO_NOFN("Whether AGC is used = %d\n", f->agc);
1483 E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm);
1484 E_INFO_NOFN("\n");
1485}
routine that implements automatic gain control
SPHINXBASE_EXPORT void agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given mfc vectors (normalize all C0 mfc coefficients in the given input such that th...
Definition agc.c:109
SPHINXBASE_EXPORT void agc_emax_set(agc_t *agc, float32 m)
Set the current AGC maximum estimate.
Definition agc.c:129
enum agc_type_e agc_type_t
Types of acoustic gain control to apply to the features.
SPHINXBASE_EXPORT void agc_free(agc_t *agc)
Free AGC structure.
Definition agc.c:100
SPHINXBASE_EXPORT const char * agc_type_str[]
String representations of agc_type_t values.
Definition agc.c:70
SPHINXBASE_EXPORT void agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC to the given block of MFC vectors.
Definition agc.c:142
SPHINXBASE_EXPORT agc_t * agc_init(void)
Initialize AGC structure with default values.
Definition agc.c:91
SPHINXBASE_EXPORT void agc_noise(agc_t *agc, mfcc_t **mfc, int32 n_frame)
Apply AGC using noise threshold to the given block of MFC vectors.
Definition agc.c:180
SPHINXBASE_EXPORT void agc_emax_update(agc_t *agc)
Update AGC parameters for next utterance.
Definition agc.c:159
Cross platform binary IO to process files in sphinx3 format.
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition ckd_alloc.c:241
SPHINXBASE_EXPORT void ckd_free_3d(void *ptr)
Free a 3-D array (ptr) previously allocated by ckd_calloc_3d.
Definition ckd_alloc.c:294
SPHINXBASE_EXPORT void ckd_free_2d(void *ptr)
Free a 2-D array (ptr) previously allocated by ckd_calloc_2d.
Definition ckd_alloc.c:252
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
Definition ckd_alloc.h:270
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition ckd_alloc.h:264
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition ckd_alloc.h:258
Apply Cepstral Mean Normalization (CMN) to the set of input mfc frames.
enum cmn_type_e cmn_type_t
Types of cepstral mean normalization to apply to the features.
SPHINXBASE_EXPORT void cmn_prior(cmn_t *cmn, mfcc_t **incep, int32 varnorm, int32 nfr)
CMN for one block of data, using prior mean.
Definition cmn_prior.c:146
SPHINXBASE_EXPORT void cmn_prior_update(cmn_t *cmn)
Update prior mean based on observed data.
Definition cmn_prior.c:113
SPHINXBASE_EXPORT void cmn(cmn_t *cmn, mfcc_t **mfc, int32 varnorm, int32 n_frame)
CMN for the whole sentence.
Definition cmn.c:150
SPHINXBASE_EXPORT const char * cmn_type_str[]
String representations of cmn_type_t values.
Definition cmn.c:110
Implementation of logging routines.
#define E_FATAL
Exit with non-zero status after error message.
Definition err.h:127
#define E_ERROR
Print error message to standard error stream.
Definition err.h:169
#define E_WARN
Print warning information to standard error stream.
Definition err.h:164
#define E_INFO
Print logging information to standard error stream.
Definition err.h:147
#define E_INFO_NOFN
Print logging information without filename.
Definition err.h:158
#define E_INFOCONT
Print logging information without header, to standard error stream.
Definition err.h:153
compute the dynamic coefficients from the cepstral vector.
SPHINXBASE_EXPORT mfcc_t *** feat_array_alloc(feat_t *fcb, int32 nfr)
Allocate an array to hold several frames worth of feature vectors.
Definition feat.c:356
SPHINXBASE_EXPORT feat_t * feat_init(char const *type, cmn_type_t cmn, int32 varnorm, agc_type_t agc, int32 breport, int32 cepsize)
Initialize feature module to use the selected type of feature stream.
Definition feat.c:705
#define feat_n_stream(f)
Number of feature streams.
Definition feat.h:218
#define feat_stream_len(f, i)
Length of feature stream i.
Definition feat.h:224
SPHINXBASE_EXPORT int32 ** parse_subvecs(char const *str)
Parse subvector specification string.
Definition feat.c:169
#define feat_dimension2(f, i)
Dimensionality of stream/subvector i in feature output.
Definition feat.h:232
#define feat_dimension(f)
Total dimensionality of feature output.
Definition feat.h:236
SPHINXBASE_EXPORT int32 feat_s2mfc2feat_live(feat_t *fcb, mfcc_t **uttcep, int32 *inout_ncep, int32 beginutt, int32 endutt, mfcc_t ***ofeat)
Feature computation routine for live mode decoder.
Definition feat.c:1307
#define feat_window_size(f)
Size of dynamic feature window.
Definition feat.h:212
SPHINXBASE_EXPORT void feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
Transform a block of features using the feature module's LDA transform.
Definition lda.c:139
SPHINXBASE_EXPORT mfcc_t *** feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr)
Realloate the array of features.
Definition feat.c:389
SPHINXBASE_EXPORT int32 feat_s2mfc2feat(feat_t *fcb, const char *file, const char *dir, const char *cepext, int32 sf, int32 ef, mfcc_t ***feat, int32 maxfr)
Read a specified MFC file (or given segment within it), perform CMN/AGC as indicated by fcb,...
Definition feat.c:1169
SPHINXBASE_EXPORT int feat_free(feat_t *f)
Release resource associated with feat_t.
Definition feat.c:1430
SPHINXBASE_EXPORT int feat_set_subvecs(feat_t *fcb, int32 **subvecs)
Add a subvector specification to the feature module.
Definition feat.c:277
#define feat_dimension1(f)
Number of streams or subvectors in feature output.
Definition feat.h:228
#define feat_cepsize(f)
Input dimensionality of feature.
Definition feat.h:208
SPHINXBASE_EXPORT void subvecs_free(int32 **subvecs)
Free array of subvector specs.
Definition feat.c:267
SPHINXBASE_EXPORT void feat_array_free(mfcc_t ***feat)
Free a buffer allocated with feat_array_alloc()
Definition feat.c:418
SPHINXBASE_EXPORT void feat_report(feat_t *f)
Report the feat_t data structure.
Definition feat.c:1461
SPHINXBASE_EXPORT feat_t * feat_retain(feat_t *f)
Retain ownership of feat_t.
Definition feat.c:1423
SPHINXBASE_EXPORT void feat_print(feat_t *fcb, mfcc_t ***feat, int32 nfr, FILE *fp)
Print the given block of feature vectors to the given FILE.
Definition feat.c:895
Generic linked-lists maintenance.
SPHINXBASE_EXPORT glist_t glist_add_int32(glist_t g, int32 val)
Create and prepend a new list node containing an integer.
Definition glist.c:86
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed.
Definition glist.c:133
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition glist.c:74
#define gnode_ptr(g)
Head of a list of gnodes.
Definition glist.h:109
SPHINXBASE_EXPORT int32 glist_count(glist_t g)
Count the number of element in a given link list.
Definition glist.c:145
file IO related operations.
SPHINXBASE_EXPORT int32 stat_retry(const char *file, struct stat *statbuf)
There is no bitstream decoder, because a stream abstraction is too slow.
Definition pio.c:480
SPHINXBASE_EXPORT int32 fread_retry(void *pointer, int32 size, int32 num_items, FILE *stream)
NFS file reads seem to fail now and then.
Definition pio.c:398
Basic type definitions used in Sphinx.
Structure for describing a speech feature type Structure for describing a speech feature type (no.
A node in a generic list.
Definition glist.h:100