SphinxBase 0.6
fsg_model.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 *
19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ====================================================================
32 *
33 */
34
35/* System headers. */
36#ifdef _WIN32_WCE
37/*MC in a debug build it's implicitly included by assert.h
38 but you need this in a release build */
39#include <windows.h>
40#else
41#include <time.h>
42#endif /* _WIN32_WCE */
43#include <stdio.h>
44#include <string.h>
45#include <assert.h>
46
47/* SphinxBase headers. */
48#include "sphinxbase/err.h"
49#include "sphinxbase/pio.h"
52#include "sphinxbase/strfuncs.h"
54#include "sphinxbase/fsg_model.h"
55
64 hash_table_t *null_trans; /* Null transitions keyed by state. */
65 hash_table_t *trans; /* Lists of non-null transitions keyed by state. */
66};
67
72 hash_iter_t *itor, *null_itor;
73 gnode_t *gn;
74};
75
76#define FSG_MODEL_BEGIN_DECL "FSG_BEGIN"
77#define FSG_MODEL_END_DECL "FSG_END"
78#define FSG_MODEL_N_DECL "N"
79#define FSG_MODEL_NUM_STATES_DECL "NUM_STATES"
80#define FSG_MODEL_S_DECL "S"
81#define FSG_MODEL_START_STATE_DECL "START_STATE"
82#define FSG_MODEL_F_DECL "F"
83#define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE"
84#define FSG_MODEL_T_DECL "T"
85#define FSG_MODEL_TRANSITION_DECL "TRANSITION"
86#define FSG_MODEL_COMMENT_CHAR '#'
87
88
89static int32
90nextline_str2words(FILE * fp, int32 * lineno,
91 char **lineptr, char ***wordptr)
92{
93 for (;;) {
94 size_t len;
95 int32 n;
96
97 ckd_free(*lineptr);
98 if ((*lineptr = fread_line(fp, &len)) == NULL)
99 return -1;
100
101 (*lineno)++;
102
103 if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR)
104 continue; /* Skip comment lines */
105
106 n = str2words(*lineptr, NULL, 0);
107 if (n == 0)
108 continue; /* Skip blank lines */
109
110 /* Abuse of realloc(), but this doesn't have to be fast. */
111 if (*wordptr == NULL)
112 *wordptr = ckd_calloc(n, sizeof(**wordptr));
113 else
114 *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr));
115 return str2words(*lineptr, *wordptr, n);
116 }
117}
118
119void
120fsg_model_trans_add(fsg_model_t * fsg,
121 int32 from, int32 to, int32 logp, int32 wid)
122{
123 fsg_link_t *link;
124 glist_t gl;
125 gnode_t *gn;
126
127 if (fsg->trans[from].trans == NULL)
128 fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES);
129
130 /* Check for duplicate link (i.e., link already exists with label=wid) */
131 for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) {
132 link = (fsg_link_t *) gnode_ptr(gn);
133 if (link->wid == wid) {
134 if (link->logs2prob < logp)
135 link->logs2prob = logp;
136 return;
137 }
138 }
139
140 /* Create transition object */
141 link = listelem_malloc(fsg->link_alloc);
142 link->from_state = from;
143 link->to_state = to;
144 link->logs2prob = logp;
145 link->wid = wid;
146
147 /* Add it to the list of transitions and update the hash table */
148 gl = glist_add_ptr(gl, (void *) link);
149 hash_table_replace_bkey(fsg->trans[from].trans,
150 (char const *) &link->to_state,
151 sizeof(link->to_state), gl);
152}
153
154int32
155fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to,
156 int32 logp, int32 wid)
157{
158 fsg_link_t *link, *link2;
159
160 /* Check for transition probability */
161 if (logp > 0) {
162 E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n",
163 from, to);
164 }
165
166 /* Self-loop null transitions (with prob <= 1.0) are redundant */
167 if (from == to)
168 return -1;
169
170 if (fsg->trans[from].null_trans == NULL)
171 fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES);
172
173 /* Check for a duplicate link; if found, keep the higher prob */
174 link = fsg_model_null_trans(fsg, from, to);
175 if (link) {
176 if (link->logs2prob < logp) {
177 link->logs2prob = logp;
178 return 0;
179 }
180 else
181 return -1;
182 }
183
184 /* Create null transition object */
185 link = listelem_malloc(fsg->link_alloc);
186 link->from_state = from;
187 link->to_state = to;
188 link->logs2prob = logp;
189 link->wid = -1;
190
191 link2 = (fsg_link_t *)
192 hash_table_enter_bkey(fsg->trans[from].null_trans,
193 (char const *) &link->to_state,
194 sizeof(link->to_state), link);
195 assert(link == link2);
196
197 return 1;
198}
199
200int32
201fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to,
202 int32 logp)
203{
204 return fsg_model_tag_trans_add(fsg, from, to, logp, -1);
205}
206
208fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls)
209{
210 gnode_t *gn1;
211 int updated;
212 fsg_link_t *tl1, *tl2;
213 int32 k, n;
214
215 E_INFO("Computing transitive closure for null transitions\n");
216
217 if (nulls == NULL) {
218 fsg_link_t *null;
219 int i, j;
220
221 for (i = 0; i < fsg->n_state; ++i) {
222 for (j = 0; j < fsg->n_state; ++j) {
223 if ((null = fsg_model_null_trans(fsg, i, j)))
224 nulls = glist_add_ptr(nulls, null);
225 }
226 }
227 }
228
229 /*
230 * Probably not the most efficient closure implementation, in general, but
231 * probably reasonably efficient for a sparse null transition matrix.
232 */
233 n = 0;
234 do {
235 updated = FALSE;
236
237 for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) {
238 hash_iter_t *itor;
239
240 tl1 = (fsg_link_t *) gnode_ptr(gn1);
241 assert(tl1->wid < 0);
242
243 if (fsg->trans[tl1->to_state].null_trans == NULL)
244 continue;
245
246 for (itor = hash_table_iter(fsg->trans[tl1->to_state].null_trans);
247 itor; itor = hash_table_iter_next(itor)) {
248
249 tl2 = (fsg_link_t *) hash_entry_val(itor->ent);
250
251 k = fsg_model_null_trans_add(fsg,
252 tl1->from_state,
253 tl2->to_state,
254 tl1->logs2prob +
255 tl2->logs2prob);
256 if (k >= 0) {
257 updated = TRUE;
258 if (k > 0) {
259 nulls = glist_add_ptr(nulls, (void *)
260 fsg_model_null_trans
261 (fsg, tl1->from_state,
262 tl2->to_state));
263 n++;
264 }
265 }
266 }
267 }
268 } while (updated);
269
270 E_INFO("%d null transitions added\n", n);
271
272 return nulls;
273}
274
276fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j)
277{
278 void *val;
279
280 if (fsg->trans[i].trans == NULL)
281 return NULL;
282 if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j,
283 sizeof(j), &val) < 0)
284 return NULL;
285 return (glist_t) val;
286}
287
289fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j)
290{
291 void *val;
292
293 if (fsg->trans[i].null_trans == NULL)
294 return NULL;
295 if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j,
296 sizeof(j), &val) < 0)
297 return NULL;
298 return (fsg_link_t *) val;
299}
300
302fsg_model_arcs(fsg_model_t * fsg, int32 i)
303{
304 fsg_arciter_t *itor;
305
306 if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL)
307 return NULL;
308 itor = ckd_calloc(1, sizeof(*itor));
309 if (fsg->trans[i].null_trans)
310 itor->null_itor = hash_table_iter(fsg->trans[i].null_trans);
311 if (fsg->trans[i].trans)
312 itor->itor = hash_table_iter(fsg->trans[i].trans);
313 if (itor->itor != NULL)
314 itor->gn = hash_entry_val(itor->itor->ent);
315 return itor;
316}
317
319fsg_arciter_get(fsg_arciter_t * itor)
320{
321 /* Iterate over non-null arcs first. */
322 if (itor->gn)
323 return (fsg_link_t *) gnode_ptr(itor->gn);
324 else if (itor->null_itor)
325 return (fsg_link_t *) hash_entry_val(itor->null_itor->ent);
326 else
327 return NULL;
328}
329
331fsg_arciter_next(fsg_arciter_t * itor)
332{
333 /* Iterate over non-null arcs first. */
334 if (itor->gn) {
335 itor->gn = gnode_next(itor->gn);
336 /* Move to the next destination arc. */
337 if (itor->gn == NULL) {
338 itor->itor = hash_table_iter_next(itor->itor);
339 if (itor->itor != NULL)
340 itor->gn = hash_entry_val(itor->itor->ent);
341 else if (itor->null_itor == NULL)
342 goto stop_iteration;
343 }
344 }
345 else {
346 if (itor->null_itor == NULL)
347 goto stop_iteration;
348 itor->null_itor = hash_table_iter_next(itor->null_itor);
349 if (itor->null_itor == NULL)
350 goto stop_iteration;
351 }
352 return itor;
353 stop_iteration:
354 fsg_arciter_free(itor);
355 return NULL;
356
357}
358
359void
360fsg_arciter_free(fsg_arciter_t * itor)
361{
362 if (itor == NULL)
363 return;
364 hash_table_iter_free(itor->null_itor);
365 hash_table_iter_free(itor->itor);
366 ckd_free(itor);
367}
368
369int
370fsg_model_word_id(fsg_model_t * fsg, char const *word)
371{
372 int wid;
373
374 /* Search for an existing word matching this. */
375 for (wid = 0; wid < fsg->n_word; ++wid) {
376 if (0 == strcmp(fsg->vocab[wid], word))
377 break;
378 }
379 /* If not found, add this to the vocab. */
380 if (wid == fsg->n_word)
381 return -1;
382 return wid;
383}
384
385int
386fsg_model_word_add(fsg_model_t * fsg, char const *word)
387{
388 int wid, old_size;
389
390 /* Search for an existing word matching this. */
391 wid = fsg_model_word_id(fsg, word);
392 /* If not found, add this to the vocab. */
393 if (wid == -1) {
394 wid = fsg->n_word;
395 if (fsg->n_word == fsg->n_word_alloc) {
396 old_size = fsg->n_word_alloc;
397 fsg->n_word_alloc += 10;
398 fsg->vocab = ckd_realloc(fsg->vocab,
399 fsg->n_word_alloc *
400 sizeof(*fsg->vocab));
401 if (fsg->silwords)
402 fsg->silwords =
403 bitvec_realloc(fsg->silwords, old_size, fsg->n_word_alloc);
404 if (fsg->altwords)
405 fsg->altwords =
406 bitvec_realloc(fsg->altwords, old_size, fsg->n_word_alloc);
407 }
408 ++fsg->n_word;
409 fsg->vocab[wid] = ckd_salloc(word);
410 }
411 return wid;
412}
413
414int
415fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
416 int state, float32 silprob)
417{
418 int32 logsilp;
419 int n_trans, silwid, src;
420
421 E_INFO("Adding silence transitions for %s to FSG\n", silword);
422
423 silwid = fsg_model_word_add(fsg, silword);
424 logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
425 if (fsg->silwords == NULL)
427 bitvec_set(fsg->silwords, silwid);
428
429 n_trans = 0;
430 if (state == -1) {
431 for (src = 0; src < fsg->n_state; src++) {
432 fsg_model_trans_add(fsg, src, src, logsilp, silwid);
433 ++n_trans;
434 }
435 }
436 else {
437 fsg_model_trans_add(fsg, state, state, logsilp, silwid);
438 ++n_trans;
439 }
440
441 E_INFO("Added %d silence word transitions\n", n_trans);
442 return n_trans;
443}
444
445int
446fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
447 char const *altword)
448{
449 int i, basewid, altwid;
450 int ntrans;
451
452 /* FIXME: This will get slow, eventually... */
453 for (basewid = 0; basewid < fsg->n_word; ++basewid)
454 if (0 == strcmp(fsg->vocab[basewid], baseword))
455 break;
456 if (basewid == fsg->n_word) {
457 E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
458 return -1;
459 }
460 altwid = fsg_model_word_add(fsg, altword);
461 if (fsg->altwords == NULL)
463 bitvec_set(fsg->altwords, altwid);
464
465 E_DEBUG(2, ("Adding alternate word transitions (%s,%s) to FSG\n",
466 baseword, altword));
467
468 /* Look for all transitions involving baseword and duplicate them. */
469 /* FIXME: This will also get slow, eventually... */
470 ntrans = 0;
471 for (i = 0; i < fsg->n_state; ++i) {
472 hash_iter_t *itor;
473 if (fsg->trans[i].trans == NULL)
474 continue;
475 for (itor = hash_table_iter(fsg->trans[i].trans); itor;
476 itor = hash_table_iter_next(itor)) {
477 glist_t trans;
478 gnode_t *gn;
479
480 trans = hash_entry_val(itor->ent);
481 for (gn = trans; gn; gn = gnode_next(gn)) {
482 fsg_link_t *fl = gnode_ptr(gn);
483 if (fl->wid == basewid) {
484 fsg_link_t *link;
485
486 /* Create transition object */
487 link = listelem_malloc(fsg->link_alloc);
488 link->from_state = fl->from_state;
489 link->to_state = fl->to_state;
490 link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
491 link->wid = altwid;
492
493 trans = glist_add_ptr(trans, (void *) link);
494 ++ntrans;
495 }
496 }
497 hash_entry_val(itor->ent) = trans;
498 }
499 }
500
501 E_DEBUG(2, ("Added %d alternate word transitions\n", ntrans));
502 return ntrans;
503}
504
505
507fsg_model_init(char const *name, logmath_t * lmath, float32 lw,
508 int32 n_state)
509{
510 fsg_model_t *fsg;
511
512 /* Allocate basic stuff. */
513 fsg = ckd_calloc(1, sizeof(*fsg));
514 fsg->refcount = 1;
516 fsg->lmath = lmath;
517 fsg->name = name ? ckd_salloc(name) : NULL;
518 fsg->n_state = n_state;
519 fsg->lw = lw;
520
521 fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans));
522
523 return fsg;
524}
525
527fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw)
528{
529 fsg_model_t *fsg;
530 hash_table_t *vocab;
531 hash_iter_t *itor;
532 int32 lastwid;
533 char **wordptr;
534 char *lineptr;
535 char *fsgname;
536 int32 lineno;
537 int32 n, i, j;
538 int n_state, n_trans, n_null_trans;
539 glist_t nulls;
540 float32 p;
541
542 lineno = 0;
543 vocab = hash_table_new(32, FALSE);
544 wordptr = NULL;
545 lineptr = NULL;
546 nulls = NULL;
547 fsgname = NULL;
548 fsg = NULL;
549
550 /* Scan upto FSG_BEGIN header */
551 for (;;) {
552 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
553 if (n < 0) {
554 E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL);
555 goto parse_error;
556 }
557
558 if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) {
559 if (n > 2) {
560 E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n",
561 lineno);
562 goto parse_error;
563 }
564 break;
565 }
566 }
567 /* Save FSG name, or it will get clobbered below :(.
568 * If name is missing, try the default.
569 */
570 if (n == 2) {
571 fsgname = ckd_salloc(wordptr[1]);
572 }
573 else {
574 E_WARN("FSG name is missing\n");
575 fsgname = ckd_salloc("unknown");
576 }
577
578 /* Read #states */
579 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
580 if ((n != 2)
581 || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0)
582 && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0))
583 || (sscanf(wordptr[1], "%d", &n_state) != 1)
584 || (n_state <= 0)) {
585 E_ERROR
586 ("Line[%d]: #states declaration line missing or malformed\n",
587 lineno);
588 goto parse_error;
589 }
590
591 /* Now create the FSG. */
592 fsg = fsg_model_init(fsgname, lmath, lw, n_state);
593 ckd_free(fsgname);
594 fsgname = NULL;
595
596 /* Read start state */
597 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
598 if ((n != 2)
599 || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0)
600 && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0))
601 || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1)
602 || (fsg->start_state < 0)
603 || (fsg->start_state >= fsg->n_state)) {
604 E_ERROR
605 ("Line[%d]: start state declaration line missing or malformed\n",
606 lineno);
607 goto parse_error;
608 }
609
610 /* Read final state */
611 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
612 if ((n != 2)
613 || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0)
614 && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0))
615 || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1)
616 || (fsg->final_state < 0)
617 || (fsg->final_state >= fsg->n_state)) {
618 E_ERROR
619 ("Line[%d]: final state declaration line missing or malformed\n",
620 lineno);
621 goto parse_error;
622 }
623
624 /* Read transitions */
625 lastwid = 0;
626 n_trans = n_null_trans = 0;
627 for (;;) {
628 int32 wid, tprob;
629
630 n = nextline_str2words(fp, &lineno, &lineptr, &wordptr);
631 if (n <= 0) {
632 E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
633 lineno);
634 goto parse_error;
635 }
636
637 if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) {
638 break;
639 }
640
641 if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0)
642 || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) {
643
644
645 if (((n != 4) && (n != 5))
646 || (sscanf(wordptr[1], "%d", &i) != 1)
647 || (sscanf(wordptr[2], "%d", &j) != 1)
648 || (i < 0) || (i >= fsg->n_state)
649 || (j < 0) || (j >= fsg->n_state)) {
650 E_ERROR
651 ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n",
652 lineno);
653 goto parse_error;
654 }
655
656 p = atof_c(wordptr[3]);
657 if ((p <= 0.0) || (p > 1.0)) {
658 E_ERROR
659 ("Line[%d]: transition spec malformed; Expecting float as transition probability\n",
660 lineno);
661 goto parse_error;
662 }
663 }
664 else {
665 E_ERROR("Line[%d]: transition or FSG_END statement expected\n",
666 lineno);
667 goto parse_error;
668 }
669
670 tprob = (int32) (logmath_log(lmath, p) * fsg->lw);
671 /* Add word to "dictionary". */
672 if (n > 4) {
673 if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) {
674 (void) hash_table_enter_int32(vocab,
675 ckd_salloc(wordptr[4]),
676 lastwid);
677 wid = lastwid;
678 ++lastwid;
679 }
680 fsg_model_trans_add(fsg, i, j, tprob, wid);
681 ++n_trans;
682 }
683 else {
684 if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) {
685 ++n_null_trans;
686 nulls =
687 glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j));
688 }
689 }
690 }
691
692 E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n",
693 fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans);
694
695
696 /* Now create a string table from the "dictionary" */
697 fsg->n_word = hash_table_inuse(vocab);
698 fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */
699 fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab));
700 for (itor = hash_table_iter(vocab); itor;
701 itor = hash_table_iter_next(itor)) {
702 char const *word = hash_entry_key(itor->ent);
703 int32 wid = (int32) (long) hash_entry_val(itor->ent);
704 fsg->vocab[wid] = (char *) word;
705 }
706 hash_table_free(vocab);
707
708 /* Do transitive closure on null transitions */
709 nulls = fsg_model_null_trans_closure(fsg, nulls);
710 glist_free(nulls);
711
712 ckd_free(lineptr);
713 ckd_free(wordptr);
714
715 return fsg;
716
717 parse_error:
718 for (itor = hash_table_iter(vocab); itor;
719 itor = hash_table_iter_next(itor))
720 ckd_free((char *) hash_entry_key(itor->ent));
721 glist_free(nulls);
722 hash_table_free(vocab);
723 ckd_free(fsgname);
724 ckd_free(lineptr);
725 ckd_free(wordptr);
726 fsg_model_free(fsg);
727 return NULL;
728}
729
730
732fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw)
733{
734 FILE *fp;
735 fsg_model_t *fsg;
736
737 if ((fp = fopen(file, "r")) == NULL) {
738 E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
739 return NULL;
740 }
741 fsg = fsg_model_read(fp, lmath, lw);
742 fclose(fp);
743 return fsg;
744}
745
747fsg_model_retain(fsg_model_t * fsg)
748{
749 ++fsg->refcount;
750 return fsg;
751}
752
753static void
754trans_list_free(fsg_model_t * fsg, int32 i)
755{
756 hash_iter_t *itor;
757
758 /* FIXME (maybe): FSG links will all get freed when we call
759 * listelem_alloc_free() so don't bother freeing them explicitly
760 * here. */
761 if (fsg->trans[i].trans) {
762 for (itor = hash_table_iter(fsg->trans[i].trans);
763 itor; itor = hash_table_iter_next(itor)) {
764 glist_t gl = (glist_t) hash_entry_val(itor->ent);
765 glist_free(gl);
766 }
767 }
768 hash_table_free(fsg->trans[i].trans);
769 hash_table_free(fsg->trans[i].null_trans);
770}
771
772int
773fsg_model_free(fsg_model_t * fsg)
774{
775 int i;
776
777 if (fsg == NULL)
778 return 0;
779
780 if (--fsg->refcount > 0)
781 return fsg->refcount;
782
783 for (i = 0; i < fsg->n_word; ++i)
784 ckd_free(fsg->vocab[i]);
785 for (i = 0; i < fsg->n_state; ++i)
786 trans_list_free(fsg, i);
787 ckd_free(fsg->trans);
788 ckd_free(fsg->vocab);
790 bitvec_free(fsg->silwords);
791 bitvec_free(fsg->altwords);
792 ckd_free(fsg->name);
793 ckd_free(fsg);
794 return 0;
795}
796
797
798void
799fsg_model_write(fsg_model_t * fsg, FILE * fp)
800{
801 int32 i;
802
803 fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL,
804 fsg->name ? fsg->name : "");
805 fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state);
806 fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state);
807 fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state);
808
809 for (i = 0; i < fsg->n_state; i++) {
810 fsg_arciter_t *itor;
811
812 for (itor = fsg_model_arcs(fsg, i); itor;
813 itor = fsg_arciter_next(itor)) {
814 fsg_link_t *tl = fsg_arciter_get(itor);
815
816 fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL,
817 tl->from_state, tl->to_state,
818 logmath_exp(fsg->lmath,
819 (int32) (tl->logs2prob / fsg->lw)),
820 (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid));
821 }
822 }
823
824 fprintf(fp, "%s\n", FSG_MODEL_END_DECL);
825
826 fflush(fp);
827}
828
829void
830fsg_model_writefile(fsg_model_t * fsg, char const *file)
831{
832 FILE *fp;
833
834 assert(fsg);
835
836 E_INFO("Writing FSG file '%s'\n", file);
837
838 if ((fp = fopen(file, "w")) == NULL) {
839 E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file);
840 return;
841 }
842
843 fsg_model_write(fsg, fp);
844
845 fclose(fp);
846}
847
848static void
849fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp)
850{
851 fsg_arciter_t *itor;
852
853 for (itor = fsg_model_arcs(fsg, i); itor;
854 itor = fsg_arciter_next(itor)) {
855 fsg_link_t *tl = fsg_arciter_get(itor);
856 fprintf(fp, "%d %d %s %f\n",
857 tl->from_state, tl->to_state,
858 (tl->wid < 0) ? "<eps>" : fsg_model_word_str(fsg, tl->wid),
859 -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw));
860 }
861}
862
863void
864fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp)
865{
866 int i;
867
868 /* Write transitions from initial state first. */
869 fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp);
870
871 /* Other states. */
872 for (i = 0; i < fsg->n_state; i++) {
873 if (i == fsg_model_start_state(fsg))
874 continue;
875 fsg_model_write_fsm_trans(fsg, i, fp);
876 }
877
878 /* Final state. */
879 fprintf(fp, "%d 0\n", fsg_model_final_state(fsg));
880
881 fflush(fp);
882}
883
884void
885fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file)
886{
887 FILE *fp;
888
889 assert(fsg);
890
891 E_INFO("Writing FSM file '%s'\n", file);
892
893 if ((fp = fopen(file, "w")) == NULL) {
894 E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file);
895 return;
896 }
897
898 fsg_model_write_fsm(fsg, fp);
899
900 fclose(fp);
901}
902
903void
904fsg_model_write_symtab(fsg_model_t * fsg, FILE * file)
905{
906 int i;
907
908 fprintf(file, "<eps> 0\n");
909 for (i = 0; i < fsg_model_n_word(fsg); ++i) {
910 fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1);
911 }
912 fflush(file);
913}
914
915void
916fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file)
917{
918 FILE *fp;
919
920 assert(fsg);
921
922 E_INFO("Writing FSM symbol table '%s'\n", file);
923
924 if ((fp = fopen(file, "w")) == NULL) {
925 E_ERROR("Failed to open symbol table '%s' for writing", file);
926 return;
927 }
928
929 fsg_model_write_symtab(fsg, fp);
930
931 fclose(fp);
932}
#define bitvec_free(v)
Free a bit vector.
Definition bitvec.h:87
SPHINXBASE_EXPORT bitvec_t * bitvec_realloc(bitvec_t *vec, size_t old_len, size_t new_len)
Resize a bit vector, clear the remaining bits.
Definition bitvec.c:64
#define bitvec_alloc(n)
Allocate a bit vector, all bits are clear.
Definition bitvec.h:75
#define bitvec_set(v, b)
Set the b-th bit of bit vector v.
Definition bitvec.h:95
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition ckd_alloc.c:241
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition ckd_alloc.h:264
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition ckd_alloc.h:258
Implementation of logging routines.
#define E_FATAL
Exit with non-zero status after error message.
Definition err.h:127
#define E_ERROR_SYSTEM
Print error text; Call perror("");.
Definition err.h:142
#define E_ERROR
Print error message to standard error stream.
Definition err.h:169
#define E_WARN
Print warning information to standard error stream.
Definition err.h:164
#define E_INFO
Print logging information to standard error stream.
Definition err.h:147
#define E_DEBUG(level, x)
Print debugging information to standard error stream.
Definition err.h:212
SPHINXBASE_EXPORT void glist_free(glist_t g)
Free the given generic list; user-defined data contained within is not automatically freed.
Definition glist.c:133
SPHINXBASE_EXPORT glist_t glist_add_ptr(glist_t g, void *ptr)
Create and prepend a new list node, with the given user-defined data, at the HEAD of the given generi...
Definition glist.c:74
#define gnode_ptr(g)
Head of a list of gnodes.
Definition glist.h:109
Hash table implementation.
SPHINXBASE_EXPORT void hash_table_free(hash_table_t *h)
Free the specified hash table; the caller is responsible for freeing the key strings pointed to by th...
Definition hash_table.c:695
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter_next(hash_iter_t *itor)
Get the next key-value pair in iteration.
Definition hash_table.c:663
#define hash_table_enter_int32(h, k, v)
Add a 32-bit integer value to a hash table.
Definition hash_table.h:228
SPHINXBASE_EXPORT void * hash_table_enter_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_enter, but with an explicitly specified key length, instead of a NULL-terminated,...
Definition hash_table.c:542
SPHINXBASE_EXPORT void * hash_table_replace_bkey(hash_table_t *h, const char *key, size_t len, void *val)
Like hash_table_replace, but with an explicitly specified key length, instead of a NULL-terminated,...
Definition hash_table.c:555
SPHINXBASE_EXPORT void hash_table_iter_free(hash_iter_t *itor)
Delete an unfinished iterator.
Definition hash_table.c:689
SPHINXBASE_EXPORT int32 hash_table_lookup_bkey(hash_table_t *h, const char *key, size_t len, void **val)
Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated,...
Definition hash_table.c:344
#define hash_entry_val(e)
Access macros.
Definition hash_table.h:175
SPHINXBASE_EXPORT int32 hash_table_lookup_int32(hash_table_t *h, const char *key, int32 *val)
Look up a 32-bit integer value in a hash table.
Definition hash_table.c:329
SPHINXBASE_EXPORT hash_iter_t * hash_table_iter(hash_table_t *h)
Start iterating over key-value pairs in a hash table.
Definition hash_table.c:653
SPHINXBASE_EXPORT hash_table_t * hash_table_new(int32 size, int32 casearg)
Allocate a new hash table for a given expected size.
Definition hash_table.c:158
SPHINXBASE_EXPORT void listelem_alloc_free(listelem_alloc_t *le)
Finalize and release all memory associated with a list element allocator.
SPHINXBASE_EXPORT listelem_alloc_t * listelem_alloc_init(size_t elemsize)
Initialize and return a list element allocator.
#define listelem_malloc(le)
Allocate a list element and return pointer to it.
SPHINXBASE_EXPORT float64 logmath_log_to_ln(logmath_t *lmath, int logb_p)
Convert integer log in base B to natural log (in floating point).
Definition logmath.c:468
SPHINXBASE_EXPORT float64 logmath_exp(logmath_t *lmath, int logb_p)
Convert integer log in base B to linear floating point.
Definition logmath.c:456
SPHINXBASE_EXPORT int logmath_log(logmath_t *lmath, float64 p)
Convert linear floating point number to integer log in base B.
Definition logmath.c:447
file IO related operations.
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Definition pio.c:367
Basic type definitions used in Sphinx.
Miscellaneous useful string functions.
SPHINXBASE_EXPORT int32 str2words(char *line, char **wptr, int32 n_wptr)
Convert a line to an array of "words", based on whitespace separators.
Definition strfuncs.c:115
SPHINXBASE_EXPORT double atof_c(char const *str)
Locale independent version of atof().
Definition strfuncs.c:56
Implementation of arc iterator.
Definition fsg_model.c:71
Word level FSG definition.
Definition fsg_model.h:91
int32 n_word_alloc
Number of words allocated in vocab.
Definition fsg_model.h:95
int32 start_state
Must be in the range [0..n_state-1].
Definition fsg_model.h:101
char ** vocab
Vocabulary for this FSG.
Definition fsg_model.h:96
int32 n_state
number of states in FSG
Definition fsg_model.h:100
int32 n_word
Number of unique words in this FSG.
Definition fsg_model.h:94
logmath_t * lmath
Pointer to log math computation object.
Definition fsg_model.h:99
char * name
A unique string identifier for this FSG.
Definition fsg_model.h:93
bitvec_t * silwords
Indicates which words are silence/fillers.
Definition fsg_model.h:97
listelem_alloc_t * link_alloc
Allocator for FSG links.
Definition fsg_model.h:106
trans_list_t * trans
Transitions out of each state, if any.
Definition fsg_model.h:105
int32 final_state
Must be in the range [0..n_state-1].
Definition fsg_model.h:102
bitvec_t * altwords
Indicates which words are pronunciation alternates.
Definition fsg_model.h:98
float32 lw
Language weight that's been applied to transition logprobs.
Definition fsg_model.h:103
int refcount
Reference count.
Definition fsg_model.h:92
A node in a generic list.
Definition glist.h:100
hash_entry_t * ent
Current entry in that table.
Definition hash_table.h:170
Adjacency list (opaque) for a state in an FSG.
Definition fsg_model.c:63