SphinxBase 0.6
lm3g_model.h
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2007 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37/*
38 * \file lm3g_model.h Core Sphinx 3-gram code used in
39 * DMP/DMP32/ARPA (for now) model code.
40 *
41 * Author: A cast of thousands, probably.
42 */
43
44#ifndef __NGRAM_MODEL_LM3G_H__
45#define __NGRAM_MODEL_LM3G_H__
46
48
49#include "ngram_model_internal.h"
50
54typedef union {
55 float32 f;
56 int32 l;
57} lmprob_t;
58
68typedef struct sorted_entry_s {
70 uint32 lower;
73 uint32 higher;
77
82typedef struct {
83 sorted_entry_t *list;
84 int32 free;
85 int32 size;
87
96
100typedef struct bigram_s bigram_t;
104typedef struct trigram_s trigram_t;
105
106
107/*
108 * To conserve space, bigram info is kept in many tables. Since the number
109 * of distinct values << #bigrams, these table indices can be 16-bit values.
110 * prob2 and bo_wt2 are such indices, but keeping trigram index is less easy.
111 * It is supposed to be the index of the first trigram entry for each bigram.
112 * But such an index cannot be represented in 16-bits, hence the following
113 * segmentation scheme: Partition bigrams into segments of BG_SEG_SZ
114 * consecutive entries, such that #trigrams in each segment <= 2**16 (the
115 * corresponding trigram segment). The bigram_t.trigrams value is then a
116 * 16-bit relative index within the trigram segment. A separate table--
117 * lm_t.tseg_base--has the index of the 1st trigram for each bigram segment.
118 */
119#define BG_SEG_SZ 512 /* chosen so that #trigram/segment <= 2**16 */
120#define LOG_BG_SEG_SZ 9
121
129typedef struct tginfo_s {
130 int32 w1;
132 int32 n_tg;
133 int32 bowt;
134 int32 used;
136 struct tginfo_s *next;
137} tginfo_t;
138
158
159void lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g);
160void lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g);
161void lm3g_apply_weights(ngram_model_t *base,
162 lm3g_model_t *lm3g,
163 float32 lw, float32 wip, float32 uw);
164int32 lm3g_add_ug(ngram_model_t *base,
165 lm3g_model_t *lm3g, int32 wid, int32 lweight);
166
167
172void init_sorted_list(sorted_list_t *l);
173void free_sorted_list(sorted_list_t *l);
174lmprob_t *vals_in_sorted_list(sorted_list_t *l);
175int32 sorted_id(sorted_list_t * l, int32 *val);
176
177#endif /* __NGRAM_MODEL_LM3G_H__ */
Fast memory allocator for uniformly sized objects.
Bigram structure.
Fast linked list allocator.
Common internal structure for Sphinx 3-gram models.
Definition lm3g_model.h:142
int32 n_prob2
prob2 size
Definition lm3g_model.h:147
listelem_alloc_t * le
List element allocator for tginfo.
Definition lm3g_model.h:156
lmprob_t * prob2
Table of actual bigram probs.
Definition lm3g_model.h:146
int32 * tseg_base
tseg_base[i>>LOG_BG_SEG_SZ] = index of 1st trigram for bigram segment (i>>LOG_BG_SEG_SZ)
Definition lm3g_model.h:152
lmprob_t * bo_wt2
Table of actual bigram backoff weights.
Definition lm3g_model.h:148
tginfo_t ** tginfo
tginfo[lw2] is head of linked list of trigram information for some cached subset of bigrams (*,...
Definition lm3g_model.h:154
int32 n_bo_wt2
bo_wt2 size
Definition lm3g_model.h:149
lmprob_t * prob3
Table of actual trigram probs.
Definition lm3g_model.h:150
int32 n_prob3
prob3 size
Definition lm3g_model.h:151
Common implementation of ngram_model_t.
Bigram probs and bo-wts, and trigram probs are kept in separate tables rather than within the bigram_...
Definition lm3g_model.h:68
uint32 higher
index of another entry.
Definition lm3g_model.h:73
uint32 lower
index of another entry.
Definition lm3g_model.h:70
lmprob_t val
value being kept in this node
Definition lm3g_model.h:69
The sorted list.
Definition lm3g_model.h:82
int32 free
first free element in list
Definition lm3g_model.h:84
Trigram information cache.
Definition lm3g_model.h:129
struct tginfo_s * next
Next lw1 with same parent lw2; NULL if none.
Definition lm3g_model.h:136
trigram_t * tg
Trigrams for lw1,lw2.
Definition lm3g_model.h:135
int32 used
whether used since last lm_reset
Definition lm3g_model.h:134
int32 n_tg
number tg for parent bigram lw1,lw2
Definition lm3g_model.h:132
int32 w1
lw1 component of bigram lw1,lw2.
Definition lm3g_model.h:130
int32 bowt
tg bowt for lw1,lw2
Definition lm3g_model.h:133
Trigram structure.
Unigram structure (common among all lm3g implementations)
Definition lm3g_model.h:91
lmprob_t prob1
Unigram probability.
Definition lm3g_model.h:92
lmprob_t bo_wt1
Unigram backoff weight.
Definition lm3g_model.h:93
int32 bigrams
Index of 1st entry in lm_t.bigrams[].
Definition lm3g_model.h:94
Type used to store language model probabilities.
Definition lm3g_model.h:54