SphinxBase 0.6
pio.c
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * This work was supported in part by funding from the Defense Advanced
19 * Research Projects Agency and the National Science Foundation of the
20 * United States of America, and the CMU Sphinx Speech Consortium.
21 *
22 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 *
34 * ====================================================================
35 *
36 */
37
38#include <config.h>
39
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#ifdef HAVE_UNISTD_H
44#include <unistd.h>
45#endif
46#ifdef HAVE_SYS_STAT_H
47#include <sys/stat.h>
48#endif
49#ifdef HAVE_SYS_TYPES_H
50#include <sys/types.h>
51#endif
52#include <assert.h>
53
54#include "sphinxbase/pio.h"
55#include "sphinxbase/filename.h"
56#include "sphinxbase/err.h"
57#include "sphinxbase/strfuncs.h"
59
60#ifndef EXEEXT
61#define EXEEXT ""
62#endif
63
64enum {
65 COMP_NONE,
66 COMP_COMPRESS,
67 COMP_GZIP,
68 COMP_BZIP2
69};
70
71static void
72guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
73{
74 int k;
75
76 k = strlen(file);
77 *ispipe = 0;
78 *isgz = COMP_NONE;
79 if ((k > 2)
80 && ((strcmp(file + k - 2, ".Z") == 0)
81 || (strcmp(file + k - 2, ".z") == 0))) {
82 *ispipe = 1;
83 *isgz = COMP_COMPRESS;
84 }
85 else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
86 || (strcmp(file + k - 3, ".GZ") == 0))) {
87 *ispipe = 1;
88 *isgz = COMP_GZIP;
89 }
90 else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
91 || (strcmp(file + k - 4, ".BZ2") == 0))) {
92 *ispipe = 1;
93 *isgz = COMP_BZIP2;
94 }
95}
96
97FILE *
98fopen_comp(const char *file, const char *mode, int32 * ispipe)
99{
100 FILE *fp;
101
102#ifndef HAVE_POPEN
103 *ispipe = 0; /* No popen() on WinCE */
104#else /* HAVE_POPEN */
105 int32 isgz;
106 guess_comptype(file, ispipe, &isgz);
107#endif /* HAVE_POPEN */
108
109 if (*ispipe) {
110#ifndef HAVE_POPEN
111 /* Shouldn't get here, anyway */
112 E_FATAL("No popen() on WinCE\n");
113#else
114 if (strcmp(mode, "r") == 0) {
115 char *command;
116 switch (isgz) {
117 case COMP_GZIP:
118 command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
119 break;
120 case COMP_COMPRESS:
121 command = string_join("zcat" EXEEXT, " ", file, NULL);
122 break;
123 case COMP_BZIP2:
124 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
125 break;
126 default:
127 command = NULL; /* Make compiler happy. */
128 E_FATAL("Unknown compression type %d\n", isgz);
129 }
130 if ((fp = popen(command, mode)) == NULL) {
131 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
132 ckd_free(command);
133 return NULL;
134 }
135 ckd_free(command);
136 }
137 else if (strcmp(mode, "w") == 0) {
138 char *command;
139 switch (isgz) {
140 case COMP_GZIP:
141 command = string_join("gzip" EXEEXT, " > ", file, NULL);
142 break;
143 case COMP_COMPRESS:
144 command = string_join("compress" EXEEXT, " -c > ", file, NULL);
145 break;
146 case COMP_BZIP2:
147 command = string_join("bzip2" EXEEXT, " > ", file, NULL);
148 break;
149 default:
150 command = NULL; /* Make compiler happy. */
151 E_FATAL("Unknown compression type %d\n", isgz);
152 }
153 if ((fp = popen(command, mode)) == NULL) {
154 E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode);
155 ckd_free(command);
156 return NULL;
157 }
158 ckd_free(command);
159 }
160 else {
161 E_ERROR("Compressed file operation for mode %s is not supported", mode);
162 return NULL;
163 }
164#endif /* HAVE_POPEN */
165 }
166 else {
167 fp = fopen(file, mode);
168 }
169
170 return (fp);
171}
172
173
174void
175fclose_comp(FILE * fp, int32 ispipe)
176{
177 if (ispipe) {
178#ifdef HAVE_POPEN
179#if defined(_WIN32) && (!defined(__SYMBIAN32__))
180 _pclose(fp);
181#else
182 pclose(fp);
183#endif
184#endif
185 }
186 else
187 fclose(fp);
188}
189
190
191FILE *
192fopen_compchk(const char *file, int32 * ispipe)
193{
194#ifndef HAVE_POPEN
195 *ispipe = 0; /* No popen() on WinCE */
196 /* And therefore the rest of this function is useless. */
197 return (fopen_comp(file, "r", ispipe));
198#else /* HAVE_POPEN */
199 int32 isgz;
200 FILE *fh;
201
202 /* First just try to fopen_comp() it */
203 if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
204 return fh;
205 else {
206 char *tmpfile;
207 int k;
208
209 /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
210 guess_comptype(file, ispipe, &isgz);
211 k = strlen(file);
212 tmpfile = ckd_calloc(k+5, 1);
213 strcpy(tmpfile, file);
214 switch (isgz) {
215 case COMP_GZIP:
216 tmpfile[k - 3] = '\0';
217 break;
218 case COMP_BZIP2:
219 tmpfile[k - 4] = '\0';
220 break;
221 case COMP_COMPRESS:
222 tmpfile[k - 2] = '\0';
223 break;
224 case COMP_NONE:
225 strcpy(tmpfile + k, ".gz");
226 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
227 E_WARN("Using %s instead of %s\n", tmpfile, file);
228 ckd_free(tmpfile);
229 return fh;
230 }
231 strcpy(tmpfile + k, ".bz2");
232 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
233 E_WARN("Using %s instead of %s\n", tmpfile, file);
234 ckd_free(tmpfile);
235 return fh;
236 }
237 strcpy(tmpfile + k, ".Z");
238 if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
239 E_WARN("Using %s instead of %s\n", tmpfile, file);
240 ckd_free(tmpfile);
241 return fh;
242 }
243 ckd_free(tmpfile);
244 return NULL;
245 }
246 E_WARN("Using %s instead of %s\n", tmpfile, file);
247 fh = fopen_comp(tmpfile, "r", ispipe);
248 ckd_free(tmpfile);
249 return NULL;
250 }
251#endif /* HAVE_POPEN */
252}
253
256{
257 lineiter_t *li;
258
259 li = ckd_calloc(1, sizeof(*li));
260 li->buf = ckd_malloc(128);
261 li->buf[0] = '\0';
262 li->bsiz = 128;
263 li->len = 0;
264 li->fh = fh;
265
266 li = lineiter_next(li);
267
268 /* Strip the UTF-8 BOM */
269
270 if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) {
271 memmove(li->buf, li->buf + 3, strlen(li->buf + 1));
272 li->len -= 3;
273 }
274
275 return li;
276}
277
280{
281 lineiter_t *li;
282
283 li = lineiter_start(fh);
284
285 if (li == NULL)
286 return li;
287
288 li->clean = TRUE;
289
290 if (li->buf && li->buf[0] == '#') {
291 li = lineiter_next(li);
292 } else {
293 string_trim(li->buf, STRING_BOTH);
294 }
295
296 return li;
297}
298
299
300static lineiter_t *
301lineiter_next_plain(lineiter_t *li)
302{
303 /* We are reading the next line */
304 li->lineno++;
305
306 /* Read a line and check for EOF. */
307 if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
308 lineiter_free(li);
309 return NULL;
310 }
311 /* If we managed to read the whole thing, then we are done
312 * (this will be by far the most common result). */
313 li->len = strlen(li->buf);
314 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
315 return li;
316
317 /* Otherwise we have to reallocate and keep going. */
318 while (1) {
319 li->bsiz *= 2;
320 li->buf = ckd_realloc(li->buf, li->bsiz);
321 /* If we get an EOF, we are obviously done. */
322 if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
323 li->len += strlen(li->buf + li->len);
324 return li;
325 }
326 li->len += strlen(li->buf + li->len);
327 /* If we managed to read the whole thing, then we are done. */
328 if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
329 return li;
330 }
331
332 /* Shouldn't get here. */
333 return li;
334}
335
336
339{
340 if (!li->clean)
341 return lineiter_next_plain(li);
342
343 for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) {
344 if (li->buf && li->buf[0] != '#') {
345 li->buf = string_trim(li->buf, STRING_BOTH);
346 break;
347 }
348 }
349 return li;
350}
351
353{
354 return li->lineno;
355}
356
357void
359{
360 if (li == NULL)
361 return;
362 ckd_free(li->buf);
363 ckd_free(li);
364}
365
366char *
367fread_line(FILE *stream, size_t *out_len)
368{
369 char *output, *outptr;
370 char buf[128];
371
372 output = outptr = NULL;
373 while (fgets(buf, sizeof(buf), stream)) {
374 size_t len = strlen(buf);
375 /* Append this data to the buffer. */
376 if (output == NULL) {
377 output = ckd_malloc(len + 1);
378 outptr = output;
379 }
380 else {
381 size_t cur = outptr - output;
382 output = ckd_realloc(output, cur + len + 1);
383 outptr = output + cur;
384 }
385 memcpy(outptr, buf, len + 1);
386 outptr += len;
387 /* Stop on a short read or end of line. */
388 if (len < sizeof(buf)-1 || buf[len-1] == '\n')
389 break;
390 }
391 if (out_len) *out_len = outptr - output;
392 return output;
393}
394
395#define FREAD_RETRY_COUNT 60
396
397int32
398fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
399{
400 char *data;
401 uint32 n_items_read;
402 uint32 n_items_rem;
403 uint32 n_retry_rem;
404 int32 loc;
405
406 n_retry_rem = FREAD_RETRY_COUNT;
407
408 data = pointer;
409 loc = 0;
410 n_items_rem = num_items;
411
412 do {
413 n_items_read = fread(&data[loc], size, n_items_rem, stream);
414
415 n_items_rem -= n_items_read;
416
417 if (n_items_rem > 0) {
418 /* an incomplete read occurred */
419
420 if (n_retry_rem == 0)
421 return -1;
422
423 if (n_retry_rem == FREAD_RETRY_COUNT) {
424 E_ERROR_SYSTEM("fread() failed; retrying...\n");
425 }
426
427 --n_retry_rem;
428
429 loc += n_items_read * size;
430#ifdef HAVE_UNISTD_H
431 sleep(1);
432#endif
433 }
434 } while (n_items_rem > 0);
435
436 return num_items;
437}
438
439
440/* Silvio Moioli: updated to use Unicode */
441#ifdef _WIN32_WCE /* No stat() on WinCE */
442int32
443stat_retry(const char *file, struct stat * statbuf)
444{
445 WIN32_FIND_DATAW file_data;
446 HANDLE *h;
447 wchar_t *wfile;
448 size_t len;
449
450 len = mbstowcs(NULL, file, 0) + 1;
451 wfile = ckd_calloc(len, sizeof(*wfile));
452 mbstowcs(wfile, file, len);
453 if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
454 ckd_free(wfile);
455 return -1;
456 }
457 ckd_free(wfile);
458 memset(statbuf, 0, sizeof(statbuf));
459 statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
460 statbuf->st_size = file_data.nFileSizeLow;
461 FindClose(h);
462
463 return 0;
464}
465
466
467int32
468stat_mtime(const char *file)
469{
470 struct stat statbuf;
471
472 if (stat_retry(file, &statbuf) != 0)
473 return -1;
474
475 return ((int32) statbuf.st_mtime);
476}
477#else
478#define STAT_RETRY_COUNT 10
479int32
480stat_retry(const char *file, struct stat * statbuf)
481{
482 int32 i;
483
484
485
486 for (i = 0; i < STAT_RETRY_COUNT; i++) {
487
488#ifndef HAVE_SYS_STAT_H
489 FILE *fp;
490
491 if ((fp=(FILE *)fopen(file, "r"))!= 0)
492 {
493 fseek( fp, 0, SEEK_END);
494 statbuf->st_size = ftell( fp );
495 fclose(fp);
496 return 0;
497 }
498
499#else /* HAVE_SYS_STAT_H */
500 if (stat(file, statbuf) == 0)
501 return 0;
502#endif
503 if (i == 0) {
504 E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file);
505 }
506#ifdef HAVE_UNISTD_H
507 sleep(1);
508#endif
509 }
510
511 return -1;
512}
513
514int32
515stat_mtime(const char *file)
516{
517 struct stat statbuf;
518
519#ifdef HAVE_SYS_STAT_H
520 if (stat(file, &statbuf) != 0)
521 return -1;
522#else /* HAVE_SYS_STAT_H */
523 if (stat_retry(file, &statbuf) != 0)
524 return -1;
525#endif /* HAVE_SYS_STAT_H */
526
527 return ((int32) statbuf.st_mtime);
528}
529#endif /* !_WIN32_WCE */
530
532 FILE *fh;
533 unsigned char buf, bbits;
534 int16 refcount;
535};
536
539{
540 bit_encode_t *be;
541
542 be = ckd_calloc(1, sizeof(*be));
543 be->refcount = 1;
544 be->fh = outfh;
545 return be;
546}
547
550{
551 ++be->refcount;
552 return be;
553}
554
555int
557{
558 if (be == NULL)
559 return 0;
560 if (--be->refcount > 0)
561 return be->refcount;
562 ckd_free(be);
563
564 return 0;
565}
566
567int
568bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
569{
570 int tbits;
571
572 tbits = nbits + be->bbits;
573 if (tbits < 8) {
574 /* Append to buffer. */
575 be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
576 }
577 else {
578 int i = 0;
579 while (tbits >= 8) {
580 /* Shift bits out of the buffer and splice with high-order bits */
581 fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
582 /* Put low-order bits back into buffer */
583 be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
584 tbits -= 8;
585 ++i;
586 }
587 }
588 /* tbits contains remaining number of bits. */
589 be->bbits = tbits;
590
591 return nbits;
592}
593
594int
595bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
596{
597 unsigned char bits[4];
598 codeword <<= (32 - nbits);
599 bits[0] = (codeword >> 24) & 0xff;
600 bits[1] = (codeword >> 16) & 0xff;
601 bits[2] = (codeword >> 8) & 0xff;
602 bits[3] = codeword & 0xff;
603 return bit_encode_write(be, bits, nbits);
604}
605
606int
608{
609 if (be->bbits) {
610 fputc(be->buf, be->fh);
611 be->bbits = 0;
612 }
613 return 0;
614}
615
616#if defined(HAVE_SYS_STAT_H) && !defined(__MINGW32__) /* Unix, Cygwin, doesn't work on MINGW */
617int
618build_directory(const char *path)
619{
620 int rv;
621
622 /* Utterly failed... */
623 if (strlen(path) == 0)
624 return -1;
625 /* Utterly succeeded... */
626 else if ((rv = mkdir(path, 0777)) == 0)
627 return 0;
628 /* Or, it already exists... */
629 else if (errno == EEXIST)
630 return 0;
631 else if (errno != ENOENT) {
632 E_ERROR_SYSTEM("Failed to create %s");
633 return -1;
634 }
635 else {
636 char *dirname = ckd_salloc(path);
637 path2dirname(path, dirname);
638 build_directory(dirname);
639 ckd_free(dirname);
640 return mkdir(path, 0777);
641 }
642}
643#elif defined(_WIN32)
644/* FIXME: Implement this. */
645int
646build_directory(const char *path)
647{
648 E_ERROR("build_directory() unimplemented on your platform!\n");
649 return -1;
650}
651#else
652int
653build_directory(const char *path)
654{
655 E_ERROR("build_directory() unimplemented on your platform!\n");
656 return -1;
657}
658#endif
Sphinx's memory allocation/deallocation routines.
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition ckd_alloc.c:241
#define ckd_malloc(sz)
Macro for ckd_malloc
Definition ckd_alloc.h:253
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition ckd_alloc.h:248
#define ckd_salloc(ptr)
Macro for ckd_salloc
Definition ckd_alloc.h:264
#define ckd_realloc(ptr, sz)
Macro for ckd_realloc
Definition ckd_alloc.h:258
Implementation of logging routines.
#define E_FATAL
Exit with non-zero status after error message.
Definition err.h:127
#define E_ERROR_SYSTEM
Print error text; Call perror("");.
Definition err.h:142
#define E_ERROR
Print error message to standard error stream.
Definition err.h:169
#define E_WARN
Print warning information to standard error stream.
Definition err.h:164
File names related operation.
SPHINXBASE_EXPORT void path2dirname(const char *path, char *dir)
Strip off filename from the given path and copy the directory name into dir Caller must have allocate...
Definition filename.c:90
file IO related operations.
SPHINXBASE_EXPORT int32 stat_retry(const char *file, struct stat *statbuf)
There is no bitstream decoder, because a stream abstraction is too slow.
Definition pio.c:480
SPHINXBASE_EXPORT int32 stat_mtime(const char *file)
Return time of last modification for the given file, or -1 if stat fails.
Definition pio.c:515
int bit_encode_flush(bit_encode_t *be)
Flush any unwritten bits, zero-padding if necessary.
Definition pio.c:607
SPHINXBASE_EXPORT void lineiter_free(lineiter_t *li)
Stop reading lines from a file.
Definition pio.c:358
int bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
Write bits to encoder.
Definition pio.c:568
SPHINXBASE_EXPORT int32 fread_retry(void *pointer, int32 size, int32 num_items, FILE *stream)
NFS file reads seem to fail now and then.
Definition pio.c:398
SPHINXBASE_EXPORT int build_directory(const char *path)
Create a directory and all of its parent directories, as needed.
Definition pio.c:653
SPHINXBASE_EXPORT int lineiter_lineno(lineiter_t *li)
Returns current line number.
Definition pio.c:352
int bit_encode_free(bit_encode_t *be)
Release pointer to a bit encoder.
Definition pio.c:556
SPHINXBASE_EXPORT void fclose_comp(FILE *fp, int32 ispipe)
Close a file opened using fopen_comp.
Definition pio.c:175
SPHINXBASE_EXPORT FILE * fopen_compchk(const char *file, int32 *ispipe)
Open a file for reading, but if file not present try to open compressed version (if file is uncompres...
Definition pio.c:192
SPHINXBASE_EXPORT lineiter_t * lineiter_start_clean(FILE *fh)
Start reading lines from a file, skip comments and trim lines.
Definition pio.c:279
bit_encode_t * bit_encode_retain(bit_encode_t *be)
Retain pointer to a bit encoder.
Definition pio.c:549
bit_encode_t * bit_encode_attach(FILE *outfh)
Attach bitstream encoder to a file.
Definition pio.c:538
SPHINXBASE_EXPORT FILE * fopen_comp(const char *file, const char *mode, int32 *ispipe)
Like fopen, but use popen and zcat if it is determined that "file" is compressed (i....
Definition pio.c:98
SPHINXBASE_EXPORT lineiter_t * lineiter_start(FILE *fh)
Start reading lines from a file.
Definition pio.c:255
int bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
Write lowest-order bits of codeword to encoder.
Definition pio.c:595
SPHINXBASE_EXPORT char * fread_line(FILE *stream, size_t *out_len)
Read a line of arbitrary length from a file and return it as a newly allocated string.
Definition pio.c:367
SPHINXBASE_EXPORT lineiter_t * lineiter_next(lineiter_t *li)
Move to the next line in the file.
Definition pio.c:338
Miscellaneous useful string functions.
SPHINXBASE_EXPORT char * string_trim(char *string, enum string_edge_e which)
Remove whitespace from a string, modifying it in-place.
Definition strfuncs.c:89
SPHINXBASE_EXPORT char * string_join(const char *base,...)
Concatenate a NULL-terminated argument list of strings, returning a newly allocated string.
Definition strfuncs.c:62
@ STRING_BOTH
Both ends of string.
Definition strfuncs.h:73
Line iterator for files.
Definition pio.h:177