From 264a42dbf5dc21db57ea33cd6af6d0b79653d308 Mon Sep 17 00:00:00 2001
From: David Huggins-Daines <dhdaines@gmail.com>
Date: Thu, 15 Sep 2022 11:21:00 -0400
Subject: [PATCH] feat: Support input files to pocketsphinx main

---
 doxygen/pocketsphinx.1        |  24 +--
 programs/CMakeLists.txt       |   2 +-
 programs/pocketsphinx_main.c  |  91 ++++++++++--
 programs/pocketsphinx_pitch.c | 198 +-----------------------
 programs/soundfiles.c         | 273 ++++++++++++++++++++++++++++++++++
 programs/soundfiles.h         |  49 ++++++
 6 files changed, 412 insertions(+), 225 deletions(-)
 create mode 100644 programs/soundfiles.c
 create mode 100644 programs/soundfiles.h

diff --git a/doxygen/pocketsphinx.1 b/doxygen/pocketsphinx.1
index 3e475b752..a01c2fb41 100644
--- a/doxygen/pocketsphinx.1
+++ b/doxygen/pocketsphinx.1
@@ -7,22 +7,24 @@ pocketsphinx \- Run speech recognition on audio data
 [ \fBlive\fR |
 \fBsingle\fR |
 \fBsoxflags\fR ]
+[ \fIinputs\fR... ]
 .SH DESCRIPTION
 .PP
 The ‘\f[CR]pocketsphinx\fP’ command-line program reads single-channel
-16-bit PCM audio from standard input and attemps to recognize speech
-in it using the default acoustic and language model. It accepts a
-large number of options which you probably don't care about, and a
-\fIcommand\fP which defaults to ‘\f[CR]live\fP’. The commands are as
-follows:
+16-bit PCM audio from standard input or one or more input files, and
+attemps to recognize speech in it using the default acoustic and
+language model. The input files can be raw audio, WAV, or NIST Sphere
+files, though some of these may not be recognized properly.  It
+accepts a large number of options which you probably don't care about,
+and a \fIcommand\fP which defaults to ‘\f[CR]live\fP’. The commands
+are as follows:
 .TP
 .B live
-Detect speech segments in standard input, run
-recognition on them (using those options you don't care about), and
-write the results to standard output in line-delimited JSON. I
-realize this isn't the prettiest format, but it sure beats XML. Each
-line contains a JSON object with these fields, which have short names
-to make the lines more readable:
+Detect speech segments in input files, run recognition on them (using
+those options you don't care about), and write the results to standard
+output in line-delimited JSON. I realize this isn't the prettiest
+format, but it sure beats XML. Each line contains a JSON object with
+these fields, which have short names to make the lines more readable:
 .IP
 "b": Start time in seconds, from the beginning of the stream
 .IP
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 49fa2a282..85c22791b 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -8,7 +8,7 @@ set(POCKETSPHINX_PROGRAMS
   pocketsphinx_pitch
   )
 foreach(PROGRAM ${POCKETSPHINX_PROGRAMS})
-  add_executable(${PROGRAM} ${PROGRAM}.c)
+  add_executable(${PROGRAM} ${PROGRAM}.c soundfiles.c)
   target_link_libraries(${PROGRAM} pocketsphinx)
   target_include_directories(
     ${PROGRAM} PRIVATE ${CMAKE_SOURCE_DIR}/src
diff --git a/programs/pocketsphinx_main.c b/programs/pocketsphinx_main.c
index b01560d10..809a18522 100644
--- a/programs/pocketsphinx_main.c
+++ b/programs/pocketsphinx_main.c
@@ -48,6 +48,7 @@
 
 #include "util/ckd_alloc.h"
 #include "pocketsphinx_internal.h"
+#include "soundfiles.h"
 
 static int global_done = 0;
 static void
@@ -163,7 +164,7 @@ output_hyp(ps_endpointer_t *ep, ps_decoder_t *decoder)
 }
 
 static int
-live(ps_config_t *config)
+live(ps_config_t *config, FILE *infile)
 {
     ps_decoder_t *decoder = NULL;
     ps_endpointer_t *ep = NULL;
@@ -192,7 +193,7 @@ live(ps_config_t *config)
         int prev_in_speech = ps_endpointer_in_speech(ep);
         size_t len, end_samples;
         if ((len = fread(frame, sizeof(frame[0]),
-                         frame_size, stdin)) != frame_size) {
+                         frame_size, infile)) != frame_size) {
             if (len > 0) {
                 speech = ps_endpointer_end_stream(ep, frame,
                                                   frame_size,
@@ -236,7 +237,7 @@ live(ps_config_t *config)
 }
 
 static int
-single(ps_config_t *config)
+single(ps_config_t *config, FILE *infile)
 {
     ps_decoder_t *decoder = NULL;
     short *data, *ptr;
@@ -259,9 +260,9 @@ single(ps_config_t *config)
             data = ckd_realloc(data, data_size * sizeof(*data));
             ptr = data + len;
         }
-        len = fread(ptr, sizeof(*ptr), block_size, stdin);
+        len = fread(ptr, sizeof(*ptr), block_size, infile);
         if (len == 0) {
-            if (feof(stdin))
+            if (feof(infile))
                 break;
             else
                 E_ERROR_SYSTEM("Failed to read %d bytes\n",
@@ -345,7 +346,7 @@ soxflags(ps_config_t *config)
     return 0;
 }
 
-static const char *
+static char *
 find_command(int *argc, char **argv)
 {
     int i;
@@ -362,28 +363,86 @@ find_command(int *argc, char **argv)
     return "live";
 }
 
+static char **
+find_inputs(int *argc, char **argv, int *ninputs)
+{
+    char **inputs = NULL;
+    int i = 1;
+    *ninputs = 0;
+    while (i < *argc) {
+        char *arg = argv[i];
+        /* Bubble-bogo-bobo-backward-sort them to the end of argv. */
+        if (arg && arg[0] && arg[0] != '-') {
+            memmove(&argv[i],
+                    &argv[i + 1],
+                    (*argc - i - 1) * sizeof(argv[i]));
+            --*argc;
+            argv[*argc] = arg;
+            inputs = &argv[*argc];
+            ++*ninputs;
+        }
+        else
+            i += 2;
+    }
+    return inputs;
+}
+
+int
+process_inputs(int (*func)(ps_config_t *, FILE *),
+               ps_config_t *config,
+               char **inputs, int ninputs)
+{
+    int rv = 0;
+    
+    if (ninputs == 0)
+        return func(config, stdin);
+    else {
+        int i, rv_one;
+        for (i = 0; i < ninputs; ++i) {
+            /* They come to us in reverse order */
+            char *file = inputs[ninputs - i - 1];
+            FILE *fh = fopen(file, "rb");
+            if (fh == NULL) {
+                E_ERROR_SYSTEM("Failed to open %s for reading", file);
+                rv = -1;
+                continue;
+            }
+            if ((rv_one = read_file_header(file, fh, config)) < 0) {
+                fclose(fh);
+                rv = rv_one;
+                continue;
+            }
+            if ((rv_one = func(config, fh)) < 0) {
+                rv = rv_one;
+                E_ERROR("Recognition failed on %s\n", file);
+            }
+            fclose(fh);
+        }
+    }
+    return rv;
+}
+
 int
 main(int argc, char *argv[])
 {
     ps_config_t *config;
-    const char *command;
-    int rv;
+    char *command;
+    char **inputs;
+    int rv, ninputs;
 
     command = find_command(&argc, argv);
+    inputs = find_inputs(&argc, argv, &ninputs);
     if ((config = ps_config_parse_args(NULL, argc, argv)) == NULL) {
         cmd_ln_log_help_r(NULL, ps_args());
         return 1;
     }
     ps_default_search_args(config);
-    if (0 == strcmp(command, "soxflags")) {
+    if (0 == strcmp(command, "soxflags"))
         rv = soxflags(config);
-    }
-    else if (0 == strcmp(command, "live")) {
-        rv = live(config);
-    }
-    else if (0 == strcmp(command, "single")) {
-        rv = single(config);
-    }
+    else if (0 == strcmp(command, "live"))
+        rv = process_inputs(live, config, inputs, ninputs);
+    else if (0 == strcmp(command, "single"))
+        rv = process_inputs(single, config, inputs, ninputs);
     else {
         E_ERROR("Unknown command \"%s\"\n", command);
         return 1;
diff --git a/programs/pocketsphinx_pitch.c b/programs/pocketsphinx_pitch.c
index 779ab4f62..b275cab8d 100644
--- a/programs/pocketsphinx_pitch.c
+++ b/programs/pocketsphinx_pitch.c
@@ -52,6 +52,7 @@
 #include "fe/yin.h"
 
 #include "pocketsphinx_internal.h"
+#include "soundfiles.h"
 
 static arg_t defn[] = {
   { "i",
@@ -183,203 +184,6 @@ main(int argc, char *argv[])
     return 0;
 }
 
-static int
-guess_file_type(char const *file, FILE *infh, cmd_ln_t *config)
-{
-    char header[4];
-
-    fseek(infh, 0, SEEK_SET);
-    if (fread(header, 1, 4, infh) != 4) {
-        E_ERROR_SYSTEM("Failed to read 4 byte header");
-        return -1;
-    }
-    if (0 == memcmp(header, "RIFF", 4)) {
-        E_INFO("%s appears to be a WAV file\n", file);
-        ps_config_set_bool(config, "mswav", TRUE);
-        ps_config_set_bool(config, "nist", FALSE);
-        ps_config_set_bool(config, "raw", FALSE);
-    }
-    else if (0 == memcmp(header, "NIST", 4)) {
-        E_INFO("%s appears to be a NIST SPHERE file\n", file);
-        ps_config_set_bool(config, "mswav", FALSE);
-        ps_config_set_bool(config, "nist", TRUE);
-        ps_config_set_bool(config, "raw", FALSE);
-    }
-    else {
-        E_INFO("%s appears to be raw data\n", file);
-        ps_config_set_bool(config, "mswav", FALSE);
-        ps_config_set_bool(config, "nist", FALSE);
-        ps_config_set_bool(config, "raw", TRUE);
-    }
-    fseek(infh, 0, SEEK_SET);
-    return 0;
-}
-
-#define TRY_FREAD(ptr, size, nmemb, stream)                             \
-    if (fread(ptr, size, nmemb, stream) != (nmemb)) {                   \
-        E_ERROR_SYSTEM("Failed to read %d bytes", size * nmemb);       \
-        goto error_out;                                                 \
-    }
-
-static int
-read_riff_header(FILE *infh, cmd_ln_t *config)
-{
-    char id[4];
-    int32 intval, header_len;
-    int16 shortval;
-
-    /* RIFF files are little-endian by definition. */
-    ps_config_set_str(config, "input_endian", "little");
-
-    /* Read in all the header chunks and etcetera. */
-    TRY_FREAD(id, 1, 4, infh);
-    /* Total file length (we don't care) */
-    TRY_FREAD(&intval, 4, 1, infh);
-    /* 'WAVE' */
-    TRY_FREAD(id, 1, 4, infh);
-    if (0 != memcmp(id, "WAVE", 4)) {
-        E_ERROR("This is not a WAVE file\n");
-        goto error_out;
-    }
-    /* 'fmt ' */
-    TRY_FREAD(id, 1, 4, infh);
-    if (0 != memcmp(id, "fmt ", 4)) {
-        E_ERROR("Format chunk missing\n");
-        goto error_out;
-    }
-    /* Length of 'fmt ' chunk */
-    TRY_FREAD(&intval, 4, 1, infh);
-    SWAP_LE_32(&intval);
-    header_len = intval;
-
-    /* Data format. */
-    TRY_FREAD(&shortval, 2, 1, infh);
-    SWAP_LE_16(&shortval);
-    if (shortval != 1) { /* PCM */
-        E_ERROR("WAVE file is not in PCM format\n");
-        goto error_out;
-    }
-
-    /* Number of channels. */
-    TRY_FREAD(&shortval, 2, 1, infh);
-    SWAP_LE_16(&shortval);
-    if (shortval != 1) { /* PCM */
-        E_ERROR("WAVE file is not single channel\n");
-        goto error_out;
-    }
-
-    /* Sampling rate (finally!) */
-    TRY_FREAD(&intval, 4, 1, infh);
-    SWAP_LE_32(&intval);
-    if (ps_config_int(config, "samprate") == 0)
-        ps_config_set_int(config, "samprate", intval);
-    else if (ps_config_int(config, "samprate") != intval) {
-        E_WARN("WAVE file sampling rate %d != -samprate %d\n",
-               intval, ps_config_int(config, "samprate"));
-    }
-
-    /* Average bytes per second (we don't care) */
-    TRY_FREAD(&intval, 4, 1, infh);
-
-    /* Block alignment (we don't care) */
-    TRY_FREAD(&shortval, 2, 1, infh);
-
-    /* Bits per sample (must be 16) */
-    TRY_FREAD(&shortval, 2, 1, infh);
-    SWAP_LE_16(&shortval);
-    if (shortval != 16) {
-        E_ERROR("WAVE file is not 16-bit\n");
-        goto error_out;
-    }
-
-    /* Any extra parameters. */
-    if (header_len > 16)
-        fseek(infh, header_len - 16, SEEK_CUR);
-
-    /* Now skip to the 'data' chunk. */
-    while (1) {
-        TRY_FREAD(id, 1, 4, infh);
-        if (0 == memcmp(id, "data", 4)) {
-            /* Total number of bytes of data (we don't care). */
-            TRY_FREAD(&intval, 4, 1, infh);
-            break;
-        }
-        else {
-            /* Some other stuff... */
-            /* Number of bytes of ... whatever */
-            TRY_FREAD(&intval, 4, 1, infh);
-            SWAP_LE_32(&intval);
-            fseek(infh, intval, SEEK_CUR);
-        }
-    }
-
-    /* We are ready to rumble. */
-    return 0;
-error_out:
-    return -1;
-}
-
-static int
-read_nist_header(FILE *infh, cmd_ln_t *config)
-{
-    char hdr[1024];
-    char *line, *c;
-
-    TRY_FREAD(hdr, 1, 1024, infh);
-    hdr[1023] = '\0';
-
-    /* Roughly parse it to find the sampling rate and byte order
-     * (don't bother with other stuff) */
-    if ((line = strstr(hdr, "sample_rate")) == NULL) {
-        E_ERROR("No sampling rate in NIST header!\n");
-        goto error_out;
-    }
-    c = strchr(line, '\n');
-    if (c) *c = '\0';
-    c = strrchr(line, ' ');
-    if (c == NULL) {
-        E_ERROR("Could not find sampling rate!\n");
-        goto error_out;
-    }
-    ++c;
-    if (ps_config_int(config, "samprate") == 0)
-        ps_config_set_int(config, "samprate", atoi(c));
-    else if (ps_config_int(config, "samprate") != atoi(c)) {
-        E_WARN("NIST file sampling rate %d != -samprate %d\n",
-               atoi(c), ps_config_int(config, "samprate"));
-    }
-
-    if (line + strlen(line) < hdr + 1023)
-        line[strlen(line)] = ' ';
-    if ((line = strstr(hdr, "sample_byte_format")) == NULL) {
-        E_ERROR("No sample byte format in NIST header!\n");
-        goto error_out;
-    }
-    c = strchr(line, '\n');
-    if (c) *c = '\0';
-    c = strrchr(line, ' ');
-    if (c == NULL) {
-        E_ERROR("Could not find sample byte order!\n");
-        goto error_out;
-    }
-    ++c;
-    if (0 == memcmp(c, "01", 2)) {
-        ps_config_set_str(config, "input_endian", "little");
-    }
-    else if (0 == memcmp(c, "10", 2)) {
-        ps_config_set_str(config, "input_endian", "big");
-    }
-    else {
-        E_ERROR("Unknown byte order %s\n", c);
-        goto error_out;
-    }
-
-    /* We are ready to rumble. */
-    return 0;
-error_out:
-    return -1;
-}
-
 static int
 extract_pitch(const char *in, const char *out, cmd_ln_t *config)
 {
diff --git a/programs/soundfiles.c b/programs/soundfiles.c
new file mode 100644
index 000000000..b310ad2d9
--- /dev/null
+++ b/programs/soundfiles.c
@@ -0,0 +1,273 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2008 Carnegie Mellon University.  All rights 
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced 
+ * Research Projects Agency and the National Science Foundation of the 
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include "util/byteorder.h"
+#include "soundfiles.h"
+
+int
+guess_file_type(char const *file, FILE *infh, ps_config_t *config)
+{
+    char header[4];
+
+    fseek(infh, 0, SEEK_SET);
+    if (fread(header, 1, 4, infh) != 4) {
+        E_ERROR_SYSTEM("Failed to read 4 byte header");
+        return -1;
+    }
+    if (0 == memcmp(header, "RIFF", 4)) {
+        E_INFO("%s appears to be a WAV file\n", file);
+        if (ps_config_typeof(config, "mswav") != 0) {
+            ps_config_set_bool(config, "mswav", TRUE);
+            ps_config_set_bool(config, "nist", FALSE);
+            ps_config_set_bool(config, "raw", FALSE);
+        }
+    }
+    else if (0 == memcmp(header, "NIST", 4)) {
+        E_INFO("%s appears to be a NIST SPHERE file\n", file);
+        if (ps_config_typeof(config, "mswav") != 0) {
+            ps_config_set_bool(config, "mswav", FALSE);
+            ps_config_set_bool(config, "nist", TRUE);
+            ps_config_set_bool(config, "raw", FALSE);
+        }
+    }
+    else {
+        E_INFO("%s appears to be raw data\n", file);
+        if (ps_config_typeof(config, "mswav") != 0) {
+            ps_config_set_bool(config, "mswav", FALSE);
+            ps_config_set_bool(config, "nist", FALSE);
+            ps_config_set_bool(config, "raw", TRUE);
+        }
+    }
+    fseek(infh, 0, SEEK_SET);
+    return 0;
+}
+
+#define TRY_FREAD(ptr, size, nmemb, stream)                             \
+    if (fread(ptr, size, nmemb, stream) != (nmemb)) {                   \
+        E_ERROR_SYSTEM("Failed to read %d bytes", size * nmemb);       \
+        goto error_out;                                                 \
+    }
+
+int
+read_file_header(const char *file, FILE *infh, ps_config_t *config)
+{
+    char header[4];
+
+    fseek(infh, 0, SEEK_SET);
+    TRY_FREAD(header, 1, 4, infh);
+    fseek(infh, 0, SEEK_SET);
+
+    if (0 == memcmp(header, "RIFF", 4)) {
+        E_INFO("%s, appears to be a WAV file\n", file);
+        return read_riff_header(infh, config);
+    }
+    else if (0 == memcmp(header, "NIST", 4)) {
+        E_INFO("%s appears to be a NIST SPHERE file\n", file);
+        return read_nist_header(infh, config);
+    }
+    else {
+        E_INFO("%s appears to be raw data\n", file);
+        return 0;
+    }
+error_out:
+    return -1;
+}
+
+int
+read_riff_header(FILE *infh, ps_config_t *config)
+{
+    char id[4];
+    int32 intval, header_len;
+    int16 shortval;
+
+    /* RIFF files are little-endian by definition. */
+    ps_config_set_str(config, "input_endian", "little");
+
+    /* Read in all the header chunks and etcetera. */
+    TRY_FREAD(id, 1, 4, infh);
+    /* Total file length (we don't care) */
+    TRY_FREAD(&intval, 4, 1, infh);
+    /* 'WAVE' */
+    TRY_FREAD(id, 1, 4, infh);
+    if (0 != memcmp(id, "WAVE", 4)) {
+        E_ERROR("This is not a WAVE file\n");
+        goto error_out;
+    }
+    /* 'fmt ' */
+    TRY_FREAD(id, 1, 4, infh);
+    if (0 != memcmp(id, "fmt ", 4)) {
+        E_ERROR("Format chunk missing\n");
+        goto error_out;
+    }
+    /* Length of 'fmt ' chunk */
+    TRY_FREAD(&intval, 4, 1, infh);
+    SWAP_LE_32(&intval);
+    header_len = intval;
+
+    /* Data format. */
+    TRY_FREAD(&shortval, 2, 1, infh);
+    SWAP_LE_16(&shortval);
+    if (shortval != 1) { /* PCM */
+        E_ERROR("WAVE file is not in PCM format\n");
+        goto error_out;
+    }
+
+    /* Number of channels. */
+    TRY_FREAD(&shortval, 2, 1, infh);
+    SWAP_LE_16(&shortval);
+    if (shortval != 1) { /* PCM */
+        E_ERROR("WAVE file is not single channel\n");
+        goto error_out;
+    }
+
+    /* Sampling rate (finally!) */
+    TRY_FREAD(&intval, 4, 1, infh);
+    SWAP_LE_32(&intval);
+    if (ps_config_int(config, "samprate") == 0)
+        ps_config_set_int(config, "samprate", intval);
+    else if (ps_config_int(config, "samprate") != intval) {
+        E_WARN("WAVE file sampling rate %d != -samprate %d\n",
+               intval, ps_config_int(config, "samprate"));
+    }
+
+    /* Average bytes per second (we don't care) */
+    TRY_FREAD(&intval, 4, 1, infh);
+
+    /* Block alignment (we don't care) */
+    TRY_FREAD(&shortval, 2, 1, infh);
+
+    /* Bits per sample (must be 16) */
+    TRY_FREAD(&shortval, 2, 1, infh);
+    SWAP_LE_16(&shortval);
+    if (shortval != 16) {
+        E_ERROR("WAVE file is not 16-bit\n");
+        goto error_out;
+    }
+
+    /* Any extra parameters. */
+    if (header_len > 16)
+        fseek(infh, header_len - 16, SEEK_CUR);
+
+    /* Now skip to the 'data' chunk. */
+    while (1) {
+        TRY_FREAD(id, 1, 4, infh);
+        if (0 == memcmp(id, "data", 4)) {
+            /* Total number of bytes of data (we don't care). */
+            TRY_FREAD(&intval, 4, 1, infh);
+            break;
+        }
+        else {
+            /* Some other stuff... */
+            /* Number of bytes of ... whatever */
+            TRY_FREAD(&intval, 4, 1, infh);
+            SWAP_LE_32(&intval);
+            fseek(infh, intval, SEEK_CUR);
+        }
+    }
+
+    /* We are ready to rumble. */
+    return 0;
+error_out:
+    return -1;
+}
+
+int
+read_nist_header(FILE *infh, ps_config_t *config)
+{
+    char hdr[1024];
+    char *line, *c;
+
+    TRY_FREAD(hdr, 1, 1024, infh);
+    hdr[1023] = '\0';
+
+    /* Roughly parse it to find the sampling rate and byte order
+     * (don't bother with other stuff) */
+    if ((line = strstr(hdr, "sample_rate")) == NULL) {
+        E_ERROR("No sampling rate in NIST header!\n");
+        goto error_out;
+    }
+    c = strchr(line, '\n');
+    if (c) *c = '\0';
+    c = strrchr(line, ' ');
+    if (c == NULL) {
+        E_ERROR("Could not find sampling rate!\n");
+        goto error_out;
+    }
+    ++c;
+    if (ps_config_int(config, "samprate") == 0)
+        ps_config_set_int(config, "samprate", atoi(c));
+    else if (ps_config_int(config, "samprate") != atoi(c)) {
+        E_WARN("NIST file sampling rate %d != -samprate %d\n",
+               atoi(c), ps_config_int(config, "samprate"));
+    }
+
+    if (line + strlen(line) < hdr + 1023)
+        line[strlen(line)] = ' ';
+    if ((line = strstr(hdr, "sample_byte_format")) == NULL) {
+        E_ERROR("No sample byte format in NIST header!\n");
+        goto error_out;
+    }
+    c = strchr(line, '\n');
+    if (c) *c = '\0';
+    c = strrchr(line, ' ');
+    if (c == NULL) {
+        E_ERROR("Could not find sample byte order!\n");
+        goto error_out;
+    }
+    ++c;
+    if (0 == memcmp(c, "01", 2)) {
+        ps_config_set_str(config, "input_endian", "little");
+    }
+    else if (0 == memcmp(c, "10", 2)) {
+        ps_config_set_str(config, "input_endian", "big");
+    }
+    else {
+        E_ERROR("Unknown byte order %s\n", c);
+        goto error_out;
+    }
+
+    /* We are ready to rumble. */
+    return 0;
+error_out:
+    return -1;
+}
diff --git a/programs/soundfiles.h b/programs/soundfiles.h
new file mode 100644
index 000000000..75f37b108
--- /dev/null
+++ b/programs/soundfiles.h
@@ -0,0 +1,49 @@
+/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
+/* ====================================================================
+ * Copyright (c) 2008 Carnegie Mellon University.  All rights 
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * This work was supported in part by funding from the Defense Advanced 
+ * Research Projects Agency and the National Science Foundation of the 
+ * United States of America, and the CMU Sphinx Speech Consortium.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+#ifndef __SOUNDFILES_H__
+#define __SOUNDFILES_H__
+
+#include <pocketsphinx.h>
+#include <stdio.h>
+
+int guess_file_type(const char *file, FILE *infh, ps_config_t *config);
+int read_riff_header(FILE *infh, ps_config_t *config);
+int read_nist_header(FILE *infh, ps_config_t *config);
+int read_file_header(const char *file, FILE *infh, ps_config_t *config);
+
+#endif /* __SOUNDFILES_H__ */