diff --git a/README.md b/README.md index 06299b1fa..3ceebbc06 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,8 @@ The `pocketsphinx` command-line program reads single-channel 16-bit PCM audio from standard input or one or more files, and attemps to recognize speech in it using the default acoustic and language model. It accepts a large number of options which you probably don't care -about, and a *command* which defaults to `live`. +about, a *command* which defaults to `live`, and one or more inputs +(except in `align` mode), or `-` to read from standard input. If you have a single-channel WAV file called "speech.wav" and you want to recognize speech in it, you can try doing this (the results may not @@ -86,12 +87,39 @@ The commands are as follows: - `t`: Full text of recognition result - `w`: List of segments (usually words), each of which in turn contains the `b`, `d`, `p`, and `t` fields, for start, end, - probability, and the text of the word. In the future we may - also support hierarchical results in which case `w` could be - present. + probability, and the text of the word. If `-phone_align yes` + has been passed, then a `w` field will be present containing + phone segmentations, in the same format. - `single`: Recognize each input as a single utterance, and write a JSON object in the same format described above. + + - `align`: Align a single input file (or `-` for standard input) to + a word sequence, and write a JSON object in the same format + described above. The first positional argument is the input, and + all subsequent ones are concatenated to make the text, to avoid + surprises if you forget to quote it. You are responsible for + normalizing the text to remove punctuation, uppercase, centipedes, + etc. For example: + + pocketsphinx align goforward.wav "go forward ten meters" + + By default, only word-level alignment is done. To get phone + alignments, pass `-phone_align yes` in the flags, e.g.: + + pocketsphinx -phone_align yes align audio.wav $text + + This will make not particularly readable output, but you can use + [jq](https://stedolan.github.io/jq/) to clean it up. For example, + you can get just the word names and start times like this: + + pocketsphinx align audio.wav $text | jq '.w[]|[.t,.b]' + + Or you could get the phone names and durations like this: + + pocketsphinx -phone_align yes align audio.wav $text | jq '.w[]|.w[]|[.t,.d]' + + There are many, many other possibilities, of course. - `soxflags`: Return arguments to `sox` which will create the appropriate input format. Note that because the `sox` @@ -99,16 +127,16 @@ The commands are as follows: filename or `-d` (which tells `sox` to read from the microphone). You can run live recognition like this: - sox -d $(pocketsphinx soxflags) | pocketsphinx + sox -d $(pocketsphinx soxflags) | pocketsphinx - or decode from a file named "audio.mp3" like this: - sox audio.mp3 $(pocketsphinx soxflags) | pocketsphinx + sox audio.mp3 $(pocketsphinx soxflags) | pocketsphinx - By default only errors are printed to standard error, but if you want more information you can pass `-loglevel INFO`. Partial results are not printed, maybe they will be in the future, but don't hold your -breath. Force-alignment is likely to be supported soon, however. +breath. Programming ----------- diff --git a/cython/_pocketsphinx.pxd b/cython/_pocketsphinx.pxd index 3b8eacc6c..8f6a7b585 100644 --- a/cython/_pocketsphinx.pxd +++ b/cython/_pocketsphinx.pxd @@ -422,7 +422,9 @@ cdef extern from "pocketsphinx/search.h": int ps_add_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase) int ps_add_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm) int ps_add_allphone_file(ps_decoder_t *ps, const char *name, const char *path) - int ps_add_align(ps_decoder_t *ps, const char *name, const char *words) + int ps_set_align_text(ps_decoder_t *ps, const char *words) + int ps_set_alignment(ps_decoder_t *ps, ps_alignment_t *al) + ps_alignment_t *ps_get_alignment(ps_decoder_t *ps) cdef extern from "pocketsphinx/vad.h": ctypedef struct ps_vad_t: @@ -472,3 +474,37 @@ cdef extern from "pocketsphinx/endpointer.h": int ps_endpointer_in_speech(ps_endpointer_t *ep) double ps_endpointer_speech_start(ps_endpointer_t *ep) double ps_endpointer_speech_end(ps_endpointer_t *ep) + +cdef extern from "pocketsphinx/alignment.h": + ctypedef struct ps_alignment_t: + pass + ctypedef struct ps_alignment_iter_t: + pass + ctypedef struct pid_struct: + short cipid + unsigned short ssid + int tmat + ctypedef union id_union: + int wid + pid_struct pid + unsigned short senid + ctypedef struct ps_alignment_entry_t: + int start + int duration + int score + id_union id + int parent + int child + ps_alignment_t *ps_alignment_retain(ps_alignment_t *al) + int ps_alignment_free(ps_alignment_t *al) + int ps_alignment_n_words(ps_alignment_t *al) + int ps_alignment_n_phones(ps_alignment_t *al) + int ps_alignment_n_states(ps_alignment_t *al) + ps_alignment_iter_t *ps_alignment_words(ps_alignment_t *al) + ps_alignment_iter_t *ps_alignment_phones(ps_alignment_t *al) + ps_alignment_iter_t *ps_alignment_states(ps_alignment_t *al) + ps_alignment_iter_t *ps_alignment_iter_next(ps_alignment_iter_t *itor) + ps_alignment_iter_t *ps_alignment_iter_children(ps_alignment_iter_t *itor) + int ps_alignment_iter_seg(ps_alignment_iter_t *itor, int *start, int *duration) + const char *ps_alignment_iter_name(ps_alignment_iter_t *itor) + int ps_alignment_iter_free(ps_alignment_iter_t *itor) diff --git a/cython/_pocketsphinx.pyx b/cython/_pocketsphinx.pyx index 025d25d03..72db2f1e8 100644 --- a/cython/_pocketsphinx.pyx +++ b/cython/_pocketsphinx.pyx @@ -1602,6 +1602,100 @@ cdef class Decoder: DeprecationWarning) return self.current_search() + def set_align_text(self, text): + """Set a word sequence for alignment *and* enable alignment mode. + + Unlike the `add_*` methods and the deprecated, badly-named + `set_*` methods, this really does immediately enable the + resulting search module. This is because alignment is + typically a one-shot deal, i.e. you are not likely to create a + list of different alignments and keep them around. If you + really want to do that, perhaps you should use FSG search + instead. Or let me know and perhaps I'll add an + `add_align_text` method. + + You must do any text normalization yourself. For word-level + alignment, once you call this, simply decode and get the + segmentation in the usual manner. For phone-level alignment, + see `set_alignment` and `get_alignment`. + + Args: + text(str): Sentence to align, as whitespace-separated + words. All words must be present in the + dictionary. + Raises: + RuntimeError: If text is invalid somehow. + """ + cdef int rv = ps_set_align_text(self._ps, text.encode("utf-8")) + if rv < 0: + raise RuntimeError("Failed to set up alignment of %s" % (text)) + + def set_alignment(self, Alignment alignment = None): + """Set up *and* activate sub-word alignment mode. + + For efficiency reasons, decoding and word-level alignment (as + done by `set_align_text`) do not track alignments at the + sub-word level. This is fine for a lot of use cases, but + obviously not all of them. If you want to obtain phone or + state level alignments, you must run a second pass of + alignment, which is what this function sets you up to do. The + sequence is something like this: + + decoder.set_align_text("hello world") + decoder.start_utt() + decoder.process_raw(data, full_utt=True) + decoder.end_utt() + decoder.set_alignment() + decoder.start_utt() + decoder.process_raw(data, full_utt=True) + decoder.end_utt() + for word in decoder.get_alignment(): + for phone in word: + for state in phone: + print(word, phone, state) + + That's a lot of code, so it may get simplified, either here or + in a derived class, before release. + + Note that if you are using this with N-Gram or FSG decoding, + you can restore the default search module afterwards by + calling activate_search() with no argument. + + Args: + alignment(Alignment): Pre-constructed `Alignment` object. + Currently you can't actually do anything with this. + Raises: + RuntimeError: If current hypothesis cannot be aligned (such + as when using keyphrase or allphone search). + + """ + cdef int rv + if alignment is not None: + rv = ps_set_alignment(self._ps, alignment._al) + else: + rv = ps_set_alignment(self._ps, NULL) + if rv < 0: + raise RuntimeError("Failed to set up sub-word alignment") + + def get_alignment(self): + """Get the current sub-word alignment, if any. + + This will return something if `ps_set_alignment` has been + called, but it will not contain an actual *alignment* + (i.e. phone and state durations) unless a second pass of + decoding has been run. + + If the decoder is not in sub-word alignment mode then it will + return None. + + Returns: + Alignment - if an alignment exists. + """ + cdef ps_alignment_t *al = ps_get_alignment(self._ps) + if al == NULL: + return None + return Alignment.create_from_ptr(ps_alignment_retain(al)) + def n_frames(self): """Get the number of frames processed up to this point. @@ -1814,6 +1908,75 @@ cdef class Endpointer: return None return (&outbuf[0])[:out_n_samples * 2] +cdef class AlignmentEntry: + cdef public int start + cdef public int duration + cdef public int score + cdef public str name + # DANGER! Not retained! + cdef ps_alignment_iter_t *itor + @staticmethod + cdef create_from_iter(ps_alignment_iter_t *itor): + cdef AlignmentEntry self + self = AlignmentEntry.__new__(AlignmentEntry) + self.score = ps_alignment_iter_seg(itor, &self.start, &self.duration) + self.name = ps_alignment_iter_name(itor).decode('utf-8') + self.itor = itor # DANGER! DANGER! + return self + + def __iter__(self): + cdef ps_alignment_iter_t *itor = ps_alignment_iter_children(self.itor) + while itor != NULL: + c = AlignmentEntry.create_from_iter(itor) + yield c + itor = ps_alignment_iter_next(itor) + # FIXME: will leak memory if iteration stopped short! + +cdef class Alignment: + """Sub-word alignment alignment. + + For the moment this is read-only. + """ + cdef ps_alignment_t *_al + + @staticmethod + cdef create_from_ptr(ps_alignment_t *al): + cdef Alignment self = Alignment.__new__(Alignment) + self._al = al + return self + + def __dealloc__(self): + if self._al != NULL: + ps_alignment_free(self._al) + + def __iter__(self): + return self.words() + + def words(self): + """Iterate over words in the alignment.""" + cdef ps_alignment_iter_t *itor = ps_alignment_words(self._al) + while itor != NULL: + w = AlignmentEntry.create_from_iter(itor) + yield w + itor = ps_alignment_iter_next(itor) + # FIXME: will leak memory if iteration stopped short! + + def phones(self): + """Iterate over phones in the alignment.""" + cdef ps_alignment_iter_t *itor = ps_alignment_phones(self._al) + while itor != NULL: + p = AlignmentEntry.create_from_iter(itor) + yield p + itor = ps_alignment_iter_next(itor) + + def states(self): + """Iterate over states in the alignment.""" + cdef ps_alignment_iter_t *itor = ps_alignment_states(self._al) + while itor != NULL: + s = AlignmentEntry.create_from_iter(itor) + yield s + itor = ps_alignment_iter_next(itor) + def set_loglevel(level): """Set internal log level of PocketSphinx. diff --git a/cython/test/alignment_test.py b/cython/test/alignment_test.py new file mode 100644 index 000000000..f66922adc --- /dev/null +++ b/cython/test/alignment_test.py @@ -0,0 +1,38 @@ +#!/usr/bin/python + +import os +from pocketsphinx import Decoder +import unittest + +DATADIR = os.path.join(os.path.dirname(__file__), "../../test/data") + + +class TestAlignment(unittest.TestCase): + def _run_decode(self, decoder, expect_fail=False): + with open(os.path.join(DATADIR, "goforward.raw"), "rb") as fh: + buf = fh.read() + decoder.start_utt() + decoder.process_raw(buf, no_search=False, full_utt=True) + decoder.end_utt() + + def test_alignment(self): + decoder = Decoder(lm=None) + decoder.set_align_text("go forward ten meters") + self._run_decode(decoder) + words = [] + for seg in decoder.seg(): + if seg.word not in ("", "", "", "(NULL)"): + words.append((seg.word, seg.start_frame, seg.end_frame)) + print(words) + decoder.set_alignment() + self._run_decode(decoder) + for word in decoder.get_alignment(): + print(word.start, word.duration, word.score, word.name) + for phone in word: + print("\t", phone.start, phone.duration, phone.score, phone.name) + for state in phone: + print("\t\t", state.start, state.duration, state.score, state.name) + + +if __name__ == "__main__": + unittest.main() diff --git a/doxygen/pocketsphinx.1 b/doxygen/pocketsphinx.1 index 52a0efc95..4a70d3868 100644 --- a/doxygen/pocketsphinx.1 +++ b/doxygen/pocketsphinx.1 @@ -1,4 +1,4 @@ -.TH POCKETSPHINX 1 "2016-04-01" +.TH POCKETSPHINX 1 "2022-09-27" .SH NAME pocketsphinx \- Run speech recognition on audio data .SH SYNOPSIS @@ -8,17 +8,17 @@ pocketsphinx \- Run speech recognition on audio data \fBsingle\fR | \fBhelp\fR | \fBsoxflags\fR ] -[ \fIinputs\fR... ] +\fIINPUTS\fR... .SH DESCRIPTION .PP The ‘\f[CR]pocketsphinx\fP’ command-line program reads single-channel -16-bit PCM audio from standard input or one or more input files, and -attemps to recognize speech in it using the default acoustic and -language model. The input files can be raw audio, WAV, or NIST Sphere -files, though some of these may not be recognized properly. It -accepts a large number of options which you probably don't care about, -and a \fIcommand\fP which defaults to ‘\f[CR]live\fP’. The commands -are as follows: +16-bit PCM audio one or more input files (or ‘\f[CR]-\fP’ to read from +standard input), and attemps to recognize speech in it using the +default acoustic and language model. The input files can be raw audio, +WAV, or NIST Sphere files, though some of these may not be recognized +properly. It accepts a large number of options which you probably +don't care about, and a \fIcommand\fP which defaults to +‘\f[CR]live\fP’. The commands are as follows: .TP .B live Detect speech segments in input files, run recognition on them (using @@ -45,6 +45,43 @@ be present. .B single Recognize the input as a single utterance, and write a JSON object in the same format described above. .TP +.B align + +Align a single input file (or ‘\f[CR]-\fP’ for standard input) to a word +sequence, and write a JSON object in the same format described above. +The first positional argument is the input, and all subsequent ones +are concatenated to make the text, to avoid surprises if you forget to +quote it. You are responsible for normalizing the text to remove +punctuation, uppercase, centipedes, etc. For example: + +.EX + pocketsphinx align goforward.wav "go forward ten meters" +.EE + +By default, only word-level alignment is done. To get phone +alignments, pass `-phone_align yes` in the flags, e.g.: + +.EX + pocketsphinx -phone_align yes align audio.wav $text +.EE + +This will make not particularly readable output, but you can use +.B jq +(https://stedolan.github.io/jq/) to clean it up. For example, +you can get just the word names and start times like this: + +.EX + pocketsphinx align audio.wav $text | jq '.w[]|[.t,.b]' +.EE + +Or you could get the phone names and durations like this: + +.EX + pocketsphinx -phone_align yes align audio.wav $text | jq '.w[]|.w[]|[.t,.d]' +.EE + +There are many, many other possibilities, of course. +.TP .B help Print a usage and help text with a list of possible arguments. .TP @@ -54,12 +91,15 @@ input format. Note that because the ‘\f[CR]sox\fP’ command-line is slightly quirky these must always come \fIafter\fP the filename or ‘\f[CR]-d\fP’ (which tells ‘\f[CR]sox\fP’ to read from the microphone). You can run live recognition like this: + .EX - sox -d $(pocketsphinx soxflags) | pocketsphinx + sox -d $(pocketsphinx soxflags) | pocketsphinx - .EE + or decode from a file named "audio.mp3" like this: + .EX -sox audio.mp3 $(pocketsphinx soxflags) | pocketsphinx + sox audio.mp3 $(pocketsphinx soxflags) | pocketsphinx - .EE .PP By default only errors are printed to standard error, but if you want more information you can pass ‘\f[CR]-loglevel INFO\fP’. Partial results are not printed, maybe they will be in the future, but don't hold your breath. Force-alignment is likely to be supported soon, however. diff --git a/doxygen/pocketsphinx.1.in b/doxygen/pocketsphinx.1.in index 034989441..2e1443ead 100644 --- a/doxygen/pocketsphinx.1.in +++ b/doxygen/pocketsphinx.1.in @@ -1,4 +1,4 @@ -.TH POCKETSPHINX 1 "2016-04-01" +.TH POCKETSPHINX 1 "2022-09-27" .SH NAME pocketsphinx \- Run speech recognition on audio data .SH SYNOPSIS @@ -8,17 +8,17 @@ pocketsphinx \- Run speech recognition on audio data \fBsingle\fR | \fBhelp\fR | \fBsoxflags\fR ] -[ \fIinputs\fR... ] +\fIINPUTS\fR... .SH DESCRIPTION .PP The ‘\f[CR]pocketsphinx\fP’ command-line program reads single-channel -16-bit PCM audio from standard input or one or more input files, and -attemps to recognize speech in it using the default acoustic and -language model. The input files can be raw audio, WAV, or NIST Sphere -files, though some of these may not be recognized properly. It -accepts a large number of options which you probably don't care about, -and a \fIcommand\fP which defaults to ‘\f[CR]live\fP’. The commands -are as follows: +16-bit PCM audio one or more input files (or ‘\f[CR]-\fP’ to read from +standard input), and attemps to recognize speech in it using the +default acoustic and language model. The input files can be raw audio, +WAV, or NIST Sphere files, though some of these may not be recognized +properly. It accepts a large number of options which you probably +don't care about, and a \fIcommand\fP which defaults to +‘\f[CR]live\fP’. The commands are as follows: .TP .B live Detect speech segments in input files, run recognition on them (using @@ -45,6 +45,43 @@ be present. .B single Recognize the input as a single utterance, and write a JSON object in the same format described above. .TP +.B align + +Align a single input file (or ‘\f[CR]-\fP’ for standard input) to a word +sequence, and write a JSON object in the same format described above. +The first positional argument is the input, and all subsequent ones +are concatenated to make the text, to avoid surprises if you forget to +quote it. You are responsible for normalizing the text to remove +punctuation, uppercase, centipedes, etc. For example: + +.EX + pocketsphinx align goforward.wav "go forward ten meters" +.EE + +By default, only word-level alignment is done. To get phone +alignments, pass `-phone_align yes` in the flags, e.g.: + +.EX + pocketsphinx -phone_align yes align audio.wav $text +.EE + +This will make not particularly readable output, but you can use +.B jq +(https://stedolan.github.io/jq/) to clean it up. For example, +you can get just the word names and start times like this: + +.EX + pocketsphinx align audio.wav $text | jq '.w[]|[.t,.b]' +.EE + +Or you could get the phone names and durations like this: + +.EX + pocketsphinx -phone_align yes align audio.wav $text | jq '.w[]|.w[]|[.t,.d]' +.EE + +There are many, many other possibilities, of course. +.TP .B help Print a usage and help text with a list of possible arguments. .TP @@ -54,12 +91,15 @@ input format. Note that because the ‘\f[CR]sox\fP’ command-line is slightly quirky these must always come \fIafter\fP the filename or ‘\f[CR]-d\fP’ (which tells ‘\f[CR]sox\fP’ to read from the microphone). You can run live recognition like this: + .EX - sox -d $(pocketsphinx soxflags) | pocketsphinx + sox -d $(pocketsphinx soxflags) | pocketsphinx - .EE + or decode from a file named "audio.mp3" like this: + .EX -sox audio.mp3 $(pocketsphinx soxflags) | pocketsphinx + sox audio.mp3 $(pocketsphinx soxflags) | pocketsphinx - .EE .PP By default only errors are printed to standard error, but if you want more information you can pass ‘\f[CR]-loglevel INFO\fP’. Partial results are not printed, maybe they will be in the future, but don't hold your breath. Force-alignment is likely to be supported soon, however. diff --git a/include/pocketsphinx.h b/include/pocketsphinx.h index 52a9ac94d..c17c1897a 100644 --- a/include/pocketsphinx.h +++ b/include/pocketsphinx.h @@ -54,6 +54,7 @@ #include #include #include +#include #include /* Namum manglium ii domum */ diff --git a/include/pocketsphinx/alignment.h b/include/pocketsphinx/alignment.h new file mode 100644 index 000000000..397095ba4 --- /dev/null +++ b/include/pocketsphinx/alignment.h @@ -0,0 +1,139 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_alignment.h + * @brief Multi-level alignment structure + */ + +#ifndef __PS_ALIGNMENT_H__ +#define __PS_ALIGNMENT_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Value indicating no parent or child for an entry. + */ +#define PS_ALIGNMENT_NONE -1 + +/** + * @struct ps_alignment_t + * @brief Multi-level alignment (words, phones, states) over an utterance. + */ +typedef struct ps_alignment_s ps_alignment_t; + +/** + * @struct ps_alignment_iter_t + * @brief Iterator over entries in an alignment. + */ +typedef struct ps_alignment_iter_s ps_alignment_iter_t; + +/** + * Retain an alighment + */ +ps_alignment_t *ps_alignment_retain(ps_alignment_t *al); + +/** + * Release an alignment + */ +int ps_alignment_free(ps_alignment_t *al); + +/** + * Iterate over the alignment starting at the first word. + */ +ps_alignment_iter_t *ps_alignment_words(ps_alignment_t *al); + +/** + * Iterate over the alignment starting at the first phone. + */ +ps_alignment_iter_t *ps_alignment_phones(ps_alignment_t *al); + +/** + * Iterate over the alignment starting at the first state. + */ +ps_alignment_iter_t *ps_alignment_states(ps_alignment_t *al); + +/** + * Get the human-readable name of the current segment for an alignment. + * + * @return Name of this segment as a string (word, phone, or state + * number). This pointer is owned by the iterator, do not free it + * yourself. + */ +const char *ps_alignment_iter_name(ps_alignment_iter_t *itor); + +/** + * Get the timing and score information for the current segment of an aligment. + * + * @arg start Output pointer for start frame + * @arg duration Output pointer for duration + * @return Acoustic score for this segment + */ +int ps_alignment_iter_seg(ps_alignment_iter_t *itor, int *start, int *duration); + +/** + * Move an alignment iterator forward. + * + * If the end of the alignment is reached, this will free the iterator + * and return NULL. + */ +ps_alignment_iter_t *ps_alignment_iter_next(ps_alignment_iter_t *itor); + +/** + * Iterate over the children of the current alignment entry. + * + * If there are no child nodes, NULL is returned. + */ +ps_alignment_iter_t *ps_alignment_iter_children(ps_alignment_iter_t *itor); + +/** + * Release an iterator before completing all iterations. + */ +int ps_alignment_iter_free(ps_alignment_iter_t *itor); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __PS_ALIGNMENT_H__ */ diff --git a/include/pocketsphinx/search.h b/include/pocketsphinx/search.h index 6c9c65696..8a4641e92 100644 --- a/include/pocketsphinx/search.h +++ b/include/pocketsphinx/search.h @@ -75,6 +75,7 @@ #define __PS_SEARCH_H__ #include +#include #ifdef __cplusplus extern "C" { @@ -94,9 +95,10 @@ typedef struct ps_decoder_s ps_decoder_t; /** * Actives search with the provided name. * - * Activates search with the provided name. The search must be added before - * using either ps_add_fsg(), ps_add_lm() or ps_add_kws(). - * + * @param name Name of search module to activate. This must have been + * previously added by either ps_add_fsg(), ps_add_lm(), or + * ps_add_kws(). If NULL, it will re-activate the default search, + * which is useful when running second-pass alignment, for instance. * @return 0 on success, -1 on failure */ POCKETSPHINX_EXPORT @@ -295,22 +297,69 @@ POCKETSPHINX_EXPORT int ps_add_allphone_file(ps_decoder_t *ps, const char *name, const char *path); /** - * Adds new search based on forced alignment. + * Set up decoder to force-align a word sequence. + * + * Unlike the `ps_add_*` functions, this activates the search module + * immediately, since force-alignment is nearly always a single shot. + * Currently "under the hood" this is an FSG search but you shouldn't + * depend on that. * - * Convenient method to and create a forced aligner for a piece of - * text. Note that this is currently less than useful, as it depends - * on the word sequence exactly matching the input, including - * alternate pronunciations and silences. + * Decoding proceeds as normal, though only this word sequence will be + * recognized, with silences and alternate pronunciations inserted. + * Word alignments are available with ps_seg_iter(). To obtain + * phoneme or state segmentations, you must subsequently call + * ps_set_alignment() and re-run decoding. It's tough son, but it's life. * * @param ps Decoder - * @param name Name for this search (could be anything, such as an utterance - * label or the name of the input file) * @param words String containing whitespace-separated words for alignment. - * These words are assumed to exist in the current dictionary. + * These words are assumed to exist in the current dictionary. * */ POCKETSPHINX_EXPORT -int ps_add_align(ps_decoder_t *ps, const char *name, const char *words); +int ps_set_align_text(ps_decoder_t *ps, const char *words); + +/** + * Set up decoder to run phone and state-level alignment. + * + * Unlike the `ps_add_*` functions, this activates the search module + * immediately, since force-alignment is nearly always a single shot. + * + * To align, run or re-run decoding as usual, then call + * ps_get_alignment() to get the resulting alignment. Note that if + * you call this function *before* rerunning decoding, you can obtain + * the phone and state sequence, but the durations will be invalid + * (phones and states will inherit the parent word's duration). + * + * @param ps Decoder object. + * @param al Usually NULL, which means to construct an alignment from + * the current search hypothesis (this does not work with + * allphone or keyword spotting). You can also pass a + * ps_alignment_t here if you have one. The search will + * retain but not copy it, so after running decoding it will + * be updated with new durations. You can set starts and + * durations for words or phones (not states) to constrain + * the alignment. + * @return 0 for success, -1 for error (if there is no search + * hypothesis, or it cannot be aligned due to missing word + * IDs) + */ +POCKETSPHINX_EXPORT +int ps_set_alignment(ps_decoder_t *ps, ps_alignment_t *al); + +/** + * Get the alignment associated with the current search module. + * + * As noted above, if decoding has not been run, this will contain + * invalid durations, but that may still be useful if you just want to + * know the state sequence. + * + * @return Current alignment, or NULL if none. This pointer is owned + * by the decoder, so you must call ps_alignment_retain() on + * it if you wish to keep it outside the lifetime of the + * decoder. + */ +POCKETSPHINX_EXPORT +ps_alignment_t *ps_get_alignment(ps_decoder_t *ps); #ifdef __cplusplus } diff --git a/programs/pocketsphinx_batch.c b/programs/pocketsphinx_batch.c index e75347c29..4ea6a8ecb 100644 --- a/programs/pocketsphinx_batch.c +++ b/programs/pocketsphinx_batch.c @@ -383,15 +383,11 @@ process_alignctl_line(ps_decoder_t *ps, cmd_ln_t *config, char const *fname) E_ERROR_SYSTEM("Failed to close transcript file %s", path); goto error_out; } - /* Always use the same name so that we don't leak memory (hopefully). */ - if (ps_add_align(ps, "align", text)) { + if (ps_set_align_text(ps, text)) { err = -1; goto error_out; } - E_INFO("Force-aligning with transcript from: %s\n", fname); - if (ps_activate_search(ps, "align")) - err = -1; error_out: ckd_free(path); diff --git a/programs/pocketsphinx_main.c b/programs/pocketsphinx_main.c index 6b190e4f0..3d016eeb9 100644 --- a/programs/pocketsphinx_main.c +++ b/programs/pocketsphinx_main.c @@ -47,9 +47,28 @@ #include #include "util/ckd_alloc.h" +#include "config_macro.h" #include "pocketsphinx_internal.h" +#include "ps_alignment_internal.h" #include "soundfiles.h" +/* Le sigh. Didn't want to have to do this. */ +static const arg_t ps_main_args_def[] = { + POCKETSPHINX_OPTIONS, + { "phone_align", + ARG_BOOLEAN, + "no", + "Run a second pass to align phones and print their durations " + "(DOES NOT WORK IN LIVE MODE)." }, + { "state_align", + ARG_BOOLEAN, + "no", + "Run a second pass to align phones and states and print their durations " + "(Implies -phone_align) " + "(DOES NOT WORK IN LIVE MODE)." }, + CMDLN_EMPTY_OPTION +}; + static int global_done = 0; static void catch_sig(int signum) @@ -109,15 +128,136 @@ format_seg(char *outptr, int len, ps_seg_t *seg, return len; } +static int +format_align_iter(char *outptr, int maxlen, + ps_alignment_iter_t *itor, double utt_start, int frate, logmath_t *lmath) +{ + int start, duration, score; + double prob, st, dur; + const char *word; + + score = ps_alignment_iter_seg(itor, &start, &duration); + st = utt_start + (double)start / frate; + dur = (double)duration / frate; + prob = logmath_exp(lmath, score); + word = ps_alignment_iter_name(itor); + if (word == NULL) + word = ""; + + return snprintf(outptr, maxlen, HYP_FORMAT, st, dur, prob, word); +} + +static int +format_seg_align(char *outptr, int maxlen, + ps_alignment_iter_t *itor, + double utt_start, int frate, + logmath_t *lmath, int state_align) +{ + ps_alignment_iter_t *pitor; + int len = 0, hyplen; + + hyplen = format_align_iter(outptr, maxlen, + itor, utt_start, frate, lmath); + len += hyplen; + if (outptr) + outptr += hyplen; + if (maxlen) + maxlen -= hyplen; + + len += 6; /* "w":,[ */ + if (outptr) { + memcpy(outptr, ",\"w\":[", 6); + outptr += 6; + } + if (maxlen) + maxlen -= 6; + + pitor = ps_alignment_iter_children(itor); + while (pitor != NULL) { + hyplen = format_align_iter(outptr, maxlen, + pitor, utt_start, frate, lmath); + len += hyplen; + if (outptr) + outptr += hyplen; + if (maxlen) + maxlen -= hyplen; + + /* FIXME: refactor with recursion, someday */ + if (state_align) { + ps_alignment_iter_t *sitor = ps_alignment_iter_children(pitor); + len += 6; /* "w":,[ */ + if (outptr) { + memcpy(outptr, ",\"w\":[", 6); + outptr += 6; + } + if (maxlen) + maxlen -= 6; + while (sitor != NULL) { + hyplen = format_align_iter(outptr, maxlen, + sitor, utt_start, frate, lmath); + len += hyplen; + if (outptr) + outptr += hyplen; + if (maxlen) + maxlen -= hyplen; + + len++; /* } */ + if (outptr) + *outptr++ = '}'; + if (maxlen) + maxlen--; + sitor = ps_alignment_iter_next(sitor); + if (sitor != NULL) { + len++; + if (outptr) + *outptr++ = ','; + if (maxlen) + maxlen--; + } + } + len++; + if (outptr) + *outptr++ = ']'; + if (maxlen) + maxlen--; + } + + len++; /* } */ + if (outptr) + *outptr++ = '}'; + if (maxlen) + maxlen--; + pitor = ps_alignment_iter_next(pitor); + if (pitor != NULL) { + len++; + if (outptr) + *outptr++ = ','; + if (maxlen) + maxlen--; + } + } + + len += 2; + if (outptr) { + *outptr++ = ']'; + *outptr++ = '}'; + *outptr = '\0'; + } + if (maxlen) + maxlen--; + + return len; +} + static void -output_hyp(ps_endpointer_t *ep, ps_decoder_t *decoder) +output_hyp(ps_endpointer_t *ep, ps_decoder_t *decoder, ps_alignment_t *alignment) { logmath_t *lmath; char *hyp_json, *ptr; - ps_seg_t *itor; int frate; int maxlen, len; double st; + int state_align = ps_config_bool(decoder->config, "state_align"); maxlen = format_hyp(NULL, 0, ep, decoder); maxlen += 6; /* "w":,[ */ @@ -127,9 +267,21 @@ output_hyp(ps_endpointer_t *ep, ps_decoder_t *decoder) st = 0.0; else st = ps_endpointer_speech_start(ep); - for (itor = ps_seg_iter(decoder); itor; itor = ps_seg_next(itor)) { - maxlen += format_seg(NULL, 0, itor, st, frate, lmath); - maxlen++; /* , or ] at end */ + if (alignment) { + ps_alignment_iter_t *itor; + for (itor = ps_alignment_words(alignment); + itor; itor = ps_alignment_iter_next(itor)) { + maxlen += format_seg_align(NULL, 0, itor, st, frate, + lmath, state_align); + maxlen++; /* , or ] at end */ + } + } + else { + ps_seg_t *itor; + for (itor = ps_seg_iter(decoder); itor; itor = ps_seg_next(itor)) { + maxlen += format_seg(NULL, 0, itor, st, frate, lmath); + maxlen++; /* , or ] at end */ + } } maxlen++; /* final } */ maxlen++; /* trailing \0 */ @@ -140,18 +292,34 @@ output_hyp(ps_endpointer_t *ep, ps_decoder_t *decoder) ptr += len; maxlen -= len; - assert(maxlen > 2); - strcpy(ptr, ",\"w\":["); + assert(maxlen > 6); + memcpy(ptr, ",\"w\":[", 6); ptr += 6; maxlen -= 6; - for (itor = ps_seg_iter(decoder); itor; itor = ps_seg_next(itor)) { - assert(maxlen > 0); - len = format_seg(ptr, maxlen, itor, st, frate, lmath); - ptr += len; - maxlen -= len; - *ptr++ = ','; - maxlen--; + if (alignment) { + ps_alignment_iter_t *itor; + for (itor = ps_alignment_words(alignment); itor; + itor = ps_alignment_iter_next(itor)) { + assert(maxlen > 0); + len = format_seg_align(ptr, maxlen, itor, st, frate, lmath, + state_align); + ptr += len; + maxlen -= len; + *ptr++ = ','; + maxlen--; + } + } + else { + ps_seg_t *itor; + for (itor = ps_seg_iter(decoder); itor; itor = ps_seg_next(itor)) { + assert(maxlen > 0); + len = format_seg(ptr, maxlen, itor, st, frate, lmath); + ptr += len; + maxlen -= len; + *ptr++ = ','; + maxlen--; + } } --ptr; *ptr++ = ']'; @@ -217,7 +385,9 @@ live(ps_config_t *config, FILE *infile) E_INFO("Speech end at %.2f\n", ps_endpointer_speech_end(ep)); ps_end_utt(decoder); - output_hyp(ep, decoder); + if (ps_config_bool(decoder->config, "phone_align")) + E_WARN("Subword alignment not yet supported in live mode\n"); + output_hyp(ep, decoder, NULL); } } } @@ -237,16 +407,13 @@ live(ps_config_t *config, FILE *infile) } static int -single(ps_config_t *config, FILE *infile) +decode_single(ps_decoder_t *decoder, FILE *infile) { - ps_decoder_t *decoder = NULL; - short *data, *ptr; + ps_alignment_t *alignment = NULL; size_t data_size, block_size; + short *data, *ptr; + int rv = 0; - if ((decoder = ps_init(config)) == NULL) { - E_FATAL("PocketSphinx decoder init failed\n"); - goto error_out; - } data_size = 65536; block_size = 2048; ptr = data = ckd_calloc(data_size, sizeof(*data)); @@ -264,29 +431,125 @@ single(ps_config_t *config, FILE *infile) if (len == 0) { if (feof(infile)) break; - else + else { E_ERROR_SYSTEM("Failed to read %d bytes\n", sizeof(*ptr) * block_size); + rv = -1; + goto error_out; + } } ptr += len; } - ps_start_utt(decoder); - if (ps_process_raw(decoder, data, ptr - data, FALSE, TRUE) < 0) { + if ((rv = ps_start_utt(decoder)) < 0) + goto error_out; + if ((rv = ps_process_raw(decoder, data, ptr - data, FALSE, TRUE)) < 0) { E_ERROR("ps_process_raw() failed\n"); goto error_out; } - ps_end_utt(decoder); - output_hyp(NULL, decoder); + if ((rv = ps_end_utt(decoder)) < 0) + goto error_out; + if (ps_config_bool(decoder->config, "phone_align")) { + if (ps_set_alignment(decoder, NULL) < 0) + goto error_out; + if ((rv = ps_start_utt(decoder)) < 0) + goto error_out; + if ((rv = ps_process_raw(decoder, data, ptr - data, FALSE, TRUE)) < 0) { + E_ERROR("ps_process_raw() failed\n"); + goto error_out; + } + if ((rv = ps_end_utt(decoder)) < 0) + goto error_out; + if ((alignment = ps_get_alignment(decoder)) == NULL) + goto error_out; + ps_activate_search(decoder, NULL); + } + output_hyp(NULL, decoder, alignment); + /* Fall through intentionally */ +error_out: ckd_free(data); + return rv; +} + +static int +single(ps_config_t *config, FILE *infile) +{ + ps_decoder_t *decoder; + int rv = 0; + + if ((decoder = ps_init(config)) == NULL) { + E_FATAL("PocketSphinx decoder init failed\n"); + return -1; + } + rv = decode_single(decoder, infile); ps_free(decoder); - return 0; + return rv; +} + +static char * +string_array_join(char **strings, int nstrings) +{ + char *joined, *ptr; + int i, *len, jlen; + + len = ckd_malloc(nstrings * sizeof(*len)); + for (jlen = i = 0; i < nstrings; ++i) { + len[i] = strlen(strings[i]); + jlen += len[i] + 1; + } + ptr = joined = ckd_malloc(jlen); + for (i = 0; i < nstrings; ++i) { + memcpy(ptr, strings[i], len[i]); + ptr += len[i]; + *ptr++ = ' '; + } + *--ptr = '\0'; + ckd_free(len); + return joined; +} + +static int +align(ps_config_t *config, char **inputs, int ninputs) +{ + int rv = 0, is_stdin = FALSE; + ps_decoder_t *decoder = NULL; + char *text = NULL; + FILE *fh = NULL; + if (ninputs < 2) { + E_ERROR("Usage: pocketsphinx align INFILE TEXT...\n"); + return -1; + } + /* Please do not use bestpath for alignment. */ + ps_config_set_bool(config, "bestpath", FALSE); + ps_config_set_str(config, "lm", NULL); + if (0 == strcmp(inputs[0], "-")) { + is_stdin = TRUE; + fh = stdin; + } + else if ((fh = fopen(inputs[0], "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open %s for input", inputs[0]); + goto error_out; + } + if ((rv = read_file_header(inputs[0], fh, config)) < 0) + goto error_out; + if ((decoder = ps_init(config)) == NULL) { + E_FATAL("PocketSphinx decoder init failed\n"); + rv = -1; + goto error_out; + } + text = string_array_join(inputs + 1, ninputs - 1); + if ((rv = ps_set_align_text(decoder, text)) < 0) + goto error_out; + rv = decode_single(decoder, fh); + /* Fall through intentionally. */ error_out: - if (data) - ckd_free(data); + if (fh && !is_stdin) + fclose(fh); + if (text) + ckd_free(text); if (decoder) ps_free(decoder); - return -1; + return rv; } #if 0 @@ -372,7 +635,9 @@ find_inputs(int *argc, char **argv, int *ninputs) while (i < *argc) { char *arg = argv[i]; /* Bubble-bogo-bobo-backward-sort them to the end of argv. */ - if (arg && arg[0] && arg[0] != '-') { + if (arg && arg[0] + /* "-" on its own is an input, otherwise, - starts args. */ + && (arg[0] != '-' || arg[1] == '\0')) { memmove(&argv[i], &argv[i + 1], (*argc - i - 1) * sizeof(argv[i])); @@ -384,6 +649,13 @@ find_inputs(int *argc, char **argv, int *ninputs) else i += 2; } + /* Now reverse them. I won't be passing Google's coding interview + any time soon, not that it matters in this particular case. */ + for (i = 0; i < *ninputs / 2; ++i) { + char *tmp = inputs[i]; + inputs[i] = inputs[*ninputs - i - 1]; + inputs[*ninputs - i - 1] = tmp; + } return inputs; } @@ -399,11 +671,16 @@ process_inputs(int (*func)(ps_config_t *, FILE *), else { int i, rv_one; for (i = 0; i < ninputs; ++i) { - /* They come to us in reverse order */ - char *file = inputs[ninputs - i - 1]; - FILE *fh = fopen(file, "rb"); - if (fh == NULL) { - E_ERROR_SYSTEM("Failed to open %s for reading", file); + char *file = inputs[i]; + int is_stdin = FALSE; + FILE *fh; + + if (0 == strcmp(file, "-")) { + is_stdin = TRUE; + fh = stdin; + } + else if ((fh = fopen(file, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open %s for input", file); rv = -1; continue; } @@ -416,12 +693,21 @@ process_inputs(int (*func)(ps_config_t *, FILE *), rv = rv_one; E_ERROR("Recognition failed on %s\n", file); } - fclose(fh); + if (!is_stdin) + fclose(fh); } } return rv; } +void +usage(char *name) +{ + fprintf(stderr, "Usage: %s [soxflags | help | live | single | align] INPUTS...\n", name); + err_set_loglevel(ERR_INFO); + cmd_ln_log_help_r(NULL, ps_args()); +} + int main(int argc, char *argv[]) { @@ -432,22 +718,24 @@ main(int argc, char *argv[]) command = find_command(&argc, argv); inputs = find_inputs(&argc, argv, &ninputs); - if ((config = ps_config_parse_args(NULL, argc, argv)) == NULL) { - cmd_ln_log_help_r(NULL, ps_args()); + if ((ninputs == 0 && 0 != strcmp(command, "soxflags")) + || (config = ps_config_parse_args(ps_main_args_def, argc, argv)) == NULL) { + usage(argv[0]); return 1; } ps_default_search_args(config); + if (ps_config_bool(config, "state_align")) + ps_config_set_bool(config, "phone_align", TRUE); if (0 == strcmp(command, "soxflags")) rv = soxflags(config); else if (0 == strcmp(command, "live")) rv = process_inputs(live, config, inputs, ninputs); else if (0 == strcmp(command, "single")) rv = process_inputs(single, config, inputs, ninputs); + else if (0 == strcmp(command, "align")) + rv = align(config, inputs, ninputs); else if (0 == strcmp(command, "help")) { - fprintf(stderr, "Usage: %s [soxflags | help | live | single] [INPUTS...]\n", - argv[0]); - err_set_loglevel(ERR_INFO); - cmd_ln_log_help_r(NULL, ps_args()); + usage(argv[0]); } else { E_ERROR("Unknown command \"%s\"\n", command); diff --git a/src/allphone_search.c b/src/allphone_search.c index fbc416f32..079018e10 100644 --- a/src/allphone_search.c +++ b/src/allphone_search.c @@ -81,7 +81,8 @@ allphone_search_fill_iter(ps_seg_t *seg, phseg_t *phseg) seg->ef = phseg->ef; seg->ascr = phseg->score; seg->lscr = phseg->tscore; - seg->word = bin_mdef_ciphone_str(ps_search_acmod(seg->search)->mdef, phseg->ci); + seg->text = bin_mdef_ciphone_str(ps_search_acmod(seg->search)->mdef, phseg->ci); + seg->wid = BAD_S3WID; } static ps_seg_t * diff --git a/src/fsg_search.c b/src/fsg_search.c index 588ce7393..62fc83db0 100644 --- a/src/fsg_search.c +++ b/src/fsg_search.c @@ -474,8 +474,8 @@ fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode) if (fsg_model_is_filler(fsgs->fsg, wid) /* FIXME: This might be slow due to repeated calls to dict_to_id(). */ || (dict_is_single_phone(ps_search_dict(fsgs), - dict_wordid(ps_search_dict(fsgs), - fsg_model_word_str(fsgs->fsg, wid))))) { + dict_wordid(ps_search_dict(fsgs), + fsg_model_word_str(fsgs->fsg, wid))))) { /* Create a dummy context structure that applies to all right contexts */ fsg_pnode_add_all_ctxt(&ctxt); @@ -1068,12 +1068,13 @@ fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry) if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0) ph = fsg_history_entry_get(fsgs->history, bp); - seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid); + seg->text = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid); + /* Make sure to convert the word IDs!!! */ + seg->wid = dict_wordid(seg->search->dict, seg->text); seg->ef = fsg_hist_entry_frame(hist_entry); seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0; /* This is kind of silly but it happens for null transitions. */ if (seg->sf > seg->ef) seg->sf = seg->ef; - seg->prob = 0; /* Bogus value... */ /* "Language model" score = transition probability. */ seg->lback = 1; seg->lscr = fsg_link_logs2prob(hist_entry->fsglink) >> SENSCR_SHIFT; @@ -1083,6 +1084,7 @@ fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry) } else seg->ascr = hist_entry->score - seg->lscr; + seg->prob = seg->lscr + seg->ascr; /* Somewhat approximate value... */ } @@ -1256,7 +1258,7 @@ find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag) /* Look for all nodes starting in frame zero with some exits. */ for (node = dag->nodes; node; node = node->next) { if (node->sf == 0 && node->exits) { - E_INFO("Start node %s.%d:%d:%d\n", + E_INFO("Start node candidate %s.%d:%d:%d\n", fsg_model_word_str(fsgs->fsg, node->wid), node->sf, node->fef, node->lef); start = glist_add_ptr(start, node); @@ -1295,7 +1297,7 @@ find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag) /* Look for all nodes ending in last frame with some entries. */ for (node = dag->nodes; node; node = node->next) { if (node->lef == dag->n_frames - 1 && node->entries) { - E_INFO("End node %s.%d:%d:%d (%d)\n", + E_INFO("End node candidate %s.%d:%d:%d (%d)\n", fsg_model_word_str(fsgs->fsg, node->wid), node->sf, node->fef, node->lef, node->info.best_exit); end = glist_add_ptr(end, node); diff --git a/src/kws_search.c b/src/kws_search.c index 5ab4b76db..8527fb9f7 100644 --- a/src/kws_search.c +++ b/src/kws_search.c @@ -84,7 +84,8 @@ kws_seg_fill(kws_seg_t *itor) { kws_detection_t* detection = (kws_detection_t*)gnode_ptr(itor->detection); - itor->base.word = detection->keyphrase; + itor->base.text = detection->keyphrase; + itor->base.wid = BAD_S3WID; itor->base.sf = detection->sf; itor->base.ef = detection->ef; itor->base.prob = detection->prob; diff --git a/src/ngram_search.c b/src/ngram_search.c index 14c7c9ca3..f6f40204b 100644 --- a/src/ngram_search.c +++ b/src/ngram_search.c @@ -891,7 +891,8 @@ ngram_search_bp2itor(ps_seg_t *seg, int bp) be = &ngs->bp_table[bp]; pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp]; - seg->word = dict_wordstr(ps_search_dict(ngs), be->wid); + seg->text = dict_wordstr(ps_search_dict(ngs), be->wid); + seg->wid = be->wid; seg->ef = be->frame; seg->sf = pbe ? pbe->frame + 1 : 0; seg->prob = 0; /* Bogus value... */ diff --git a/src/pocketsphinx.c b/src/pocketsphinx.c index fc617fb45..8deb131ae 100644 --- a/src/pocketsphinx.c +++ b/src/pocketsphinx.c @@ -52,6 +52,7 @@ #include "util/hash_table.h" #include "pocketsphinx_internal.h" #include "ps_lattice_internal.h" +#include "ps_alignment_internal.h" #include "phone_loop_search.h" #include "kws_search.h" #include "fsg_search_internal.h" @@ -492,9 +493,11 @@ ps_activate_search(ps_decoder_t *ps, const char *name) return -1; } - if (!(search = ps_find_search(ps, name))) { + if (name == NULL) + name = PS_DEFAULT_SEARCH; + + if (!(search = ps_find_search(ps, name))) return -1; - } ps->search = search; /* Set pl window depending on the search */ @@ -562,27 +565,43 @@ ngram_model_t * ps_get_lm(ps_decoder_t *ps, const char *name) { ps_search_t *search = ps_find_search(ps, name); - if (search && strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) + if (search == NULL) return NULL; - return search ? ((ngram_search_t *) search)->lmset : NULL; + if (0 != strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) + return NULL; + return ((ngram_search_t *) search)->lmset; } fsg_model_t * ps_get_fsg(ps_decoder_t *ps, const char *name) { ps_search_t *search = ps_find_search(ps, name); - if (search && strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search))) + if (search == NULL) + return NULL; + if (0 != strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search))) return NULL; - return search ? ((fsg_search_t *) search)->fsg : NULL; + return ((fsg_search_t *) search)->fsg; } const char* -ps_get_kws(ps_decoder_t *ps, const char* name) +ps_get_kws(ps_decoder_t *ps, const char *name) { ps_search_t *search = ps_find_search(ps, name); - if (search && strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search))) + if (search == NULL) + return NULL; + if (0 != strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search))) + return NULL; + return kws_search_get_keyphrases(search); +} + +ps_alignment_t * +ps_get_alignment(ps_decoder_t *ps) +{ + if (ps->search == NULL) + return NULL; + if (0 != strcmp(PS_SEARCH_TYPE_STATE_ALIGN, ps_search_type(ps->search))) return NULL; - return search ? kws_search_get_keyphrases(search) : NULL; + return ((state_align_search_t *) ps->search)->al; } static int @@ -648,35 +667,99 @@ ps_add_allphone_file(ps_decoder_t *ps, const char *name, const char *path) } int -ps_add_align(ps_decoder_t *ps, const char *name, const char *text) +ps_set_align_text(ps_decoder_t *ps, const char *text) { - ps_search_t *search; - ps_alignment_t *alignment; + fsg_model_t *fsg; char *textbuf = ckd_salloc(text); char *ptr, *word, delimfound; - int n; + int n, nwords; textbuf = string_trim(textbuf, STRING_BOTH); - alignment = ps_alignment_init(ps->d2p); - ps_alignment_add_word(alignment, dict_wordid(ps->dict, ""), 0); - for (ptr = textbuf; - (n = nextword(ptr, " \t\n\r", &word, &delimfound)) >= 0; - ptr = word + n, *ptr = delimfound) { + /* First pass: count and verify words */ + nwords = 0; + ptr = textbuf; + while ((n = nextword(ptr, " \t\n\r", &word, &delimfound)) >= 0) { int wid; if ((wid = dict_wordid(ps->dict, word)) == BAD_S3WID) { E_ERROR("Unknown word %s\n", word); ckd_free(textbuf); - ps_alignment_free(alignment); return -1; } - ps_alignment_add_word(alignment, wid, 0); + ptr = word + n; + *ptr = delimfound; + ++nwords; + } + /* Second pass: make fsg */ + fsg = fsg_model_init("_align", ps->lmath, + ps_config_float(ps->config, "lw"), + nwords + 1); + nwords = 0; + ptr = textbuf; + while ((n = nextword(ptr, " \t\n\r", &word, &delimfound)) >= 0) { + int wid; + if ((wid = dict_wordid(ps->dict, word)) == BAD_S3WID) { + E_ERROR("Unknown word %s\n", word); + ckd_free(textbuf); + return -1; + } + wid = fsg_model_word_add(fsg, word); + fsg_model_trans_add(fsg, nwords, nwords + 1, 0, wid); + ptr = word + n; + *ptr = delimfound; + ++nwords; } - ps_alignment_add_word(alignment, dict_wordid(ps->dict, ""), 0); - ps_alignment_populate(alignment); - search = state_align_search_init(name, ps->config, ps->acmod, alignment); - ps_alignment_free(alignment); ckd_free(textbuf); - return set_search_internal(ps, search); + fsg->start_state = 0; + fsg->final_state = nwords; + if (ps_add_fsg(ps, PS_DEFAULT_SEARCH, fsg) < 0) { + fsg_model_free(fsg); + return -1; + } + /* FIXME: Should rethink ownership semantics, this is annoying. */ + fsg_model_free(fsg); + return ps_activate_search(ps, PS_DEFAULT_SEARCH); +} + +int +ps_set_alignment(ps_decoder_t *ps, ps_alignment_t *al) +{ + ps_search_t *search; + int new_alignment = FALSE; + + if (al == NULL) { + ps_seg_t *seg; + seg = ps_seg_iter(ps); + if (seg == NULL) + return -1; + al = ps_alignment_init(ps->d2p); + new_alignment = TRUE; + while (seg) { + if (seg->wid == BAD_S3WID) { + E_ERROR("No word ID for segment %s, cannot align\n", + seg->text); + goto error_out; + } + ps_alignment_add_word(al, seg->wid, seg->sf, seg->ef - seg->sf + 1); + seg = ps_seg_next(seg); + } + /* FIXME: Add cionly parameter as in SoundSwallower */ + if (ps_alignment_populate(al) < 0) + goto error_out; + } + else + al = ps_alignment_retain(al); + search = state_align_search_init("_state_align", ps->config, ps->acmod, al); + if (search == NULL) + goto error_out; + if (new_alignment) + ps_alignment_free(al); + if (set_search_internal(ps, search) < 0) + goto error_out; + return ps_activate_search(ps, "_state_align"); +error_out: + if (new_alignment) + ps_alignment_free(al); + return -1; } int @@ -1319,7 +1402,7 @@ ps_seg_next(ps_seg_t *seg) char const * ps_seg_word(ps_seg_t *seg) { - return seg->word; + return seg->text; } void diff --git a/src/pocketsphinx_internal.h b/src/pocketsphinx_internal.h index 38c848f51..8f583fcfa 100644 --- a/src/pocketsphinx_internal.h +++ b/src/pocketsphinx_internal.h @@ -104,7 +104,10 @@ struct ps_search_s { char *type; char *name; - ps_search_t *pls; /**< Phoneme loop for lookahead. */ + /** + * Phoneme loop for lookahead. Reference (not retained) to + * phone_loop in the parent ps_decoder_t. */ + ps_search_t *pls; cmd_ln_t *config; /**< Configuration. */ acmod_t *acmod; /**< Acoustic model. */ dict_t *dict; /**< Pronunciation dictionary. */ @@ -181,9 +184,10 @@ typedef struct ps_segfuncs_s { struct ps_seg_s { ps_segfuncs_t *vt; /**< V-table of seg methods */ ps_search_t *search; /**< Search object from whence this came */ - char const *word; /**< Word string (pointer into dictionary hash) */ + const char *text; /**< Textual representation of segment */ frame_idx_t sf; /**< Start frame. */ frame_idx_t ef; /**< End frame. */ + s3wid_t wid; /**< Word ID (*not* base word ID). */ int32 ascr; /**< Acoustic score. */ int32 lscr; /**< Language model score. */ int32 prob; /**< Log posterior probability. */ diff --git a/src/ps_alignment.c b/src/ps_alignment.c index 8d2eb2055..2c4c40e6c 100644 --- a/src/ps_alignment.c +++ b/src/ps_alignment.c @@ -40,7 +40,7 @@ */ #include "util/ckd_alloc.h" -#include "ps_alignment.h" +#include "ps_alignment_internal.h" ps_alignment_t * ps_alignment_init(dict2pid_t *d2p) @@ -111,17 +111,14 @@ ps_alignment_vector_empty(ps_alignment_vector_t *vec) int ps_alignment_add_word(ps_alignment_t *al, - int32 wid, int duration) + int32 wid, int start, int duration) { ps_alignment_entry_t *ent; if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL) return 0; ent->id.wid = wid; - if (al->word.n_ent > 1) - ent->start = ent[-1].start + ent[-1].duration; - else - ent->start = 0; + ent->start = start; ent->duration = duration; ent->score = 0; ent->parent = PS_ALIGNMENT_NONE; @@ -352,24 +349,6 @@ ps_alignment_propagate(ps_alignment_t *al) return 0; } -int -ps_alignment_n_words(ps_alignment_t *al) -{ - return (int)al->word.n_ent; -} - -int -ps_alignment_n_phones(ps_alignment_t *al) -{ - return (int)al->sseq.n_ent; -} - -int -ps_alignment_n_states(ps_alignment_t *al) -{ - return (int)al->state.n_ent; -} - ps_alignment_iter_t * ps_alignment_words(ps_alignment_t *al) { @@ -381,6 +360,7 @@ ps_alignment_words(ps_alignment_t *al) itor->al = al; itor->vec = &al->word; itor->pos = 0; + itor->parent = PS_ALIGNMENT_NONE; return itor; } @@ -395,6 +375,8 @@ ps_alignment_phones(ps_alignment_t *al) itor->al = al; itor->vec = &al->sseq; itor->pos = 0; + /* Iterate over *all* phones */ + itor->parent = PS_ALIGNMENT_NONE; return itor; } @@ -409,6 +391,8 @@ ps_alignment_states(ps_alignment_t *al) itor->al = al; itor->vec = &al->state; itor->pos = 0; + /* Iterate over *all* states */ + itor->parent = PS_ALIGNMENT_NONE; return itor; } @@ -418,9 +402,59 @@ ps_alignment_iter_get(ps_alignment_iter_t *itor) return itor->vec->seq + itor->pos; } +const char * +ps_alignment_iter_name(ps_alignment_iter_t *itor) +{ + ps_alignment_entry_t *ent; + if (itor == NULL) + return NULL; + ent = ps_alignment_iter_get(itor); + if (itor->vec == &itor->al->word) { + return dict_wordstr(itor->al->d2p->dict, + ent->id.wid); + } + else if (itor->vec == &itor->al->sseq) { + return bin_mdef_ciphone_str(itor->al->d2p->mdef, + ent->id.pid.cipid); + } + else if (itor->vec == &itor->al->state) { + int len = snprintf(NULL, 0, "%u", ent->id.senid); + if (len == 0) { + E_ERROR_SYSTEM("snprintf() failed"); + return NULL; + } + if (itor->name) + ckd_free(itor->name); + itor->name = ckd_malloc(len + 1); + if (snprintf(itor->name, len + 1, "%u", ent->id.senid) != len) { + E_ERROR_SYSTEM("snprintf() failed"); + return NULL; + } + return itor->name; + } + else + return NULL; +} + +int +ps_alignment_iter_seg(ps_alignment_iter_t *itor, int *start, int *duration) +{ + ps_alignment_entry_t *ent; + if (itor == NULL) + return 0; + ent = ps_alignment_iter_get(itor); + if (start) + *start = ent->start; + if (duration) + *duration = ent->duration; + return ent->score; +} + int ps_alignment_iter_free(ps_alignment_iter_t *itor) { + if (itor->name) + ckd_free(itor->name); ckd_free(itor); return 0; } @@ -435,6 +469,9 @@ ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos) return NULL; } itor->pos = pos; + /* Switch to this word/phone as parent */ + if (itor->parent != PS_ALIGNMENT_NONE) + itor->parent = itor->vec->seq[itor->pos].parent; return itor; } @@ -447,15 +484,8 @@ ps_alignment_iter_next(ps_alignment_iter_t *itor) ps_alignment_iter_free(itor); return NULL; } - return itor; -} - -ps_alignment_iter_t * -ps_alignment_iter_prev(ps_alignment_iter_t *itor) -{ - if (itor == NULL) - return NULL; - if (--itor->pos < 0) { + if (itor->parent != PS_ALIGNMENT_NONE + && itor->vec->seq[itor->pos].parent != itor->parent) { ps_alignment_iter_free(itor); return NULL; } @@ -463,27 +493,7 @@ ps_alignment_iter_prev(ps_alignment_iter_t *itor) } ps_alignment_iter_t * -ps_alignment_iter_up(ps_alignment_iter_t *itor) -{ - ps_alignment_iter_t *itor2; - if (itor == NULL) - return NULL; - if (itor->vec == &itor->al->word) - return NULL; - if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE) - return NULL; - itor2 = ckd_calloc(1, sizeof(*itor2)); - itor2->al = itor->al; - itor2->pos = itor->vec->seq[itor->pos].parent; - if (itor->vec == &itor->al->sseq) - itor2->vec = &itor->al->word; - else - itor2->vec = &itor->al->sseq; - return itor2; -} - -ps_alignment_iter_t * -ps_alignment_iter_down(ps_alignment_iter_t *itor) +ps_alignment_iter_children(ps_alignment_iter_t *itor) { ps_alignment_iter_t *itor2; if (itor == NULL) @@ -495,6 +505,8 @@ ps_alignment_iter_down(ps_alignment_iter_t *itor) itor2 = ckd_calloc(1, sizeof(*itor2)); itor2->al = itor->al; itor2->pos = itor->vec->seq[itor->pos].child; + /* Iterate over only parent's phones/states */ + itor2->parent = itor->pos; if (itor->vec == &itor->al->word) itor2->vec = &itor->al->sseq; else diff --git a/src/ps_alignment.h b/src/ps_alignment_internal.h similarity index 58% rename from src/ps_alignment.h rename to src/ps_alignment_internal.h index d540cd748..72e2d6a59 100644 --- a/src/ps_alignment.h +++ b/src/ps_alignment_internal.h @@ -35,12 +35,8 @@ * */ -/** - * @file ps_alignment.h Multi-level alignment structure - */ - -#ifndef __PS_ALIGNMENT_H__ -#define __PS_ALIGNMENT_H__ +#ifndef __PS_ALIGNMENT_INTERNAL_H__ +#define __PS_ALIGNMENT_INTERNAL_H__ #include @@ -54,25 +50,40 @@ extern "C" { } #endif -#define PS_ALIGNMENT_NONE ((uint16)0xffff) - -struct ps_alignment_entry_s { +/** + * @struct ps_alignment_entry_t + * @brief Entry (phone, word, or state) in an alignment + */ +typedef struct ps_alignment_entry_s { + int32 start; /**< Start frame index. */ + int32 duration; /**< Duration in frames. */ + int32 score; /**< Alignment score (fairly meaningless). */ + /** + * Index of parent node. + * + * You can use this to determine if you have crossed a parent + * boundary. For example if you wish to iterate only over phones + * inside a word, you can store this for the first phone and stop + * iterating once it changes. */ + int parent; + int child; /**< Index of child node. */ + /** + * ID or IDs for this entry. + * + * This is complex, though perhaps not needlessly so. We need all + * this information to do state alignment. + */ union { - int32 wid; + int32 wid; /**< Word ID (for words) */ struct { - uint16 ssid; - uint16 cipid; - uint16 tmatid; + int16 cipid; /**< Phone ID, which you care about. */ + uint16 ssid; /**< Senone sequence ID, which you don't. */ + int32 tmatid; /**< Transition matrix ID, almost certainly + the same as cipid. */ } pid; uint16 senid; } id; - int16 start; - int16 duration; - int32 score; - uint16 parent; - uint16 child; -}; -typedef struct ps_alignment_entry_s ps_alignment_entry_t; +} ps_alignment_entry_t; struct ps_alignment_vector_s { ps_alignment_entry_t *seq; @@ -87,35 +98,29 @@ struct ps_alignment_s { ps_alignment_vector_t sseq; ps_alignment_vector_t state; }; -typedef struct ps_alignment_s ps_alignment_t; struct ps_alignment_iter_s { ps_alignment_t *al; ps_alignment_vector_t *vec; int pos; + int parent; + char *name; }; -typedef struct ps_alignment_iter_s ps_alignment_iter_t; + +#define ps_alignment_n_words(al) (int)(al)->word.n_ent +#define ps_alignment_n_phones(al) (int)(al)->sseq.n_ent +#define ps_alignment_n_states(al) (int)(al)->state.n_ent /** * Create a new, empty alignment. */ ps_alignment_t *ps_alignment_init(dict2pid_t *d2p); -/** - * Retain an alighment - */ -ps_alignment_t *ps_alignment_retain(ps_alignment_t *al); - -/** - * Release an alignment - */ -int ps_alignment_free(ps_alignment_t *al); - /** * Append a word. */ int ps_alignment_add_word(ps_alignment_t *al, - int32 wid, int duration); + int32 wid, int start, int duration); /** * Populate lower layers using available word information. @@ -132,36 +137,6 @@ int ps_alignment_populate_ci(ps_alignment_t *al); */ int ps_alignment_propagate(ps_alignment_t *al); -/** - * Number of words. - */ -int ps_alignment_n_words(ps_alignment_t *al); - -/** - * Number of phones. - */ -int ps_alignment_n_phones(ps_alignment_t *al); - -/** - * Number of states. - */ -int ps_alignment_n_states(ps_alignment_t *al); - -/** - * Iterate over the alignment starting at the first word. - */ -ps_alignment_iter_t *ps_alignment_words(ps_alignment_t *al); - -/** - * Iterate over the alignment starting at the first phone. - */ -ps_alignment_iter_t *ps_alignment_phones(ps_alignment_t *al); - -/** - * Iterate over the alignment starting at the first state. - */ -ps_alignment_iter_t *ps_alignment_states(ps_alignment_t *al); - /** * Get the alignment entry pointed to by an iterator. * @@ -174,42 +149,8 @@ ps_alignment_entry_t *ps_alignment_iter_get(ps_alignment_iter_t *itor); */ ps_alignment_iter_t *ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos); -/** - * Move an alignment iterator forward. - * - * If the end of the alignment is reached, this will free the iterator - * and return NULL. - */ -ps_alignment_iter_t *ps_alignment_iter_next(ps_alignment_iter_t *itor); - -/** - * Move an alignment iterator back. - * - * If the start of the alignment is reached, this will free the iterator - * and return NULL. - */ -ps_alignment_iter_t *ps_alignment_iter_prev(ps_alignment_iter_t *itor); - -/** - * Get a new iterator starting at the parent of the current node. - * - * If there is no parent node, NULL is returned. - */ -ps_alignment_iter_t *ps_alignment_iter_up(ps_alignment_iter_t *itor); -/** - * Get a new iterator starting at the first child of the current node. - * - * If there is no child node, NULL is returned. - */ -ps_alignment_iter_t *ps_alignment_iter_down(ps_alignment_iter_t *itor); - -/** - * Release an iterator before completing all iterations. - */ -int ps_alignment_iter_free(ps_alignment_iter_t *itor); - #ifdef __cplusplus } /* extern "C" */ #endif -#endif /* __PS_ALIGNMENT_H__ */ +#endif /* __PS_ALIGNMENT_INTERNAL_H__ */ diff --git a/src/ps_lattice.c b/src/ps_lattice.c index bca03e218..e069ae501 100644 --- a/src/ps_lattice.c +++ b/src/ps_lattice.c @@ -956,7 +956,8 @@ ps_lattice_link2itor(ps_seg_t *seg, ps_latlink_t *link, int to) } } } - seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid); + seg->text = dict_wordstr(ps_search_dict(seg->search), node->wid); + seg->wid = node->wid; seg->sf = node->sf; seg->ascr = link->ascr << SENSCR_SHIFT; /* Compute language model score from best predecessors. */ @@ -1855,7 +1856,8 @@ ps_astar_node2itor(astar_seg_t *itor) seg->ef = node->lef; else seg->ef = itor->nodes[itor->cur + 1]->sf - 1; - seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid); + seg->text = dict_wordstr(ps_search_dict(seg->search), node->wid); + seg->wid = node->wid; seg->sf = node->sf; seg->prob = 0; /* FIXME: implement forward-backward */ } diff --git a/src/state_align_search.c b/src/state_align_search.c index 33c851dd8..665446449 100644 --- a/src/state_align_search.c +++ b/src/state_align_search.c @@ -8,27 +8,27 @@ * are met: * * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== @@ -94,6 +94,11 @@ prune_hmms(state_align_search_t *sas, int frame_idx) hmm_t *hmm = sas->hmms + i; if (hmm_frame(hmm) < frame_idx) continue; + /* Enforce alignment constraint: due to non-emitting states, + * previous phone's HMM remains active in first frame of its + * successor. */ + if (nf > sas->ef[i]) + continue; hmm_frame(hmm) = nf; } } @@ -111,6 +116,9 @@ phone_transition(state_align_search_t *sas, int frame_idx) hmm = sas->hmms + i; if (hmm_frame(hmm) != nf) continue; + /* Enforce alignment constraint for initial state of each phone. */ + if (nf < sas->sf[i + 1]) + continue; newphone_score = hmm_out_score(hmm); /* Transition into next phone using the usual Viterbi rule. */ @@ -174,7 +182,8 @@ state_align_search_step(ps_search_t *search, int frame_idx) /* Calculate senone scores. */ for (i = 0; i < sas->n_phones; ++i) - acmod_activate_hmm(acmod, sas->hmms + i); + if (hmm_frame(&sas->hmms[i]) == frame_idx) + acmod_activate_hmm(acmod, &sas->hmms[i]); senscr = acmod_score(acmod, &frame_idx); /* Renormalize here if needed. */ @@ -184,7 +193,7 @@ state_align_search_step(ps_search_t *search, int frame_idx) frame_idx, sas->best_score); renormalize_hmms(sas, frame_idx, sas->best_score); } - + /* Viterbi step. */ sas->best_score = evaluate_hmms(sas, senscr, frame_idx); prune_hmms(sas, frame_idx); @@ -268,6 +277,8 @@ state_align_search_free(ps_search_t *search) ps_search_base_free(search); ckd_free(sas->hmms); ckd_free(sas->tokens); + ckd_free(sas->sf); + ckd_free(sas->ef); hmm_context_free(sas->hmmctx); ps_alignment_free(sas->al); ckd_free(sas); @@ -283,7 +294,10 @@ static void state_align_search_seg_free(ps_seg_t * seg) { state_align_seg_t *itor = (state_align_seg_t *)seg; - ps_alignment_iter_free(itor->itor); + if (itor->itor != NULL) { + /* If we hit the end of the alignment, it was already freed! */ + ps_alignment_iter_free(itor->itor); + } ckd_free(itor); } @@ -297,7 +311,8 @@ state_align_search_fill_iter(ps_seg_t *seg) seg->ef = entry->start + entry->duration - 1; seg->ascr = entry->score; seg->lscr = 0; - seg->word = dict_wordstr(ps_search_dict(seg->search), entry->id.wid); + seg->text = dict_wordstr(ps_search_dict(seg->search), entry->id.wid); + seg->wid = entry->id.wid; } static ps_seg_t * @@ -333,7 +348,7 @@ state_align_search_seg_iter(ps_search_t * search) purposes of the decoder API we will just iterate over words, which is the most likely/useful use case. We will also expose the rest of the alignment API separately. */ - + itor = ps_alignment_words(sas->al); if (itor == NULL) return NULL; @@ -342,7 +357,7 @@ state_align_search_seg_iter(ps_search_t * search) seg->base.search = search; seg->itor = itor; state_align_search_fill_iter((ps_seg_t *)seg); - + return (ps_seg_t *)seg; } @@ -362,23 +377,32 @@ state_align_search_hyp(ps_search_t *search, int32 *out_score) if (itor == NULL) return NULL; for (hyp_len = 0; itor; itor = ps_alignment_iter_next(itor)) { - const char *word = dict_wordstr(ps_search_dict(search), - ps_alignment_iter_get(itor)->id.wid); - if (word == NULL) { - E_ERROR("Unknown word id %d in alignment", - ps_alignment_iter_get(itor)->id.wid); - return NULL; + const char *word; + int32 wid = ps_alignment_iter_get(itor)->id.wid; + + if (dict_real_word(ps_search_dict(search), wid)) { + word = dict_basestr(ps_search_dict(search), + ps_alignment_iter_get(itor)->id.wid); + if (word == NULL) { + E_ERROR("Unknown word id %d in alignment", + ps_alignment_iter_get(itor)->id.wid); + return NULL; + } + hyp_len += strlen(word) + 1; } - hyp_len += strlen(word) + 1; } search->hyp_str = ckd_calloc(hyp_len + 1, sizeof(*search->hyp_str)); for (itor = ps_alignment_words(sas->al); itor; itor = ps_alignment_iter_next(itor)) { ps_alignment_entry_t *ent = ps_alignment_iter_get(itor); - const char *word = dict_wordstr(ps_search_dict(search), - ent->id.wid); - strcat(search->hyp_str, word); - strcat(search->hyp_str, " "); + int32 wid = ent->id.wid; + const char *word; + if (dict_real_word(ps_search_dict(search), wid)) { + word = dict_basestr(ps_search_dict(search), + ent->id.wid); + strcat(search->hyp_str, word); + strcat(search->hyp_str, " "); + } *out_score = ent->score; } search->hyp_str[strlen(search->hyp_str) - 1] = '\0'; @@ -405,7 +429,7 @@ state_align_search_init(const char *name, { state_align_search_t *sas; ps_alignment_iter_t *itor; - hmm_t *hmm; + int i; sas = ckd_calloc(1, sizeof(*sas)); ps_search_init(ps_search_base(sas), &state_align_search_funcs, @@ -423,11 +447,22 @@ state_align_search_init(const char *name, sas->n_phones = ps_alignment_n_phones(al); sas->n_emit_state = ps_alignment_n_states(al); sas->hmms = ckd_calloc(sas->n_phones, sizeof(*sas->hmms)); - for (hmm = sas->hmms, itor = ps_alignment_phones(al); itor; - ++hmm, itor = ps_alignment_iter_next(itor)) { + sas->sf = ckd_calloc(sas->n_phones, sizeof(*sas->sf)); + sas->ef = ckd_calloc(sas->n_phones, sizeof(*sas->ef)); + for (i = 0, itor = ps_alignment_phones(al); + i < sas->n_phones && itor; + ++i, itor = ps_alignment_iter_next(itor)) { ps_alignment_entry_t *ent = ps_alignment_iter_get(itor); - hmm_init(sas->hmmctx, hmm, FALSE, + hmm_init(sas->hmmctx, &sas->hmms[i], FALSE, ent->id.pid.ssid, ent->id.pid.tmatid); + if (ent-> start > 0) + sas->sf[i] = ent->start; + else + sas->sf[i] = 0; /* Always active */ + if (ent->duration > 0) + sas->ef[i] = ent->start + ent->duration; + else + sas->ef[i] = INT_MAX; /* Always active */ } return ps_search_base(sas); } diff --git a/src/state_align_search.h b/src/state_align_search.h index 500084c0f..23c340d61 100644 --- a/src/state_align_search.h +++ b/src/state_align_search.h @@ -44,7 +44,7 @@ #include #include "pocketsphinx_internal.h" -#include "ps_alignment.h" +#include "ps_alignment_internal.h" #include "hmm.h" #ifdef __cplusplus @@ -71,6 +71,9 @@ struct state_align_search_s { hmm_context_t *hmmctx; /**< HMM context structure. */ ps_alignment_t *al; /**< Alignment structure being operated on. */ hmm_t *hmms; /**< Vector of HMMs corresponding to phone level. */ + int *sf; /**< Vector of minimum start frames for HMMs. */ + int *ef; /**< Vector of maximum exit frames for HMMs. + (note that exit frame = end frame + 1) */ int n_phones; /**< Number of HMMs (phones). */ int frame; /**< Current frame being processed. */ diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.json new file mode 100644 index 000000000..10b121fd3 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.json @@ -0,0 +1 @@ +{"b":0.000,"d":7.100,"p":1.000,"t":"and mister john dashwood had then leisure to consider how much there might be prudently in his power to do for them","w":[{"b":0.000,"d":0.200,"p":0.946,"t":""},{"b":0.200,"d":0.170,"p":0.952,"t":"and(2)"},{"b":0.370,"d":0.260,"p":0.969,"t":"mister"},{"b":0.630,"d":0.350,"p":0.978,"t":"john"},{"b":0.980,"d":0.600,"p":0.958,"t":"dashwood"},{"b":1.580,"d":0.260,"p":0.977,"t":"had"},{"b":1.840,"d":0.370,"p":0.966,"t":"then"},{"b":2.210,"d":0.040,"p":0.947,"t":""},{"b":2.250,"d":0.460,"p":0.969,"t":"leisure(2)"},{"b":2.710,"d":0.180,"p":0.972,"t":"to(3)"},{"b":2.890,"d":0.550,"p":0.964,"t":"consider"},{"b":3.440,"d":0.510,"p":0.968,"t":"how"},{"b":3.950,"d":0.050,"p":0.954,"t":""},{"b":4.000,"d":0.330,"p":0.972,"t":"much"},{"b":4.330,"d":0.190,"p":0.979,"t":"there"},{"b":4.520,"d":0.270,"p":0.971,"t":"might"},{"b":4.790,"d":0.150,"p":0.987,"t":"be"},{"b":4.940,"d":0.520,"p":0.902,"t":"prudently"},{"b":5.460,"d":0.100,"p":0.991,"t":"in"},{"b":5.560,"d":0.190,"p":0.975,"t":"his"},{"b":5.750,"d":0.290,"p":0.976,"t":"power"},{"b":6.040,"d":0.100,"p":0.983,"t":"to(2)"},{"b":6.140,"d":0.210,"p":0.968,"t":"do"},{"b":6.350,"d":0.260,"p":0.970,"t":"for"},{"b":6.610,"d":0.180,"p":0.956,"t":"them(2)"},{"b":6.790,"d":0.300,"p":0.953,"t":""}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.phone.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.phone.json new file mode 100644 index 000000000..63c7b978a --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.phone.json @@ -0,0 +1 @@ +{"b":0.000,"d":7.100,"p":1.000,"t":"and mister john dashwood had then leisure to consider how much there might be prudently in his power to do for them","w":[{"b":0.000,"d":0.200,"p":0.995,"t":"","w":[{"b":0.000,"d":0.200,"p":0.995,"t":"SIL"}]},{"b":0.200,"d":0.170,"p":0.975,"t":"and(2)","w":[{"b":0.200,"d":0.110,"p":0.994,"t":"AE"},{"b":0.310,"d":0.030,"p":0.992,"t":"N"},{"b":0.340,"d":0.030,"p":0.989,"t":"D"}]},{"b":0.370,"d":0.260,"p":0.974,"t":"mister","w":[{"b":0.370,"d":0.040,"p":0.991,"t":"M"},{"b":0.410,"d":0.030,"p":0.998,"t":"IH"},{"b":0.440,"d":0.070,"p":0.994,"t":"S"},{"b":0.510,"d":0.070,"p":0.996,"t":"T"},{"b":0.580,"d":0.050,"p":0.994,"t":"ER"}]},{"b":0.630,"d":0.350,"p":0.982,"t":"john","w":[{"b":0.630,"d":0.140,"p":0.993,"t":"JH"},{"b":0.770,"d":0.140,"p":0.994,"t":"AA"},{"b":0.910,"d":0.070,"p":0.995,"t":"N"}]},{"b":0.980,"d":0.600,"p":0.943,"t":"dashwood","w":[{"b":0.980,"d":0.090,"p":0.994,"t":"D"},{"b":1.070,"d":0.150,"p":0.986,"t":"AE"},{"b":1.220,"d":0.110,"p":0.989,"t":"SH"},{"b":1.330,"d":0.130,"p":0.994,"t":"W"},{"b":1.460,"d":0.060,"p":0.996,"t":"UH"},{"b":1.520,"d":0.060,"p":0.982,"t":"D"}]},{"b":1.580,"d":0.260,"p":0.971,"t":"had","w":[{"b":1.580,"d":0.100,"p":0.989,"t":"HH"},{"b":1.680,"d":0.080,"p":0.992,"t":"AE"},{"b":1.760,"d":0.080,"p":0.990,"t":"D"}]},{"b":1.840,"d":0.370,"p":0.942,"t":"then","w":[{"b":1.840,"d":0.060,"p":0.986,"t":"DH"},{"b":1.900,"d":0.110,"p":0.989,"t":"EH"},{"b":2.010,"d":0.200,"p":0.967,"t":"N"}]},{"b":2.210,"d":0.040,"p":0.983,"t":"","w":[{"b":2.210,"d":0.040,"p":0.983,"t":"SIL"}]},{"b":2.250,"d":0.460,"p":0.974,"t":"leisure(2)","w":[{"b":2.250,"d":0.130,"p":0.993,"t":"L"},{"b":2.380,"d":0.090,"p":0.995,"t":"IY"},{"b":2.470,"d":0.130,"p":0.993,"t":"ZH"},{"b":2.600,"d":0.110,"p":0.993,"t":"ER"}]},{"b":2.710,"d":0.180,"p":0.977,"t":"to(3)","w":[{"b":2.710,"d":0.120,"p":0.981,"t":"T"},{"b":2.830,"d":0.060,"p":0.996,"t":"AH"}]},{"b":2.890,"d":0.550,"p":0.938,"t":"consider","w":[{"b":2.890,"d":0.090,"p":0.989,"t":"K"},{"b":2.980,"d":0.040,"p":0.995,"t":"AH"},{"b":3.020,"d":0.050,"p":0.987,"t":"N"},{"b":3.070,"d":0.140,"p":0.990,"t":"S"},{"b":3.210,"d":0.060,"p":0.995,"t":"IH"},{"b":3.270,"d":0.040,"p":0.995,"t":"D"},{"b":3.310,"d":0.130,"p":0.985,"t":"ER"}]},{"b":3.440,"d":0.510,"p":0.960,"t":"how","w":[{"b":3.440,"d":0.120,"p":0.984,"t":"HH"},{"b":3.560,"d":0.390,"p":0.975,"t":"AW"}]},{"b":3.950,"d":0.050,"p":0.988,"t":"","w":[{"b":3.950,"d":0.050,"p":0.988,"t":"SIL"}]},{"b":4.000,"d":0.330,"p":0.965,"t":"much","w":[{"b":4.000,"d":0.100,"p":0.984,"t":"M"},{"b":4.100,"d":0.060,"p":0.995,"t":"AH"},{"b":4.160,"d":0.170,"p":0.987,"t":"CH"}]},{"b":4.330,"d":0.190,"p":0.970,"t":"there","w":[{"b":4.330,"d":0.100,"p":0.979,"t":"DH"},{"b":4.430,"d":0.040,"p":0.994,"t":"EH"},{"b":4.470,"d":0.050,"p":0.997,"t":"R"}]},{"b":4.520,"d":0.270,"p":0.968,"t":"might","w":[{"b":4.520,"d":0.100,"p":0.992,"t":"M"},{"b":4.620,"d":0.090,"p":0.982,"t":"AY"},{"b":4.710,"d":0.080,"p":0.994,"t":"T"}]},{"b":4.790,"d":0.150,"p":0.982,"t":"be","w":[{"b":4.790,"d":0.050,"p":0.995,"t":"B"},{"b":4.840,"d":0.100,"p":0.987,"t":"IY"}]},{"b":4.940,"d":0.520,"p":0.869,"t":"prudently","w":[{"b":4.940,"d":0.110,"p":0.986,"t":"P"},{"b":5.050,"d":0.070,"p":0.993,"t":"R"},{"b":5.120,"d":0.050,"p":0.990,"t":"UW"},{"b":5.170,"d":0.030,"p":0.985,"t":"D"},{"b":5.200,"d":0.030,"p":0.974,"t":"AH"},{"b":5.230,"d":0.030,"p":0.971,"t":"N"},{"b":5.260,"d":0.030,"p":0.982,"t":"T"},{"b":5.290,"d":0.080,"p":0.987,"t":"L"},{"b":5.370,"d":0.090,"p":0.992,"t":"IY"}]},{"b":5.460,"d":0.100,"p":0.986,"t":"in","w":[{"b":5.460,"d":0.060,"p":0.997,"t":"IH"},{"b":5.520,"d":0.040,"p":0.989,"t":"N"}]},{"b":5.560,"d":0.190,"p":0.962,"t":"his","w":[{"b":5.560,"d":0.030,"p":0.980,"t":"HH"},{"b":5.590,"d":0.060,"p":0.987,"t":"IH"},{"b":5.650,"d":0.100,"p":0.994,"t":"Z"}]},{"b":5.750,"d":0.290,"p":0.975,"t":"power","w":[{"b":5.750,"d":0.050,"p":0.994,"t":"P"},{"b":5.800,"d":0.130,"p":0.988,"t":"AW"},{"b":5.930,"d":0.110,"p":0.993,"t":"ER"}]},{"b":6.040,"d":0.100,"p":0.971,"t":"to(2)","w":[{"b":6.040,"d":0.040,"p":0.978,"t":"T"},{"b":6.080,"d":0.060,"p":0.993,"t":"IH"}]},{"b":6.140,"d":0.210,"p":0.972,"t":"do","w":[{"b":6.140,"d":0.080,"p":0.989,"t":"D"},{"b":6.220,"d":0.130,"p":0.983,"t":"UW"}]},{"b":6.350,"d":0.260,"p":0.957,"t":"for","w":[{"b":6.350,"d":0.140,"p":0.994,"t":"F"},{"b":6.490,"d":0.080,"p":0.986,"t":"AO"},{"b":6.570,"d":0.040,"p":0.976,"t":"R"}]},{"b":6.610,"d":0.180,"p":0.930,"t":"them(2)","w":[{"b":6.610,"d":0.050,"p":0.977,"t":"DH"},{"b":6.660,"d":0.080,"p":0.965,"t":"AH"},{"b":6.740,"d":0.050,"p":0.987,"t":"M"}]},{"b":6.790,"d":0.300,"p":0.986,"t":"","w":[{"b":6.790,"d":0.300,"p":0.986,"t":"SIL"}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.state.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.state.json new file mode 100644 index 000000000..faa5b5799 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0870.state.json @@ -0,0 +1 @@ +{"b":0.000,"d":7.100,"p":1.000,"t":"and mister john dashwood had then leisure to consider how much there might be prudently in his power to do for them","w":[{"b":0.000,"d":0.200,"p":0.995,"t":"","w":[{"b":0.000,"d":0.200,"p":0.995,"t":"SIL","w":[{"b":0.000,"d":0.010,"p":1.000,"t":"96"},{"b":0.010,"d":0.140,"p":0.997,"t":"97"},{"b":0.150,"d":0.050,"p":0.998,"t":"98"}]}]},{"b":0.200,"d":0.170,"p":0.975,"t":"and(2)","w":[{"b":0.200,"d":0.110,"p":0.994,"t":"AE","w":[{"b":0.200,"d":0.060,"p":0.997,"t":"268"},{"b":0.260,"d":0.020,"p":0.998,"t":"312"},{"b":0.280,"d":0.030,"p":0.998,"t":"327"}]},{"b":0.310,"d":0.030,"p":0.992,"t":"N","w":[{"b":0.310,"d":0.010,"p":0.999,"t":"3324"},{"b":0.320,"d":0.010,"p":0.999,"t":"3373"},{"b":0.330,"d":0.010,"p":0.994,"t":"3446"}]},{"b":0.340,"d":0.030,"p":0.989,"t":"D","w":[{"b":0.340,"d":0.010,"p":0.994,"t":"1229"},{"b":0.350,"d":0.010,"p":0.998,"t":"1267"},{"b":0.360,"d":0.010,"p":0.997,"t":"1382"}]}]},{"b":0.370,"d":0.260,"p":0.974,"t":"mister","w":[{"b":0.370,"d":0.040,"p":0.991,"t":"M","w":[{"b":0.370,"d":0.010,"p":0.994,"t":"3176"},{"b":0.380,"d":0.010,"p":0.998,"t":"3213"},{"b":0.390,"d":0.020,"p":0.999,"t":"3264"}]},{"b":0.410,"d":0.030,"p":0.998,"t":"IH","w":[{"b":0.410,"d":0.010,"p":1.000,"t":"2298"},{"b":0.420,"d":0.010,"p":0.999,"t":"2385"},{"b":0.430,"d":0.010,"p":1.000,"t":"2503"}]},{"b":0.440,"d":0.070,"p":0.994,"t":"S","w":[{"b":0.440,"d":0.030,"p":0.998,"t":"4053"},{"b":0.470,"d":0.010,"p":0.998,"t":"4110"},{"b":0.480,"d":0.030,"p":0.998,"t":"4159"}]},{"b":0.510,"d":0.070,"p":0.996,"t":"T","w":[{"b":0.510,"d":0.030,"p":0.998,"t":"4334"},{"b":0.540,"d":0.020,"p":0.999,"t":"4432"},{"b":0.560,"d":0.020,"p":0.999,"t":"4477"}]},{"b":0.580,"d":0.050,"p":0.994,"t":"ER","w":[{"b":0.580,"d":0.030,"p":0.998,"t":"1654"},{"b":0.610,"d":0.010,"p":0.997,"t":"1725"},{"b":0.620,"d":0.010,"p":0.999,"t":"1797"}]}]},{"b":0.630,"d":0.350,"p":0.982,"t":"john","w":[{"b":0.630,"d":0.140,"p":0.993,"t":"JH","w":[{"b":0.630,"d":0.050,"p":0.998,"t":"2732"},{"b":0.680,"d":0.050,"p":0.998,"t":"2736"},{"b":0.730,"d":0.040,"p":0.998,"t":"2743"}]},{"b":0.770,"d":0.140,"p":0.994,"t":"AA","w":[{"b":0.770,"d":0.050,"p":0.998,"t":"131"},{"b":0.820,"d":0.040,"p":0.998,"t":"189"},{"b":0.860,"d":0.050,"p":0.998,"t":"214"}]},{"b":0.910,"d":0.070,"p":0.995,"t":"N","w":[{"b":0.910,"d":0.040,"p":0.997,"t":"3312"},{"b":0.950,"d":0.020,"p":0.999,"t":"3371"},{"b":0.970,"d":0.010,"p":0.999,"t":"3450"}]}]},{"b":0.980,"d":0.600,"p":0.943,"t":"dashwood","w":[{"b":0.980,"d":0.090,"p":0.994,"t":"D","w":[{"b":0.980,"d":0.030,"p":0.997,"t":"1223"},{"b":1.010,"d":0.020,"p":0.999,"t":"1302"},{"b":1.030,"d":0.040,"p":0.998,"t":"1318"}]},{"b":1.070,"d":0.150,"p":0.986,"t":"AE","w":[{"b":1.070,"d":0.060,"p":0.997,"t":"226"},{"b":1.130,"d":0.050,"p":0.992,"t":"288"},{"b":1.180,"d":0.040,"p":0.998,"t":"334"}]},{"b":1.220,"d":0.110,"p":0.989,"t":"SH","w":[{"b":1.220,"d":0.040,"p":0.998,"t":"4198"},{"b":1.260,"d":0.040,"p":0.993,"t":"4208"},{"b":1.300,"d":0.030,"p":0.998,"t":"4233"}]},{"b":1.330,"d":0.130,"p":0.994,"t":"W","w":[{"b":1.330,"d":0.070,"p":0.997,"t":"4831"},{"b":1.400,"d":0.030,"p":0.998,"t":"4863"},{"b":1.430,"d":0.030,"p":0.998,"t":"4929"}]},{"b":1.460,"d":0.060,"p":0.996,"t":"UH","w":[{"b":1.460,"d":0.030,"p":0.998,"t":"4597"},{"b":1.490,"d":0.020,"p":0.999,"t":"4605"},{"b":1.510,"d":0.010,"p":0.999,"t":"4613"}]},{"b":1.520,"d":0.060,"p":0.982,"t":"D","w":[{"b":1.520,"d":0.020,"p":0.999,"t":"1202"},{"b":1.540,"d":0.010,"p":1.000,"t":"1247"},{"b":1.550,"d":0.030,"p":0.984,"t":"1353"}]}]},{"b":1.580,"d":0.260,"p":0.971,"t":"had","w":[{"b":1.580,"d":0.100,"p":0.989,"t":"HH","w":[{"b":1.580,"d":0.030,"p":0.993,"t":"2098"},{"b":1.610,"d":0.040,"p":0.998,"t":"2165"},{"b":1.650,"d":0.030,"p":0.998,"t":"2210"}]},{"b":1.680,"d":0.080,"p":0.992,"t":"AE","w":[{"b":1.680,"d":0.020,"p":0.999,"t":"243"},{"b":1.700,"d":0.020,"p":0.998,"t":"296"},{"b":1.720,"d":0.040,"p":0.995,"t":"342"}]},{"b":1.760,"d":0.080,"p":0.990,"t":"D","w":[{"b":1.760,"d":0.030,"p":0.998,"t":"1188"},{"b":1.790,"d":0.020,"p":0.999,"t":"1258"},{"b":1.810,"d":0.030,"p":0.993,"t":"1369"}]}]},{"b":1.840,"d":0.370,"p":0.942,"t":"then","w":[{"b":1.840,"d":0.060,"p":0.986,"t":"DH","w":[{"b":1.840,"d":0.010,"p":0.995,"t":"1399"},{"b":1.850,"d":0.030,"p":0.997,"t":"1436"},{"b":1.880,"d":0.020,"p":0.994,"t":"1481"}]},{"b":1.900,"d":0.110,"p":0.989,"t":"EH","w":[{"b":1.900,"d":0.030,"p":0.998,"t":"1496"},{"b":1.930,"d":0.030,"p":0.998,"t":"1575"},{"b":1.960,"d":0.050,"p":0.992,"t":"1610"}]},{"b":2.010,"d":0.200,"p":0.967,"t":"N","w":[{"b":2.010,"d":0.030,"p":0.994,"t":"3327"},{"b":2.040,"d":0.040,"p":0.989,"t":"3396"},{"b":2.080,"d":0.130,"p":0.984,"t":"3469"}]}]},{"b":2.210,"d":0.040,"p":0.983,"t":"","w":[{"b":2.210,"d":0.040,"p":0.983,"t":"SIL","w":[{"b":2.210,"d":0.010,"p":0.991,"t":"96"},{"b":2.220,"d":0.010,"p":0.996,"t":"97"},{"b":2.230,"d":0.020,"p":0.995,"t":"98"}]}]},{"b":2.250,"d":0.460,"p":0.974,"t":"leisure(2)","w":[{"b":2.250,"d":0.130,"p":0.993,"t":"L","w":[{"b":2.250,"d":0.040,"p":0.998,"t":"2991"},{"b":2.290,"d":0.050,"p":0.997,"t":"2995"},{"b":2.340,"d":0.040,"p":0.998,"t":"3075"}]},{"b":2.380,"d":0.090,"p":0.995,"t":"IY","w":[{"b":2.380,"d":0.020,"p":0.999,"t":"2561"},{"b":2.400,"d":0.050,"p":0.998,"t":"2618"},{"b":2.450,"d":0.020,"p":0.999,"t":"2696"}]},{"b":2.470,"d":0.130,"p":0.993,"t":"ZH","w":[{"b":2.470,"d":0.030,"p":0.998,"t":"5120"},{"b":2.500,"d":0.080,"p":0.996,"t":"5121"},{"b":2.580,"d":0.020,"p":0.999,"t":"5125"}]},{"b":2.600,"d":0.110,"p":0.993,"t":"ER","w":[{"b":2.600,"d":0.020,"p":0.997,"t":"1656"},{"b":2.620,"d":0.050,"p":0.998,"t":"1720"},{"b":2.670,"d":0.040,"p":0.998,"t":"1799"}]}]},{"b":2.710,"d":0.180,"p":0.977,"t":"to(3)","w":[{"b":2.710,"d":0.120,"p":0.981,"t":"T","w":[{"b":2.710,"d":0.050,"p":0.990,"t":"4281"},{"b":2.760,"d":0.030,"p":0.994,"t":"4400"},{"b":2.790,"d":0.040,"p":0.998,"t":"4443"}]},{"b":2.830,"d":0.060,"p":0.996,"t":"AH","w":[{"b":2.830,"d":0.020,"p":0.999,"t":"393"},{"b":2.850,"d":0.020,"p":0.999,"t":"563"},{"b":2.870,"d":0.020,"p":0.999,"t":"765"}]}]},{"b":2.890,"d":0.550,"p":0.938,"t":"consider","w":[{"b":2.890,"d":0.090,"p":0.989,"t":"K","w":[{"b":2.890,"d":0.040,"p":0.998,"t":"2790"},{"b":2.930,"d":0.030,"p":0.992,"t":"2826"},{"b":2.960,"d":0.020,"p":0.999,"t":"2893"}]},{"b":2.980,"d":0.040,"p":0.995,"t":"AH","w":[{"b":2.980,"d":0.020,"p":0.999,"t":"408"},{"b":3.000,"d":0.010,"p":0.997,"t":"594"},{"b":3.010,"d":0.010,"p":1.000,"t":"718"}]},{"b":3.020,"d":0.050,"p":0.987,"t":"N","w":[{"b":3.020,"d":0.010,"p":0.999,"t":"3294"},{"b":3.030,"d":0.020,"p":0.991,"t":"3356"},{"b":3.050,"d":0.020,"p":0.997,"t":"3465"}]},{"b":3.070,"d":0.140,"p":0.990,"t":"S","w":[{"b":3.070,"d":0.040,"p":0.995,"t":"4035"},{"b":3.110,"d":0.070,"p":0.997,"t":"4073"},{"b":3.180,"d":0.030,"p":0.998,"t":"4164"}]},{"b":3.210,"d":0.060,"p":0.995,"t":"IH","w":[{"b":3.210,"d":0.030,"p":0.998,"t":"2239"},{"b":3.240,"d":0.020,"p":0.998,"t":"2395"},{"b":3.260,"d":0.010,"p":0.999,"t":"2479"}]},{"b":3.270,"d":0.040,"p":0.995,"t":"D","w":[{"b":3.270,"d":0.010,"p":0.999,"t":"1203"},{"b":3.280,"d":0.010,"p":0.999,"t":"1278"},{"b":3.290,"d":0.020,"p":0.996,"t":"1315"}]},{"b":3.310,"d":0.130,"p":0.985,"t":"ER","w":[{"b":3.310,"d":0.030,"p":0.989,"t":"1656"},{"b":3.340,"d":0.080,"p":0.997,"t":"1726"},{"b":3.420,"d":0.020,"p":0.999,"t":"1823"}]}]},{"b":3.440,"d":0.510,"p":0.960,"t":"how","w":[{"b":3.440,"d":0.120,"p":0.984,"t":"HH","w":[{"b":3.440,"d":0.050,"p":0.997,"t":"2129"},{"b":3.490,"d":0.060,"p":0.990,"t":"2173"},{"b":3.550,"d":0.010,"p":0.996,"t":"2216"}]},{"b":3.560,"d":0.390,"p":0.975,"t":"AW","w":[{"b":3.560,"d":0.060,"p":0.991,"t":"903"},{"b":3.620,"d":0.090,"p":0.996,"t":"924"},{"b":3.710,"d":0.240,"p":0.988,"t":"938"}]}]},{"b":3.950,"d":0.050,"p":0.988,"t":"","w":[{"b":3.950,"d":0.050,"p":0.988,"t":"SIL","w":[{"b":3.950,"d":0.010,"p":0.998,"t":"96"},{"b":3.960,"d":0.010,"p":0.996,"t":"97"},{"b":3.970,"d":0.030,"p":0.994,"t":"98"}]}]},{"b":4.000,"d":0.330,"p":0.965,"t":"much","w":[{"b":4.000,"d":0.100,"p":0.984,"t":"M","w":[{"b":4.000,"d":0.030,"p":0.990,"t":"3170"},{"b":4.030,"d":0.020,"p":0.999,"t":"3199"},{"b":4.050,"d":0.050,"p":0.995,"t":"3242"}]},{"b":4.100,"d":0.060,"p":0.995,"t":"AH","w":[{"b":4.100,"d":0.020,"p":0.999,"t":"434"},{"b":4.120,"d":0.020,"p":0.999,"t":"541"},{"b":4.140,"d":0.020,"p":0.997,"t":"751"}]},{"b":4.160,"d":0.170,"p":0.987,"t":"CH","w":[{"b":4.160,"d":0.050,"p":0.993,"t":"1145"},{"b":4.210,"d":0.070,"p":0.996,"t":"1165"},{"b":4.280,"d":0.050,"p":0.997,"t":"1173"}]}]},{"b":4.330,"d":0.190,"p":0.970,"t":"there","w":[{"b":4.330,"d":0.100,"p":0.979,"t":"DH","w":[{"b":4.330,"d":0.040,"p":0.992,"t":"1400"},{"b":4.370,"d":0.040,"p":0.994,"t":"1438"},{"b":4.410,"d":0.020,"p":0.992,"t":"1477"}]},{"b":4.430,"d":0.040,"p":0.994,"t":"EH","w":[{"b":4.430,"d":0.010,"p":0.997,"t":"1500"},{"b":4.440,"d":0.020,"p":0.998,"t":"1597"},{"b":4.460,"d":0.010,"p":0.999,"t":"1638"}]},{"b":4.470,"d":0.050,"p":0.997,"t":"R","w":[{"b":4.470,"d":0.030,"p":0.998,"t":"3806"},{"b":4.500,"d":0.010,"p":0.999,"t":"3877"},{"b":4.510,"d":0.010,"p":0.999,"t":"4020"}]}]},{"b":4.520,"d":0.270,"p":0.968,"t":"might","w":[{"b":4.520,"d":0.100,"p":0.992,"t":"M","w":[{"b":4.520,"d":0.030,"p":0.998,"t":"3153"},{"b":4.550,"d":0.030,"p":0.996,"t":"3192"},{"b":4.580,"d":0.040,"p":0.998,"t":"3243"}]},{"b":4.620,"d":0.090,"p":0.982,"t":"AY","w":[{"b":4.620,"d":0.030,"p":0.996,"t":"952"},{"b":4.650,"d":0.040,"p":0.990,"t":"1016"},{"b":4.690,"d":0.020,"p":0.996,"t":"1050"}]},{"b":4.710,"d":0.080,"p":0.994,"t":"T","w":[{"b":4.710,"d":0.030,"p":0.998,"t":"4296"},{"b":4.740,"d":0.040,"p":0.996,"t":"4345"},{"b":4.780,"d":0.010,"p":0.999,"t":"4507"}]}]},{"b":4.790,"d":0.150,"p":0.982,"t":"be","w":[{"b":4.790,"d":0.050,"p":0.995,"t":"B","w":[{"b":4.790,"d":0.010,"p":0.997,"t":"1081"},{"b":4.800,"d":0.010,"p":1.000,"t":"1115"},{"b":4.810,"d":0.030,"p":0.998,"t":"1143"}]},{"b":4.840,"d":0.100,"p":0.987,"t":"IY","w":[{"b":4.840,"d":0.020,"p":0.998,"t":"2547"},{"b":4.860,"d":0.050,"p":0.996,"t":"2583"},{"b":4.910,"d":0.030,"p":0.993,"t":"2705"}]}]},{"b":4.940,"d":0.520,"p":0.869,"t":"prudently","w":[{"b":4.940,"d":0.110,"p":0.986,"t":"P","w":[{"b":4.940,"d":0.020,"p":0.993,"t":"3695"},{"b":4.960,"d":0.040,"p":0.998,"t":"3732"},{"b":5.000,"d":0.050,"p":0.995,"t":"3765"}]},{"b":5.050,"d":0.070,"p":0.993,"t":"R","w":[{"b":5.050,"d":0.040,"p":0.997,"t":"3825"},{"b":5.090,"d":0.020,"p":0.999,"t":"3945"},{"b":5.110,"d":0.010,"p":0.998,"t":"3960"}]},{"b":5.120,"d":0.050,"p":0.990,"t":"UW","w":[{"b":5.120,"d":0.010,"p":0.997,"t":"4621"},{"b":5.130,"d":0.010,"p":0.999,"t":"4662"},{"b":5.140,"d":0.030,"p":0.995,"t":"4695"}]},{"b":5.170,"d":0.030,"p":0.985,"t":"D","w":[{"b":5.170,"d":0.010,"p":0.999,"t":"1202"},{"b":5.180,"d":0.010,"p":0.995,"t":"1276"},{"b":5.190,"d":0.010,"p":0.990,"t":"1313"}]},{"b":5.200,"d":0.030,"p":0.974,"t":"AH","w":[{"b":5.200,"d":0.010,"p":0.993,"t":"490"},{"b":5.210,"d":0.010,"p":0.991,"t":"589"},{"b":5.220,"d":0.010,"p":0.990,"t":"721"}]},{"b":5.230,"d":0.030,"p":0.971,"t":"N","w":[{"b":5.230,"d":0.010,"p":0.990,"t":"3345"},{"b":5.240,"d":0.010,"p":0.989,"t":"3359"},{"b":5.250,"d":0.010,"p":0.991,"t":"3459"}]},{"b":5.260,"d":0.030,"p":0.982,"t":"T","w":[{"b":5.260,"d":0.010,"p":0.997,"t":"4304"},{"b":5.270,"d":0.010,"p":0.993,"t":"4426"},{"b":5.280,"d":0.010,"p":0.992,"t":"4494"}]},{"b":5.290,"d":0.080,"p":0.987,"t":"L","w":[{"b":5.290,"d":0.040,"p":0.990,"t":"2990"},{"b":5.330,"d":0.020,"p":0.999,"t":"2997"},{"b":5.350,"d":0.020,"p":0.999,"t":"3078"}]},{"b":5.370,"d":0.090,"p":0.992,"t":"IY","w":[{"b":5.370,"d":0.030,"p":0.998,"t":"2562"},{"b":5.400,"d":0.040,"p":0.997,"t":"2647"},{"b":5.440,"d":0.020,"p":0.998,"t":"2656"}]}]},{"b":5.460,"d":0.100,"p":0.986,"t":"in","w":[{"b":5.460,"d":0.060,"p":0.997,"t":"IH","w":[{"b":5.460,"d":0.030,"p":0.998,"t":"2288"},{"b":5.490,"d":0.020,"p":0.999,"t":"2338"},{"b":5.510,"d":0.010,"p":1.000,"t":"2451"}]},{"b":5.520,"d":0.040,"p":0.989,"t":"N","w":[{"b":5.520,"d":0.020,"p":0.996,"t":"3333"},{"b":5.540,"d":0.010,"p":0.995,"t":"3366"},{"b":5.550,"d":0.010,"p":0.998,"t":"3442"}]}]},{"b":5.560,"d":0.190,"p":0.962,"t":"his","w":[{"b":5.560,"d":0.030,"p":0.980,"t":"HH","w":[{"b":5.560,"d":0.010,"p":0.992,"t":"2125"},{"b":5.570,"d":0.010,"p":0.996,"t":"2189"},{"b":5.580,"d":0.010,"p":0.993,"t":"2197"}]},{"b":5.590,"d":0.060,"p":0.987,"t":"IH","w":[{"b":5.590,"d":0.010,"p":0.997,"t":"2252"},{"b":5.600,"d":0.040,"p":0.991,"t":"2389"},{"b":5.640,"d":0.010,"p":0.999,"t":"2514"}]},{"b":5.650,"d":0.100,"p":0.994,"t":"Z","w":[{"b":5.650,"d":0.020,"p":0.999,"t":"4992"},{"b":5.670,"d":0.050,"p":0.997,"t":"5038"},{"b":5.720,"d":0.030,"p":0.998,"t":"5099"}]}]},{"b":5.750,"d":0.290,"p":0.975,"t":"power","w":[{"b":5.750,"d":0.050,"p":0.994,"t":"P","w":[{"b":5.750,"d":0.010,"p":0.999,"t":"3705"},{"b":5.760,"d":0.030,"p":0.997,"t":"3734"},{"b":5.790,"d":0.010,"p":0.997,"t":"3774"}]},{"b":5.800,"d":0.130,"p":0.988,"t":"AW","w":[{"b":5.800,"d":0.040,"p":0.997,"t":"902"},{"b":5.840,"d":0.030,"p":0.998,"t":"926"},{"b":5.870,"d":0.060,"p":0.993,"t":"942"}]},{"b":5.930,"d":0.110,"p":0.993,"t":"ER","w":[{"b":5.930,"d":0.060,"p":0.997,"t":"1683"},{"b":5.990,"d":0.030,"p":0.998,"t":"1722"},{"b":6.020,"d":0.020,"p":0.998,"t":"1799"}]}]},{"b":6.040,"d":0.100,"p":0.971,"t":"to(2)","w":[{"b":6.040,"d":0.040,"p":0.978,"t":"T","w":[{"b":6.040,"d":0.020,"p":0.994,"t":"4281"},{"b":6.060,"d":0.010,"p":0.992,"t":"4400"},{"b":6.070,"d":0.010,"p":0.992,"t":"4447"}]},{"b":6.080,"d":0.060,"p":0.993,"t":"IH","w":[{"b":6.080,"d":0.020,"p":0.996,"t":"2232"},{"b":6.100,"d":0.020,"p":0.999,"t":"2401"},{"b":6.120,"d":0.020,"p":0.999,"t":"2481"}]}]},{"b":6.140,"d":0.210,"p":0.972,"t":"do","w":[{"b":6.140,"d":0.080,"p":0.989,"t":"D","w":[{"b":6.140,"d":0.040,"p":0.995,"t":"1217"},{"b":6.180,"d":0.010,"p":1.000,"t":"1292"},{"b":6.190,"d":0.030,"p":0.995,"t":"1331"}]},{"b":6.220,"d":0.130,"p":0.983,"t":"UW","w":[{"b":6.220,"d":0.040,"p":0.994,"t":"4650"},{"b":6.260,"d":0.040,"p":0.991,"t":"4674"},{"b":6.300,"d":0.050,"p":0.998,"t":"4722"}]}]},{"b":6.350,"d":0.260,"p":0.957,"t":"for","w":[{"b":6.350,"d":0.140,"p":0.994,"t":"F","w":[{"b":6.350,"d":0.020,"p":0.999,"t":"1976"},{"b":6.370,"d":0.080,"p":0.997,"t":"1993"},{"b":6.450,"d":0.040,"p":0.998,"t":"2010"}]},{"b":6.490,"d":0.080,"p":0.986,"t":"AO","w":[{"b":6.490,"d":0.020,"p":0.999,"t":"844"},{"b":6.510,"d":0.030,"p":0.997,"t":"875"},{"b":6.540,"d":0.030,"p":0.990,"t":"899"}]},{"b":6.570,"d":0.040,"p":0.976,"t":"R","w":[{"b":6.570,"d":0.010,"p":0.992,"t":"3791"},{"b":6.580,"d":0.020,"p":0.991,"t":"3862"},{"b":6.600,"d":0.010,"p":0.994,"t":"4006"}]}]},{"b":6.610,"d":0.180,"p":0.930,"t":"them(2)","w":[{"b":6.610,"d":0.050,"p":0.977,"t":"DH","w":[{"b":6.610,"d":0.020,"p":0.991,"t":"1418"},{"b":6.630,"d":0.020,"p":0.991,"t":"1447"},{"b":6.650,"d":0.010,"p":0.994,"t":"1467"}]},{"b":6.660,"d":0.080,"p":0.965,"t":"AH","w":[{"b":6.660,"d":0.030,"p":0.992,"t":"417"},{"b":6.690,"d":0.020,"p":0.985,"t":"616"},{"b":6.710,"d":0.030,"p":0.988,"t":"685"}]},{"b":6.740,"d":0.050,"p":0.987,"t":"M","w":[{"b":6.740,"d":0.030,"p":0.996,"t":"3143"},{"b":6.770,"d":0.010,"p":0.995,"t":"3236"},{"b":6.780,"d":0.010,"p":0.996,"t":"3270"}]}]},{"b":6.790,"d":0.300,"p":0.986,"t":"","w":[{"b":6.790,"d":0.300,"p":0.986,"t":"SIL","w":[{"b":6.790,"d":0.280,"p":0.994,"t":"96"},{"b":7.070,"d":0.010,"p":0.995,"t":"97"},{"b":7.080,"d":0.010,"p":0.997,"t":"98"}]}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.json new file mode 100644 index 000000000..447ecfd81 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.json @@ -0,0 +1 @@ +{"b":0.000,"d":2.990,"p":1.000,"t":"he was not an ill disposed young man","w":[{"b":0.000,"d":0.210,"p":0.939,"t":""},{"b":0.210,"d":0.120,"p":0.987,"t":"he"},{"b":0.330,"d":0.230,"p":0.982,"t":"was(2)"},{"b":0.560,"d":0.500,"p":0.954,"t":"not"},{"b":1.060,"d":0.070,"p":0.947,"t":""},{"b":1.130,"d":0.170,"p":0.971,"t":"an(2)"},{"b":1.300,"d":0.180,"p":0.985,"t":"ill"},{"b":1.480,"d":0.630,"p":0.939,"t":"disposed"},{"b":2.110,"d":0.220,"p":0.978,"t":"young"},{"b":2.330,"d":0.410,"p":0.975,"t":"man"},{"b":2.740,"d":0.240,"p":0.948,"t":""}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.phone.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.phone.json new file mode 100644 index 000000000..545859fdd --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.phone.json @@ -0,0 +1 @@ +{"b":0.000,"d":2.990,"p":1.000,"t":"he was not an ill disposed young man","w":[{"b":0.000,"d":0.210,"p":0.982,"t":"","w":[{"b":0.000,"d":0.210,"p":0.982,"t":"SIL"}]},{"b":0.210,"d":0.120,"p":0.991,"t":"he","w":[{"b":0.210,"d":0.060,"p":0.995,"t":"HH"},{"b":0.270,"d":0.060,"p":0.996,"t":"IY"}]},{"b":0.330,"d":0.230,"p":0.986,"t":"was(2)","w":[{"b":0.330,"d":0.080,"p":0.995,"t":"W"},{"b":0.410,"d":0.040,"p":0.998,"t":"AH"},{"b":0.450,"d":0.110,"p":0.993,"t":"Z"}]},{"b":0.560,"d":0.500,"p":0.963,"t":"not","w":[{"b":0.560,"d":0.050,"p":0.997,"t":"N"},{"b":0.610,"d":0.250,"p":0.985,"t":"AA"},{"b":0.860,"d":0.200,"p":0.980,"t":"T"}]},{"b":1.060,"d":0.070,"p":0.979,"t":"","w":[{"b":1.060,"d":0.070,"p":0.979,"t":"SIL"}]},{"b":1.130,"d":0.170,"p":0.981,"t":"an(2)","w":[{"b":1.130,"d":0.100,"p":0.990,"t":"AH"},{"b":1.230,"d":0.070,"p":0.991,"t":"N"}]},{"b":1.300,"d":0.180,"p":0.986,"t":"ill","w":[{"b":1.300,"d":0.050,"p":0.997,"t":"IH"},{"b":1.350,"d":0.130,"p":0.989,"t":"L"}]},{"b":1.480,"d":0.630,"p":0.938,"t":"disposed","w":[{"b":1.480,"d":0.030,"p":0.997,"t":"D"},{"b":1.510,"d":0.030,"p":0.986,"t":"IH"},{"b":1.540,"d":0.130,"p":0.991,"t":"S"},{"b":1.670,"d":0.080,"p":0.994,"t":"P"},{"b":1.750,"d":0.220,"p":0.989,"t":"OW"},{"b":1.970,"d":0.080,"p":0.990,"t":"Z"},{"b":2.050,"d":0.060,"p":0.990,"t":"D"}]},{"b":2.110,"d":0.220,"p":0.970,"t":"young","w":[{"b":2.110,"d":0.070,"p":0.985,"t":"Y"},{"b":2.180,"d":0.060,"p":0.997,"t":"AH"},{"b":2.240,"d":0.090,"p":0.988,"t":"NG"}]},{"b":2.330,"d":0.410,"p":0.971,"t":"man","w":[{"b":2.330,"d":0.100,"p":0.994,"t":"M"},{"b":2.430,"d":0.200,"p":0.983,"t":"AE"},{"b":2.630,"d":0.110,"p":0.993,"t":"N"}]},{"b":2.740,"d":0.240,"p":0.978,"t":"","w":[{"b":2.740,"d":0.240,"p":0.978,"t":"SIL"}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.state.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.state.json new file mode 100644 index 000000000..6f05c445b --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0880.state.json @@ -0,0 +1 @@ +{"b":0.000,"d":2.990,"p":1.000,"t":"he was not an ill disposed young man","w":[{"b":0.000,"d":0.210,"p":0.982,"t":"","w":[{"b":0.000,"d":0.210,"p":0.982,"t":"SIL","w":[{"b":0.000,"d":0.010,"p":1.000,"t":"96"},{"b":0.010,"d":0.060,"p":0.998,"t":"97"},{"b":0.070,"d":0.140,"p":0.984,"t":"98"}]}]},{"b":0.210,"d":0.120,"p":0.991,"t":"he","w":[{"b":0.210,"d":0.060,"p":0.995,"t":"HH","w":[{"b":0.210,"d":0.030,"p":0.998,"t":"2110"},{"b":0.240,"d":0.020,"p":0.999,"t":"2182"},{"b":0.260,"d":0.010,"p":0.998,"t":"2204"}]},{"b":0.270,"d":0.060,"p":0.996,"t":"IY","w":[{"b":0.270,"d":0.020,"p":0.999,"t":"2538"},{"b":0.290,"d":0.020,"p":0.998,"t":"2653"},{"b":0.310,"d":0.020,"p":0.999,"t":"2680"}]}]},{"b":0.330,"d":0.230,"p":0.986,"t":"was(2)","w":[{"b":0.330,"d":0.080,"p":0.995,"t":"W","w":[{"b":0.330,"d":0.040,"p":0.997,"t":"4857"},{"b":0.370,"d":0.020,"p":0.999,"t":"4883"},{"b":0.390,"d":0.020,"p":0.999,"t":"4904"}]},{"b":0.410,"d":0.040,"p":0.998,"t":"AH","w":[{"b":0.410,"d":0.010,"p":1.000,"t":"441"},{"b":0.420,"d":0.020,"p":0.999,"t":"529"},{"b":0.440,"d":0.010,"p":1.000,"t":"811"}]},{"b":0.450,"d":0.110,"p":0.993,"t":"Z","w":[{"b":0.450,"d":0.040,"p":0.997,"t":"4988"},{"b":0.490,"d":0.040,"p":0.998,"t":"5043"},{"b":0.530,"d":0.030,"p":0.998,"t":"5088"}]}]},{"b":0.560,"d":0.500,"p":0.963,"t":"not","w":[{"b":0.560,"d":0.050,"p":0.997,"t":"N","w":[{"b":0.560,"d":0.020,"p":0.999,"t":"3287"},{"b":0.580,"d":0.010,"p":0.999,"t":"3430"},{"b":0.590,"d":0.020,"p":0.999,"t":"3494"}]},{"b":0.610,"d":0.250,"p":0.985,"t":"AA","w":[{"b":0.610,"d":0.030,"p":0.998,"t":"126"},{"b":0.640,"d":0.180,"p":0.989,"t":"192"},{"b":0.820,"d":0.040,"p":0.998,"t":"211"}]},{"b":0.860,"d":0.200,"p":0.980,"t":"T","w":[{"b":0.860,"d":0.030,"p":0.996,"t":"4265"},{"b":0.890,"d":0.040,"p":0.998,"t":"4425"},{"b":0.930,"d":0.130,"p":0.986,"t":"4518"}]}]},{"b":1.060,"d":0.070,"p":0.979,"t":"","w":[{"b":1.060,"d":0.070,"p":0.979,"t":"SIL","w":[{"b":1.060,"d":0.050,"p":0.990,"t":"96"},{"b":1.110,"d":0.010,"p":0.994,"t":"97"},{"b":1.120,"d":0.010,"p":0.995,"t":"98"}]}]},{"b":1.130,"d":0.170,"p":0.981,"t":"an(2)","w":[{"b":1.130,"d":0.100,"p":0.990,"t":"AH","w":[{"b":1.130,"d":0.030,"p":0.998,"t":"506"},{"b":1.160,"d":0.060,"p":0.992,"t":"573"},{"b":1.220,"d":0.010,"p":1.000,"t":"708"}]},{"b":1.230,"d":0.070,"p":0.991,"t":"N","w":[{"b":1.230,"d":0.030,"p":0.996,"t":"3303"},{"b":1.260,"d":0.020,"p":0.999,"t":"3407"},{"b":1.280,"d":0.020,"p":0.997,"t":"3480"}]}]},{"b":1.300,"d":0.180,"p":0.986,"t":"ill","w":[{"b":1.300,"d":0.050,"p":0.997,"t":"IH","w":[{"b":1.300,"d":0.010,"p":0.999,"t":"2301"},{"b":1.310,"d":0.030,"p":0.998,"t":"2355"},{"b":1.340,"d":0.010,"p":1.000,"t":"2472"}]},{"b":1.350,"d":0.130,"p":0.989,"t":"L","w":[{"b":1.350,"d":0.040,"p":0.998,"t":"2957"},{"b":1.390,"d":0.050,"p":0.997,"t":"3067"},{"b":1.440,"d":0.040,"p":0.995,"t":"3124"}]}]},{"b":1.480,"d":0.630,"p":0.938,"t":"disposed","w":[{"b":1.480,"d":0.030,"p":0.997,"t":"D","w":[{"b":1.480,"d":0.010,"p":0.999,"t":"1245"},{"b":1.490,"d":0.010,"p":1.000,"t":"1306"},{"b":1.500,"d":0.010,"p":0.998,"t":"1327"}]},{"b":1.510,"d":0.030,"p":0.986,"t":"IH","w":[{"b":1.510,"d":0.010,"p":0.998,"t":"2260"},{"b":1.520,"d":0.010,"p":0.998,"t":"2390"},{"b":1.530,"d":0.010,"p":0.990,"t":"2502"}]},{"b":1.540,"d":0.130,"p":0.991,"t":"S","w":[{"b":1.540,"d":0.020,"p":0.996,"t":"4050"},{"b":1.560,"d":0.070,"p":0.997,"t":"4129"},{"b":1.630,"d":0.040,"p":0.998,"t":"4152"}]},{"b":1.670,"d":0.080,"p":0.994,"t":"P","w":[{"b":1.670,"d":0.030,"p":0.998,"t":"3706"},{"b":1.700,"d":0.020,"p":0.998,"t":"3715"},{"b":1.720,"d":0.030,"p":0.998,"t":"3751"}]},{"b":1.750,"d":0.220,"p":0.989,"t":"OW","w":[{"b":1.750,"d":0.030,"p":0.998,"t":"3548"},{"b":1.780,"d":0.140,"p":0.994,"t":"3598"},{"b":1.920,"d":0.050,"p":0.997,"t":"3642"}]},{"b":1.970,"d":0.080,"p":0.990,"t":"Z","w":[{"b":1.970,"d":0.050,"p":0.993,"t":"4996"},{"b":2.020,"d":0.020,"p":0.998,"t":"5050"},{"b":2.040,"d":0.010,"p":0.998,"t":"5090"}]},{"b":2.050,"d":0.060,"p":0.990,"t":"D","w":[{"b":2.050,"d":0.010,"p":0.998,"t":"1236"},{"b":2.060,"d":0.040,"p":0.993,"t":"1309"},{"b":2.100,"d":0.010,"p":0.998,"t":"1380"}]}]},{"b":2.110,"d":0.220,"p":0.970,"t":"young","w":[{"b":2.110,"d":0.070,"p":0.985,"t":"Y","w":[{"b":2.110,"d":0.030,"p":0.993,"t":"4941"},{"b":2.140,"d":0.010,"p":0.994,"t":"4956"},{"b":2.150,"d":0.030,"p":0.998,"t":"4975"}]},{"b":2.180,"d":0.060,"p":0.997,"t":"AH","w":[{"b":2.180,"d":0.020,"p":0.999,"t":"505"},{"b":2.200,"d":0.030,"p":0.998,"t":"580"},{"b":2.230,"d":0.010,"p":1.000,"t":"689"}]},{"b":2.240,"d":0.090,"p":0.988,"t":"NG","w":[{"b":2.240,"d":0.030,"p":0.993,"t":"3506"},{"b":2.270,"d":0.010,"p":0.998,"t":"3508"},{"b":2.280,"d":0.050,"p":0.997,"t":"3524"}]}]},{"b":2.330,"d":0.410,"p":0.971,"t":"man","w":[{"b":2.330,"d":0.100,"p":0.994,"t":"M","w":[{"b":2.330,"d":0.030,"p":0.998,"t":"3180"},{"b":2.360,"d":0.040,"p":0.998,"t":"3212"},{"b":2.400,"d":0.030,"p":0.998,"t":"3259"}]},{"b":2.430,"d":0.200,"p":0.983,"t":"AE","w":[{"b":2.430,"d":0.020,"p":0.999,"t":"237"},{"b":2.450,"d":0.100,"p":0.996,"t":"308"},{"b":2.550,"d":0.080,"p":0.989,"t":"321"}]},{"b":2.630,"d":0.110,"p":0.993,"t":"N","w":[{"b":2.630,"d":0.030,"p":0.998,"t":"3327"},{"b":2.660,"d":0.070,"p":0.996,"t":"3398"},{"b":2.730,"d":0.010,"p":0.999,"t":"3469"}]}]},{"b":2.740,"d":0.240,"p":0.978,"t":"","w":[{"b":2.740,"d":0.240,"p":0.978,"t":"SIL","w":[{"b":2.740,"d":0.180,"p":0.987,"t":"96"},{"b":2.920,"d":0.050,"p":0.995,"t":"97"},{"b":2.970,"d":0.010,"p":0.996,"t":"98"}]}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.json new file mode 100644 index 000000000..89b4b95c0 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.json @@ -0,0 +1 @@ +{"b":0.000,"d":5.300,"p":1.000,"t":"unless to be rather cold hearted and rather selfish is to be ill disposed","w":[{"b":0.000,"d":0.270,"p":0.945,"t":""},{"b":0.270,"d":0.320,"p":0.961,"t":"unless"},{"b":0.590,"d":0.110,"p":0.989,"t":"to(3)"},{"b":0.700,"d":0.160,"p":0.988,"t":"be"},{"b":0.860,"d":0.360,"p":0.960,"t":"rather(2)"},{"b":1.220,"d":0.520,"p":0.936,"t":"cold"},{"b":1.740,"d":0.480,"p":0.924,"t":"hearted(2)"},{"b":2.220,"d":0.170,"p":0.970,"t":"and"},{"b":2.390,"d":0.390,"p":0.964,"t":"rather"},{"b":2.780,"d":0.810,"p":0.926,"t":"selfish"},{"b":3.590,"d":0.040,"p":0.954,"t":""},{"b":3.630,"d":0.250,"p":0.966,"t":"is"},{"b":3.880,"d":0.100,"p":0.991,"t":"to(3)"},{"b":3.980,"d":0.180,"p":0.983,"t":"be"},{"b":4.160,"d":0.210,"p":0.986,"t":"ill"},{"b":4.370,"d":0.720,"p":0.929,"t":"disposed"},{"b":5.090,"d":0.200,"p":0.954,"t":""}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.phone.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.phone.json new file mode 100644 index 000000000..fae05ea8d --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.phone.json @@ -0,0 +1 @@ +{"b":0.000,"d":5.300,"p":1.000,"t":"unless to be rather cold hearted and rather selfish is to be ill disposed","w":[{"b":0.000,"d":0.270,"p":0.987,"t":"","w":[{"b":0.000,"d":0.270,"p":0.987,"t":"SIL"}]},{"b":0.270,"d":0.320,"p":0.955,"t":"unless","w":[{"b":0.270,"d":0.060,"p":0.986,"t":"AH"},{"b":0.330,"d":0.030,"p":0.993,"t":"N"},{"b":0.360,"d":0.080,"p":0.995,"t":"L"},{"b":0.440,"d":0.060,"p":0.993,"t":"EH"},{"b":0.500,"d":0.090,"p":0.987,"t":"S"}]},{"b":0.590,"d":0.110,"p":0.994,"t":"to(3)","w":[{"b":0.590,"d":0.070,"p":0.996,"t":"T"},{"b":0.660,"d":0.040,"p":0.998,"t":"AH"}]},{"b":0.700,"d":0.160,"p":0.990,"t":"be","w":[{"b":0.700,"d":0.060,"p":0.996,"t":"B"},{"b":0.760,"d":0.100,"p":0.995,"t":"IY"}]},{"b":0.860,"d":0.360,"p":0.980,"t":"rather(2)","w":[{"b":0.860,"d":0.120,"p":0.993,"t":"R"},{"b":0.980,"d":0.060,"p":0.997,"t":"AH"},{"b":1.040,"d":0.070,"p":0.996,"t":"DH"},{"b":1.110,"d":0.110,"p":0.994,"t":"ER"}]},{"b":1.220,"d":0.520,"p":0.935,"t":"cold","w":[{"b":1.220,"d":0.240,"p":0.960,"t":"K"},{"b":1.460,"d":0.190,"p":0.987,"t":"OW"},{"b":1.650,"d":0.060,"p":0.995,"t":"L"},{"b":1.710,"d":0.030,"p":0.991,"t":"D"}]},{"b":1.740,"d":0.480,"p":0.936,"t":"hearted(2)","w":[{"b":1.740,"d":0.080,"p":0.984,"t":"HH"},{"b":1.820,"d":0.100,"p":0.992,"t":"AA"},{"b":1.920,"d":0.060,"p":0.996,"t":"R"},{"b":1.980,"d":0.050,"p":0.997,"t":"T"},{"b":2.030,"d":0.080,"p":0.990,"t":"IH"},{"b":2.110,"d":0.110,"p":0.976,"t":"D"}]},{"b":2.220,"d":0.170,"p":0.969,"t":"and","w":[{"b":2.220,"d":0.080,"p":0.988,"t":"AH"},{"b":2.300,"d":0.040,"p":0.992,"t":"N"},{"b":2.340,"d":0.050,"p":0.988,"t":"D"}]},{"b":2.390,"d":0.390,"p":0.969,"t":"rather","w":[{"b":2.390,"d":0.100,"p":0.993,"t":"R"},{"b":2.490,"d":0.090,"p":0.987,"t":"AE"},{"b":2.580,"d":0.070,"p":0.996,"t":"DH"},{"b":2.650,"d":0.130,"p":0.992,"t":"ER"}]},{"b":2.780,"d":0.810,"p":0.918,"t":"selfish","w":[{"b":2.780,"d":0.190,"p":0.985,"t":"S"},{"b":2.970,"d":0.060,"p":0.996,"t":"EH"},{"b":3.030,"d":0.090,"p":0.995,"t":"L"},{"b":3.120,"d":0.090,"p":0.992,"t":"F"},{"b":3.210,"d":0.140,"p":0.954,"t":"IH"},{"b":3.350,"d":0.240,"p":0.994,"t":"SH"}]},{"b":3.590,"d":0.040,"p":0.988,"t":"","w":[{"b":3.590,"d":0.040,"p":0.988,"t":"SIL"}]},{"b":3.630,"d":0.250,"p":0.972,"t":"is","w":[{"b":3.630,"d":0.160,"p":0.983,"t":"IH"},{"b":3.790,"d":0.090,"p":0.989,"t":"Z"}]},{"b":3.880,"d":0.100,"p":0.992,"t":"to(3)","w":[{"b":3.880,"d":0.070,"p":0.993,"t":"T"},{"b":3.950,"d":0.030,"p":0.999,"t":"AH"}]},{"b":3.980,"d":0.180,"p":0.990,"t":"be","w":[{"b":3.980,"d":0.060,"p":0.996,"t":"B"},{"b":4.040,"d":0.120,"p":0.994,"t":"IY"}]},{"b":4.160,"d":0.210,"p":0.969,"t":"ill","w":[{"b":4.160,"d":0.060,"p":0.995,"t":"IH"},{"b":4.220,"d":0.150,"p":0.974,"t":"L"}]},{"b":4.370,"d":0.720,"p":0.874,"t":"disposed","w":[{"b":4.370,"d":0.030,"p":0.987,"t":"D"},{"b":4.400,"d":0.030,"p":0.985,"t":"IH"},{"b":4.430,"d":0.130,"p":0.976,"t":"S"},{"b":4.560,"d":0.070,"p":0.986,"t":"P"},{"b":4.630,"d":0.230,"p":0.971,"t":"OW"},{"b":4.860,"d":0.140,"p":0.981,"t":"Z"},{"b":5.000,"d":0.090,"p":0.980,"t":"D"}]},{"b":5.090,"d":0.200,"p":0.986,"t":"","w":[{"b":5.090,"d":0.200,"p":0.986,"t":"SIL"}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.state.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.state.json new file mode 100644 index 000000000..c3fc17a2a --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0890.state.json @@ -0,0 +1 @@ +{"b":0.000,"d":5.300,"p":1.000,"t":"unless to be rather cold hearted and rather selfish is to be ill disposed","w":[{"b":0.000,"d":0.270,"p":0.987,"t":"","w":[{"b":0.000,"d":0.270,"p":0.987,"t":"SIL","w":[{"b":0.000,"d":0.150,"p":1.000,"t":"96"},{"b":0.150,"d":0.010,"p":0.995,"t":"97"},{"b":0.160,"d":0.110,"p":0.992,"t":"98"}]}]},{"b":0.270,"d":0.320,"p":0.955,"t":"unless","w":[{"b":0.270,"d":0.060,"p":0.986,"t":"AH","w":[{"b":0.270,"d":0.020,"p":0.998,"t":"506"},{"b":0.290,"d":0.030,"p":0.988,"t":"573"},{"b":0.320,"d":0.010,"p":1.000,"t":"708"}]},{"b":0.330,"d":0.030,"p":0.993,"t":"N","w":[{"b":0.330,"d":0.010,"p":0.999,"t":"3299"},{"b":0.340,"d":0.010,"p":0.999,"t":"3385"},{"b":0.350,"d":0.010,"p":0.994,"t":"3439"}]},{"b":0.360,"d":0.080,"p":0.995,"t":"L","w":[{"b":0.360,"d":0.020,"p":0.999,"t":"2992"},{"b":0.380,"d":0.030,"p":0.998,"t":"3010"},{"b":0.410,"d":0.030,"p":0.998,"t":"3085"}]},{"b":0.440,"d":0.060,"p":0.993,"t":"EH","w":[{"b":0.440,"d":0.020,"p":0.999,"t":"1537"},{"b":0.460,"d":0.010,"p":0.999,"t":"1586"},{"b":0.470,"d":0.030,"p":0.995,"t":"1627"}]},{"b":0.500,"d":0.090,"p":0.987,"t":"S","w":[{"b":0.500,"d":0.030,"p":0.994,"t":"4061"},{"b":0.530,"d":0.020,"p":0.996,"t":"4106"},{"b":0.550,"d":0.040,"p":0.998,"t":"4156"}]}]},{"b":0.590,"d":0.110,"p":0.994,"t":"to(3)","w":[{"b":0.590,"d":0.070,"p":0.996,"t":"T","w":[{"b":0.590,"d":0.030,"p":0.998,"t":"4326"},{"b":0.620,"d":0.020,"p":0.999,"t":"4438"},{"b":0.640,"d":0.020,"p":0.999,"t":"4442"}]},{"b":0.660,"d":0.040,"p":0.998,"t":"AH","w":[{"b":0.660,"d":0.020,"p":0.999,"t":"393"},{"b":0.680,"d":0.010,"p":0.999,"t":"649"},{"b":0.690,"d":0.010,"p":1.000,"t":"774"}]}]},{"b":0.700,"d":0.160,"p":0.990,"t":"be","w":[{"b":0.700,"d":0.060,"p":0.996,"t":"B","w":[{"b":0.700,"d":0.010,"p":0.999,"t":"1072"},{"b":0.710,"d":0.020,"p":0.999,"t":"1113"},{"b":0.730,"d":0.030,"p":0.998,"t":"1141"}]},{"b":0.760,"d":0.100,"p":0.995,"t":"IY","w":[{"b":0.760,"d":0.020,"p":0.999,"t":"2547"},{"b":0.780,"d":0.030,"p":0.998,"t":"2651"},{"b":0.810,"d":0.050,"p":0.998,"t":"2683"}]}]},{"b":0.860,"d":0.360,"p":0.980,"t":"rather(2)","w":[{"b":0.860,"d":0.120,"p":0.993,"t":"R","w":[{"b":0.860,"d":0.050,"p":0.997,"t":"3847"},{"b":0.910,"d":0.030,"p":0.998,"t":"3925"},{"b":0.940,"d":0.040,"p":0.998,"t":"3993"}]},{"b":0.980,"d":0.060,"p":0.997,"t":"AH","w":[{"b":0.980,"d":0.020,"p":0.999,"t":"451"},{"b":1.000,"d":0.030,"p":0.998,"t":"619"},{"b":1.030,"d":0.010,"p":1.000,"t":"778"}]},{"b":1.040,"d":0.070,"p":0.996,"t":"DH","w":[{"b":1.040,"d":0.030,"p":0.998,"t":"1411"},{"b":1.070,"d":0.020,"p":0.999,"t":"1454"},{"b":1.090,"d":0.020,"p":0.999,"t":"1489"}]},{"b":1.110,"d":0.110,"p":0.994,"t":"ER","w":[{"b":1.110,"d":0.030,"p":0.998,"t":"1651"},{"b":1.140,"d":0.050,"p":0.998,"t":"1729"},{"b":1.190,"d":0.030,"p":0.998,"t":"1800"}]}]},{"b":1.220,"d":0.520,"p":0.935,"t":"cold","w":[{"b":1.220,"d":0.240,"p":0.960,"t":"K","w":[{"b":1.220,"d":0.160,"p":0.964,"t":"2765"},{"b":1.380,"d":0.040,"p":0.998,"t":"2854"},{"b":1.420,"d":0.040,"p":0.998,"t":"2881"}]},{"b":1.460,"d":0.190,"p":0.987,"t":"OW","w":[{"b":1.460,"d":0.030,"p":0.998,"t":"3552"},{"b":1.490,"d":0.010,"p":0.999,"t":"3609"},{"b":1.500,"d":0.150,"p":0.990,"t":"3659"}]},{"b":1.650,"d":0.060,"p":0.995,"t":"L","w":[{"b":1.650,"d":0.010,"p":0.999,"t":"2932"},{"b":1.660,"d":0.030,"p":0.997,"t":"3049"},{"b":1.690,"d":0.020,"p":0.999,"t":"3126"}]},{"b":1.710,"d":0.030,"p":0.991,"t":"D","w":[{"b":1.710,"d":0.010,"p":0.999,"t":"1243"},{"b":1.720,"d":0.010,"p":1.000,"t":"1307"},{"b":1.730,"d":0.010,"p":0.993,"t":"1352"}]}]},{"b":1.740,"d":0.480,"p":0.936,"t":"hearted(2)","w":[{"b":1.740,"d":0.080,"p":0.984,"t":"HH","w":[{"b":1.740,"d":0.060,"p":0.987,"t":"2100"},{"b":1.800,"d":0.010,"p":0.997,"t":"2149"},{"b":1.810,"d":0.010,"p":1.000,"t":"2218"}]},{"b":1.820,"d":0.100,"p":0.992,"t":"AA","w":[{"b":1.820,"d":0.030,"p":0.998,"t":"144"},{"b":1.850,"d":0.010,"p":0.999,"t":"200"},{"b":1.860,"d":0.060,"p":0.995,"t":"218"}]},{"b":1.920,"d":0.060,"p":0.996,"t":"R","w":[{"b":1.920,"d":0.020,"p":0.999,"t":"3793"},{"b":1.940,"d":0.030,"p":0.998,"t":"3869"},{"b":1.970,"d":0.010,"p":0.999,"t":"3996"}]},{"b":1.980,"d":0.050,"p":0.997,"t":"T","w":[{"b":1.980,"d":0.020,"p":0.999,"t":"4282"},{"b":2.000,"d":0.010,"p":0.999,"t":"4374"},{"b":2.010,"d":0.020,"p":0.999,"t":"4476"}]},{"b":2.030,"d":0.080,"p":0.990,"t":"IH","w":[{"b":2.030,"d":0.010,"p":0.998,"t":"2230"},{"b":2.040,"d":0.060,"p":0.993,"t":"2403"},{"b":2.100,"d":0.010,"p":1.000,"t":"2480"}]},{"b":2.110,"d":0.110,"p":0.976,"t":"D","w":[{"b":2.110,"d":0.040,"p":0.997,"t":"1203"},{"b":2.150,"d":0.010,"p":0.999,"t":"1277"},{"b":2.160,"d":0.060,"p":0.979,"t":"1340"}]}]},{"b":2.220,"d":0.170,"p":0.969,"t":"and","w":[{"b":2.220,"d":0.080,"p":0.988,"t":"AH","w":[{"b":2.220,"d":0.040,"p":0.991,"t":"489"},{"b":2.260,"d":0.030,"p":0.998,"t":"588"},{"b":2.290,"d":0.010,"p":1.000,"t":"719"}]},{"b":2.300,"d":0.040,"p":0.992,"t":"N","w":[{"b":2.300,"d":0.020,"p":0.999,"t":"3346"},{"b":2.320,"d":0.010,"p":0.996,"t":"3378"},{"b":2.330,"d":0.010,"p":0.997,"t":"3448"}]},{"b":2.340,"d":0.050,"p":0.988,"t":"D","w":[{"b":2.340,"d":0.020,"p":0.993,"t":"1232"},{"b":2.360,"d":0.010,"p":0.999,"t":"1266"},{"b":2.370,"d":0.020,"p":0.996,"t":"1383"}]}]},{"b":2.390,"d":0.390,"p":0.969,"t":"rather","w":[{"b":2.390,"d":0.100,"p":0.993,"t":"R","w":[{"b":2.390,"d":0.040,"p":0.997,"t":"3842"},{"b":2.430,"d":0.030,"p":0.998,"t":"3929"},{"b":2.460,"d":0.030,"p":0.998,"t":"3970"}]},{"b":2.490,"d":0.090,"p":0.987,"t":"AE","w":[{"b":2.490,"d":0.020,"p":0.999,"t":"238"},{"b":2.510,"d":0.040,"p":0.995,"t":"290"},{"b":2.550,"d":0.030,"p":0.994,"t":"336"}]},{"b":2.580,"d":0.070,"p":0.996,"t":"DH","w":[{"b":2.580,"d":0.030,"p":0.998,"t":"1411"},{"b":2.610,"d":0.020,"p":0.999,"t":"1455"},{"b":2.630,"d":0.020,"p":0.999,"t":"1489"}]},{"b":2.650,"d":0.130,"p":0.992,"t":"ER","w":[{"b":2.650,"d":0.030,"p":0.998,"t":"1650"},{"b":2.680,"d":0.060,"p":0.996,"t":"1734"},{"b":2.740,"d":0.040,"p":0.998,"t":"1806"}]}]},{"b":2.780,"d":0.810,"p":0.918,"t":"selfish","w":[{"b":2.780,"d":0.190,"p":0.985,"t":"S","w":[{"b":2.780,"d":0.030,"p":0.998,"t":"4046"},{"b":2.810,"d":0.130,"p":0.988,"t":"4099"},{"b":2.940,"d":0.030,"p":0.998,"t":"4172"}]},{"b":2.970,"d":0.060,"p":0.996,"t":"EH","w":[{"b":2.970,"d":0.020,"p":0.999,"t":"1522"},{"b":2.990,"d":0.010,"p":0.999,"t":"1596"},{"b":3.000,"d":0.030,"p":0.998,"t":"1642"}]},{"b":3.030,"d":0.090,"p":0.995,"t":"L","w":[{"b":3.030,"d":0.050,"p":0.998,"t":"2948"},{"b":3.080,"d":0.020,"p":0.999,"t":"3069"},{"b":3.100,"d":0.020,"p":0.999,"t":"3118"}]},{"b":3.120,"d":0.090,"p":0.992,"t":"F","w":[{"b":3.120,"d":0.020,"p":0.999,"t":"1973"},{"b":3.140,"d":0.040,"p":0.998,"t":"1984"},{"b":3.180,"d":0.030,"p":0.996,"t":"2027"}]},{"b":3.210,"d":0.140,"p":0.954,"t":"IH","w":[{"b":3.210,"d":0.030,"p":0.990,"t":"2227"},{"b":3.240,"d":0.090,"p":0.965,"t":"2405"},{"b":3.330,"d":0.020,"p":0.999,"t":"2508"}]},{"b":3.350,"d":0.240,"p":0.994,"t":"SH","w":[{"b":3.350,"d":0.010,"p":0.999,"t":"4195"},{"b":3.360,"d":0.190,"p":0.997,"t":"4208"},{"b":3.550,"d":0.040,"p":0.998,"t":"4232"}]}]},{"b":3.590,"d":0.040,"p":0.988,"t":"","w":[{"b":3.590,"d":0.040,"p":0.988,"t":"SIL","w":[{"b":3.590,"d":0.010,"p":0.993,"t":"96"},{"b":3.600,"d":0.010,"p":0.997,"t":"97"},{"b":3.610,"d":0.020,"p":0.998,"t":"98"}]}]},{"b":3.630,"d":0.250,"p":0.972,"t":"is","w":[{"b":3.630,"d":0.160,"p":0.983,"t":"IH","w":[{"b":3.630,"d":0.080,"p":0.991,"t":"2282"},{"b":3.710,"d":0.050,"p":0.995,"t":"2377"},{"b":3.760,"d":0.030,"p":0.997,"t":"2512"}]},{"b":3.790,"d":0.090,"p":0.989,"t":"Z","w":[{"b":3.790,"d":0.030,"p":0.998,"t":"4992"},{"b":3.820,"d":0.030,"p":0.996,"t":"5049"},{"b":3.850,"d":0.030,"p":0.995,"t":"5090"}]}]},{"b":3.880,"d":0.100,"p":0.992,"t":"to(3)","w":[{"b":3.880,"d":0.070,"p":0.993,"t":"T","w":[{"b":3.880,"d":0.020,"p":0.999,"t":"4326"},{"b":3.900,"d":0.030,"p":0.995,"t":"4407"},{"b":3.930,"d":0.020,"p":0.999,"t":"4442"}]},{"b":3.950,"d":0.030,"p":0.999,"t":"AH","w":[{"b":3.950,"d":0.010,"p":1.000,"t":"393"},{"b":3.960,"d":0.010,"p":0.999,"t":"649"},{"b":3.970,"d":0.010,"p":1.000,"t":"774"}]}]},{"b":3.980,"d":0.180,"p":0.990,"t":"be","w":[{"b":3.980,"d":0.060,"p":0.996,"t":"B","w":[{"b":3.980,"d":0.020,"p":0.999,"t":"1072"},{"b":4.000,"d":0.020,"p":0.999,"t":"1113"},{"b":4.020,"d":0.020,"p":0.999,"t":"1141"}]},{"b":4.040,"d":0.120,"p":0.994,"t":"IY","w":[{"b":4.040,"d":0.030,"p":0.998,"t":"2547"},{"b":4.070,"d":0.040,"p":0.998,"t":"2598"},{"b":4.110,"d":0.050,"p":0.998,"t":"2657"}]}]},{"b":4.160,"d":0.210,"p":0.969,"t":"ill","w":[{"b":4.160,"d":0.060,"p":0.995,"t":"IH","w":[{"b":4.160,"d":0.030,"p":0.996,"t":"2286"},{"b":4.190,"d":0.020,"p":0.999,"t":"2352"},{"b":4.210,"d":0.010,"p":1.000,"t":"2472"}]},{"b":4.220,"d":0.150,"p":0.974,"t":"L","w":[{"b":4.220,"d":0.040,"p":0.998,"t":"2957"},{"b":4.260,"d":0.090,"p":0.981,"t":"3067"},{"b":4.350,"d":0.020,"p":0.994,"t":"3124"}]}]},{"b":4.370,"d":0.720,"p":0.874,"t":"disposed","w":[{"b":4.370,"d":0.030,"p":0.987,"t":"D","w":[{"b":4.370,"d":0.010,"p":0.997,"t":"1245"},{"b":4.380,"d":0.010,"p":0.996,"t":"1306"},{"b":4.390,"d":0.010,"p":0.994,"t":"1327"}]},{"b":4.400,"d":0.030,"p":0.985,"t":"IH","w":[{"b":4.400,"d":0.010,"p":0.995,"t":"2260"},{"b":4.410,"d":0.010,"p":0.998,"t":"2390"},{"b":4.420,"d":0.010,"p":0.992,"t":"2502"}]},{"b":4.430,"d":0.130,"p":0.976,"t":"S","w":[{"b":4.430,"d":0.030,"p":0.984,"t":"4050"},{"b":4.460,"d":0.060,"p":0.994,"t":"4129"},{"b":4.520,"d":0.040,"p":0.998,"t":"4152"}]},{"b":4.560,"d":0.070,"p":0.986,"t":"P","w":[{"b":4.560,"d":0.020,"p":0.999,"t":"3706"},{"b":4.580,"d":0.030,"p":0.997,"t":"3715"},{"b":4.610,"d":0.020,"p":0.991,"t":"3751"}]},{"b":4.630,"d":0.230,"p":0.971,"t":"OW","w":[{"b":4.630,"d":0.060,"p":0.980,"t":"3548"},{"b":4.690,"d":0.100,"p":0.993,"t":"3598"},{"b":4.790,"d":0.070,"p":0.998,"t":"3642"}]},{"b":4.860,"d":0.140,"p":0.981,"t":"Z","w":[{"b":4.860,"d":0.040,"p":0.998,"t":"4996"},{"b":4.900,"d":0.060,"p":0.990,"t":"5050"},{"b":4.960,"d":0.040,"p":0.994,"t":"5090"}]},{"b":5.000,"d":0.090,"p":0.980,"t":"D","w":[{"b":5.000,"d":0.010,"p":0.998,"t":"1236"},{"b":5.010,"d":0.010,"p":0.996,"t":"1309"},{"b":5.020,"d":0.070,"p":0.986,"t":"1359"}]}]},{"b":5.090,"d":0.200,"p":0.986,"t":"","w":[{"b":5.090,"d":0.200,"p":0.986,"t":"SIL","w":[{"b":5.090,"d":0.100,"p":0.994,"t":"96"},{"b":5.190,"d":0.090,"p":0.997,"t":"97"},{"b":5.280,"d":0.010,"p":0.995,"t":"98"}]}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.json new file mode 100644 index 000000000..6fe0c7387 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.json @@ -0,0 +1 @@ +{"b":0.000,"d":6.050,"p":1.000,"t":"had he married a more a amiable woman he might have been made still more respectable than he was","w":[{"b":0.000,"d":0.220,"p":0.955,"t":""},{"b":0.220,"d":0.220,"p":0.984,"t":"had"},{"b":0.440,"d":0.100,"p":0.980,"t":"he"},{"b":0.540,"d":0.440,"p":0.963,"t":"married"},{"b":0.980,"d":0.050,"p":0.992,"t":"a"},{"b":1.030,"d":0.380,"p":0.966,"t":"more"},{"b":1.410,"d":0.050,"p":0.990,"t":"a(2)"},{"b":1.460,"d":0.550,"p":0.946,"t":"amiable"},{"b":2.010,"d":0.480,"p":0.941,"t":"woman"},{"b":2.490,"d":0.220,"p":0.984,"t":"he"},{"b":2.710,"d":0.290,"p":0.976,"t":"might"},{"b":3.000,"d":0.190,"p":0.982,"t":"have"},{"b":3.190,"d":0.170,"p":0.962,"t":"been"},{"b":3.360,"d":0.330,"p":0.979,"t":"made"},{"b":3.690,"d":0.380,"p":0.963,"t":"still"},{"b":4.070,"d":0.180,"p":0.982,"t":"more"},{"b":4.250,"d":0.750,"p":0.934,"t":"respectable"},{"b":5.000,"d":0.130,"p":0.975,"t":"than(2)"},{"b":5.130,"d":0.080,"p":0.978,"t":"he"},{"b":5.210,"d":0.620,"p":0.933,"t":"was"},{"b":5.830,"d":0.210,"p":0.951,"t":""}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.phone.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.phone.json new file mode 100644 index 000000000..6f1a74403 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.phone.json @@ -0,0 +1 @@ +{"b":0.000,"d":6.050,"p":1.000,"t":"had he married a more a amiable woman he might have been made still more respectable than he was","w":[{"b":0.000,"d":0.220,"p":0.994,"t":"","w":[{"b":0.000,"d":0.220,"p":0.994,"t":"SIL"}]},{"b":0.220,"d":0.220,"p":0.987,"t":"had","w":[{"b":0.220,"d":0.110,"p":0.994,"t":"HH"},{"b":0.330,"d":0.080,"p":0.995,"t":"AE"},{"b":0.410,"d":0.030,"p":0.998,"t":"D"}]},{"b":0.440,"d":0.100,"p":0.984,"t":"he","w":[{"b":0.440,"d":0.040,"p":0.990,"t":"HH"},{"b":0.480,"d":0.060,"p":0.994,"t":"IY"}]},{"b":0.540,"d":0.440,"p":0.958,"t":"married","w":[{"b":0.540,"d":0.090,"p":0.995,"t":"M"},{"b":0.630,"d":0.040,"p":0.994,"t":"EH"},{"b":0.670,"d":0.130,"p":0.993,"t":"R"},{"b":0.800,"d":0.120,"p":0.983,"t":"IY"},{"b":0.920,"d":0.060,"p":0.993,"t":"D"}]},{"b":0.980,"d":0.050,"p":0.995,"t":"a","w":[{"b":0.980,"d":0.050,"p":0.995,"t":"AH"}]},{"b":1.030,"d":0.380,"p":0.975,"t":"more","w":[{"b":1.030,"d":0.090,"p":0.995,"t":"M"},{"b":1.120,"d":0.190,"p":0.990,"t":"AO"},{"b":1.310,"d":0.100,"p":0.990,"t":"R"}]},{"b":1.410,"d":0.050,"p":0.981,"t":"a(2)","w":[{"b":1.410,"d":0.050,"p":0.981,"t":"EY"}]},{"b":1.460,"d":0.550,"p":0.943,"t":"amiable","w":[{"b":1.460,"d":0.150,"p":0.978,"t":"EY"},{"b":1.610,"d":0.090,"p":0.988,"t":"M"},{"b":1.700,"d":0.070,"p":0.992,"t":"IY"},{"b":1.770,"d":0.050,"p":0.997,"t":"AH"},{"b":1.820,"d":0.080,"p":0.995,"t":"B"},{"b":1.900,"d":0.040,"p":0.998,"t":"AH"},{"b":1.940,"d":0.070,"p":0.995,"t":"L"}]},{"b":2.010,"d":0.480,"p":0.946,"t":"woman","w":[{"b":2.010,"d":0.150,"p":0.988,"t":"W"},{"b":2.160,"d":0.070,"p":0.992,"t":"UH"},{"b":2.230,"d":0.070,"p":0.991,"t":"M"},{"b":2.300,"d":0.070,"p":0.991,"t":"AH"},{"b":2.370,"d":0.120,"p":0.983,"t":"N"}]},{"b":2.490,"d":0.220,"p":0.975,"t":"he","w":[{"b":2.490,"d":0.130,"p":0.983,"t":"HH"},{"b":2.620,"d":0.090,"p":0.993,"t":"IY"}]},{"b":2.710,"d":0.290,"p":0.959,"t":"might","w":[{"b":2.710,"d":0.110,"p":0.989,"t":"M"},{"b":2.820,"d":0.130,"p":0.981,"t":"AY"},{"b":2.950,"d":0.050,"p":0.988,"t":"T"}]},{"b":3.000,"d":0.190,"p":0.977,"t":"have","w":[{"b":3.000,"d":0.060,"p":0.995,"t":"HH"},{"b":3.060,"d":0.060,"p":0.989,"t":"AE"},{"b":3.120,"d":0.070,"p":0.993,"t":"V"}]},{"b":3.190,"d":0.170,"p":0.958,"t":"been","w":[{"b":3.190,"d":0.060,"p":0.989,"t":"B"},{"b":3.250,"d":0.060,"p":0.987,"t":"IH"},{"b":3.310,"d":0.050,"p":0.981,"t":"N"}]},{"b":3.360,"d":0.330,"p":0.969,"t":"made","w":[{"b":3.360,"d":0.100,"p":0.989,"t":"M"},{"b":3.460,"d":0.160,"p":0.987,"t":"EY"},{"b":3.620,"d":0.070,"p":0.994,"t":"D"}]},{"b":3.690,"d":0.380,"p":0.958,"t":"still","w":[{"b":3.690,"d":0.130,"p":0.993,"t":"S"},{"b":3.820,"d":0.080,"p":0.994,"t":"T"},{"b":3.900,"d":0.050,"p":0.997,"t":"IH"},{"b":3.950,"d":0.120,"p":0.974,"t":"L"}]},{"b":4.070,"d":0.180,"p":0.967,"t":"more","w":[{"b":4.070,"d":0.050,"p":0.981,"t":"M"},{"b":4.120,"d":0.090,"p":0.989,"t":"AO"},{"b":4.210,"d":0.040,"p":0.996,"t":"R"}]},{"b":4.250,"d":0.750,"p":0.913,"t":"respectable","w":[{"b":4.250,"d":0.080,"p":0.992,"t":"R"},{"b":4.330,"d":0.040,"p":0.996,"t":"IH"},{"b":4.370,"d":0.110,"p":0.986,"t":"S"},{"b":4.480,"d":0.080,"p":0.994,"t":"P"},{"b":4.560,"d":0.070,"p":0.993,"t":"EH"},{"b":4.630,"d":0.060,"p":0.981,"t":"K"},{"b":4.690,"d":0.050,"p":0.992,"t":"T"},{"b":4.740,"d":0.040,"p":0.996,"t":"AH"},{"b":4.780,"d":0.060,"p":0.995,"t":"B"},{"b":4.840,"d":0.030,"p":0.994,"t":"AH"},{"b":4.870,"d":0.130,"p":0.990,"t":"L"}]},{"b":5.000,"d":0.130,"p":0.979,"t":"than(2)","w":[{"b":5.000,"d":0.050,"p":0.992,"t":"DH"},{"b":5.050,"d":0.050,"p":0.997,"t":"AH"},{"b":5.100,"d":0.030,"p":0.990,"t":"N"}]},{"b":5.130,"d":0.080,"p":0.967,"t":"he","w":[{"b":5.130,"d":0.030,"p":0.975,"t":"HH"},{"b":5.160,"d":0.050,"p":0.992,"t":"IY"}]},{"b":5.210,"d":0.620,"p":0.913,"t":"was","w":[{"b":5.210,"d":0.100,"p":0.995,"t":"W"},{"b":5.310,"d":0.240,"p":0.954,"t":"AA"},{"b":5.550,"d":0.280,"p":0.962,"t":"Z"}]},{"b":5.830,"d":0.210,"p":0.985,"t":"","w":[{"b":5.830,"d":0.210,"p":0.985,"t":"SIL"}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.state.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.state.json new file mode 100644 index 000000000..c8acbe791 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0920.state.json @@ -0,0 +1 @@ +{"b":0.000,"d":6.050,"p":1.000,"t":"had he married a more a amiable woman he might have been made still more respectable than he was","w":[{"b":0.000,"d":0.220,"p":0.994,"t":"","w":[{"b":0.000,"d":0.220,"p":0.994,"t":"SIL","w":[{"b":0.000,"d":0.190,"p":1.000,"t":"96"},{"b":0.190,"d":0.010,"p":0.996,"t":"97"},{"b":0.200,"d":0.020,"p":0.998,"t":"98"}]}]},{"b":0.220,"d":0.220,"p":0.987,"t":"had","w":[{"b":0.220,"d":0.110,"p":0.994,"t":"HH","w":[{"b":0.220,"d":0.050,"p":0.997,"t":"2113"},{"b":0.270,"d":0.050,"p":0.997,"t":"2170"},{"b":0.320,"d":0.010,"p":1.000,"t":"2209"}]},{"b":0.330,"d":0.080,"p":0.995,"t":"AE","w":[{"b":0.330,"d":0.030,"p":0.998,"t":"243"},{"b":0.360,"d":0.030,"p":0.998,"t":"296"},{"b":0.390,"d":0.020,"p":0.999,"t":"342"}]},{"b":0.410,"d":0.030,"p":0.998,"t":"D","w":[{"b":0.410,"d":0.010,"p":0.999,"t":"1185"},{"b":0.420,"d":0.010,"p":1.000,"t":"1247"},{"b":0.430,"d":0.010,"p":0.999,"t":"1353"}]}]},{"b":0.440,"d":0.100,"p":0.984,"t":"he","w":[{"b":0.440,"d":0.040,"p":0.990,"t":"HH","w":[{"b":0.440,"d":0.020,"p":0.998,"t":"2103"},{"b":0.460,"d":0.010,"p":0.993,"t":"2180"},{"b":0.470,"d":0.010,"p":1.000,"t":"2201"}]},{"b":0.480,"d":0.060,"p":0.994,"t":"IY","w":[{"b":0.480,"d":0.010,"p":0.997,"t":"2536"},{"b":0.490,"d":0.030,"p":0.998,"t":"2606"},{"b":0.520,"d":0.020,"p":0.999,"t":"2673"}]}]},{"b":0.540,"d":0.440,"p":0.958,"t":"married","w":[{"b":0.540,"d":0.090,"p":0.995,"t":"M","w":[{"b":0.540,"d":0.020,"p":0.999,"t":"3158"},{"b":0.560,"d":0.050,"p":0.998,"t":"3209"},{"b":0.610,"d":0.020,"p":0.999,"t":"3257"}]},{"b":0.630,"d":0.040,"p":0.994,"t":"EH","w":[{"b":0.630,"d":0.020,"p":0.999,"t":"1531"},{"b":0.650,"d":0.010,"p":0.995,"t":"1600"},{"b":0.660,"d":0.010,"p":0.999,"t":"1635"}]},{"b":0.670,"d":0.130,"p":0.993,"t":"R","w":[{"b":0.670,"d":0.050,"p":0.997,"t":"3805"},{"b":0.720,"d":0.050,"p":0.997,"t":"3898"},{"b":0.770,"d":0.030,"p":0.998,"t":"3951"}]},{"b":0.800,"d":0.120,"p":0.983,"t":"IY","w":[{"b":0.800,"d":0.050,"p":0.995,"t":"2551"},{"b":0.850,"d":0.040,"p":0.989,"t":"2632"},{"b":0.890,"d":0.030,"p":0.998,"t":"2712"}]},{"b":0.920,"d":0.060,"p":0.993,"t":"D","w":[{"b":0.920,"d":0.020,"p":0.999,"t":"1212"},{"b":0.940,"d":0.020,"p":0.998,"t":"1272"},{"b":0.960,"d":0.020,"p":0.996,"t":"1339"}]}]},{"b":0.980,"d":0.050,"p":0.995,"t":"a","w":[{"b":0.980,"d":0.050,"p":0.995,"t":"AH","w":[{"b":0.980,"d":0.020,"p":0.996,"t":"482"},{"b":1.000,"d":0.020,"p":0.999,"t":"612"},{"b":1.020,"d":0.010,"p":1.000,"t":"682"}]}]},{"b":1.030,"d":0.380,"p":0.975,"t":"more","w":[{"b":1.030,"d":0.090,"p":0.995,"t":"M","w":[{"b":1.030,"d":0.030,"p":0.998,"t":"3145"},{"b":1.060,"d":0.050,"p":0.998,"t":"3190"},{"b":1.110,"d":0.010,"p":0.999,"t":"3253"}]},{"b":1.120,"d":0.190,"p":0.990,"t":"AO","w":[{"b":1.120,"d":0.020,"p":0.999,"t":"838"},{"b":1.140,"d":0.100,"p":0.995,"t":"873"},{"b":1.240,"d":0.070,"p":0.997,"t":"900"}]},{"b":1.310,"d":0.100,"p":0.990,"t":"R","w":[{"b":1.310,"d":0.050,"p":0.997,"t":"3787"},{"b":1.360,"d":0.030,"p":0.998,"t":"3900"},{"b":1.390,"d":0.020,"p":0.994,"t":"3953"}]}]},{"b":1.410,"d":0.050,"p":0.981,"t":"a(2)","w":[{"b":1.410,"d":0.050,"p":0.981,"t":"EY","w":[{"b":1.410,"d":0.020,"p":0.997,"t":"1883"},{"b":1.430,"d":0.010,"p":0.991,"t":"1911"},{"b":1.440,"d":0.020,"p":0.993,"t":"1939"}]}]},{"b":1.460,"d":0.550,"p":0.943,"t":"amiable","w":[{"b":1.460,"d":0.150,"p":0.978,"t":"EY","w":[{"b":1.460,"d":0.100,"p":0.981,"t":"1865"},{"b":1.560,"d":0.030,"p":0.998,"t":"1916"},{"b":1.590,"d":0.020,"p":0.999,"t":"1947"}]},{"b":1.610,"d":0.090,"p":0.988,"t":"M","w":[{"b":1.610,"d":0.030,"p":0.996,"t":"3156"},{"b":1.640,"d":0.030,"p":0.994,"t":"3210"},{"b":1.670,"d":0.030,"p":0.998,"t":"3255"}]},{"b":1.700,"d":0.070,"p":0.992,"t":"IY","w":[{"b":1.700,"d":0.020,"p":0.998,"t":"2555"},{"b":1.720,"d":0.020,"p":0.995,"t":"2612"},{"b":1.740,"d":0.030,"p":0.998,"t":"2666"}]},{"b":1.770,"d":0.050,"p":0.997,"t":"AH","w":[{"b":1.770,"d":0.020,"p":0.999,"t":"500"},{"b":1.790,"d":0.020,"p":0.999,"t":"602"},{"b":1.810,"d":0.010,"p":1.000,"t":"767"}]},{"b":1.820,"d":0.080,"p":0.995,"t":"B","w":[{"b":1.820,"d":0.040,"p":0.997,"t":"1061"},{"b":1.860,"d":0.020,"p":0.999,"t":"1098"},{"b":1.880,"d":0.020,"p":0.999,"t":"1120"}]},{"b":1.900,"d":0.040,"p":0.998,"t":"AH","w":[{"b":1.900,"d":0.010,"p":1.000,"t":"462"},{"b":1.910,"d":0.010,"p":0.999,"t":"671"},{"b":1.920,"d":0.020,"p":0.999,"t":"731"}]},{"b":1.940,"d":0.070,"p":0.995,"t":"L","w":[{"b":1.940,"d":0.040,"p":0.997,"t":"2938"},{"b":1.980,"d":0.010,"p":0.999,"t":"3057"},{"b":1.990,"d":0.020,"p":0.999,"t":"3133"}]}]},{"b":2.010,"d":0.480,"p":0.946,"t":"woman","w":[{"b":2.010,"d":0.150,"p":0.988,"t":"W","w":[{"b":2.010,"d":0.100,"p":0.997,"t":"4846"},{"b":2.110,"d":0.040,"p":0.998,"t":"4880"},{"b":2.150,"d":0.010,"p":0.994,"t":"4929"}]},{"b":2.160,"d":0.070,"p":0.992,"t":"UH","w":[{"b":2.160,"d":0.010,"p":0.999,"t":"4595"},{"b":2.170,"d":0.030,"p":0.995,"t":"4601"},{"b":2.200,"d":0.030,"p":0.998,"t":"4610"}]},{"b":2.230,"d":0.070,"p":0.991,"t":"M","w":[{"b":2.230,"d":0.020,"p":0.998,"t":"3147"},{"b":2.250,"d":0.030,"p":0.998,"t":"3182"},{"b":2.280,"d":0.020,"p":0.995,"t":"3247"}]},{"b":2.300,"d":0.070,"p":0.991,"t":"AH","w":[{"b":2.300,"d":0.030,"p":0.994,"t":"435"},{"b":2.330,"d":0.020,"p":0.999,"t":"567"},{"b":2.350,"d":0.020,"p":0.999,"t":"712"}]},{"b":2.370,"d":0.120,"p":0.983,"t":"N","w":[{"b":2.370,"d":0.010,"p":0.999,"t":"3297"},{"b":2.380,"d":0.090,"p":0.987,"t":"3365"},{"b":2.470,"d":0.020,"p":0.997,"t":"3442"}]}]},{"b":2.490,"d":0.220,"p":0.975,"t":"he","w":[{"b":2.490,"d":0.130,"p":0.983,"t":"HH","w":[{"b":2.490,"d":0.050,"p":0.990,"t":"2123"},{"b":2.540,"d":0.060,"p":0.994,"t":"2178"},{"b":2.600,"d":0.020,"p":0.998,"t":"2202"}]},{"b":2.620,"d":0.090,"p":0.993,"t":"IY","w":[{"b":2.620,"d":0.040,"p":0.996,"t":"2536"},{"b":2.660,"d":0.020,"p":0.998,"t":"2606"},{"b":2.680,"d":0.030,"p":0.998,"t":"2673"}]}]},{"b":2.710,"d":0.290,"p":0.959,"t":"might","w":[{"b":2.710,"d":0.110,"p":0.989,"t":"M","w":[{"b":2.710,"d":0.030,"p":0.997,"t":"3158"},{"b":2.740,"d":0.050,"p":0.995,"t":"3193"},{"b":2.790,"d":0.030,"p":0.998,"t":"3243"}]},{"b":2.820,"d":0.130,"p":0.981,"t":"AY","w":[{"b":2.820,"d":0.040,"p":0.998,"t":"952"},{"b":2.860,"d":0.040,"p":0.998,"t":"1016"},{"b":2.900,"d":0.050,"p":0.986,"t":"1050"}]},{"b":2.950,"d":0.050,"p":0.988,"t":"T","w":[{"b":2.950,"d":0.010,"p":0.996,"t":"4291"},{"b":2.960,"d":0.010,"p":0.999,"t":"4335"},{"b":2.970,"d":0.030,"p":0.993,"t":"4534"}]}]},{"b":3.000,"d":0.190,"p":0.977,"t":"have","w":[{"b":3.000,"d":0.060,"p":0.995,"t":"HH","w":[{"b":3.000,"d":0.010,"p":0.999,"t":"2098"},{"b":3.010,"d":0.020,"p":0.997,"t":"2165"},{"b":3.030,"d":0.030,"p":0.998,"t":"2210"}]},{"b":3.060,"d":0.060,"p":0.989,"t":"AE","w":[{"b":3.060,"d":0.040,"p":0.997,"t":"244"},{"b":3.100,"d":0.010,"p":0.996,"t":"300"},{"b":3.110,"d":0.010,"p":0.997,"t":"328"}]},{"b":3.120,"d":0.070,"p":0.993,"t":"V","w":[{"b":3.120,"d":0.040,"p":0.997,"t":"4740"},{"b":3.160,"d":0.020,"p":0.998,"t":"4765"},{"b":3.180,"d":0.010,"p":0.997,"t":"4794"}]}]},{"b":3.190,"d":0.170,"p":0.958,"t":"been","w":[{"b":3.190,"d":0.060,"p":0.989,"t":"B","w":[{"b":3.190,"d":0.010,"p":0.998,"t":"1075"},{"b":3.200,"d":0.030,"p":0.995,"t":"1109"},{"b":3.230,"d":0.020,"p":0.996,"t":"1138"}]},{"b":3.250,"d":0.060,"p":0.987,"t":"IH","w":[{"b":3.250,"d":0.020,"p":0.999,"t":"2267"},{"b":3.270,"d":0.010,"p":0.999,"t":"2333"},{"b":3.280,"d":0.030,"p":0.989,"t":"2456"}]},{"b":3.310,"d":0.050,"p":0.981,"t":"N","w":[{"b":3.310,"d":0.020,"p":0.991,"t":"3333"},{"b":3.330,"d":0.020,"p":0.994,"t":"3381"},{"b":3.350,"d":0.010,"p":0.996,"t":"3434"}]}]},{"b":3.360,"d":0.330,"p":0.969,"t":"made","w":[{"b":3.360,"d":0.100,"p":0.989,"t":"M","w":[{"b":3.360,"d":0.010,"p":0.998,"t":"3181"},{"b":3.370,"d":0.050,"p":0.993,"t":"3212"},{"b":3.420,"d":0.040,"p":0.998,"t":"3265"}]},{"b":3.460,"d":0.160,"p":0.987,"t":"EY","w":[{"b":3.460,"d":0.050,"p":0.996,"t":"1878"},{"b":3.510,"d":0.080,"p":0.996,"t":"1898"},{"b":3.590,"d":0.030,"p":0.994,"t":"1932"}]},{"b":3.620,"d":0.070,"p":0.994,"t":"D","w":[{"b":3.620,"d":0.020,"p":0.999,"t":"1213"},{"b":3.640,"d":0.030,"p":0.996,"t":"1259"},{"b":3.670,"d":0.020,"p":0.999,"t":"1388"}]}]},{"b":3.690,"d":0.380,"p":0.958,"t":"still","w":[{"b":3.690,"d":0.130,"p":0.993,"t":"S","w":[{"b":3.690,"d":0.040,"p":0.998,"t":"4032"},{"b":3.730,"d":0.050,"p":0.998,"t":"4115"},{"b":3.780,"d":0.040,"p":0.998,"t":"4158"}]},{"b":3.820,"d":0.080,"p":0.994,"t":"T","w":[{"b":3.820,"d":0.050,"p":0.997,"t":"4324"},{"b":3.870,"d":0.010,"p":0.998,"t":"4437"},{"b":3.880,"d":0.020,"p":0.999,"t":"4441"}]},{"b":3.900,"d":0.050,"p":0.997,"t":"IH","w":[{"b":3.900,"d":0.020,"p":0.999,"t":"2235"},{"b":3.920,"d":0.010,"p":0.999,"t":"2351"},{"b":3.930,"d":0.020,"p":0.999,"t":"2470"}]},{"b":3.950,"d":0.120,"p":0.974,"t":"L","w":[{"b":3.950,"d":0.050,"p":0.998,"t":"2957"},{"b":4.000,"d":0.030,"p":0.993,"t":"3068"},{"b":4.030,"d":0.040,"p":0.983,"t":"3130"}]}]},{"b":4.070,"d":0.180,"p":0.967,"t":"more","w":[{"b":4.070,"d":0.050,"p":0.981,"t":"M","w":[{"b":4.070,"d":0.010,"p":0.995,"t":"3149"},{"b":4.080,"d":0.020,"p":0.991,"t":"3188"},{"b":4.100,"d":0.020,"p":0.996,"t":"3253"}]},{"b":4.120,"d":0.090,"p":0.989,"t":"AO","w":[{"b":4.120,"d":0.010,"p":0.997,"t":"838"},{"b":4.130,"d":0.060,"p":0.993,"t":"873"},{"b":4.190,"d":0.020,"p":0.999,"t":"900"}]},{"b":4.210,"d":0.040,"p":0.996,"t":"R","w":[{"b":4.210,"d":0.010,"p":0.998,"t":"3783"},{"b":4.220,"d":0.020,"p":0.999,"t":"3889"},{"b":4.240,"d":0.010,"p":0.999,"t":"4018"}]}]},{"b":4.250,"d":0.750,"p":0.913,"t":"respectable","w":[{"b":4.250,"d":0.080,"p":0.992,"t":"R","w":[{"b":4.250,"d":0.050,"p":0.994,"t":"3852"},{"b":4.300,"d":0.020,"p":0.999,"t":"3934"},{"b":4.320,"d":0.010,"p":0.999,"t":"3958"}]},{"b":4.330,"d":0.040,"p":0.996,"t":"IH","w":[{"b":4.330,"d":0.010,"p":0.998,"t":"2310"},{"b":4.340,"d":0.010,"p":0.999,"t":"2387"},{"b":4.350,"d":0.020,"p":0.999,"t":"2506"}]},{"b":4.370,"d":0.110,"p":0.986,"t":"S","w":[{"b":4.370,"d":0.030,"p":0.995,"t":"4050"},{"b":4.400,"d":0.040,"p":0.993,"t":"4129"},{"b":4.440,"d":0.040,"p":0.998,"t":"4152"}]},{"b":4.480,"d":0.080,"p":0.994,"t":"P","w":[{"b":4.480,"d":0.030,"p":0.998,"t":"3706"},{"b":4.510,"d":0.030,"p":0.998,"t":"3717"},{"b":4.540,"d":0.020,"p":0.999,"t":"3772"}]},{"b":4.560,"d":0.070,"p":0.993,"t":"EH","w":[{"b":4.560,"d":0.020,"p":0.999,"t":"1514"},{"b":4.580,"d":0.040,"p":0.995,"t":"1547"},{"b":4.620,"d":0.010,"p":0.999,"t":"1622"}]},{"b":4.630,"d":0.060,"p":0.981,"t":"K","w":[{"b":4.630,"d":0.040,"p":0.992,"t":"2781"},{"b":4.670,"d":0.010,"p":0.993,"t":"2815"},{"b":4.680,"d":0.010,"p":0.995,"t":"2918"}]},{"b":4.690,"d":0.050,"p":0.992,"t":"T","w":[{"b":4.690,"d":0.010,"p":0.998,"t":"4316"},{"b":4.700,"d":0.010,"p":0.999,"t":"4398"},{"b":4.710,"d":0.030,"p":0.995,"t":"4439"}]},{"b":4.740,"d":0.040,"p":0.996,"t":"AH","w":[{"b":4.740,"d":0.020,"p":0.999,"t":"388"},{"b":4.760,"d":0.010,"p":0.998,"t":"601"},{"b":4.770,"d":0.010,"p":1.000,"t":"767"}]},{"b":4.780,"d":0.060,"p":0.995,"t":"B","w":[{"b":4.780,"d":0.030,"p":0.998,"t":"1061"},{"b":4.810,"d":0.020,"p":0.998,"t":"1098"},{"b":4.830,"d":0.010,"p":0.999,"t":"1120"}]},{"b":4.840,"d":0.030,"p":0.994,"t":"AH","w":[{"b":4.840,"d":0.010,"p":1.000,"t":"462"},{"b":4.850,"d":0.010,"p":0.999,"t":"671"},{"b":4.860,"d":0.010,"p":0.996,"t":"731"}]},{"b":4.870,"d":0.130,"p":0.990,"t":"L","w":[{"b":4.870,"d":0.080,"p":0.996,"t":"2941"},{"b":4.950,"d":0.030,"p":0.996,"t":"3053"},{"b":4.980,"d":0.020,"p":0.998,"t":"3123"}]}]},{"b":5.000,"d":0.130,"p":0.979,"t":"than(2)","w":[{"b":5.000,"d":0.050,"p":0.992,"t":"DH","w":[{"b":5.000,"d":0.010,"p":0.995,"t":"1419"},{"b":5.010,"d":0.010,"p":1.000,"t":"1452"},{"b":5.020,"d":0.030,"p":0.998,"t":"1465"}]},{"b":5.050,"d":0.050,"p":0.997,"t":"AH","w":[{"b":5.050,"d":0.020,"p":0.999,"t":"415"},{"b":5.070,"d":0.020,"p":0.999,"t":"572"},{"b":5.090,"d":0.010,"p":0.999,"t":"709"}]},{"b":5.100,"d":0.030,"p":0.990,"t":"N","w":[{"b":5.100,"d":0.010,"p":0.997,"t":"3297"},{"b":5.110,"d":0.010,"p":0.997,"t":"3365"},{"b":5.120,"d":0.010,"p":0.995,"t":"3442"}]}]},{"b":5.130,"d":0.080,"p":0.967,"t":"he","w":[{"b":5.130,"d":0.030,"p":0.975,"t":"HH","w":[{"b":5.130,"d":0.010,"p":0.994,"t":"2123"},{"b":5.140,"d":0.010,"p":0.989,"t":"2178"},{"b":5.150,"d":0.010,"p":0.991,"t":"2202"}]},{"b":5.160,"d":0.050,"p":0.992,"t":"IY","w":[{"b":5.160,"d":0.010,"p":0.995,"t":"2538"},{"b":5.170,"d":0.010,"p":0.999,"t":"2653"},{"b":5.180,"d":0.030,"p":0.998,"t":"2680"}]}]},{"b":5.210,"d":0.620,"p":0.913,"t":"was","w":[{"b":5.210,"d":0.100,"p":0.995,"t":"W","w":[{"b":5.210,"d":0.050,"p":0.998,"t":"4858"},{"b":5.260,"d":0.030,"p":0.998,"t":"4893"},{"b":5.290,"d":0.020,"p":0.999,"t":"4913"}]},{"b":5.310,"d":0.240,"p":0.954,"t":"AA","w":[{"b":5.310,"d":0.040,"p":0.998,"t":"161"},{"b":5.350,"d":0.100,"p":0.997,"t":"178"},{"b":5.450,"d":0.100,"p":0.959,"t":"210"}]},{"b":5.550,"d":0.280,"p":0.962,"t":"Z","w":[{"b":5.550,"d":0.030,"p":0.989,"t":"4999"},{"b":5.580,"d":0.190,"p":0.979,"t":"5069"},{"b":5.770,"d":0.060,"p":0.994,"t":"5093"}]}]},{"b":5.830,"d":0.210,"p":0.985,"t":"","w":[{"b":5.830,"d":0.210,"p":0.985,"t":"SIL","w":[{"b":5.830,"d":0.060,"p":0.996,"t":"96"},{"b":5.890,"d":0.140,"p":0.994,"t":"97"},{"b":6.030,"d":0.010,"p":0.994,"t":"98"}]}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.json new file mode 100644 index 000000000..22de40650 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.json @@ -0,0 +1 @@ +{"b":0.000,"d":3.290,"p":1.000,"t":"he might even have been made amiable himself","w":[{"b":0.000,"d":0.210,"p":0.946,"t":""},{"b":0.210,"d":0.170,"p":0.984,"t":"he"},{"b":0.380,"d":0.260,"p":0.967,"t":"might"},{"b":0.640,"d":0.280,"p":0.972,"t":"even"},{"b":0.920,"d":0.150,"p":0.962,"t":"have"},{"b":1.070,"d":0.260,"p":0.966,"t":"been"},{"b":1.330,"d":0.370,"p":0.968,"t":"made"},{"b":1.700,"d":0.570,"p":0.957,"t":"amiable"},{"b":2.270,"d":0.750,"p":0.920,"t":"himself"},{"b":3.020,"d":0.260,"p":0.952,"t":""}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.phone.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.phone.json new file mode 100644 index 000000000..e50bbd46b --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.phone.json @@ -0,0 +1 @@ +{"b":0.000,"d":3.290,"p":1.000,"t":"he might even have been made amiable himself","w":[{"b":0.000,"d":0.210,"p":0.988,"t":"","w":[{"b":0.000,"d":0.210,"p":0.988,"t":"SIL"}]},{"b":0.210,"d":0.170,"p":0.990,"t":"he","w":[{"b":0.210,"d":0.090,"p":0.995,"t":"HH"},{"b":0.300,"d":0.080,"p":0.996,"t":"IY"}]},{"b":0.380,"d":0.260,"p":0.970,"t":"might","w":[{"b":0.380,"d":0.100,"p":0.981,"t":"M"},{"b":0.480,"d":0.120,"p":0.994,"t":"AY"},{"b":0.600,"d":0.040,"p":0.995,"t":"T"}]},{"b":0.640,"d":0.280,"p":0.976,"t":"even","w":[{"b":0.640,"d":0.110,"p":0.994,"t":"IY"},{"b":0.750,"d":0.070,"p":0.996,"t":"V"},{"b":0.820,"d":0.040,"p":0.996,"t":"IH"},{"b":0.860,"d":0.060,"p":0.990,"t":"N"}]},{"b":0.920,"d":0.150,"p":0.964,"t":"have","w":[{"b":0.920,"d":0.030,"p":0.981,"t":"HH"},{"b":0.950,"d":0.060,"p":0.989,"t":"AE"},{"b":1.010,"d":0.060,"p":0.995,"t":"V"}]},{"b":1.070,"d":0.260,"p":0.978,"t":"been","w":[{"b":1.070,"d":0.070,"p":0.993,"t":"B"},{"b":1.140,"d":0.100,"p":0.991,"t":"IH"},{"b":1.240,"d":0.090,"p":0.994,"t":"N"}]},{"b":1.330,"d":0.370,"p":0.954,"t":"made","w":[{"b":1.330,"d":0.070,"p":0.996,"t":"M"},{"b":1.400,"d":0.230,"p":0.990,"t":"EY"},{"b":1.630,"d":0.070,"p":0.968,"t":"D"}]},{"b":1.700,"d":0.570,"p":0.937,"t":"amiable","w":[{"b":1.700,"d":0.110,"p":0.989,"t":"EY"},{"b":1.810,"d":0.080,"p":0.984,"t":"M"},{"b":1.890,"d":0.080,"p":0.986,"t":"IY"},{"b":1.970,"d":0.060,"p":0.996,"t":"AH"},{"b":2.030,"d":0.080,"p":0.994,"t":"B"},{"b":2.110,"d":0.030,"p":0.999,"t":"AH"},{"b":2.140,"d":0.130,"p":0.988,"t":"L"}]},{"b":2.270,"d":0.750,"p":0.900,"t":"himself","w":[{"b":2.270,"d":0.030,"p":0.977,"t":"HH"},{"b":2.300,"d":0.030,"p":0.991,"t":"IH"},{"b":2.330,"d":0.090,"p":0.978,"t":"M"},{"b":2.420,"d":0.180,"p":0.992,"t":"S"},{"b":2.600,"d":0.080,"p":0.995,"t":"EH"},{"b":2.680,"d":0.130,"p":0.991,"t":"L"},{"b":2.810,"d":0.210,"p":0.970,"t":"F"}]},{"b":3.020,"d":0.260,"p":0.984,"t":"","w":[{"b":3.020,"d":0.260,"p":0.984,"t":"SIL"}]}]} diff --git a/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.state.json b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.state.json new file mode 100644 index 000000000..750acb194 --- /dev/null +++ b/test/data/librivox/sense_and_sensibility_01_austen_64kb-0930.state.json @@ -0,0 +1 @@ +{"b":0.000,"d":3.290,"p":1.000,"t":"he might even have been made amiable himself","w":[{"b":0.000,"d":0.210,"p":0.988,"t":"","w":[{"b":0.000,"d":0.210,"p":0.988,"t":"SIL","w":[{"b":0.000,"d":0.010,"p":1.000,"t":"96"},{"b":0.010,"d":0.080,"p":0.997,"t":"97"},{"b":0.090,"d":0.120,"p":0.990,"t":"98"}]}]},{"b":0.210,"d":0.170,"p":0.990,"t":"he","w":[{"b":0.210,"d":0.090,"p":0.995,"t":"HH","w":[{"b":0.210,"d":0.050,"p":0.997,"t":"2110"},{"b":0.260,"d":0.020,"p":0.999,"t":"2182"},{"b":0.280,"d":0.020,"p":0.999,"t":"2204"}]},{"b":0.300,"d":0.080,"p":0.996,"t":"IY","w":[{"b":0.300,"d":0.010,"p":0.999,"t":"2536"},{"b":0.310,"d":0.040,"p":0.998,"t":"2606"},{"b":0.350,"d":0.030,"p":0.998,"t":"2673"}]}]},{"b":0.380,"d":0.260,"p":0.970,"t":"might","w":[{"b":0.380,"d":0.100,"p":0.981,"t":"M","w":[{"b":0.380,"d":0.020,"p":0.993,"t":"3158"},{"b":0.400,"d":0.040,"p":0.996,"t":"3193"},{"b":0.440,"d":0.040,"p":0.993,"t":"3243"}]},{"b":0.480,"d":0.120,"p":0.994,"t":"AY","w":[{"b":0.480,"d":0.050,"p":0.998,"t":"952"},{"b":0.530,"d":0.030,"p":0.998,"t":"1016"},{"b":0.560,"d":0.040,"p":0.998,"t":"1050"}]},{"b":0.600,"d":0.040,"p":0.995,"t":"T","w":[{"b":0.600,"d":0.020,"p":0.998,"t":"4291"},{"b":0.620,"d":0.010,"p":0.998,"t":"4396"},{"b":0.630,"d":0.010,"p":0.999,"t":"4468"}]}]},{"b":0.640,"d":0.280,"p":0.976,"t":"even","w":[{"b":0.640,"d":0.110,"p":0.994,"t":"IY","w":[{"b":0.640,"d":0.010,"p":0.999,"t":"2540"},{"b":0.650,"d":0.070,"p":0.996,"t":"2604"},{"b":0.720,"d":0.030,"p":0.998,"t":"2709"}]},{"b":0.750,"d":0.070,"p":0.996,"t":"V","w":[{"b":0.750,"d":0.020,"p":0.999,"t":"4746"},{"b":0.770,"d":0.020,"p":0.999,"t":"4757"},{"b":0.790,"d":0.030,"p":0.998,"t":"4808"}]},{"b":0.820,"d":0.040,"p":0.996,"t":"IH","w":[{"b":0.820,"d":0.010,"p":1.000,"t":"2275"},{"b":0.830,"d":0.010,"p":0.998,"t":"2342"},{"b":0.840,"d":0.020,"p":0.999,"t":"2460"}]},{"b":0.860,"d":0.060,"p":0.990,"t":"N","w":[{"b":0.860,"d":0.020,"p":0.995,"t":"3333"},{"b":0.880,"d":0.020,"p":0.996,"t":"3366"},{"b":0.900,"d":0.020,"p":0.999,"t":"3442"}]}]},{"b":0.920,"d":0.150,"p":0.964,"t":"have","w":[{"b":0.920,"d":0.030,"p":0.981,"t":"HH","w":[{"b":0.920,"d":0.010,"p":0.998,"t":"2126"},{"b":0.930,"d":0.010,"p":0.987,"t":"2167"},{"b":0.940,"d":0.010,"p":0.996,"t":"2213"}]},{"b":0.950,"d":0.060,"p":0.989,"t":"AE","w":[{"b":0.950,"d":0.010,"p":0.997,"t":"244"},{"b":0.960,"d":0.020,"p":0.994,"t":"300"},{"b":0.980,"d":0.030,"p":0.997,"t":"328"}]},{"b":1.010,"d":0.060,"p":0.995,"t":"V","w":[{"b":1.010,"d":0.030,"p":0.998,"t":"4740"},{"b":1.040,"d":0.020,"p":0.999,"t":"4765"},{"b":1.060,"d":0.010,"p":0.998,"t":"4794"}]}]},{"b":1.070,"d":0.260,"p":0.978,"t":"been","w":[{"b":1.070,"d":0.070,"p":0.993,"t":"B","w":[{"b":1.070,"d":0.020,"p":0.996,"t":"1075"},{"b":1.090,"d":0.030,"p":0.997,"t":"1109"},{"b":1.120,"d":0.020,"p":0.999,"t":"1138"}]},{"b":1.140,"d":0.100,"p":0.991,"t":"IH","w":[{"b":1.140,"d":0.030,"p":0.998,"t":"2267"},{"b":1.170,"d":0.060,"p":0.995,"t":"2333"},{"b":1.230,"d":0.010,"p":0.999,"t":"2456"}]},{"b":1.240,"d":0.090,"p":0.994,"t":"N","w":[{"b":1.240,"d":0.040,"p":0.997,"t":"3333"},{"b":1.280,"d":0.030,"p":0.998,"t":"3381"},{"b":1.310,"d":0.020,"p":0.999,"t":"3434"}]}]},{"b":1.330,"d":0.370,"p":0.954,"t":"made","w":[{"b":1.330,"d":0.070,"p":0.996,"t":"M","w":[{"b":1.330,"d":0.010,"p":0.999,"t":"3181"},{"b":1.340,"d":0.030,"p":0.998,"t":"3212"},{"b":1.370,"d":0.030,"p":0.998,"t":"3265"}]},{"b":1.400,"d":0.230,"p":0.990,"t":"EY","w":[{"b":1.400,"d":0.040,"p":0.998,"t":"1878"},{"b":1.440,"d":0.160,"p":0.996,"t":"1898"},{"b":1.600,"d":0.030,"p":0.997,"t":"1932"}]},{"b":1.630,"d":0.070,"p":0.968,"t":"D","w":[{"b":1.630,"d":0.010,"p":0.999,"t":"1212"},{"b":1.640,"d":0.010,"p":0.993,"t":"1272"},{"b":1.650,"d":0.050,"p":0.976,"t":"1347"}]}]},{"b":1.700,"d":0.570,"p":0.937,"t":"amiable","w":[{"b":1.700,"d":0.110,"p":0.989,"t":"EY","w":[{"b":1.700,"d":0.050,"p":0.996,"t":"1865"},{"b":1.750,"d":0.040,"p":0.998,"t":"1917"},{"b":1.790,"d":0.020,"p":0.995,"t":"1947"}]},{"b":1.810,"d":0.080,"p":0.984,"t":"M","w":[{"b":1.810,"d":0.040,"p":0.989,"t":"3156"},{"b":1.850,"d":0.020,"p":0.996,"t":"3210"},{"b":1.870,"d":0.020,"p":0.999,"t":"3255"}]},{"b":1.890,"d":0.080,"p":0.986,"t":"IY","w":[{"b":1.890,"d":0.030,"p":0.996,"t":"2555"},{"b":1.920,"d":0.020,"p":0.994,"t":"2612"},{"b":1.940,"d":0.030,"p":0.996,"t":"2666"}]},{"b":1.970,"d":0.060,"p":0.996,"t":"AH","w":[{"b":1.970,"d":0.010,"p":1.000,"t":"500"},{"b":1.980,"d":0.030,"p":0.998,"t":"602"},{"b":2.010,"d":0.020,"p":0.999,"t":"767"}]},{"b":2.030,"d":0.080,"p":0.994,"t":"B","w":[{"b":2.030,"d":0.040,"p":0.997,"t":"1061"},{"b":2.070,"d":0.020,"p":0.999,"t":"1098"},{"b":2.090,"d":0.020,"p":0.999,"t":"1120"}]},{"b":2.110,"d":0.030,"p":0.999,"t":"AH","w":[{"b":2.110,"d":0.010,"p":1.000,"t":"462"},{"b":2.120,"d":0.010,"p":0.999,"t":"671"},{"b":2.130,"d":0.010,"p":1.000,"t":"731"}]},{"b":2.140,"d":0.130,"p":0.988,"t":"L","w":[{"b":2.140,"d":0.050,"p":0.998,"t":"2934"},{"b":2.190,"d":0.060,"p":0.996,"t":"3060"},{"b":2.250,"d":0.020,"p":0.995,"t":"3128"}]}]},{"b":2.270,"d":0.750,"p":0.900,"t":"himself","w":[{"b":2.270,"d":0.030,"p":0.977,"t":"HH","w":[{"b":2.270,"d":0.010,"p":0.990,"t":"2118"},{"b":2.280,"d":0.010,"p":0.994,"t":"2187"},{"b":2.290,"d":0.010,"p":0.993,"t":"2196"}]},{"b":2.300,"d":0.030,"p":0.991,"t":"IH","w":[{"b":2.300,"d":0.010,"p":0.998,"t":"2246"},{"b":2.310,"d":0.010,"p":0.996,"t":"2350"},{"b":2.320,"d":0.010,"p":0.997,"t":"2466"}]},{"b":2.330,"d":0.090,"p":0.978,"t":"M","w":[{"b":2.330,"d":0.030,"p":0.991,"t":"3166"},{"b":2.360,"d":0.030,"p":0.990,"t":"3223"},{"b":2.390,"d":0.030,"p":0.996,"t":"3280"}]},{"b":2.420,"d":0.180,"p":0.992,"t":"S","w":[{"b":2.420,"d":0.040,"p":0.998,"t":"4038"},{"b":2.460,"d":0.100,"p":0.997,"t":"4070"},{"b":2.560,"d":0.040,"p":0.998,"t":"4173"}]},{"b":2.600,"d":0.080,"p":0.995,"t":"EH","w":[{"b":2.600,"d":0.020,"p":0.999,"t":"1522"},{"b":2.620,"d":0.030,"p":0.998,"t":"1596"},{"b":2.650,"d":0.030,"p":0.998,"t":"1642"}]},{"b":2.680,"d":0.130,"p":0.991,"t":"L","w":[{"b":2.680,"d":0.080,"p":0.995,"t":"2948"},{"b":2.760,"d":0.030,"p":0.998,"t":"3069"},{"b":2.790,"d":0.020,"p":0.999,"t":"3118"}]},{"b":2.810,"d":0.210,"p":0.970,"t":"F","w":[{"b":2.810,"d":0.020,"p":0.999,"t":"1974"},{"b":2.830,"d":0.070,"p":0.995,"t":"2004"},{"b":2.900,"d":0.120,"p":0.976,"t":"2019"}]}]},{"b":3.020,"d":0.260,"p":0.984,"t":"","w":[{"b":3.020,"d":0.260,"p":0.984,"t":"SIL","w":[{"b":3.020,"d":0.160,"p":0.991,"t":"96"},{"b":3.180,"d":0.090,"p":0.997,"t":"97"},{"b":3.270,"d":0.010,"p":0.995,"t":"98"}]}]}]} diff --git a/test/data/librivox/test-align.matchseg b/test/data/librivox/test-align.matchseg index 2edc7fe57..ace259e1e 100644 --- a/test/data/librivox/test-align.matchseg +++ b/test/data/librivox/test-align.matchseg @@ -1,5 +1,5 @@ -sense_and_sensibility_01_austen_64kb-0870 S 0 T -13828 A -13828 L 0 0 -206 0 22 -538 0 and 37 -390 0 mister 63 -296 0 john 98 -948 0 dashwood 158 -319 0 had 184 -1125 0 then 222 -1895 0 leisure 271 -448 0 to 289 -717 0 consider 344 -714 0 how 390 -724 0 much 433 -402 0 there 452 -381 0 might 479 -146 0 be 494 -1441 0 prudently 546 -148 0 in 556 -389 0 his 574 -319 0 power 604 -406 0 to 615 -280 0 do 633 -385 0 for 661 -1074 0 them 678 -137 0 708 -sense_and_sensibility_01_austen_64kb-0880 S 0 T -5909 A -5909 L 0 0 -99 0 10 -186 0 he 23 -1077 0 was 44 -1585 0 not 108 -452 0 an 119 -203 0 ill 137 -1068 0 disposed 200 -454 0 young 222 -475 0 man 263 -310 0 286 -sense_and_sensibility_01_austen_64kb-0890 S 0 T -10085 A -10085 L 0 0 -300 0 16 -853 0 unless 48 -285 0 to 59 -339 0 be 75 -534 0 rather 111 -895 0 cold 163 -1023 0 hearted 211 -421 0 and 228 -469 0 rather 267 -1811 0 selfish 350 -533 0 is 376 -229 0 to 387 -180 0 be 405 -453 0 ill 426 -1545 0 disposed 497 -215 0 517 -sense_and_sensibility_01_austen_64kb-0920 S 0 T -11790 A -11790 L 0 0 -98 0 10 -308 0 had 32 -303 0 he 42 -777 0 married 87 -107 0 a 91 -528 0 more 130 -227 0 a 133 -1308 0 amiable 189 -1159 0 woman 237 -369 0 he 259 -518 0 might 286 -364 0 have 307 -641 0 been 324 -401 0 made 357 -604 0 still 395 -409 0 more 413 -1476 0 respectable 488 -384 0 than 500 -328 0 he 509 -1267 0 was 571 -214 0 591 -sense_and_sensibility_01_austen_64kb-0930 S 0 T -6393 A -6393 L 0 0 -156 0 7 -230 0 he 24 -697 0 might 50 -446 0 even 78 -561 0 have 93 -651 0 been 119 -595 0 made 156 -1141 0 amiable 213 -1639 0 himself 288 -277 0 313 +sense_and_sensibility_01_austen_64kb-0870 S 0 T -8437760 A -8437760 L 0 0 -573440 0 20 -502784 0 and(2) 37 -319488 0 mister 63 -224256 0 john 98 -442368 0 dashwood 158 -233472 0 had 184 -463872 0 then 225 -322560 0 leisure(2) 271 -291840 0 to(3) 289 -377856 0 consider 344 -436224 0 how 400 -289792 0 much 433 -217088 0 there 452 -301056 0 might 479 -130048 0 be 494 -1054720 0 prudently 546 -92160 0 in 556 -259072 0 his 575 -246784 0 power 604 -179200 0 to(2) 614 -332800 0 do 635 -233472 0 for 661 -456704 0 them(2) 679 -456704 0 708 +sense_and_sensibility_01_austen_64kb-0880 S 0 T -3303424 A -3303424 L 0 0 -558080 0 20 -134144 0 he 34 -180224 0 was(2) 56 -607232 0 not 113 -293888 0 an(2) 130 -152576 0 ill 148 -638976 0 disposed 211 -224256 0 young 233 -257024 0 man 274 -257024 0 297 +sense_and_sensibility_01_austen_64kb-0890 S 0 T -6656000 A -6656000 L 0 0 -534528 0 27 -407552 0 unless 59 -109568 0 to(3) 70 -125952 0 be 86 -402432 0 rather(2) 122 -673792 0 cold 174 -710656 0 hearted(2) 222 -311296 0 and 239 -375808 0 rather 278 -973824 0 selfish 363 -344064 0 is 388 -93184 0 to(3) 398 -178176 0 be 416 -139264 0 ill 437 -637952 0 disposed 529 -637952 0 529 +sense_and_sensibility_01_austen_64kb-0920 S 0 T -7218176 A -7218176 L 0 0 -464896 0 22 -159744 0 had 44 -208896 0 he 54 -381952 0 married 98 -78848 0 a 103 -354304 0 more 141 -107520 0 a(2) 146 -562176 0 amiable 201 -551936 0 woman 249 -164864 0 he 271 -243712 0 might 300 -198656 0 have 319 -397312 0 been 336 -219136 0 made 369 -327680 0 still 407 -186368 0 more 425 -702464 0 respectable 500 -263168 0 than(2) 513 -229376 0 he 521 -707584 0 was 583 -707584 0 603 +sense_and_sensibility_01_austen_64kb-0930 S 0 T -4573184 A -4573184 L 0 0 -560128 0 21 -160768 0 he 38 -338944 0 might 64 -280576 0 even 92 -393216 0 have 107 -349184 0 been 133 -338944 0 made 170 -451584 0 amiable 227 -849920 0 himself 302 -849920 0 327 diff --git a/test/data/librivox/test-main.json b/test/data/librivox/test-main.json index ec05cfd89..b1f7fa1c3 100644 --- a/test/data/librivox/test-main.json +++ b/test/data/librivox/test-main.json @@ -1,5 +1,5 @@ -{"b":0.240,"d":6.660,"p":0.000,"t":"mr john s. would and then a leisure to consider how watch there might be pretty late in his power to do for fun","w":[{"b":0.240,"d":0.070,"p":1.000,"t":""},{"b":0.310,"d":0.320,"p":0.996,"t":"mr"},{"b":0.630,"d":0.400,"p":0.906,"t":"john"},{"b":1.030,"d":0.310,"p":0.206,"t":"s."},{"b":1.340,"d":0.250,"p":0.902,"t":"would"},{"b":1.590,"d":0.250,"p":0.435,"t":"and(2)"},{"b":1.840,"d":0.360,"p":0.561,"t":"then"},{"b":2.200,"d":0.070,"p":0.190,"t":"a(2)"},{"b":2.270,"d":0.450,"p":0.217,"t":"leisure(2)"},{"b":2.720,"d":0.180,"p":0.676,"t":"to"},{"b":2.900,"d":0.540,"p":1.000,"t":"consider"},{"b":3.440,"d":0.510,"p":0.813,"t":"how"},{"b":3.950,"d":0.030,"p":0.240,"t":""},{"b":3.980,"d":0.350,"p":0.213,"t":"watch"},{"b":4.330,"d":0.190,"p":0.180,"t":"there"},{"b":4.520,"d":0.250,"p":0.592,"t":"might"},{"b":4.770,"d":0.170,"p":1.000,"t":"be"},{"b":4.940,"d":0.380,"p":0.502,"t":"pretty"},{"b":5.320,"d":0.160,"p":0.113,"t":"late"},{"b":5.480,"d":0.080,"p":0.550,"t":"in"},{"b":5.560,"d":0.180,"p":0.494,"t":"his"},{"b":5.740,"d":0.300,"p":0.993,"t":"power"},{"b":6.040,"d":0.100,"p":0.741,"t":"to(2)"},{"b":6.140,"d":0.200,"p":0.962,"t":"do"},{"b":6.340,"d":0.280,"p":0.969,"t":"for"},{"b":6.620,"d":0.150,"p":0.006,"t":"fun"},{"b":6.770,"d":0.120,"p":1.000,"t":""}]} -{"b":7.380,"d":2.640,"p":0.001,"t":"it was not until this blows young man","w":[{"b":7.380,"d":0.030,"p":1.000,"t":""},{"b":7.410,"d":0.060,"p":0.836,"t":"it"},{"b":7.470,"d":0.180,"p":0.991,"t":"was(2)"},{"b":7.650,"d":0.430,"p":0.997,"t":"not"},{"b":8.080,"d":0.150,"p":0.521,"t":""},{"b":8.230,"d":0.350,"p":0.615,"t":"until"},{"b":8.580,"d":0.200,"p":0.139,"t":"this"},{"b":8.780,"d":0.380,"p":0.030,"t":"blows"},{"b":9.160,"d":0.270,"p":0.082,"t":"young"},{"b":9.430,"d":0.410,"p":1.000,"t":"man"},{"b":9.840,"d":0.170,"p":1.000,"t":""}]} -{"b":10.380,"d":4.770,"p":0.000,"t":"less to be rather cold hearted and rather selfish is to the oldest those","w":[{"b":10.380,"d":0.030,"p":1.000,"t":""},{"b":10.410,"d":0.270,"p":0.103,"t":"less"},{"b":10.680,"d":0.110,"p":0.138,"t":"to(3)"},{"b":10.790,"d":0.160,"p":0.958,"t":"be"},{"b":10.950,"d":0.360,"p":0.581,"t":"rather(2)"},{"b":11.310,"d":0.520,"p":0.987,"t":"cold"},{"b":11.830,"d":0.480,"p":0.882,"t":"hearted(2)"},{"b":12.310,"d":0.170,"p":0.347,"t":"and"},{"b":12.480,"d":0.390,"p":0.847,"t":"rather"},{"b":12.870,"d":0.810,"p":1.000,"t":"selfish"},{"b":13.680,"d":0.040,"p":0.787,"t":""},{"b":13.720,"d":0.240,"p":0.913,"t":"is"},{"b":13.960,"d":0.110,"p":0.796,"t":"to(3)"},{"b":14.070,"d":0.230,"p":0.938,"t":"the(2)"},{"b":14.300,"d":0.380,"p":0.862,"t":"oldest"},{"b":14.680,"d":0.420,"p":0.735,"t":"those"},{"b":15.100,"d":0.040,"p":1.000,"t":""}]} -{"b":15.690,"d":5.310,"p":0.000,"t":"i remarried a more amiable woman he might have been made still more respectable that he was","w":[{"b":15.690,"d":0.030,"p":0.999,"t":""},{"b":15.720,"d":0.080,"p":0.458,"t":"i"},{"b":15.800,"d":0.570,"p":0.076,"t":"remarried"},{"b":16.370,"d":0.050,"p":0.176,"t":"a"},{"b":16.420,"d":0.380,"p":0.988,"t":"more"},{"b":16.800,"d":0.600,"p":1.000,"t":"amiable"},{"b":17.400,"d":0.480,"p":0.828,"t":"woman"},{"b":17.880,"d":0.220,"p":0.999,"t":"he"},{"b":18.100,"d":0.270,"p":1.000,"t":"might"},{"b":18.370,"d":0.210,"p":1.000,"t":"have"},{"b":18.580,"d":0.170,"p":0.567,"t":"been(2)"},{"b":18.750,"d":0.330,"p":0.988,"t":"made"},{"b":19.080,"d":0.390,"p":1.000,"t":"still"},{"b":19.470,"d":0.170,"p":1.000,"t":"more"},{"b":19.640,"d":0.750,"p":0.772,"t":"respectable"},{"b":20.390,"d":0.130,"p":0.735,"t":"that"},{"b":20.520,"d":0.080,"p":0.693,"t":"he"},{"b":20.600,"d":0.310,"p":0.767,"t":"was"},{"b":20.910,"d":0.080,"p":1.000,"t":""}]} -{"b":21.720,"d":2.760,"p":0.001,"t":"you might even have been made the amiable himself","w":[{"b":21.720,"d":0.030,"p":1.000,"t":""},{"b":21.750,"d":0.090,"p":0.212,"t":"you"},{"b":21.840,"d":0.230,"p":0.966,"t":"might"},{"b":22.070,"d":0.290,"p":1.000,"t":"even"},{"b":22.360,"d":0.150,"p":0.324,"t":"have"},{"b":22.510,"d":0.260,"p":0.737,"t":"been"},{"b":22.770,"d":0.320,"p":0.970,"t":"made"},{"b":23.090,"d":0.080,"p":0.194,"t":"the"},{"b":23.170,"d":0.540,"p":0.156,"t":"amiable"},{"b":23.710,"d":0.670,"p":0.711,"t":"himself"},{"b":24.380,"d":0.090,"p":1.000,"t":""}]} +{"b":0.240,"d":6.660,"p":0.000,"t":"mr john s. would and then a leisure to consider how watch there might be pretty late in his power to do for fun","w":[{"b":0.240,"d":0.070,"p":0.999,"t":""},{"b":0.310,"d":0.320,"p":0.997,"t":"mr"},{"b":0.630,"d":0.400,"p":0.896,"t":"john"},{"b":1.030,"d":0.310,"p":0.193,"t":"s."},{"b":1.340,"d":0.250,"p":0.911,"t":"would"},{"b":1.590,"d":0.250,"p":0.435,"t":"and(2)"},{"b":1.840,"d":0.360,"p":0.561,"t":"then"},{"b":2.200,"d":0.070,"p":0.197,"t":"a(2)"},{"b":2.270,"d":0.450,"p":0.196,"t":"leisure(2)"},{"b":2.720,"d":0.180,"p":1.000,"t":"to"},{"b":2.900,"d":0.540,"p":1.000,"t":"consider"},{"b":3.440,"d":0.460,"p":0.666,"t":"how"},{"b":3.900,"d":0.430,"p":0.305,"t":"watch"},{"b":4.330,"d":0.190,"p":0.110,"t":"there"},{"b":4.520,"d":0.250,"p":0.767,"t":"might"},{"b":4.770,"d":0.170,"p":1.000,"t":"be"},{"b":4.940,"d":0.380,"p":0.460,"t":"pretty"},{"b":5.320,"d":0.140,"p":0.144,"t":"late"},{"b":5.460,"d":0.100,"p":0.587,"t":"in"},{"b":5.560,"d":0.180,"p":0.514,"t":"his"},{"b":5.740,"d":0.300,"p":0.997,"t":"power"},{"b":6.040,"d":0.100,"p":0.742,"t":"to(2)"},{"b":6.140,"d":0.200,"p":0.964,"t":"do"},{"b":6.340,"d":0.280,"p":0.971,"t":"for"},{"b":6.620,"d":0.150,"p":0.007,"t":"fun"},{"b":6.770,"d":0.120,"p":1.000,"t":""}]} +{"b":7.380,"d":2.640,"p":0.001,"t":"it was not until this blows young man","w":[{"b":7.380,"d":0.030,"p":1.000,"t":""},{"b":7.410,"d":0.060,"p":0.846,"t":"it"},{"b":7.470,"d":0.180,"p":0.999,"t":"was(2)"},{"b":7.650,"d":0.430,"p":0.997,"t":"not"},{"b":8.080,"d":0.150,"p":0.519,"t":""},{"b":8.230,"d":0.350,"p":0.600,"t":"until"},{"b":8.580,"d":0.200,"p":0.151,"t":"this"},{"b":8.780,"d":0.380,"p":0.033,"t":"blows"},{"b":9.160,"d":0.270,"p":0.072,"t":"young"},{"b":9.430,"d":0.410,"p":1.000,"t":"man"},{"b":9.840,"d":0.170,"p":1.000,"t":""}]} +{"b":10.350,"d":4.830,"p":0.000,"t":"less to be rather cold hearted and rather selfish is to the oldest those","w":[{"b":10.350,"d":0.030,"p":1.000,"t":""},{"b":10.380,"d":0.300,"p":0.269,"t":"less"},{"b":10.680,"d":0.110,"p":0.428,"t":"to(3)"},{"b":10.790,"d":0.160,"p":0.665,"t":"be"},{"b":10.950,"d":0.360,"p":0.418,"t":"rather(2)"},{"b":11.310,"d":0.520,"p":0.989,"t":"cold"},{"b":11.830,"d":0.480,"p":0.883,"t":"hearted(2)"},{"b":12.310,"d":0.170,"p":0.349,"t":"and"},{"b":12.480,"d":0.390,"p":0.827,"t":"rather"},{"b":12.870,"d":0.810,"p":1.000,"t":"selfish"},{"b":13.680,"d":0.040,"p":0.774,"t":""},{"b":13.720,"d":0.240,"p":0.905,"t":"is"},{"b":13.960,"d":0.110,"p":0.795,"t":"to(3)"},{"b":14.070,"d":0.230,"p":0.926,"t":"the(2)"},{"b":14.300,"d":0.380,"p":0.818,"t":"oldest"},{"b":14.680,"d":0.460,"p":0.693,"t":"those"},{"b":15.140,"d":0.030,"p":1.000,"t":""}]} +{"b":15.690,"d":5.310,"p":0.002,"t":"i married a more amiable woman he might have been made still more respectable that he was","w":[{"b":15.690,"d":0.030,"p":1.000,"t":""},{"b":15.720,"d":0.210,"p":0.530,"t":"i"},{"b":15.930,"d":0.440,"p":0.762,"t":"married"},{"b":16.370,"d":0.050,"p":0.178,"t":"a"},{"b":16.420,"d":0.380,"p":0.990,"t":"more"},{"b":16.800,"d":0.600,"p":1.001,"t":"amiable"},{"b":17.400,"d":0.480,"p":0.823,"t":"woman"},{"b":17.880,"d":0.220,"p":0.999,"t":"he"},{"b":18.100,"d":0.270,"p":1.001,"t":"might"},{"b":18.370,"d":0.210,"p":1.001,"t":"have"},{"b":18.580,"d":0.170,"p":0.561,"t":"been(2)"},{"b":18.750,"d":0.330,"p":0.989,"t":"made"},{"b":19.080,"d":0.380,"p":1.000,"t":"still"},{"b":19.460,"d":0.180,"p":1.000,"t":"more"},{"b":19.640,"d":0.750,"p":0.768,"t":"respectable"},{"b":20.390,"d":0.130,"p":0.348,"t":"that(2)"},{"b":20.520,"d":0.070,"p":0.718,"t":"he"},{"b":20.590,"d":0.320,"p":0.763,"t":"was"},{"b":20.910,"d":0.080,"p":1.000,"t":""}]} +{"b":21.720,"d":2.760,"p":0.001,"t":"you might even have been made the amiable himself","w":[{"b":21.720,"d":0.030,"p":1.000,"t":""},{"b":21.750,"d":0.090,"p":0.213,"t":"you"},{"b":21.840,"d":0.230,"p":0.964,"t":"might"},{"b":22.070,"d":0.290,"p":1.000,"t":"even"},{"b":22.360,"d":0.150,"p":0.322,"t":"have"},{"b":22.510,"d":0.260,"p":0.749,"t":"been"},{"b":22.770,"d":0.320,"p":0.973,"t":"made"},{"b":23.090,"d":0.080,"p":0.169,"t":"the"},{"b":23.170,"d":0.540,"p":0.127,"t":"amiable"},{"b":23.710,"d":0.670,"p":0.693,"t":"himself"},{"b":24.380,"d":0.090,"p":1.000,"t":""}]} diff --git a/test/regression/test-align.sh b/test/regression/test-align.sh index 24dc40ab2..a7a182b94 100755 --- a/test/regression/test-align.sh +++ b/test/regression/test-align.sh @@ -31,3 +31,4 @@ fi # Check the decoding results grep AVERAGE $bn.log +compare_table "matchseg" $data/librivox/test-align.matchseg $bn.matchseg 1000000 diff --git a/test/regression/test-main-align.sh b/test/regression/test-main-align.sh new file mode 100755 index 000000000..afff33daa --- /dev/null +++ b/test/regression/test-main-align.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -e + +: ${CMAKE_BINARY_DIR:=$(pwd)} +. ${CMAKE_BINARY_DIR}/test/testfuncs.sh + +bn=`basename $0 .sh` + +echo "Test: $bn" +for wav in $data/librivox/*.wav; do \ + utt=$(basename $wav .wav) + run_program pocketsphinx \ + -loglevel INFO \ + -hmm $model/en-us/en-us \ + -dict $model/en-us/cmudict-en-us.dict \ + align $wav $(cat $data/librivox/$utt.txt) \ + > $utt.json 2>>$bn.log + run_program pocketsphinx \ + -loglevel INFO \ + -phone_align yes \ + -hmm $model/en-us/en-us \ + -dict $model/en-us/cmudict-en-us.dict \ + align $wav $(cat $data/librivox/$utt.txt) \ + > $utt.phone.json 2>>$bn.phone.log + run_program pocketsphinx \ + -loglevel INFO \ + -state_align yes \ + -hmm $model/en-us/en-us \ + -dict $model/en-us/cmudict-en-us.dict \ + align $wav $(cat $data/librivox/$utt.txt) \ + > $utt.state.json 2>>$bn.state.log + + # Test whether it actually completed + if [ $? = 0 ]; then + pass "run" + else + fail "run" + fi + # Check the decoding results + compare_table "match" $data/librivox/$utt.json $utt.json 1000000 + compare_table "match" $data/librivox/$utt.phone.json $utt.phone.json 1000000 + compare_table "match" $data/librivox/$utt.state.json $utt.state.json 1000000 +done diff --git a/test/regression/test-main.sh b/test/regression/test-main.sh index dc3eb22ee..8e6678d29 100755 --- a/test/regression/test-main.sh +++ b/test/regression/test-main.sh @@ -11,7 +11,7 @@ sox $data/librivox/*.wav $(run_program pocketsphinx soxflags) | \ -loglevel INFO \ -hmm $model/en-us/en-us \ -lm $model/en-us/en-us.lm.bin \ - -dict $model/en-us/cmudict-en-us.dict \ + -dict $model/en-us/cmudict-en-us.dict - \ > $bn.json 2>$bn.log # Test whether it actually completed diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 3c3e2a0ee..af108b7da 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -31,6 +31,7 @@ set(TESTS test_simple test_state_align test_vad + test_word_align test_endpointer ) foreach(TEST_EXECUTABLE ${TESTS}) diff --git a/test/unit/test_alignment.c b/test/unit/test_alignment.c index df4ead050..03475cf4c 100644 --- a/test/unit/test_alignment.c +++ b/test/unit/test_alignment.c @@ -1,6 +1,6 @@ #include -#include "ps_alignment.h" +#include "ps_alignment_internal.h" #include "pocketsphinx_internal.h" #include "test_macros.h" @@ -12,8 +12,9 @@ main(int argc, char *argv[]) dict_t *dict; dict2pid_t *d2p; ps_alignment_t *al; - ps_alignment_iter_t *itor; + ps_alignment_iter_t *itor, *itor2; cmd_ln_t *config; + int score, start, duration; (void)argc; (void)argv; @@ -25,16 +26,21 @@ main(int argc, char *argv[]) d2p = dict2pid_build(mdef, dict); al = ps_alignment_init(d2p); - TEST_EQUAL(1, ps_alignment_add_word(al, dict_wordid(dict, ""), 0)); - TEST_EQUAL(2, ps_alignment_add_word(al, dict_wordid(dict, "hello"), 0)); - TEST_EQUAL(3, ps_alignment_add_word(al, dict_wordid(dict, "world"), 0)); - TEST_EQUAL(4, ps_alignment_add_word(al, dict_wordid(dict, ""), 0)); + TEST_EQUAL(1, ps_alignment_add_word(al, dict_wordid(dict, ""), 0, 0)); + TEST_EQUAL(2, ps_alignment_add_word(al, dict_wordid(dict, "hello"), 0, 0)); + TEST_EQUAL(3, ps_alignment_add_word(al, dict_wordid(dict, "world"), 0, 0)); + TEST_EQUAL(4, ps_alignment_add_word(al, dict_wordid(dict, ""), 0, 0)); TEST_EQUAL(0, ps_alignment_populate(al)); itor = ps_alignment_words(al); TEST_EQUAL(ps_alignment_iter_get(itor)->id.wid, dict_wordid(dict, "")); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->id.wid, dict_wordid(dict, "hello")); + score = ps_alignment_iter_seg(itor, &start, &duration); + TEST_EQUAL(0, strcmp(ps_alignment_iter_name(itor), "hello")); + TEST_EQUAL(score, 0); + TEST_EQUAL(start, 0); + TEST_EQUAL(duration, 0); itor = ps_alignment_iter_next(itor); TEST_EQUAL(ps_alignment_iter_get(itor)->id.wid, dict_wordid(dict, "world")); itor = ps_alignment_iter_next(itor); @@ -47,6 +53,21 @@ main(int argc, char *argv[]) ps_alignment_n_phones(al), ps_alignment_n_states(al)); + itor = ps_alignment_words(al); + itor = ps_alignment_iter_next(itor); + TEST_EQUAL(ps_alignment_iter_get(itor)->id.wid, dict_wordid(dict, "hello")); + itor2 = ps_alignment_iter_children(itor); + ps_alignment_iter_free(itor); + TEST_EQUAL(0, strcmp(ps_alignment_iter_name(itor2), "HH")); + itor2 = ps_alignment_iter_next(itor2); + TEST_EQUAL(0, strcmp(ps_alignment_iter_name(itor2), "AH")); + itor2 = ps_alignment_iter_next(itor2); + TEST_EQUAL(0, strcmp(ps_alignment_iter_name(itor2), "L")); + itor2 = ps_alignment_iter_next(itor2); + TEST_EQUAL(0, strcmp(ps_alignment_iter_name(itor2), "OW")); + itor2 = ps_alignment_iter_next(itor2); + TEST_EQUAL(NULL, itor2); + ps_alignment_free(al); dict_free(dict); dict2pid_free(d2p); diff --git a/test/unit/test_state_align.c b/test/unit/test_state_align.c index db5668372..c68f2720e 100644 --- a/test/unit/test_state_align.c +++ b/test/unit/test_state_align.c @@ -2,7 +2,7 @@ /* -*- c-basic-offset: 4 -*- */ #include -#include "ps_alignment.h" +#include "ps_alignment_internal.h" #include "state_align_search.h" #include "pocketsphinx_internal.h" @@ -35,25 +35,6 @@ do_search(ps_search_t *search, acmod_t *acmod) return ps_search_finish(search); } -static int -do_decode(ps_decoder_t *ps) -{ - FILE *rawfh; - const char *hyp; - long nsamp; - int score; - - TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); - nsamp = ps_decode_raw(ps, rawfh, -1); - hyp = ps_get_hyp(ps, &score); - printf("%s (%ld samples, %d score)\n", hyp, nsamp, score); - TEST_ASSERT(nsamp > 0); - TEST_EQUAL(0, strcmp(hyp, " go forward ten meters ")); - fclose(rawfh); - - return 0; -} - int main(int argc, char *argv[]) { @@ -64,9 +45,8 @@ main(int argc, char *argv[]) ps_alignment_t *al; ps_alignment_iter_t *itor; ps_search_t *search; - ps_seg_t *seg; cmd_ln_t *config; - int i, sf, ef, last_ef; + int i; (void)argc; (void)argv; @@ -82,12 +62,12 @@ main(int argc, char *argv[]) acmod = ps->acmod; al = ps_alignment_init(d2p); - TEST_EQUAL(1, ps_alignment_add_word(al, dict_wordid(dict, ""), 0)); - TEST_EQUAL(2, ps_alignment_add_word(al, dict_wordid(dict, "go"), 0)); - TEST_EQUAL(3, ps_alignment_add_word(al, dict_wordid(dict, "forward"), 0)); - TEST_EQUAL(4, ps_alignment_add_word(al, dict_wordid(dict, "ten"), 0)); - TEST_EQUAL(5, ps_alignment_add_word(al, dict_wordid(dict, "meters"), 0)); - TEST_EQUAL(6, ps_alignment_add_word(al, dict_wordid(dict, ""), 0)); + TEST_EQUAL(1, ps_alignment_add_word(al, dict_wordid(dict, ""), 0, 0)); + TEST_EQUAL(2, ps_alignment_add_word(al, dict_wordid(dict, "go"), 0, 0)); + TEST_EQUAL(3, ps_alignment_add_word(al, dict_wordid(dict, "forward"), 0, 0)); + TEST_EQUAL(4, ps_alignment_add_word(al, dict_wordid(dict, "ten"), 0, 0)); + TEST_EQUAL(5, ps_alignment_add_word(al, dict_wordid(dict, "meters"), 0, 0)); + TEST_EQUAL(6, ps_alignment_add_word(al, dict_wordid(dict, ""), 0, 0)); TEST_EQUAL(0, ps_alignment_populate(al)); TEST_ASSERT(search = state_align_search_init("state_align", config, acmod, al)); @@ -126,56 +106,6 @@ main(int argc, char *argv[]) ps_search_free(search); ps_alignment_free(al); - - /* Test alignment through the decoder/search API */ - TEST_EQUAL(0, ps_add_align(ps, "align", "go forward ten meters")); - ps_activate_search(ps, "align"); - do_decode(ps); - TEST_EQUAL(0, strcmp(ps_get_hyp(ps, &i), " go forward ten meters ")); - seg = ps_seg_iter(ps); - ps_seg_frames(seg, &sf, &ef); - printf("%s %d %d\n", ps_seg_word(seg), sf, ef); - TEST_EQUAL(0, strcmp("", ps_seg_word(seg))); - TEST_ASSERT(ef > sf); - last_ef = ef; - seg = ps_seg_next(seg); - ps_seg_frames(seg, &sf, &ef); - printf("%s %d %d\n", ps_seg_word(seg), sf, ef); - TEST_EQUAL(0, strcmp("go", ps_seg_word(seg))); - TEST_ASSERT(sf > last_ef); - TEST_ASSERT(ef > sf); - last_ef = ef; - seg = ps_seg_next(seg); - ps_seg_frames(seg, &sf, &ef); - printf("%s %d %d\n", ps_seg_word(seg), sf, ef); - TEST_EQUAL(0, strcmp("forward", ps_seg_word(seg))); - TEST_ASSERT(sf > last_ef); - TEST_ASSERT(ef > sf); - last_ef = ef; - seg = ps_seg_next(seg); - ps_seg_frames(seg, &sf, &ef); - printf("%s %d %d\n", ps_seg_word(seg), sf, ef); - TEST_EQUAL(0, strcmp("ten", ps_seg_word(seg))); - TEST_ASSERT(sf > last_ef); - TEST_ASSERT(ef > sf); - last_ef = ef; - seg = ps_seg_next(seg); - ps_seg_frames(seg, &sf, &ef); - printf("%s %d %d\n", ps_seg_word(seg), sf, ef); - TEST_EQUAL(0, strcmp("meters", ps_seg_word(seg))); - TEST_ASSERT(sf > last_ef); - TEST_ASSERT(ef > sf); - last_ef = ef; - seg = ps_seg_next(seg); - ps_seg_frames(seg, &sf, &ef); - printf("%s %d %d\n", ps_seg_word(seg), sf, ef); - TEST_EQUAL(0, strcmp("", ps_seg_word(seg))); - TEST_ASSERT(sf > last_ef); - TEST_ASSERT(ef > sf); - last_ef = ef; - seg = ps_seg_next(seg); - TEST_EQUAL(NULL, seg); - ps_free(ps); ps_config_free(config); return 0; diff --git a/test/unit/test_word_align.c b/test/unit/test_word_align.c new file mode 100644 index 000000000..e6d5e0d8e --- /dev/null +++ b/test/unit/test_word_align.c @@ -0,0 +1,195 @@ +/* -*- c-basic-offset: 4 -*- */ +#include + +#include "test_macros.h" +#include "pocketsphinx_internal.h" + +//#define AUSTEN_TEXT "and mister john dashwood had then leisure to consider how much there might be prudently in his power to do for them" +#define AUSTEN_TEXT "he was not an ill disposed young man" +static int +do_decode(ps_decoder_t *ps) +{ + FILE *rawfh; + const char *hyp; + long nsamp; + int score; + + TEST_ASSERT(rawfh = fopen(DATADIR "/librivox/sense_and_sensibility_01_austen_64kb-0880.wav", "rb")); + fseek(rawfh, 44, SEEK_SET); + nsamp = ps_decode_raw(ps, rawfh, -1); + hyp = ps_get_hyp(ps, &score); + printf("%s (%ld samples, %d score)\n", hyp, nsamp, score); + TEST_ASSERT(nsamp > 0); + TEST_ASSERT((0 == strcmp(hyp, AUSTEN_TEXT) + || 0 == strcmp(hyp, " " AUSTEN_TEXT " "))); + fclose(rawfh); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + ps_decoder_t *ps; + ps_alignment_t *al; + ps_alignment_iter_t *itor; + ps_seg_t *seg; + ps_config_t *config; + int i, sf, ef, last_ef; + int *sfs, *efs; + + (void)argc; + (void)argv; + err_set_loglevel(ERR_INFO); + TEST_ASSERT(config = + ps_config_parse_json( + NULL, + "loglevel: INFO, bestpath: false," + "hmm: \"" MODELDIR "/en-us/en-us\"," + "dict: \"" MODELDIR "/en-us/cmudict-en-us.dict\"," + "samprate: 16000")); + TEST_ASSERT(ps = ps_init(config)); + /* Test alignment through the decoder/search API */ + TEST_EQUAL(0, ps_set_align_text(ps, AUSTEN_TEXT)); + do_decode(ps); + TEST_EQUAL(0, strcmp(ps_get_hyp(ps, &i), AUSTEN_TEXT)); + printf("Word alignment:\n"); + i = 0; last_ef = -1; + for (seg = ps_seg_iter(ps); seg; seg = ps_seg_next(seg)) { + ps_seg_frames(seg, &sf, &ef); + printf("%s %d %d\n", ps_seg_word(seg), sf, ef); + TEST_ASSERT(sf == last_ef + 1); + TEST_ASSERT(ef > sf); + last_ef = ef; + i++; + } + TEST_EQUAL(NULL, seg); + + /* Save start and end points for comparison */ + sfs = ckd_calloc(i, sizeof(*sfs)); + efs = ckd_calloc(i, sizeof(*efs)); + i = 0; + for (seg = ps_seg_iter(ps); seg; seg = ps_seg_next(seg)) { + ps_seg_frames(seg, &sfs[i], &efs[i]); + i++; + } + + /* Test second pass alignment. Ensure that alignment and seg give + * the same results and that phones have constraints propagated to + * them. */ + printf("Converted to subword alignment constraints:\n"); + TEST_EQUAL(0, ps_set_alignment(ps, NULL)); + TEST_ASSERT(al = ps_get_alignment(ps)); + for (i = 0, seg = ps_seg_iter(ps), itor = ps_alignment_words(al); itor; + i++, seg = ps_seg_next(seg), itor = ps_alignment_iter_next(itor)) { + int score, start, duration; + ps_alignment_iter_t *pitor; + + ps_seg_frames(seg, &sf, &ef); + TEST_ASSERT(seg); + score = ps_alignment_iter_seg(itor, &start, &duration); + printf("%s %d %d %s %d %d\n", ps_seg_word(seg), sf, ef, + ps_alignment_iter_name(itor), start, duration); + TEST_EQUAL(0, strcmp(ps_seg_word(seg), ps_alignment_iter_name(itor))); + TEST_EQUAL(sf, sfs[i]); + TEST_EQUAL(ef, efs[i]); + TEST_EQUAL(0, score); + TEST_EQUAL(start, sf); + TEST_EQUAL(duration, ef - sf + 1); + /* Durations are propagated down from words, each phone will + * have the same duration as its parent, and these are used as + * constraints to alignment. */ + for (pitor = ps_alignment_iter_children(itor); pitor; + pitor = ps_alignment_iter_next(pitor)) { + score = ps_alignment_iter_seg(pitor, &start, &duration); + TEST_EQUAL(0, score); + TEST_EQUAL(start, sf); + TEST_EQUAL(duration, ef - sf + 1); + } + } + + do_decode(ps); + TEST_ASSERT(al = ps_get_alignment(ps)); + printf("Subword alignment:\n"); + /* It should have durations assigned (and properly constrained). */ + for (i = 0, seg = ps_seg_iter(ps), itor = ps_alignment_words(al); itor; + i++, seg = ps_seg_next(seg), itor = ps_alignment_iter_next(itor)) { + int score, start, duration; + ps_alignment_iter_t *pitor; + + ps_seg_frames(seg, &sf, &ef); + TEST_ASSERT(seg); + score = ps_alignment_iter_seg(itor, &start, &duration); + printf("%s %d %d %d %d %s %d %d %d\n", ps_seg_word(seg), + sfs[i], efs[i], sf, ef, + ps_alignment_iter_name(itor), start, duration, score); + TEST_EQUAL(sf, sfs[i]); + TEST_EQUAL(ef, efs[i]); + TEST_ASSERT(score != 0); + TEST_EQUAL(start, sf); + TEST_EQUAL(duration, ef - sf + 1); + + /* Phone segmentations should be constrained by words */ + pitor = ps_alignment_iter_children(itor); + score = ps_alignment_iter_seg(pitor, &start, &duration); + /* First phone should be aligned with word */ + TEST_EQUAL(start, sf); + while (pitor) { + ps_alignment_iter_t *sitor; + int state_start, state_duration; + score = ps_alignment_iter_seg(pitor, &start, &duration); + printf("%s %d %d %s %d %d %d\n", ps_seg_word(seg), sf, ef, + ps_alignment_iter_name(pitor), start, duration, score); + /* State segmentations should be constrained by phones */ + sitor = ps_alignment_iter_children(pitor); + score = ps_alignment_iter_seg(sitor, &state_start, &state_duration); + /* First state should be aligned with phone */ + TEST_EQUAL(state_start, start); + while (sitor) { + score = ps_alignment_iter_seg(sitor, &state_start, &state_duration); + printf("%s %d %d %s %d %d %d\n", ps_seg_word(seg), sf, ef, + ps_alignment_iter_name(sitor), state_start, state_duration, + score); + sitor = ps_alignment_iter_next(sitor); + } + /* Last state should fill phone duration */ + TEST_EQUAL(state_start + state_duration, start + duration); + pitor = ps_alignment_iter_next(pitor); + } + /* Last phone should fill word duration */ + TEST_EQUAL(start + duration - 1, ef); + } + + /* Segmentations should all be contiguous */ + last_ef = 0; + for (itor = ps_alignment_words(al); itor; + itor = ps_alignment_iter_next(itor)) { + int start, duration; + (void)ps_alignment_iter_seg(itor, &start, &duration); + TEST_EQUAL(start, last_ef); + last_ef = start + duration; + } + last_ef = 0; + for (itor = ps_alignment_phones(al); itor; + itor = ps_alignment_iter_next(itor)) { + int start, duration; + (void)ps_alignment_iter_seg(itor, &start, &duration); + TEST_EQUAL(start, last_ef); + last_ef = start + duration; + } + last_ef = 0; + for (itor = ps_alignment_states(al); itor; + itor = ps_alignment_iter_next(itor)) { + int start, duration; + (void)ps_alignment_iter_seg(itor, &start, &duration); + TEST_EQUAL(start, last_ef); + last_ef = start + duration; + } + + ckd_free(sfs); + ckd_free(efs); + ps_free(ps); + ps_config_free(config); + + return 0; +}