Skip to content

Commit

Permalink
Merge pull request #253 from cmusphinx/bigendian_bigiron_fixes
Browse files Browse the repository at this point in the history
Fix endianness issues in binary trie LM code
  • Loading branch information
dhdaines authored Jun 9, 2022
2 parents d2aa676 + 2d30f61 commit 2c0066b
Show file tree
Hide file tree
Showing 10 changed files with 240 additions and 158 deletions.
14 changes: 12 additions & 2 deletions include/sphinxbase/bitarr.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,19 @@

/**
* @file bitarr.h
* @brief An implementation bit array - memory efficient storage for digit int and float data.
* @brief An implementation bit array - memory
* efficient storage for digit int and float data. (FIXME: NO)
*
* Implementation of basic operations of read/write digits consuming as little space as possible.
* Implementation of basic operations of read/write digits consuming
* as little space as possible.
*
* I HAVE QUESTIONS. Why 25 and 57 bits? What are the other 7 bits
* *doing*?!? Why didn't you stop to think about architectures with
* big-endian byte ordering or strictly aligned memory access when you
* wrote this? Does it really store floats BECAUSE NO IT DOESN'T
*
* Note that because of the problems noted above data is canonically
* stored in little-endian order in memory.
*/

#ifdef __cplusplus
Expand Down
49 changes: 22 additions & 27 deletions src/lm/lm_trie.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,16 +384,14 @@ lm_trie_read_ug(lm_trie_t * trie, uint32 * counts, FILE * fp)
{
size_t rv = fread(trie->unigrams, sizeof(*trie->unigrams),
(counts[0] + 1), fp);
#if defined(DEBUG_ENDIAN) || defined(WORDS_BIGENDIAN)
{
if (SWAP_LM_TRIE) {
int i;
for (i = 0; i < counts[0] + 1; ++i) {
SWAP_FLOAT32(&trie->unigrams[i].prob);
SWAP_FLOAT32(&trie->unigrams[i].bo);
SWAP_INT32(&trie->unigrams[i].next);
}
}
#endif
return rv;
}

Expand All @@ -402,38 +400,35 @@ lm_trie_read_bin(uint32 * counts, int order, FILE * fp)
{
lm_trie_t *trie = lm_trie_init(counts[0]);
trie->quant = (order > 1) ? lm_trie_quant_read_bin(fp, order) : NULL;
E_DEBUG("pos after quant: %ld\n", ftell(fp));
E_INFO("pos after quant: %ld\n", ftell(fp));
lm_trie_read_ug(trie, counts, fp);
E_DEBUG("pos after ug: %ld\n", ftell(fp));
/* It looks like quant and ngram_mem are just blobs of bits so no
swapping needed. */
E_INFO("pos after ug: %ld\n", ftell(fp));
if (order > 1) {
lm_trie_alloc_ngram(trie, counts, order);
fread(trie->ngram_mem, 1, trie->ngram_mem_size, fp);
E_DEBUG("#ngram_mem: %ld\n", trie->ngram_mem_size);
E_INFO("#ngram_mem: %ld\n", trie->ngram_mem_size);
}
return trie;
}

static size_t
lm_trie_write_ug(lm_trie_t * trie, uint32 unigram_count, FILE * fp)
{
#if defined(DEBUG_ENDIAN) || defined(WORDS_BIGENDIAN)
int i;
for (i = 0; i < unigram_count + 1; ++i) {
unigram_t ug = trie->unigrams[i];
SWAP_FLOAT32(&ug.prob);
SWAP_FLOAT32(&ug.bo);
SWAP_INT32(&ug.next);
if (fwrite(&ug, sizeof(ug), 1, fp) != 1)
return -1;
}
return (size_t)i;
#else
return fwrite(trie->unigrams, sizeof(*trie->unigrams),
(unigram_count + 1), fp);

#endif
if (SWAP_LM_TRIE) {
int i;
for (i = 0; i < unigram_count + 1; ++i) {
unigram_t ug = trie->unigrams[i];
SWAP_FLOAT32(&ug.prob);
SWAP_FLOAT32(&ug.bo);
SWAP_INT32(&ug.next);
if (fwrite(&ug, sizeof(ug), 1, fp) != 1)
return -1;
}
return (size_t)i;
}
else
return fwrite(trie->unigrams, sizeof(*trie->unigrams),
(unigram_count + 1), fp);
}

void
Expand All @@ -442,12 +437,12 @@ lm_trie_write_bin(lm_trie_t * trie, uint32 unigram_count, FILE * fp)

if (trie->quant)
lm_trie_quant_write_bin(trie->quant, fp);
E_DEBUG("pos after quant: %ld\n", ftell(fp));
E_INFO("pos after quant: %ld\n", ftell(fp));
lm_trie_write_ug(trie, unigram_count, fp);
E_DEBUG("pos after ug: %ld\n", ftell(fp));
E_INFO("pos after ug: %ld\n", ftell(fp));
if (trie->ngram_mem) {
fwrite(trie->ngram_mem, 1, trie->ngram_mem_size, fp);
E_DEBUG("#ngram_mem: %ld\n", trie->ngram_mem_size);
E_INFO("#ngram_mem: %ld\n", trie->ngram_mem_size);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/lm/lm_trie.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ typedef struct longest_s {
} longest_t;

typedef struct lm_trie_s {
uint8 *ngram_mem;
uint8 *ngram_mem; /*<< This appears to be a bitarr.h bit array */
size_t ngram_mem_size;
unigram_t *unigrams;
middle_t *middle_begin;
Expand Down
Loading

0 comments on commit 2c0066b

Please sign in to comment.