Skip to content

Commit

Permalink
improvements to error handling (#17)
Browse files Browse the repository at this point in the history
* improvements to error handling

* improvements to error handling

---------

Co-authored-by: PhotoNomad0 <[email protected]>
  • Loading branch information
PhotoNomad0 and PhotoNomad0 authored May 30, 2024
1 parent 691fed2 commit 468d72e
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 56 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "word-aligner-rcl",
"version": "1.1.1",
"version": "1.1.2",
"main": "dist/index.cjs.js",
"module": "dist/index.es.js",
"repository": "https://github.com/unfoldingWord/word-aligner-rcl.git",
Expand Down
109 changes: 57 additions & 52 deletions src/utils/alignmentHelpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,63 +141,68 @@ export function getWordListFromVerseObjects(verseObjects) {
* @return {array} list of alignments in target text
*/
export function extractAlignmentsFromTargetVerse(alignedTargetVerse, sourceVerse) {
const targetVerse = usfmVerseToJson(alignedTargetVerse);
const alignments = wordaligner.unmerge(targetVerse, sourceVerse);
const originalLangWordList = sourceVerse && getOriginalLanguageListForVerseData(sourceVerse);
const alignmentsWordList = getAlignedWordListFromAlignments(alignments.alignment);
const targetTokens = getWordListFromVerseObjects(targetVerse);
// clean up metadata in alignments
originalLangWordList && updateAlignedWordsFromOriginalWordList(originalLangWordList, alignmentsWordList);
if (alignments.alignment) { // for compatibility change alignment to alignments
// convert occurrence(s) from string to number
const alignments_ = alignments.alignment.map(alignment => {
const topWords = convertOccurrences(alignment.topWords);
const bottomWords = convertOccurrences(alignment.bottomWords);
return {
sourceNgram: topWords.map(topWord => { // word aligner uses sourceNgram instead of topWord
if (originalLangWordList) {
const pos = originalLangWordList.findIndex(item => (
topWord.word === (item.word || item.text) &&
topWord.occurrence == item.occurrence //Tricky: we want to allow automatic conversion between string and integer because occurrence could be either
));
try {
const targetVerse = usfmVerseToJson(alignedTargetVerse);
const alignments = wordaligner.unmerge(targetVerse, sourceVerse);
const originalLangWordList = sourceVerse && getOriginalLanguageListForVerseData(sourceVerse);
const alignmentsWordList = getAlignedWordListFromAlignments(alignments.alignment);
const targetTokens = getWordListFromVerseObjects(targetVerse);
// clean up metadata in alignments
originalLangWordList && updateAlignedWordsFromOriginalWordList(originalLangWordList, alignmentsWordList);
if (alignments.alignment) { // for compatibility change alignment to alignments
// convert occurrence(s) from string to number
const alignments_ = alignments.alignment.map(alignment => {
const topWords = convertOccurrences(alignment.topWords);
const bottomWords = convertOccurrences(alignment.bottomWords);
return {
sourceNgram: topWords.map(topWord => { // word aligner uses sourceNgram instead of topWord
if (originalLangWordList) {
const pos = originalLangWordList.findIndex(item => (
topWord.word === (item.word || item.text) &&
topWord.occurrence == item.occurrence //Tricky: we want to allow automatic conversion between string and integer because occurrence could be either
));
const newSource = {
...topWord,
index: pos,
text: topWord.text || topWord.word,
};
delete newSource.word
return newSource
}
const newSource = {
...topWord,
index: pos,
text: topWord.text || topWord.word,
};
delete newSource.word
delete newSource.position
return newSource
}
const newSource = {
...topWord,
text: topWord.text || topWord.word,
};
delete newSource.word
delete newSource.position
return newSource
}),
targetNgram: bottomWords.map(bottomWord => { // word aligner uses targetNgram instead of bottomWords
const word = bottomWord.text || bottomWord.word
// noinspection EqualityComparisonWithCoercionJS
const pos = targetTokens.findIndex(item => (
word === item.text &&
// eslint-disable-next-line eqeqeq
bottomWord.occurrence == item.occurrence
));

const newTarget = {
...bottomWord,
index: pos,
text: word,
};
delete newTarget.word
return newTarget;
}),
}
})
alignments.alignments = alignments_;
}),
targetNgram: bottomWords.map(bottomWord => { // word aligner uses targetNgram instead of bottomWords
const word = bottomWord.text || bottomWord.word
// noinspection EqualityComparisonWithCoercionJS
const pos = targetTokens.findIndex(item => (
word === item.text &&
// eslint-disable-next-line eqeqeq
bottomWord.occurrence == item.occurrence
));

const newTarget = {
...bottomWord,
index: pos,
text: word,
};
delete newTarget.word
return newTarget;
}),
}
})
alignments.alignments = alignments_;
}
return alignments;
} catch (e) {
console.warn(`extractAlignmentsFromTargetVerse()`,e)
return null
}
return alignments;
}

/**
Expand All @@ -215,7 +220,7 @@ export function addAlignmentsToTargetVerseUsingMerge(targetVerseText, verseAlign
verseAlignments.alignments, verseAlignments.wordBank, verseString, true,
);
} catch (e) {
console.log(`addAlignmentsToTargetVerseUsingMerge() - invalid alignment`, e);
console.error(`addAlignmentsToTargetVerseUsingMerge() - invalid alignment`, e);
}

if (verseObjects) {
Expand Down Expand Up @@ -315,7 +320,7 @@ export function parseUsfmToWordAlignerData(targetVerseUSFM, sourceVerseUSFM) {
const sourceVerseObjects = sourceVerseUSFM && usfmVerseToJson(sourceVerseUSFM);
let targetWords = [];
const targetVerseAlignments = extractAlignmentsFromTargetVerse(targetVerseUSFM, sourceVerseObjects);
const verseAlignments = targetVerseAlignments.alignments;
const verseAlignments = targetVerseAlignments?.alignments;
targetWords = markTargetWordsAsDisabledIfAlreadyUsedForAlignments(targetTokens, verseAlignments);
return {targetWords, verseAlignments};
}
Expand Down
8 changes: 5 additions & 3 deletions src/utils/usfmHelpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,12 @@ export const removeUsfmMarkers = (targetVerseText) => {
};

export function usfmVerseToJson(verseUSFM) {
const verseObjects = usfmjs.toJSON('\\v 1 ' + verseUSFM, { chunk: true });
if (verseUSFM) {
const verseObjects = usfmjs.toJSON('\\v 1 ' + verseUSFM, {chunk: true});

if (verseObjects?.verses?.[1]?.verseObjects) {
return verseObjects.verses[1].verseObjects;
if (verseObjects?.verses?.[1]?.verseObjects) {
return verseObjects.verses[1].verseObjects;
}
}
return null;
}

0 comments on commit 468d72e

Please sign in to comment.