Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improvements to error handling #17

Merged
merged 2 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "word-aligner-rcl",
"version": "1.1.1",
"version": "1.1.2",
"main": "dist/index.cjs.js",
"module": "dist/index.es.js",
"repository": "https://github.com/unfoldingWord/word-aligner-rcl.git",
Expand Down
109 changes: 57 additions & 52 deletions src/utils/alignmentHelpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,63 +141,68 @@ export function getWordListFromVerseObjects(verseObjects) {
* @return {array} list of alignments in target text
*/
export function extractAlignmentsFromTargetVerse(alignedTargetVerse, sourceVerse) {
const targetVerse = usfmVerseToJson(alignedTargetVerse);
const alignments = wordaligner.unmerge(targetVerse, sourceVerse);
const originalLangWordList = sourceVerse && getOriginalLanguageListForVerseData(sourceVerse);
const alignmentsWordList = getAlignedWordListFromAlignments(alignments.alignment);
const targetTokens = getWordListFromVerseObjects(targetVerse);
// clean up metadata in alignments
originalLangWordList && updateAlignedWordsFromOriginalWordList(originalLangWordList, alignmentsWordList);
if (alignments.alignment) { // for compatibility change alignment to alignments
// convert occurrence(s) from string to number
const alignments_ = alignments.alignment.map(alignment => {
const topWords = convertOccurrences(alignment.topWords);
const bottomWords = convertOccurrences(alignment.bottomWords);
return {
sourceNgram: topWords.map(topWord => { // word aligner uses sourceNgram instead of topWord
if (originalLangWordList) {
const pos = originalLangWordList.findIndex(item => (
topWord.word === (item.word || item.text) &&
topWord.occurrence == item.occurrence //Tricky: we want to allow automatic conversion between string and integer because occurrence could be either
));
try {
const targetVerse = usfmVerseToJson(alignedTargetVerse);
const alignments = wordaligner.unmerge(targetVerse, sourceVerse);
const originalLangWordList = sourceVerse && getOriginalLanguageListForVerseData(sourceVerse);
const alignmentsWordList = getAlignedWordListFromAlignments(alignments.alignment);
const targetTokens = getWordListFromVerseObjects(targetVerse);
// clean up metadata in alignments
originalLangWordList && updateAlignedWordsFromOriginalWordList(originalLangWordList, alignmentsWordList);
if (alignments.alignment) { // for compatibility change alignment to alignments
// convert occurrence(s) from string to number
const alignments_ = alignments.alignment.map(alignment => {
const topWords = convertOccurrences(alignment.topWords);
const bottomWords = convertOccurrences(alignment.bottomWords);
return {
sourceNgram: topWords.map(topWord => { // word aligner uses sourceNgram instead of topWord
if (originalLangWordList) {
const pos = originalLangWordList.findIndex(item => (
topWord.word === (item.word || item.text) &&
topWord.occurrence == item.occurrence //Tricky: we want to allow automatic conversion between string and integer because occurrence could be either
));
const newSource = {
...topWord,
index: pos,
text: topWord.text || topWord.word,
};
delete newSource.word
return newSource
}
const newSource = {
...topWord,
index: pos,
text: topWord.text || topWord.word,
};
delete newSource.word
delete newSource.position
return newSource
}
const newSource = {
...topWord,
text: topWord.text || topWord.word,
};
delete newSource.word
delete newSource.position
return newSource
}),
targetNgram: bottomWords.map(bottomWord => { // word aligner uses targetNgram instead of bottomWords
const word = bottomWord.text || bottomWord.word
// noinspection EqualityComparisonWithCoercionJS
const pos = targetTokens.findIndex(item => (
word === item.text &&
// eslint-disable-next-line eqeqeq
bottomWord.occurrence == item.occurrence
));

const newTarget = {
...bottomWord,
index: pos,
text: word,
};
delete newTarget.word
return newTarget;
}),
}
})
alignments.alignments = alignments_;
}),
targetNgram: bottomWords.map(bottomWord => { // word aligner uses targetNgram instead of bottomWords
const word = bottomWord.text || bottomWord.word
// noinspection EqualityComparisonWithCoercionJS
const pos = targetTokens.findIndex(item => (
word === item.text &&
// eslint-disable-next-line eqeqeq
bottomWord.occurrence == item.occurrence
));

const newTarget = {
...bottomWord,
index: pos,
text: word,
};
delete newTarget.word
return newTarget;
}),
}
})
alignments.alignments = alignments_;
}
return alignments;
} catch (e) {
console.warn(`extractAlignmentsFromTargetVerse()`,e)
return null
}
return alignments;
}

/**
Expand All @@ -215,7 +220,7 @@ export function addAlignmentsToTargetVerseUsingMerge(targetVerseText, verseAlign
verseAlignments.alignments, verseAlignments.wordBank, verseString, true,
);
} catch (e) {
console.log(`addAlignmentsToTargetVerseUsingMerge() - invalid alignment`, e);
console.error(`addAlignmentsToTargetVerseUsingMerge() - invalid alignment`, e);
}

if (verseObjects) {
Expand Down Expand Up @@ -315,7 +320,7 @@ export function parseUsfmToWordAlignerData(targetVerseUSFM, sourceVerseUSFM) {
const sourceVerseObjects = sourceVerseUSFM && usfmVerseToJson(sourceVerseUSFM);
let targetWords = [];
const targetVerseAlignments = extractAlignmentsFromTargetVerse(targetVerseUSFM, sourceVerseObjects);
const verseAlignments = targetVerseAlignments.alignments;
const verseAlignments = targetVerseAlignments?.alignments;
targetWords = markTargetWordsAsDisabledIfAlreadyUsedForAlignments(targetTokens, verseAlignments);
return {targetWords, verseAlignments};
}
Expand Down
8 changes: 5 additions & 3 deletions src/utils/usfmHelpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,12 @@ export const removeUsfmMarkers = (targetVerseText) => {
};

export function usfmVerseToJson(verseUSFM) {
const verseObjects = usfmjs.toJSON('\\v 1 ' + verseUSFM, { chunk: true });
if (verseUSFM) {
const verseObjects = usfmjs.toJSON('\\v 1 ' + verseUSFM, {chunk: true});

if (verseObjects?.verses?.[1]?.verseObjects) {
return verseObjects.verses[1].verseObjects;
if (verseObjects?.verses?.[1]?.verseObjects) {
return verseObjects.verses[1].verseObjects;
}
}
return null;
}
Loading