From 2911864ca897fa0438588c2c038268a6a5f1c4a5 Mon Sep 17 00:00:00 2001 From: Sanjaya Kumar Saxena Date: Mon, 6 May 2024 16:36:05 +0530 Subject: [PATCH] fix(*): add missing typescript declarations for word embeddings references #136 references winkjs/wink-embeddings-sg-100d#1 Co-authored-by: Rachna --- src/api/col-get-item.js | 4 +- src/api/sel-get-item.js | 4 +- test/apiA-specs.js | 24 ++++----- types/index.d.ts | 105 ++++++++++++++++++++++++---------------- 4 files changed, 81 insertions(+), 56 deletions(-) diff --git a/src/api/col-get-item.js b/src/api/col-get-item.js index 252f19e..7191cdf 100644 --- a/src/api/col-get-item.js +++ b/src/api/col-get-item.js @@ -44,7 +44,9 @@ var colGetItemAt = function ( k, start, end, itemFn ) { // To handle relative indexing, compute actual `k` by adding `start`. var ak = k + start; - return ( ( ak < start || ak > end ) ? undefined : itemFn( ak ) ); + if ( ak < start || ak > end ) { + throw Error( `wink-nlp: ${k} is an invalid or out of bounds index.`); + } else return itemFn( ak ); }; // colGetItemAt() module.exports = colGetItemAt; diff --git a/src/api/sel-get-item.js b/src/api/sel-get-item.js index a66be5a..7236649 100644 --- a/src/api/sel-get-item.js +++ b/src/api/sel-get-item.js @@ -41,7 +41,9 @@ * @private */ var selGetItemAt = function ( k, selection, itemFn ) { - return ( ( k < 0 || k >= selection.length ) ? undefined : itemFn( selection[ k ] ) ); + if ( k < 0 || k >= selection.length ) { + throw Error( `wink-nlp: wink-nlp: ${k} is an invalid or out of bounds index.`); + } else return itemFn( selection[ k ] ); }; // selGetItemAt() module.exports = selGetItemAt; diff --git a/test/apiA-specs.js b/test/apiA-specs.js index 3db5e07..2c76074 100644 --- a/test/apiA-specs.js +++ b/test/apiA-specs.js @@ -124,20 +124,20 @@ describe( 'APIs — A', function () { // ItemAt boundary tests. // Will need a revamp once SBD is in place (TODO): describe( 'doc API out of range access test', function () { - it( '.sentences() should return undefined for out of range index', function () { - expect( doc1.sentences().itemAt( -1 ) ).to.equal( undefined ); - expect( doc1.sentences().itemAt( doc1.sentences().length() ) ).to.equal( undefined ); + it( '.sentences() should throw out of range index error', function () { + expect( () => doc1.sentences().itemAt(-1) ).to.throw( /^wink-nlp: -1 is an invalid or out of bounds index./ ); + expect( () => doc1.sentences().itemAt( doc1.sentences().length() ) ).to.throw( /^wink-nlp: 3 is an invalid or out of bounds index./ ); } ); - it( '.tokens().itemAt() should return undefined for out of range index', function () { - expect( doc1.tokens().itemAt( -1 ) ).to.equal( undefined ); - expect( doc1.tokens().itemAt( doc1.tokens().length() ) ).to.equal( undefined ); + it( '.tokens().itemAt() should throw out of range index error', function () { + expect( () => doc1.tokens().itemAt( -1 ) ).to.throw( /^wink-nlp: -1 is an invalid or out of bounds index./ ); + expect( () => doc1.tokens().itemAt( doc1.tokens().length() ) ).to.throw( /^wink-nlp: 37 is an invalid or out of bounds index./ ); } ); - it( '.tokens().filter().itemAt() should return undefined for out of range index', function () { + it( '.tokens().filter().itemAt() should throw out of range index error', function () { const ftk1 = doc1.tokens().filter( ( t ) => ( t.out( its.type ) === 'word' ) ); - expect( ftk1.itemAt( -1 ) ).to.equal( undefined ); - expect( ftk1.itemAt( ftk1.length() ) ).to.equal( undefined ); + expect( () => ftk1.itemAt( -1 ) ).to.throw( /^wink-nlp: wink-nlp: -1 is an invalid or out of bounds index./ ); + expect( () => ftk1.itemAt( ftk1.length() ) ).to.throw( /^wink-nlp: wink-nlp: 14 is an invalid or out of bounds index./ ); } ); } ); @@ -232,7 +232,7 @@ describe( 'APIs — A', function () { const i22 = doc2.entities().itemAt( 5 ); expect( i22.out( its.detail ) ).to.deep.equal( ae2[ 5 ] ); - expect( doc2.entities().itemAt( 12 ) ).to.deep.equal( undefined ); + expect( () => doc2.entities().itemAt( 12 ) ).to.throw( /^wink-nlp: 12 is an invalid or out of bounds index./ ); } ); it( '.filter() should return correctly filter entities', function () { @@ -247,7 +247,7 @@ describe( 'APIs — A', function () { // Also check the parent document! expect( fe1.itemAt( 1 ).parentDocument() ).to.deep.equal( doc1 ); // Out of range item test - expect( fe1.itemAt( 2 ) ).to.deep.equal( undefined ); + expect( () => fe1.itemAt( 2 ) ).to.throw( /^wink-nlp: wink-nlp: 2 is an invalid or out of bounds index./ ); // itemAt() api. fe1.each( ( e, k ) => { expect( e.out() ).to.deep.equal( fe1.itemAt( k ).out() ); @@ -260,7 +260,7 @@ describe( 'APIs — A', function () { expect( fe2.out( its.detail ) ).to.deep.equal( fae2 ); expect( fe2.itemAt( 1 ).out( its.detail ) ).to.deep.equal( fae2[ 1 ] ); expect( fe2.itemAt( 1 ).parentDocument() ).to.deep.equal( doc2 ); - expect( fe2.itemAt( 3 ) ).to.deep.equal( undefined ); + expect( () => fe2.itemAt( 3 ) ).to.throw( /^wink-nlp: wink-nlp: 3 is an invalid or out of bounds index./ ); fe2.each( ( e, k ) => { expect( e.out() ).to.deep.equal( fe2.itemAt( k ).out() ); } ); diff --git a/types/index.d.ts b/types/index.d.ts index 2c1cc94..31fa2cf 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -4,6 +4,8 @@ declare module 'wink-nlp' { // turn off exporting by default since we don't want to expose internal details export { }; + // *** BEGIN Language Model Specific Declarations *** + // These should be always in sync with the langauge model's type declarations. // these types are internal details of the implementing model type StemAddon = unknown; type LemmatizeAddon = unknown; @@ -40,6 +42,7 @@ declare module 'wink-nlp' { featureFn: FeatureFn; addons: ModelAddons; } + // *** END Language Model Specific Declarations *** // its helpers @@ -106,25 +109,25 @@ declare module 'wink-nlp' { // Its export interface ItsHelpers { - case(index: number, token: Token, cache: Cache): Case; - uniqueId(index: number, token: Token): number; - negationFlag(index: number, token: Token): boolean; - normal(index: number, token: Token, cache: Cache): string; - contractionFlag(index: number, token: Token): boolean; - pos(index: number, token: Token, cache: Cache): PartOfSpeech; - precedingSpaces(index: number, token: Token): string; - prefix(index: number, token: Token, cache: Cache): string; - shape(index: number, token: Token, cache: Cache): string; - stopWordFlag(index: number, token: Token, cache: Cache): boolean; - abbrevFlag(index: number, token: Token, cache: Cache): boolean; - suffix(index: number, token: Token, cache: Cache): string; - type(index: number, token: Token, cache: Cache): string; - value(index: number, token: Token, cache: Cache): string; - stem(index: number, token: Token, cache: Cache, addons: ModelAddons): string; - lemma(index: number, token: Token, cache: Cache, addons: ModelAddons): string; + case(index: number, rdd: RawDocumentData): Case; + uniqueId(index: number, rdd: RawDocumentData): number; + negationFlag(index: number, rdd: RawDocumentData): boolean; + normal(index: number, rdd: RawDocumentData): string; + contractionFlag(index: number, rdd: RawDocumentData): boolean; + pos(index: number, rdd: RawDocumentData): PartOfSpeech; + precedingSpaces(index: number, rdd: RawDocumentData): string; + prefix(index: number, rdd: RawDocumentData): string; + shape(index: number, rdd: RawDocumentData): string; + stopWordFlag(index: number, rdd: RawDocumentData): boolean; + abbrevFlag(index: number, rdd: RawDocumentData): boolean; + suffix(index: number, rdd: RawDocumentData): string; + type(index: number, rdd: RawDocumentData): string; + value(index: number, rdd: RawDocumentData): string; + stem(index: number, rdd: RawDocumentData, addons: ModelAddons): string; + lemma(index: number, rdd: RawDocumentData, addons: ModelAddons): string; vector(): number[]; detail(): Detail; - markedUpText(index: number, token: Token, cache: Cache): string; + markedUpText(index: number, rdd: RawDocumentData): string; span(spanItem: number[]): number[]; sentenceWiseImportance(rdd: RawDocumentData): SentenceImportance[]; sentiment(spanItem: number[]): number; @@ -134,7 +137,7 @@ declare module 'wink-nlp' { docBOWArray(tf: ModelTermFrequencies): Bow; bow(tf: ModelTermFrequencies): Bow; idf(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): Array<[term: string, frequency: number]>; - tf(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): Array<[term: string, frequency: number]>; + tf(tf: ModelTermFrequencies): Array<[term: string, frequency: number]>; modelJSON(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): string; } @@ -146,6 +149,7 @@ declare module 'wink-nlp' { freqTable(tokens: T[]): Array<[token: T, freq: number]>; bigrams(tokens: T[]): Array<[T, T]>; unique(tokens: T[]): T[]; + vector(token: string[]): number[]; } // functions for use with document @@ -168,9 +172,9 @@ declare module 'wink-nlp' { } export interface SelectedTokens { - each(f: (token: ItemToken) => void): void; - filter(f: (token: ItemToken) => boolean): SelectedTokens; - itemAt(k: number): ItemToken | undefined; + each(cb: ((item: ItemToken) => void) | ((item: ItemToken, index: number) => void)): void; + filter(cb: (item: ItemToken) => boolean): SelectedTokens; + itemAt(k: number): ItemToken; length(): number; out(): string[]; out(itsf: ItsFunction): T[] | string[]; @@ -178,9 +182,9 @@ declare module 'wink-nlp' { } export interface Tokens { - each(f: (token: ItemToken) => void): void; - filter(f: (token: ItemToken) => boolean): SelectedTokens; - itemAt(k: number): ItemToken | undefined; + each(cb: ((item: ItemToken) => void) | ((item: ItemToken, index: number) => void)): void; + filter(cb: (item: ItemToken) => boolean): SelectedTokens; + itemAt(k: number): ItemToken; length(): number; out(): string[]; out(itsf: ItsFunction): T[] | string[]; @@ -189,7 +193,7 @@ declare module 'wink-nlp' { export interface ItemEntity { parentDocument(): Document; - markup(beginMarker: string, endMarker: string): void; + markup(beginMarker?: string, endMarker?: string): void; out(): string; out(itsf: ItsFunction): T | string; parentSentence(): ItemSentence; @@ -198,9 +202,9 @@ declare module 'wink-nlp' { } export interface SelectedEntities { - each(f: (entity: ItemEntity) => void): void; - filter(f: (entity: ItemEntity) => boolean): SelectedEntities; - itemAt(k: number): ItemEntity | undefined; + each(cb: ((item: ItemEntity) => void) | ((item: ItemEntity, index: number) => void)): void; + filter(cb: (item: ItemEntity) => boolean): SelectedEntities; + itemAt(k: number): ItemEntity; length(): number; out(): string[]; out(itsf: ItsFunction): T[] | string[]; @@ -208,9 +212,9 @@ declare module 'wink-nlp' { } export interface Entities { - each(f: (entity: ItemEntity) => void): void; - filter(f: (entity: ItemEntity) => boolean): SelectedEntities; - itemAt(k: number): ItemEntity | undefined; + each(cb: ((item: ItemEntity) => void) | ((item: ItemEntity, index: number) => void)): void; + filter(cb: (item: ItemEntity) => boolean): SelectedEntities; + itemAt(k: number): ItemEntity; length(): number; out(): string[]; out(itsf: ItsFunction): T[] | string[]; @@ -219,7 +223,7 @@ declare module 'wink-nlp' { export interface ItemCustomEntity { parentDocument(): Document; - markup(beginMarker: string, endMarker: string): void; + markup(beginMarker?: string, endMarker?: string): void; out(): string; out(itsf: ItsFunction): T | string; parentSentence(): ItemSentence; @@ -228,9 +232,9 @@ declare module 'wink-nlp' { } export interface SelectedCustomEntities { - each(f: (entity: ItemCustomEntity) => void): void; - filter(f: (entity: ItemCustomEntity) => boolean): SelectedCustomEntities; - itemAt(k: number): ItemCustomEntity | undefined; + each(cb: ((item: ItemCustomEntity) => void) | ((item: ItemCustomEntity, index: number) => void)): void; + filter(cb: (item: ItemCustomEntity) => boolean): SelectedCustomEntities; + itemAt(k: number): ItemCustomEntity; length(): number; out(): string[]; out(itsf: ItsFunction): T[] | string[]; @@ -238,9 +242,9 @@ declare module 'wink-nlp' { } export interface CustomEntities { - each(f: (entity: ItemCustomEntity) => void): void; - filter(f: (entity: ItemCustomEntity) => boolean): SelectedCustomEntities; - itemAt(k: number): ItemCustomEntity | undefined; + each(cb: ((item: ItemCustomEntity) => void) | ((item: ItemCustomEntity, index: number) => void)): void; + filter(cb: (item: ItemCustomEntity) => boolean): SelectedCustomEntities; + itemAt(k: number): ItemCustomEntity; length(): number; out(): string[]; out(itsf: ItsFunction): T[] | string[]; @@ -249,7 +253,7 @@ declare module 'wink-nlp' { export interface ItemSentence { parentDocument(): Document; - markup(beginMarker: string, endMarker: string): void; + markup(beginMarker?: string, endMarker?: string): void; out(): string; out(itsf: ItsFunction): T | string; entities(): Entities; @@ -259,8 +263,8 @@ declare module 'wink-nlp' { } export interface Sentences { - each(f: (entity: ItemSentence) => void): void; - itemAt(k: number): ItemSentence | undefined; + each(cb: ((item: ItemSentence) => void) | ((item: ItemSentence, index: number) => void)): void; + itemAt(k: number): ItemSentence; length(): number; out(): string[]; out(itsf: ItsFunction): T[] | string[]; @@ -277,6 +281,8 @@ declare module 'wink-nlp' { sentences(): Sentences; tokens(): Tokens; printTokens(): void; + pipeConfig(): string[]; + contextualVectors(lemma: boolean, specifcWordVectors: string[], similarWordVectors: boolean, wordVectorsLimit: number): string; } export interface CerExample { @@ -295,6 +301,18 @@ declare module 'wink-nlp' { patterns: string[]; } + // Wink word embeddings structure, should stay in sync with emdedding repo. + interface WordEmbedding { + precision: number; + l2NormIndex: number; + wordIndex: number; + dimensions: number; + unkVector: number[]; + size: number; + words: string[]; + vectors: Record; + } + export interface WinkMethods { readDoc(text: string): Document; // returns number of learned entities @@ -303,7 +321,7 @@ declare module 'wink-nlp' { as: AsHelpers; } - export default function WinkFn(theModel: Model, pipe?: string[]): WinkMethods; + export default function WinkFn(theModel: Model, pipe?: string[], wordEmbeddings?: WordEmbedding): WinkMethods; } declare module 'wink-nlp/utilities/bm25-vectorizer' { @@ -348,6 +366,9 @@ declare module 'wink-nlp/utilities/similarity' { tversky(setA: Set, setB: Set, alpha?: number, beta?: number): number; oo(setA: Set, setB: Set): number; }; + vector: { + cosine(vectorA: number[], vectorB: number[]): number; + }; } const similarity: SimilarityHelper;