diff --git a/packages/cspell-tools/src/compiler/legacyLineToWords.test.ts b/packages/cspell-tools/src/compiler/legacyLineToWords.test.ts index 1092bde81d6..910f6edad3d 100644 --- a/packages/cspell-tools/src/compiler/legacyLineToWords.test.ts +++ b/packages/cspell-tools/src/compiler/legacyLineToWords.test.ts @@ -14,23 +14,23 @@ describe('Validate legacyLineToWords', () => { }); test.each` - line | expectedResult - ${'hello'} | ${['hello']} - ${'AppendIterator::getArrayIterator'} | ${['append', 'iterator', 'get', 'array']} - ${'Austin Martin'} | ${['austin', 'martin']} - ${'JPEGsBLOBs'} | ${['jpegs', 'blobs']} - ${'CURLs CURLing' /* Sadly we cannot do this one correctly */} | ${['curls', 'curling']} - ${'DNSTable Lookup'} | ${['dns', 'table', 'lookup']} - ${'OUTRing'} | ${['outring']} - ${'OUTRings'} | ${['outrings']} - ${'DIRs'} | ${['dirs']} - ${'AVGAspect'} | ${['avg', 'aspect']} - ${'New York'} | ${['new', 'york']} - ${'Namespace DNSLookup'} | ${['namespace', 'dns', 'lookup']} - ${'well-educated'} | ${['well', 'educated']} - ${'CURLcode'} | ${['cur', 'lcode']} - ${'kDNSServiceErr_BadSig'} | ${['k', 'dns', 'service', 'err', 'bad', 'sig']} - ${'apd_get_active_symbols'} | ${['apd', 'get', 'active', 'symbols']} + line | expectedResult + ${'hello'} | ${['hello']} + ${'AppendIterator::getArrayIterator'} | ${['append', 'iterator', 'get', 'array']} + ${'Austin Martin'} | ${['austin', 'martin']} + ${'JPEGSBlobs'} | ${['jpegs', 'blobs']} + ${'CURLS Curling'} | ${['curls', 'curling']} + ${'DNSTable Lookup'} | ${['dns', 'table', 'lookup']} + ${'OUTRing'} | ${['out', 'ring']} + ${'OUTRings'} | ${['out', 'rings']} + ${'DIRs'} | ${['di', 'rs']} + ${'AVGAspect'} | ${['avg', 'aspect']} + ${'New York'} | ${['new', 'york']} + ${'Namespace DNSLookup'} | ${['namespace', 'dns', 'lookup']} + ${'well-educated'} | ${['well', 'educated']} + ${'CURLcode'} | ${['cur', 'lcode']} + ${'kDNSServiceErr_BadSig'} | ${['k', 'dns', 'service', 'err', 'bad', 'sig']} + ${'apd_get_active_symbols'} | ${['apd', 'get', 'active', 'symbols']} `('legacy splitting lines $line', ({ line, expectedResult }: { line: string; expectedResult: string[] }) => { expect([...pipe(legacyLineToWords(line, false, allowed), opFilter(distinct()))]).toEqual(expectedResult); }); diff --git a/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.test.ts b/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.test.ts index 95a1edc490e..ecae053bbd3 100644 --- a/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.test.ts +++ b/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.test.ts @@ -3,17 +3,19 @@ import { splitCamelCaseIfAllowed } from './splitCamelCaseIfAllowed'; describe('splitCamelCaseIfAllowed', () => { test.each` - text | keepCase | allowed | expected - ${''} | ${false} | ${undefined} | ${[]} - ${'hello'} | ${false} | ${undefined} | ${['hello']} - ${'helloThere'} | ${false} | ${['hello', 'there']} | ${['hello', 'there']} - ${'helloThere'} | ${false} | ${['hello', 'There']} | ${['hello', 'There']} - ${'helloThere'} | ${true} | ${['hello', 'There']} | ${['hello', 'There']} - ${'ERRORCode'} | ${false} | ${['error', 'code']} | ${['error', 'code']} - ${'ERRORCode'} | ${true} | ${['error', 'code']} | ${['ERROR', 'code']} - ${'ERRORCode'} | ${true} | ${['code']} | ${['ERRORCode']} - ${'ERRORCode'} | ${false} | ${['code']} | ${['ERRORCode']} - ${'ErrorCode'} | ${true} | ${['error', 'code']} | ${['error', 'code']} + text | keepCase | allowed | expected + ${''} | ${false} | ${undefined} | ${[]} + ${'hello'} | ${false} | ${undefined} | ${['hello']} + ${'helloThere'} | ${false} | ${['hello', 'there']} | ${['hello', 'there']} + ${'helloThere'} | ${false} | ${['hello', 'There']} | ${['hello', 'There']} + ${'helloThere'} | ${true} | ${['hello', 'There']} | ${['hello', 'There']} + ${'ERRORCode'} | ${false} | ${['error', 'code']} | ${['error', 'code']} + ${'ERRORCode'} | ${true} | ${['error', 'code']} | ${['ERROR', 'code']} + ${'ERRORCode'} | ${true} | ${['code']} | ${['ERRORCode']} + ${'ERRORCode'} | ${false} | ${['code']} | ${['ERRORCode']} + ${'ErrorCode'} | ${true} | ${['error', 'code']} | ${['error', 'code']} + ${'xmlUCSIsCatZ'} | ${true} | ${['xml', 'UCS', 'is', 'cat', 'z']} | ${['xml', 'UCS', 'is', 'cat', 'z']} + ${'ADP_ConnectionStateMsg_Closed'} | ${true} | ${undefined} | ${['ADP', 'connection', 'state', 'msg', 'closed']} `('splitCamelCaseIfAllowed $text $keepCase $allowed', ({ text, keepCase, allowed, expected }) => { allowed = createAllowedSplitWords(allowed); expect(splitCamelCaseIfAllowed(text, allowed, keepCase)).toEqual(expected); diff --git a/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.ts b/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.ts index 7e5231cd011..d66596084cd 100644 --- a/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.ts +++ b/packages/cspell-tools/src/compiler/splitCamelCaseIfAllowed.ts @@ -34,7 +34,7 @@ function isUnknown(word: string, allowedWords: AllowedSplitWordsCollection): boo } function splitCamelCase(word: string): Iterable { - const splitWords = Text.splitCamelCaseWord(word).filter((word) => !regExpIsNumber.test(word)); + const splitWords = Text.splitCamelCaseWord(word, false).filter((word) => !regExpIsNumber.test(word)); // We only want to preserve this: "New York" and not "Namespace DNSLookup" if (splitWords.length > 1 && regExpSpaceOrDash.test(word)) { return splitWords.flatMap((w) => w.split(regExpSpaceOrDash)); diff --git a/packages/cspell-tools/src/compiler/text.ts b/packages/cspell-tools/src/compiler/text.ts index 69d05eb8263..6d8d9f74580 100644 --- a/packages/cspell-tools/src/compiler/text.ts +++ b/packages/cspell-tools/src/compiler/text.ts @@ -7,8 +7,8 @@ const regExSplitWords2 = /(\p{Lu})(\p{Lu}\p{Ll})/gu; /** * Split camelCase words into an array of strings. */ -export function splitCamelCaseWord(word: string): string[] { - const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase()); +export function splitCamelCaseWord(word: string, autoStem = true): string[] { + const wPrime = autoStem ? word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase()) : word; const pass1 = wPrime.replace(regExSplitWords, '$1|$2'); const pass2 = pass1.replace(regExSplitWords2, '$1|$2'); const pass3 = pass2.replace(/[\d_]+/g, '|'); diff --git a/packages/cspell-tools/src/compiler/wordListCompiler.ts b/packages/cspell-tools/src/compiler/wordListCompiler.ts index 0bc1bc10883..ea2a4cc90a5 100644 --- a/packages/cspell-tools/src/compiler/wordListCompiler.ts +++ b/packages/cspell-tools/src/compiler/wordListCompiler.ts @@ -23,13 +23,23 @@ export async function compileWordList( destFilename: string, options: CompileOptions ): Promise { - const filter = normalizeTargetWords(options); + const finalLines = normalize(lines, options); - const finalSeq = pipe(wordListHeaderLines, opAppend(pipe(lines, filter))); + const finalSeq = pipe(wordListHeaderLines, opAppend(finalLines)); return createWordListTarget(destFilename)(finalSeq); } +function normalize(lines: Iterable, options: CompileOptions): Iterable { + const filter = normalizeTargetWords(options); + + const iter = pipe(lines, filter); + if (!options.sort) return iter; + + const result = new Set(iter); + return [...result].sort(); +} + function createWordListTarget(destFilename: string): (seq: Iterable) => Promise { const target = createTarget(destFilename); return (seq: Iterable) =>