From ea674523308dd12aa7cce0f51fca4fdce7ef2eae Mon Sep 17 00:00:00 2001 From: Pavlo Karatsiuba Date: Fri, 31 Mar 2023 19:44:49 +0200 Subject: [PATCH] Added all metadata keys into rules to validate. --- src/mwoffliner.lib.ts | 105 +++++++++++++++------------------- src/util/metaData.ts | 37 +++++++++--- test/unit/mock/1x1.png | Bin 0 -> 83 bytes test/unit/util.test.ts | 124 +++++++++++++++++++++++++++++++++++------ 4 files changed, 184 insertions(+), 82 deletions(-) create mode 100644 test/unit/mock/1x1.png diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 0940b8e14..e61b96913 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -36,7 +36,6 @@ import { mkdirPromise, sanitizeString, saveStaticFiles, - writeFilePromise, importPolyfillModules, extractArticleList, getTmpDirectory, @@ -93,7 +92,7 @@ async function execute(argv: any) { publisher: _publisher, outputDirectory: _outputDirectory, addNamespaces: _addNamespaces, - customZimFavicon: _customZimFavicon, + customZimFavicon, optimisationCacheUrl, customFlavour, } = argv @@ -107,7 +106,6 @@ async function execute(argv: any) { if (articleList) articleList = String(articleList) if (articleListToIgnore) articleListToIgnore = String(articleListToIgnore) const publisher = _publisher || config.defaults.publisher - let customZimFavicon = _customZimFavicon /* HTTP user-agent string */ // const adminEmail = argv.adminEmail; @@ -190,6 +188,7 @@ async function execute(argv: any) { Language: mwMetaData.langIso3, Publisher: publisher, Title: customZimTitle || mwMetaData.title, + 'Illustration_48x48@1': await getIllustrationMetadata(), } validateMetadata(metaDataRequiredKeys) @@ -248,35 +247,6 @@ async function execute(argv: any) { .map((a: string) => Number(a)) : [] - /* ZIM custom Favicon */ - if (customZimFavicon) { - const faviconPath = path.join(tmpDirectory, 'favicon.png') // Later the PNG conversion (if necessary) - const faviconIsRemote = customZimFavicon.includes('http') - logger.log(`${faviconIsRemote ? 'Downloading' : 'Moving'} custom favicon to [${faviconPath}]`) - let content - if (faviconIsRemote) { - logger.log(`Downloading remote zim favicon from [${customZimFavicon}]`) - content = await axios - .get(customZimFavicon, downloader.arrayBufferRequestOptions) - .then((a) => a.data) - .catch(() => { - throw new Error(`Failed to download custom zim favicon from [${customZimFavicon}]`) - }) - } else { - try { - content = fs.readFileSync(customZimFavicon) - } catch (err) { - throw new Error(`Failed to read custom zim favicon from [${customZimFavicon}]`) - } - } - fs.writeFileSync(faviconPath, content) - customZimFavicon = faviconPath - - if (!fs.existsSync(customZimFavicon)) { - throw new Error(`Path ${customZimFavicon} is not a valid PNG file.`) - } - } - /* ********************************* */ /* GET CONTENT ********************* */ /* ********************************* */ @@ -391,6 +361,15 @@ async function execute(argv: any) { logger.log(`Writing zim to [${outZim}]`) dump.outFile = outZim + const metadata = { + ...metaDataRequiredKeys, + Tags: dump.computeZimTags(), + Name: dump.computeFilenameRadical(false, true, true), + Flavour: dump.computeFlavour(), + ...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}), + } + validateMetadata(metadata) + const zimCreator = new ZimCreator( { fileName: outZim, @@ -398,13 +377,7 @@ async function execute(argv: any) { welcome: dump.opts.mainPage ? dump.opts.mainPage : 'index', compression: 'zstd', }, - { - ...metaDataRequiredKeys, - Tags: dump.computeZimTags(), - Name: dump.computeFilenameRadical(false, true, true), - Flavour: dump.computeFlavour(), - ...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}), - } as any, + metadata as any, ) const scraperArticle = new ZimArticle({ ns: 'M', @@ -426,7 +399,7 @@ async function execute(argv: any) { const article = new ZimArticle({ url: `${config.output.dirs.mediawiki}/style.css`, data: finalCss, ns: '-' }) zimCreator.addArticle(article) - await saveFavicon(dump, zimCreator) + await saveFavicon(zimCreator, metaDataRequiredKeys['Illustration_48x48@1']) await getThumbnailsData() @@ -502,25 +475,31 @@ async function execute(argv: any) { }) } - async function saveFavicon(dump: Dump, zimCreator: ZimCreator): Promise { - logger.log('Saving favicon.png...') - - async function saveFavicon(zimCreator: ZimCreator, faviconPath: string): Promise { + async function getIllustrationMetadata(): Promise { + if (customZimFavicon) { + const faviconIsRemote = customZimFavicon.includes('http') + let content + if (faviconIsRemote) { + logger.log(`Downloading remote zim favicon from [${customZimFavicon}]`) + content = await axios + .get(customZimFavicon, downloader.arrayBufferRequestOptions) + .then((a) => a.data) + .catch(() => { + throw new Error(`Failed to download custom zim favicon from [${customZimFavicon}]`) + }) + } else { + try { + content = fs.readFileSync(customZimFavicon) + } catch (err) { + throw new Error(`Failed to read custom zim favicon from [${customZimFavicon}]`) + } + } try { - const source = await fs.promises.readFile(faviconPath) - const data = await sharp(source).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer() - const illustrationMetadata = new ZimArticle({ url: 'Illustration_48x48@1', mimeType: 'image/png', data, ns: 'M' }) - zimCreator.addArticle(illustrationMetadata) - const article = new ZimArticle({ url: 'favicon', mimeType: 'image/png', data, ns: '-' }) - return zimCreator.addArticle(article) + return sharp(content).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer() } catch (e) { - throw new Error('Failed to save favicon and IllustrationMetadata using sharp') + throw new Error('Failed to read or process IllustrationMetadata using sharp') } } - - if (customZimFavicon) { - return saveFavicon(zimCreator, customZimFavicon) - } const body = await downloader.getJSON(mw.siteInfoUrl()) const entries = body.query.general if (!entries.logo) { @@ -530,11 +509,19 @@ async function execute(argv: any) { } const parsedUrl = urlParser.parse(entries.logo) - const faviconPath = path.join(tmpDirectory, 'favicon.png') const logoUrl = parsedUrl.protocol ? entries.logo : mw.baseUrl.protocol + entries.logo - const logoContent = await downloader.downloadContent(logoUrl) - await writeFilePromise(faviconPath, logoContent.content, null) - return saveFavicon(zimCreator, faviconPath) + const { content } = await downloader.downloadContent(logoUrl) + return await sharp(content).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer() + } + + async function saveFavicon(zimCreator: ZimCreator, data: Buffer): Promise { + logger.log('Saving favicon.png...') + try { + const article = new ZimArticle({ url: 'favicon', mimeType: 'image/png', data, ns: '-' }) + return zimCreator.addArticle(article) + } catch (e) { + throw new Error('Failed to save favicon') + } } function getMainPage(dump: Dump, zimCreator: ZimCreator, downloader: Downloader) { diff --git a/src/util/metaData.ts b/src/util/metaData.ts index c4c30958e..5e2e36f86 100644 --- a/src/util/metaData.ts +++ b/src/util/metaData.ts @@ -3,22 +3,48 @@ import AjvModule from 'ajv' const Ajv = AjvModule.default const ajv = new Ajv({ allErrors: true }) +ajv.addKeyword({ + keyword: 'checkRegexFromBuffer', + validate: (regexStr: string, buffer) => { + if (Buffer.isBuffer(buffer)) { + const regex = new RegExp(regexStr) + const binary = buffer.toString('binary') + return regex.test(binary) + } + return false + }, + error: { + message: 'must match regex pattern', + }, +}) + const schema = { type: 'object', properties: { + Name: { type: 'string', minLength: 1 }, Creator: { type: 'string', minLength: 1 }, Description: { type: 'string', maxLength: 80, minLength: 1 }, - Language: { type: 'string', minLength: 1 }, + Language: { type: 'string', minLength: 1, pattern: '^\\w{3}(,\\w{3})*$' }, Publisher: { type: 'string', minLength: 1 }, Title: { type: 'string', maxLength: 30, minLength: 1 }, + Date: { type: 'string', maxLength: 10, minLength: 10 }, + 'Illustration_48x48@1': { checkRegexFromBuffer: '^\x89\x50\x4e\x47\x0d\x0a\x1a\x0a.+' }, + LongDescription: { type: 'string', maxLength: 4000 }, + License: { type: 'string' }, + Tags: { type: 'string' }, + Relation: { type: 'string' }, + Flavour: { type: 'string' }, + Source: { type: 'string' }, + Counter: { type: 'string' }, + Scraper: { type: 'string' }, }, - required: ['Creator', 'Description', 'Language', 'Publisher', 'Title'], + required: ['Creator', 'Description', 'Language', 'Publisher', 'Title', 'Illustration_48x48@1'], additionalProperties: true, } const validate = ajv.compile(schema) -export const validateMetadata = (metaData) => { +export const validateMetadata = (metaData): void => { const valid = validate(metaData) if (!valid) { @@ -31,9 +57,6 @@ export const validateMetadata = (metaData) => { if (error.keyword === 'minLength') { throw new Error(`Metadata "${keyword}" is required`) } - if (error.keyword === 'maxLength') { - throw new Error(`MetaData ${keyword}: ${error.message}`) - } - throw new Error(validate.errors[0].message) + throw new Error(`MetaData ${keyword}: ${error.message}`) } } diff --git a/test/unit/mock/1x1.png b/test/unit/mock/1x1.png new file mode 100644 index 0000000000000000000000000000000000000000..33f19edad6519edcb0dc7e238fd101a193437165 GIT binary patch literal 83 zcmeAS@N?(olHy`uVBq!ia0vp^j3CUx1|;Q0k92}K#X;^)4C~IxyaaN1JzX3_B*K$_ d?4R<`o`LN>(-Jk#D<(h*22WQ%mvv4FO#qYI6K? { }) describe('metaData', () => { + const pngImage = fs.readFileSync(`${__dirname}/mock/1x1.png`) + + const minimumValidMetadata = { + Creator: 'the creator', + Description: 'test Description', + Language: 'eng,ita', + Publisher: 'test Publisher', + Title: 'test Title', + 'Illustration_48x48@1': pngImage, + } + + test('validate valid metadata', () => { + expect(() => validateMetadata(minimumValidMetadata)).not.toThrowError() + }) + + test('validate with unicode chars', () => { + const metaData = { + ...minimumValidMetadata, + Description: '😎 Emoji, ❤ Hearts, 💲 Currencies, → Arrows, ☆ Stars', + } + expect(() => validateMetadata(metaData)).not.toThrowError() + }) + test('validate empty string', () => { const metaData = { + ...minimumValidMetadata, Creator: '', - Description: 'test Description', - Language: 'test Language', - Publisher: 'test Publisher', - Title: 'test Title', } expect(() => validateMetadata(metaData)).toThrow('Metadata "Creator" is required') }) test('validate missed metaData key', () => { const metaData = { - Creator: 'test Creator', - Language: 'test Language', - Publisher: 'test Publisher', - Title: 'test Title', + ...minimumValidMetadata, } + delete metaData.Description expect(() => validateMetadata(metaData)).toThrow('Metadata "Description" is required') }) test('validate long Description', () => { const metaData = { - Creator: 'test Creator', + ...minimumValidMetadata, Description: 'test Description test Description test Description test Description test Description test Description ', - Language: 'test Language', - Publisher: 'test Publisher', - Title: 'test Title', } expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must NOT have more than 80 characters') }) test('validate long Title', () => { const metaData = { - Creator: 'test Creator', - Description: 'test Description', - Language: 'test Language', - Publisher: 'test Publisher', + ...minimumValidMetadata, Title: 'test Title test Title test Title', } expect(() => validateMetadata(metaData)).toThrow('MetaData Title: must NOT have more than 30 characters') }) + + test('validate string with line brake', () => { + const metaData = { + ...minimumValidMetadata, + Description: `test + Description + test`, + } + expect(() => validateMetadata(metaData)).not.toThrowError() + }) + + test('validate null value', () => { + const metaData = { + ...minimumValidMetadata, + Creator: null, + } + expect(() => validateMetadata(metaData)).toThrow('MetaData Creator: must be string') + }) + + test('validate undefined value', () => { + const metaData = { + ...minimumValidMetadata, + Description: undefined, + } + expect(() => validateMetadata(metaData)).toThrow('Metadata "Description" is required') + }) + + test('validate Object value', () => { + const metaData = { + ...minimumValidMetadata, + Description: { key: 'value' }, + } + expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string') + }) + + test('validate Array value', () => { + const metaData = { + ...minimumValidMetadata, + Description: [1, 2, 3], + } + expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string') + }) + + test('validate Boolean value', () => { + const metaData = { + ...minimumValidMetadata, + Description: true, + } + expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string') + }) + + test('validate NaN value', () => { + const metaData = { + ...minimumValidMetadata, + Description: NaN, + } + expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string') + }) + + test('validate wrong language format', () => { + const metaDataLangTest = { + ...minimumValidMetadata, + Language: 'en', + } + expect(() => validateMetadata(metaDataLangTest)).toThrow('MetaData Language: must match pattern \"^\\w{3}(,\\w{3})*$\"') // prettier-ignore + + const metaData = { + ...minimumValidMetadata, + Language: 'en,it', + } + expect(() => validateMetadata(metaData)).toThrow('MetaData Language: must match pattern \"^\\w{3}(,\\w{3})*$\"') // prettier-ignore + }) + + test('validate wrong illustration', () => { + const metaData = { + ...minimumValidMetadata, + 'Illustration_48x48@1': 'text is not png', + } + expect(() => validateMetadata(metaData)).toThrow('MetaData Illustration_48x48@1: must match regex pattern') + }) }) })