Skip to content

Commit

Permalink
Added all metadata keys into rules to validate.
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-karatsiuba committed Mar 31, 2023
1 parent 2a0c288 commit 72977cb
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 82 deletions.
105 changes: 46 additions & 59 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import {
mkdirPromise,
sanitizeString,
saveStaticFiles,
writeFilePromise,
importPolyfillModules,
extractArticleList,
getTmpDirectory,
Expand Down Expand Up @@ -93,7 +92,7 @@ async function execute(argv: any) {
publisher: _publisher,
outputDirectory: _outputDirectory,
addNamespaces: _addNamespaces,
customZimFavicon: _customZimFavicon,
customZimFavicon,
optimisationCacheUrl,
customFlavour,
} = argv
Expand All @@ -107,7 +106,6 @@ async function execute(argv: any) {
if (articleList) articleList = String(articleList)
if (articleListToIgnore) articleListToIgnore = String(articleListToIgnore)
const publisher = _publisher || config.defaults.publisher
let customZimFavicon = _customZimFavicon

/* HTTP user-agent string */
// const adminEmail = argv.adminEmail;
Expand Down Expand Up @@ -190,6 +188,7 @@ async function execute(argv: any) {
Language: mwMetaData.langIso3,
Publisher: publisher,
Title: customZimTitle || mwMetaData.title,
'Illustration_48x48@1': await getIllustrationMetadata(),
}
validateMetadata(metaDataRequiredKeys)

Expand Down Expand Up @@ -248,35 +247,6 @@ async function execute(argv: any) {
.map((a: string) => Number(a))
: []

/* ZIM custom Favicon */
if (customZimFavicon) {
const faviconPath = path.join(tmpDirectory, 'favicon.png') // Later the PNG conversion (if necessary)
const faviconIsRemote = customZimFavicon.includes('http')
logger.log(`${faviconIsRemote ? 'Downloading' : 'Moving'} custom favicon to [${faviconPath}]`)
let content
if (faviconIsRemote) {
logger.log(`Downloading remote zim favicon from [${customZimFavicon}]`)
content = await axios
.get(customZimFavicon, downloader.arrayBufferRequestOptions)
.then((a) => a.data)
.catch(() => {
throw new Error(`Failed to download custom zim favicon from [${customZimFavicon}]`)
})
} else {
try {
content = fs.readFileSync(customZimFavicon)
} catch (err) {
throw new Error(`Failed to read custom zim favicon from [${customZimFavicon}]`)
}
}
fs.writeFileSync(faviconPath, content)
customZimFavicon = faviconPath

if (!fs.existsSync(customZimFavicon)) {
throw new Error(`Path ${customZimFavicon} is not a valid PNG file.`)
}
}

/* ********************************* */
/* GET CONTENT ********************* */
/* ********************************* */
Expand Down Expand Up @@ -391,20 +361,23 @@ async function execute(argv: any) {
logger.log(`Writing zim to [${outZim}]`)
dump.outFile = outZim

const metadata = {
...metaDataRequiredKeys,
Tags: dump.computeZimTags(),
Name: dump.computeFilenameRadical(false, true, true),
Flavour: dump.computeFlavour(),
...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}),
}
validateMetadata(metadata)

const zimCreator = new ZimCreator(
{
fileName: outZim,
fullTextIndexLanguage: dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3,
welcome: dump.opts.mainPage ? dump.opts.mainPage : 'index',
compression: 'zstd',
},
{
...metaDataRequiredKeys,
Tags: dump.computeZimTags(),
Name: dump.computeFilenameRadical(false, true, true),
Flavour: dump.computeFlavour(),
...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}),
} as any,
metadata as any,
)
const scraperArticle = new ZimArticle({
ns: 'M',
Expand All @@ -426,7 +399,7 @@ async function execute(argv: any) {

const article = new ZimArticle({ url: `${config.output.dirs.mediawiki}/style.css`, data: finalCss, ns: '-' })
zimCreator.addArticle(article)
await saveFavicon(dump, zimCreator)
await saveFavicon(zimCreator, metaDataRequiredKeys['Illustration_48x48@1'])

await getThumbnailsData()

Expand Down Expand Up @@ -502,25 +475,31 @@ async function execute(argv: any) {
})
}

async function saveFavicon(dump: Dump, zimCreator: ZimCreator): Promise<any> {
logger.log('Saving favicon.png...')

async function saveFavicon(zimCreator: ZimCreator, faviconPath: string): Promise<any> {
async function getIllustrationMetadata(): Promise<Buffer> {
if (customZimFavicon) {
const faviconIsRemote = customZimFavicon.includes('http')
let content
if (faviconIsRemote) {
logger.log(`Downloading remote zim favicon from [${customZimFavicon}]`)
content = await axios
.get(customZimFavicon, downloader.arrayBufferRequestOptions)
.then((a) => a.data)
.catch(() => {
throw new Error(`Failed to download custom zim favicon from [${customZimFavicon}]`)
})
} else {
try {
content = fs.readFileSync(customZimFavicon)
} catch (err) {
throw new Error(`Failed to read custom zim favicon from [${customZimFavicon}]`)
}
}
try {
const source = await fs.promises.readFile(faviconPath)
const data = await sharp(source).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer()
const illustrationMetadata = new ZimArticle({ url: 'Illustration_48x48@1', mimeType: 'image/png', data, ns: 'M' })
zimCreator.addArticle(illustrationMetadata)
const article = new ZimArticle({ url: 'favicon', mimeType: 'image/png', data, ns: '-' })
return zimCreator.addArticle(article)
return sharp(content).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer()
} catch (e) {
throw new Error('Failed to save favicon and IllustrationMetadata using sharp')
throw new Error('Failed to read or process IllustrationMetadata using sharp')
}
}

if (customZimFavicon) {
return saveFavicon(zimCreator, customZimFavicon)
}
const body = await downloader.getJSON<any>(mw.siteInfoUrl())
const entries = body.query.general
if (!entries.logo) {
Expand All @@ -530,11 +509,19 @@ async function execute(argv: any) {
}

const parsedUrl = urlParser.parse(entries.logo)
const faviconPath = path.join(tmpDirectory, 'favicon.png')
const logoUrl = parsedUrl.protocol ? entries.logo : mw.baseUrl.protocol + entries.logo
const logoContent = await downloader.downloadContent(logoUrl)
await writeFilePromise(faviconPath, logoContent.content, null)
return saveFavicon(zimCreator, faviconPath)
const { content } = await downloader.downloadContent(logoUrl)
return sharp(content).resize(48, 48, { fit: sharp.fit.inside, withoutEnlargement: true }).png().toBuffer()
}

async function saveFavicon(zimCreator: ZimCreator, data: Buffer): Promise<any> {
logger.log('Saving favicon.png...')
try {
const article = new ZimArticle({ url: 'favicon', mimeType: 'image/png', data, ns: '-' })
return zimCreator.addArticle(article)
} catch (e) {
throw new Error('Failed to save favicon')
}
}

function getMainPage(dump: Dump, zimCreator: ZimCreator, downloader: Downloader) {
Expand Down
37 changes: 30 additions & 7 deletions src/util/metaData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,48 @@ import AjvModule from 'ajv'
const Ajv = AjvModule.default
const ajv = new Ajv({ allErrors: true })

ajv.addKeyword({
keyword: 'checkRegexFromBuffer',
validate: (regexStr: string, buffer) => {
if (Buffer.isBuffer(buffer)) {
const regex = new RegExp(regexStr)
const binary = buffer.toString('binary')
return regex.test(binary)
}
return false
},
error: {
message: 'must match regex pattern',
},
})

const schema = {
type: 'object',
properties: {
Name: { type: 'string', minLength: 1 },
Creator: { type: 'string', minLength: 1 },
Description: { type: 'string', maxLength: 80, minLength: 1 },
Language: { type: 'string', minLength: 1 },
Language: { type: 'string', minLength: 1, pattern: '^\\w{3}(,\\w{3})*$' },
Publisher: { type: 'string', minLength: 1 },
Title: { type: 'string', maxLength: 30, minLength: 1 },
Date: { type: 'string', maxLength: 10, minLength: 10 },
'Illustration_48x48@1': { checkRegexFromBuffer: '^\x89\x50\x4e\x47\x0d\x0a\x1a\x0a.+' },
LongDescription: { type: 'string', maxLength: 4000 },
License: { type: 'string' },
Tags: { type: 'string' },
Relation: { type: 'string' },
Flavour: { type: 'string' },
Source: { type: 'string' },
Counter: { type: 'string' },
Scraper: { type: 'string' },
},
required: ['Creator', 'Description', 'Language', 'Publisher', 'Title'],
required: ['Creator', 'Description', 'Language', 'Publisher', 'Title', 'Illustration_48x48@1'],
additionalProperties: true,
}

const validate = ajv.compile(schema)

export const validateMetadata = (metaData) => {
export const validateMetadata = (metaData): void => {
const valid = validate(metaData)

if (!valid) {
Expand All @@ -31,9 +57,6 @@ export const validateMetadata = (metaData) => {
if (error.keyword === 'minLength') {
throw new Error(`Metadata "${keyword}" is required`)
}
if (error.keyword === 'maxLength') {
throw new Error(`MetaData ${keyword}: ${error.message}`)
}
throw new Error(validate.errors[0].message)
throw new Error(`MetaData ${keyword}: ${error.message}`)
}
}
Binary file added test/unit/mock/1x1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
124 changes: 108 additions & 16 deletions test/unit/util.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -415,47 +415,139 @@ describe('Utils', () => {
})

describe('metaData', () => {
const pngImage = fs.readFileSync(`${__dirname}/mock/1x1.png`)

const minimumValidMetadata = {
Creator: 'the creator',
Description: 'test Description',
Language: 'eng,ita',
Publisher: 'test Publisher',
Title: 'test Title',
'Illustration_48x48@1': pngImage,
}

test('validate valid metadata', () => {
expect(() => validateMetadata(minimumValidMetadata)).not.toThrowError()
})

test('validate with unicode chars', () => {
const metaData = {
...minimumValidMetadata,
Description: '😎 Emoji, ❤ Hearts, 💲 Currencies, → Arrows, ☆ Stars',
}
expect(() => validateMetadata(metaData)).not.toThrowError()
})

test('validate empty string', () => {
const metaData = {
...minimumValidMetadata,
Creator: '',
Description: 'test Description',
Language: 'test Language',
Publisher: 'test Publisher',
Title: 'test Title',
}
expect(() => validateMetadata(metaData)).toThrow('Metadata "Creator" is required')
})

test('validate missed metaData key', () => {
const metaData = {
Creator: 'test Creator',
Language: 'test Language',
Publisher: 'test Publisher',
Title: 'test Title',
...minimumValidMetadata,
}
delete metaData.Description
expect(() => validateMetadata(metaData)).toThrow('Metadata "Description" is required')
})

test('validate long Description', () => {
const metaData = {
Creator: 'test Creator',
...minimumValidMetadata,
Description: 'test Description test Description test Description test Description test Description test Description ',
Language: 'test Language',
Publisher: 'test Publisher',
Title: 'test Title',
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must NOT have more than 80 characters')
})

test('validate long Title', () => {
const metaData = {
Creator: 'test Creator',
Description: 'test Description',
Language: 'test Language',
Publisher: 'test Publisher',
...minimumValidMetadata,
Title: 'test Title test Title test Title',
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Title: must NOT have more than 30 characters')
})

test('validate string with line brake', () => {
const metaData = {
...minimumValidMetadata,
Description: `test
Description
test`,
}
expect(() => validateMetadata(metaData)).not.toThrowError()
})

test('validate null value', () => {
const metaData = {
...minimumValidMetadata,
Creator: null,
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Creator: must be string')
})

test('validate undefined value', () => {
const metaData = {
...minimumValidMetadata,
Description: undefined,
}
expect(() => validateMetadata(metaData)).toThrow('Metadata "Description" is required')
})

test('validate Object value', () => {
const metaData = {
...minimumValidMetadata,
Description: { key: 'value' },
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string')
})

test('validate Array value', () => {
const metaData = {
...minimumValidMetadata,
Description: [1, 2, 3],
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string')
})

test('validate Boolean value', () => {
const metaData = {
...minimumValidMetadata,
Description: true,
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string')
})

test('validate NaN value', () => {
const metaData = {
...minimumValidMetadata,
Description: NaN,
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Description: must be string')
})

test('validate wrong language format', () => {
const metaDataLangTest = {
...minimumValidMetadata,
Language: 'en',
}
expect(() => validateMetadata(metaDataLangTest)).toThrow('MetaData Language: must match pattern \"^\\w{3}(,\\w{3})*$\"') // prettier-ignore

const metaData = {
...minimumValidMetadata,
Language: 'en,it',
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Language: must match pattern \"^\\w{3}(,\\w{3})*$\"') // prettier-ignore
})

test('validate wrong illustration', () => {
const metaData = {
...minimumValidMetadata,
'Illustration_48x48@1': 'text is not png',
}
expect(() => validateMetadata(metaData)).toThrow('MetaData Illustration_48x48@1: must match regex pattern')
})
})
})

0 comments on commit 72977cb

Please sign in to comment.