Skip to content

Commit

Permalink
sanitize metadata mandatory fields
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-karatsiuba committed Mar 22, 2023
1 parent 7119f8c commit 0f3c6c5
Show file tree
Hide file tree
Showing 6 changed files with 254 additions and 40 deletions.
168 changes: 150 additions & 18 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
"@types/rimraf": "^3.0.2",
"@types/semver": "^7.3.13",
"@types/sharp": "^0.31.1",
"ajv": "^8.12.0",
"async": "^3.2.4",
"aws-sdk": "^2.1295.0",
"axios": "^1.3.2",
Expand Down
39 changes: 17 additions & 22 deletions src/mwoffliner.lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import {
importPolyfillModules,
extractArticleList,
getTmpDirectory,
validateMetadata,
} from './util/index.js'
import S3 from './S3.js'
import RedisStore from './RedisStore.js'
Expand Down Expand Up @@ -108,8 +109,6 @@ async function execute(argv: any) {
const publisher = _publisher || config.defaults.publisher
let customZimFavicon = _customZimFavicon

const zimMetadataMandatoryKeys = ['Creator', 'Description', 'Language', 'Name', 'Publisher', 'Title']

/* HTTP user-agent string */
// const adminEmail = argv.adminEmail;
if (!isValidEmail(adminEmail)) {
Expand Down Expand Up @@ -185,6 +184,15 @@ async function execute(argv: any) {
logger.error('FATAL - Failed to get MediaWiki Metadata')
throw err
}
const metaDataRequiredKeys = {
Creator: mwMetaData.creator,
Description: customZimDescription || mwMetaData.subTitle,
Language: mwMetaData.langIso3,
Publisher: publisher,
Title: customZimTitle || mwMetaData.title,
}
validateMetadata(metaDataRequiredKeys)

// Sanitizing main page
let mainPage = articleList ? '' : mwMetaData.mainPage
if (customMainPage) {
Expand Down Expand Up @@ -383,33 +391,20 @@ async function execute(argv: any) {
logger.log(`Writing zim to [${outZim}]`)
dump.outFile = outZim

const zimMetadata = {
Tags: dump.computeZimTags(),
Language: dump.mwMetaData.langIso3,
Title: dump.opts.customZimTitle || dump.mwMetaData.title,
Name: dump.computeFilenameRadical(false, true, true),
Flavour: dump.computeFlavour(),
Description: dump.opts.customZimDescription || dump.mwMetaData.subTitle,
...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}),
Creator: dump.mwMetaData.creator,
Publisher: dump.opts.publisher,
}

for (const key of zimMetadataMandatoryKeys) {
if (!zimMetadata[key]) {
logger.error(`Metadata "${key}" is required`)
return
}
}

const zimCreator = new ZimCreator(
{
fileName: outZim,
fullTextIndexLanguage: dump.opts.withoutZimFullTextIndex ? '' : dump.mwMetaData.langIso3,
welcome: dump.opts.mainPage ? dump.opts.mainPage : 'index',
compression: 'zstd',
},
zimMetadata,
{
...metaDataRequiredKeys,
Tags: dump.computeZimTags(),
Name: dump.computeFilenameRadical(false, true, true),
Flavour: dump.computeFlavour(),
...(dump.opts.customZimLongDescription ? { LongDescription: `${dump.opts.customZimLongDescription}` } : {}),
} as any,
)
const scraperArticle = new ZimArticle({
ns: 'M',
Expand Down
1 change: 1 addition & 0 deletions src/util/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ export * from './dump.js'
export * from './articleRenderers.js'
export * from './const.js'
export * from './mw-api.js'
export * from './metaData.js'
Loading

0 comments on commit 0f3c6c5

Please sign in to comment.