diff --git a/src/config.ts b/src/config.ts index 9112e5b7a..eb1ff83bb 100644 --- a/src/config.ts +++ b/src/config.ts @@ -71,7 +71,7 @@ const config = { ], js: [ 'startup', - 'jquery', + 'jquery,mediawiki', 'mediawiki.base', 'mediawiki.util', 'site', diff --git a/src/util/const.ts b/src/util/const.ts index 7dfc9d14a..bb144f929 100644 --- a/src/util/const.ts +++ b/src/util/const.ts @@ -16,5 +16,6 @@ export const WEBP_CANDIDATE_IMAGE_MIME_TYPE = /image+[/]+(jpeg|png)/ export const DEFAULT_WIKI_PATH = 'wiki/' export const ALL_READY_FUNCTION = /function allReady\( modules \) {/ export const DO_PROPAGATION = /mw\.requestIdleCallback\( doPropagation, \{ timeout: 1 \} \);/ +export const LOAD_PHP = /script.src = ".*load\.php.*";/ export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js' export const MAX_FILE_DOWNLOAD_RETRIES = 5 diff --git a/src/util/dump.ts b/src/util/dump.ts index bd809381f..43233c98f 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -10,7 +10,7 @@ import MediaWiki from '../MediaWiki.js' import { ZimCreator, ZimArticle } from '@openzim/libzim' import { Dump } from '../Dump.js' import fs from 'fs' -import { DO_PROPAGATION, ALL_READY_FUNCTION, WEBP_HANDLER_URL } from './const.js' +import { DO_PROPAGATION, ALL_READY_FUNCTION, WEBP_HANDLER_URL, LOAD_PHP } from './const.js' import * as path from 'path' import { fileURLToPath } from 'url' @@ -101,11 +101,11 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik // it also removes requestIdleCallback as in our case window is idle after all script tags are called but those script tags // will require the functions which would have been loaded by doPropagation. function hackStartUpModule(jsCode: string) { - if (!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) { + if ((!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) && !LOAD_PHP.test(jsCode)) { throw new Error('unable to hack startup module') } - return jsCode.replace(DO_PROPAGATION, 'doPropagation();').replace(ALL_READY_FUNCTION, 'function allReady( modules ) { return true;') + return jsCode.replace(DO_PROPAGATION, 'doPropagation();').replace(ALL_READY_FUNCTION, 'function allReady( modules ) { return true;').replace(LOAD_PHP, 'script.src ="";') } let apiParameterOnly diff --git a/test/e2e/vikidia.e2e.test.ts b/test/e2e/vikidia.e2e.test.ts new file mode 100644 index 000000000..21c4fc48a --- /dev/null +++ b/test/e2e/vikidia.e2e.test.ts @@ -0,0 +1,43 @@ +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import { execa } from 'execa' +import rimraf from 'rimraf' +import { zimcheckAvailable, zimcheck } from '../util.js' +import 'dotenv/config.js' +import { jest } from '@jest/globals' + +jest.setTimeout(200000) + +describe('vikidia', () => { + const now = new Date() + const testId = `mwo-test-${+now}` + + const parameters = { + mwUrl: 'https://en.vikidia.org', + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + articleList: 'Alaska', + } + + test('right scrapping from vikidia.org', async () => { + await execa('redis-cli flushall', { shell: true }) + + const outFiles = await mwoffliner.execute(parameters) + + // Created 1 output + expect(outFiles).toHaveLength(1) + + if (await zimcheckAvailable()) { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + } else { + console.log('Zimcheck not installed, skipping test') + } + + // TODO: clear test dir + rimraf.sync(`./${testId}`) + + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) +})