From 0431b736fd37667d97bc8d22f8c26686c2f35256 Mon Sep 17 00:00:00 2001 From: Pavlo Karatsiuba Date: Thu, 16 Feb 2023 13:52:13 +0100 Subject: [PATCH] Replacing script.src with empty string if it contains load.php Added test for getting articles from vikidia.org --- src/util/const.ts | 2 +- src/util/dump.ts | 2 +- test/e2e/vikidia.e2e.test.ts | 43 ++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 test/e2e/vikidia.e2e.test.ts diff --git a/src/util/const.ts b/src/util/const.ts index bfd1238a7..bb144f929 100644 --- a/src/util/const.ts +++ b/src/util/const.ts @@ -16,6 +16,6 @@ export const WEBP_CANDIDATE_IMAGE_MIME_TYPE = /image+[/]+(jpeg|png)/ export const DEFAULT_WIKI_PATH = 'wiki/' export const ALL_READY_FUNCTION = /function allReady\( modules \) {/ export const DO_PROPAGATION = /mw\.requestIdleCallback\( doPropagation, \{ timeout: 1 \} \);/ -export const LOAD_PHP = /script.src = "\/\/en.vikidia.org\/w\/load.php\?debug=true&lang=en&modules=jquery%2Cmediawiki&only=scripts&skin=vector&version=09k3x4y";/ +export const LOAD_PHP = /script.src = ".*load\.php.*";/ export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js' export const MAX_FILE_DOWNLOAD_RETRIES = 5 diff --git a/src/util/dump.ts b/src/util/dump.ts index 92afe85fc..43233c98f 100644 --- a/src/util/dump.ts +++ b/src/util/dump.ts @@ -101,7 +101,7 @@ export async function downloadAndSaveModule(zimCreator: ZimCreator, mw: MediaWik // it also removes requestIdleCallback as in our case window is idle after all script tags are called but those script tags // will require the functions which would have been loaded by doPropagation. function hackStartUpModule(jsCode: string) { - if ((!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) && !LOAD_PHP.test(jsCode)) { + if ((!ALL_READY_FUNCTION.test(jsCode) || !DO_PROPAGATION.test(jsCode)) && !LOAD_PHP.test(jsCode)) { throw new Error('unable to hack startup module') } diff --git a/test/e2e/vikidia.e2e.test.ts b/test/e2e/vikidia.e2e.test.ts new file mode 100644 index 000000000..21c4fc48a --- /dev/null +++ b/test/e2e/vikidia.e2e.test.ts @@ -0,0 +1,43 @@ +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import { execa } from 'execa' +import rimraf from 'rimraf' +import { zimcheckAvailable, zimcheck } from '../util.js' +import 'dotenv/config.js' +import { jest } from '@jest/globals' + +jest.setTimeout(200000) + +describe('vikidia', () => { + const now = new Date() + const testId = `mwo-test-${+now}` + + const parameters = { + mwUrl: 'https://en.vikidia.org', + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + articleList: 'Alaska', + } + + test('right scrapping from vikidia.org', async () => { + await execa('redis-cli flushall', { shell: true }) + + const outFiles = await mwoffliner.execute(parameters) + + // Created 1 output + expect(outFiles).toHaveLength(1) + + if (await zimcheckAvailable()) { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + } else { + console.log('Zimcheck not installed, skipping test') + } + + // TODO: clear test dir + rimraf.sync(`./${testId}`) + + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) +})