From ebd2a4d8d1e8258d4903704be376ca5fb0b1c03c Mon Sep 17 00:00:00 2001 From: Travis Briggs Date: Mon, 15 Jul 2024 09:33:50 -0700 Subject: [PATCH 1/2] Set 'Referer' header --- src/Downloader.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Downloader.ts b/src/Downloader.ts index febf00d4..d9c147a9 100644 --- a/src/Downloader.ts +++ b/src/Downloader.ts @@ -520,7 +520,9 @@ class Downloader { if (this.optimisationCacheUrl && isImageUrl(url)) { this.downloadImage(url, handler) } else { - const resp = await axios(url, this.arrayBufferRequestOptions) + // The 'Referer' header is set to get around WMF domain origin restrictions. + // See: https://github.com/openzim/mwoffliner/issues/2061 + const resp = await axios(url, { ...this.arrayBufferRequestOptions, headers: { Referer: 'https://localhost/' } }) await this.getCompressedBody(resp) handler(null, { responseHeaders: resp.headers, @@ -544,7 +546,9 @@ class Downloader { if (s3Resp?.Metadata?.etag) { this.arrayBufferRequestOptions.headers['If-None-Match'] = this.removeEtagWeakPrefix(s3Resp.Metadata.etag) } - const mwResp = await axios(url, this.arrayBufferRequestOptions) + // The 'Referer' header is set to get around WMF domain origin restrictions. + // See: https://github.com/openzim/mwoffliner/issues/2061 + const mwResp = await axios(url, { ...this.arrayBufferRequestOptions, headers: { Referer: 'https://localhost/' } }) /* TODO: Code to remove in a few months (February 2023). For some reason, it seems a few pictures have 'image/webp' From 157c2b939974611f9345d2975cc3462bf6cce481 Mon Sep 17 00:00:00 2001 From: Travis Briggs Date: Fri, 19 Jul 2024 10:06:23 -0700 Subject: [PATCH 2/2] Add test for downloading map images --- test/unit/downloader.test.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/unit/downloader.test.ts b/test/unit/downloader.test.ts index aecfa8fd..4dfc34ce 100644 --- a/test/unit/downloader.test.ts +++ b/test/unit/downloader.test.ts @@ -126,6 +126,19 @@ describe('Downloader class', () => { expect(LondonImage.responseHeaders['content-type']).toMatch(/image\//i) }) + test('downloadContent successfully downloads a map image', async () => { + const { data: LondonHtml } = await Axios.get('https://en.wikipedia.org/api/rest_v1/page/html/London') + const doc = domino.createDocument(LondonHtml) + const imgToGet = Array.from(doc.querySelectorAll('.mw-kartographer-map img'))[0] + let imgToGetSrc = '' + if (imgToGet.getAttribute('src')) { + imgToGetSrc = imgToGet.getAttribute('src') + } + // This is the downloading of an image + const LondonImage = await downloader.downloadContent(imgToGetSrc) + expect(LondonImage.responseHeaders['content-type']).toMatch(/image\//i) + }) + describe('getArticle method', () => { let dump: Dump const wikimediaMobileRenderer = new WikimediaMobileRenderer()