Skip to content

Commit

Permalink
Fix incorrect processing of titles with question marks and anchor par…
Browse files Browse the repository at this point in the history
…ameters #806 (#807)
  • Loading branch information
Jaifroid authored Jan 31, 2022
1 parent 0a78496 commit b143e83
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 101 deletions.
74 changes: 33 additions & 41 deletions service-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,19 @@ let fetchCaptureEnabled = false;
/**
* Intercept selected Fetch requests from the browser window
*/
self.addEventListener('fetch', function (event) {
self.addEventListener('fetch', function (event) {
// Only cache GET requests
if (event.request.method !== "GET") return;
// Remove any querystring before requesting from the cache
var rqUrl = event.request.url.replace(/\?[^?]+$/i, '');
var rqUrl = event.request.url;
var urlObject = new URL(rqUrl);
// Test the URL with parameters removed
var strippedUrl = urlObject.pathname;
// Select cache depending on request format
var cache = /\.zim\//i.test(rqUrl) ? ASSETS_CACHE : APP_CACHE;
var cache = /\.zim\//i.test(strippedUrl) ? ASSETS_CACHE : APP_CACHE;
if (cache === ASSETS_CACHE && !fetchCaptureEnabled) return;
// For APP_CACHE assets, we should ignore any querystring (whereas it should be conserved for ZIM assets,
// especially .js assets, where it may be significant). Anchor targets are irreleveant in this context.
if (cache === APP_CACHE) rqUrl = strippedUrl;
event.respondWith(
// First see if the content is in the cache
fromCache(cache, rqUrl).then(function (response) {
Expand All @@ -203,24 +208,24 @@ let fetchCaptureEnabled = false;
}, function () {
// The response was not found in the cache so we look for it in the ZIM
// and add it to the cache if it is an asset type (css or js)
if (cache === ASSETS_CACHE && regexpZIMUrlWithNamespace.test(rqUrl)) {
return fetchRequestFromZIM(event).then(function (response) {
if (cache === ASSETS_CACHE && regexpZIMUrlWithNamespace.test(strippedUrl)) {
return fetchUrlFromZIM(urlObject).then(function (response) {
// Add css or js assets to ASSETS_CACHE (or update their cache entries) unless the URL schema is not supported
if (regexpCachedContentTypes.test(response.headers.get('Content-Type')) &&
!regexpExcludedURLSchema.test(event.request.url)) {
event.waitUntil(updateCache(ASSETS_CACHE, event.request, response.clone()));
event.waitUntil(updateCache(ASSETS_CACHE, rqUrl, response.clone()));
}
return response;
}).catch(function (msgPortData, title) {
console.error('Invalid message received from app.js for ' + title, msgPortData);
}).catch(function (msgPortData) {
console.error('Invalid message received from app.js for ' + strippedUrl, msgPortData);
return msgPortData;
});
} else {
// It's not an asset, or it doesn't match a ZIM URL pattern, so we should fetch it with Fetch API
return fetch(event.request).then(function (response) {
// If request was successful, add or update it in the cache, but be careful not to cache the ZIM archive itself!
if (!regexpExcludedURLSchema.test(rqUrl) && !/\.zim\w{0,2}$/i.test(rqUrl)) {
event.waitUntil(updateCache(APP_CACHE, event.request, response.clone()));
if (!regexpExcludedURLSchema.test(event.request.url) && !/\.zim\w{0,2}$/i.test(strippedUrl)) {
event.waitUntil(updateCache(APP_CACHE, rqUrl, response.clone()));
}
return response;
}).catch(function (error) {
Expand Down Expand Up @@ -271,25 +276,22 @@ let fetchCaptureEnabled = false;
});

/**
* Handles fetch events that need to be extracted from the ZIM
* Handles URLs that need to be extracted from the ZIM archive
*
* @param {Event} fetchEvent The fetch event to be processed
* @param {URL} urlObject The URL object to be processed for extraction from the ZIM
* @returns {Promise<Response>} A Promise for the Response, or rejects with the invalid message port data
*/
function fetchRequestFromZIM(fetchEvent) {
function fetchUrlFromZIM(urlObject) {
return new Promise(function (resolve, reject) {
var nameSpace;
var title;
var titleWithNameSpace;
var regexpResult = regexpZIMUrlWithNamespace.exec(fetchEvent.request.url);
var prefix = regexpResult[1];
nameSpace = regexpResult[2];
title = regexpResult[3];
// Note that titles may contain bare question marks or hashes, so we must use only the pathname without any URL parameters.
// Be sure that you haven't encoded any querystring along with the URL.
var barePathname = decodeURIComponent(urlObject.pathname);
var partsOfZIMUrl = regexpZIMUrlWithNamespace.exec(barePathname);
var prefix = partsOfZIMUrl[1];
var nameSpace = partsOfZIMUrl[2];
var title = partsOfZIMUrl[3];

// We need to remove the potential parameters in the URL
title = removeUrlParameters(decodeURIComponent(title));

titleWithNameSpace = nameSpace + '/' + title;
var titleWithNameSpace = nameSpace + '/' + title;

// Let's instantiate a new messageChannel, to allow app.js to give us the content
var messageChannel = new MessageChannel();
Expand Down Expand Up @@ -333,15 +335,6 @@ function fetchRequestFromZIM(fetchEvent) {
});
}

/**
* Removes parameters and anchors from a URL
* @param {type} url The URL to be processed
* @returns {String} The same URL without its parameters and anchors
*/
function removeUrlParameters(url) {
return url.replace(/([^?#]+)[?#].*$/, '$1');
}

/**
* Looks up a Request in a cache and returns a Promise for the matched Response
* @param {String} cache The name of the cache to look in
Expand All @@ -350,12 +343,10 @@ function removeUrlParameters(url) {
*/
function fromCache(cache, requestUrl) {
// Prevents use of Cache API if user has disabled it
if (!useAppCache && cache === APP_CACHE || !useAssetsCache && cache === ASSETS_CACHE) return Promise.reject('disabled');
if (!(useAppCache && cache === APP_CACHE || useAssetsCache && cache === ASSETS_CACHE)) return Promise.reject('disabled');
return caches.open(cache).then(function (cacheObj) {
return cacheObj.match(requestUrl).then(function (matching) {
if (!matching || matching.status === 404) {
return Promise.reject('no-match');
}
if (!matching || matching.status === 404) return Promise.reject('no-match');
console.debug('[SW] Supplying ' + requestUrl + ' from ' + cache + '...');
return matching;
});
Expand All @@ -365,15 +356,16 @@ function fromCache(cache, requestUrl) {
/**
* Stores or updates in a cache the given Request/Response pair
* @param {String} cache The name of the cache to open
* @param {Request} request The original Request object
* @param {Request|String} request The original Request object or the URL string requested
* @param {Response} response The Response received from the server/ZIM
* @returns {Promise} A Promise for the update action
*/
function updateCache(cache, request, response) {
// Prevents use of Cache API if user has disabled it
if (!useAppCache && cache === APP_CACHE || !useAssetsCache && cache === ASSETS_CACHE) return Promise.resolve();
if (!response.ok || !(useAppCache && cache === APP_CACHE || useAssetsCache && cache === ASSETS_CACHE))
return Promise.resolve();
return caches.open(cache).then(function (cacheObj) {
console.debug('[SW] Adding ' + request.url + ' to ' + cache + '...');
console.debug('[SW] Adding ' + (request.url || request) + ' to ' + cache + '...');
return cacheObj.put(request, response);
});
}
Expand Down
19 changes: 11 additions & 8 deletions tests/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,17 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
assert.equal(util.allCaseFirstLetters(testString6, "full").indexOf("ΚΑΛΆ ΝΕΡΆ ΜΑΓΝΗΣΊΑ ŽIŽEK") >= 0, true, "All Unicode letters should be uppercase");
});
QUnit.test("check removal of parameters in URL", function(assert) {
var testUrl1 = "A/question.html";
var testUrl2 = "A/question.html?param1=toto&param2=titi";
var testUrl3 = "A/question.html?param1=toto&param2=titi#anchor";
var testUrl4 = "A/question.html#anchor";
assert.equal(uiUtil.removeUrlParameters(testUrl1), testUrl1);
assert.equal(uiUtil.removeUrlParameters(testUrl2), testUrl1);
assert.equal(uiUtil.removeUrlParameters(testUrl3), testUrl1);
assert.equal(uiUtil.removeUrlParameters(testUrl4), testUrl1);
var baseUrl = "A/Che cosa è l'amore?.html";
var testUrls = [
"A/Che%20cosa%20%C3%A8%20l'amore%3F.html?param1=toto&param2=titi",
"A/Che%20cosa%20%C3%A8%20l'amore%3F.html?param1=toto&param2=titi#anchor",
"A/Che%20cosa%20%C3%A8%20l'amore%3F.html#anchor"
];
testUrls.forEach(function (testUrl) {
assert.equal(decodeURIComponent(
uiUtil.removeUrlParameters(testUrl)
), baseUrl);
});
});

QUnit.module("ZIM initialisation");
Expand Down
Loading

0 comments on commit b143e83

Please sign in to comment.