From 9cfa2e90648e841307d308fd03b578fb5537df87 Mon Sep 17 00:00:00 2001 From: Judah Gabriel Himango Date: Thu, 1 Jul 2021 10:15:08 -0700 Subject: [PATCH] Added handling for sites with redirects in head, fixing https://github.com/pwa-builder/CloudAPK/issues/78#issuecomment-872132508 --- ManifestService.cs | 46 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/ManifestService.cs b/ManifestService.cs index 0ed6fe3..d49a348 100644 --- a/ManifestService.cs +++ b/ManifestService.cs @@ -33,8 +33,8 @@ public ManifestService(Uri url, ILogger logger) /// public async Task Run() { - var document = await LoadPage(); - var manifestNode = LoadManifestNode(document); + var document = await LoadPage(this.url); + var manifestNode = await LoadManifestNode(document); var manifestContext = await LoadManifestInfo(manifestNode); var (manifestObject, dynamicManifest) = DeserializeManifest(manifestContext.Json); var manifestScore = GetManifestScore(manifestObject); @@ -46,10 +46,18 @@ public async Task Run() }; } - private HtmlNode LoadManifestNode(HtmlDocument document) + private async Task LoadManifestNode(HtmlDocument document) { var manifestNode = document.DocumentNode?.SelectSingleNode("//head/link[@rel='manifest']") ?? document.DocumentNode?.SelectSingleNode("//link[@rel='manifest']"); // We've witnesses some sites in the wild with no , and they put the manifest link right in the HTML. + + // If we can't find a manifest node, see if we're being redirected via a tag + // See https://github.com/pwa-builder/CloudAPK/issues/78#issuecomment-872132508 + if (manifestNode == null) + { + manifestNode = await TryLoadManifestNodeFromRedirectTag(document); + } + if (manifestNode == null) { var error = new ManifestNotFoundException("Unable to find manifest node in document"); @@ -64,6 +72,34 @@ private HtmlNode LoadManifestNode(HtmlDocument document) return manifestNode; } + private async Task TryLoadManifestNodeFromRedirectTag(HtmlDocument document) + { + // Redirect tags look like + + // Do we have a redirect? If so, follow that and then see if we can load the manifest node. + var redirectTag = document.DocumentNode?.SelectSingleNode("//head/meta[@http-equiv='refresh']"); + if (redirectTag != null) + { + var redirectSettings = redirectTag.Attributes["content"]?.Value ?? string.Empty; + var redirectRegex = "url\\s*=\\s*['|\"]*([^'\"]+)"; + var regexMatch = System.Text.RegularExpressions.Regex.Match(redirectSettings, redirectRegex, System.Text.RegularExpressions.RegexOptions.IgnoreCase); + if (regexMatch.Success && regexMatch.Groups.Count == 2) + { + var redirectUrl = regexMatch.Groups[1].Value; + + // Make sure it's a legit URI, and make sure it's not the page we're already on. + if (Uri.TryCreate(this.url, redirectUrl, out var redirectUri) && redirectUri != this.url) + { + logger.LogInformation("Page contained redirect tag in . Redirecting to {url}", redirectUrl); + var redirectDoc = await LoadPage(redirectUri); + return await LoadManifestNode(redirectDoc); + } + } + } + + return null; + } + private async Task TryFetchHttpWithHttp2Fallback(Uri url, string? acceptHeader) { try @@ -228,7 +264,7 @@ private async Task LoadManifestInfo(string manifestHref, HtmlNo throw new ManifestNotFoundException($"Unable to detect manifest. Attempted manifest download at {manifestAbsoluteUrl} and {localPathManifestUrl}, but both failed."); } - private async Task LoadPage() + private async Task LoadPage(Uri url) { var web = new HtmlWeb { @@ -236,7 +272,7 @@ private async Task LoadPage() }; try { - return await web.LoadFromWebAsync(this.url, null, null); + return await web.LoadFromWebAsync(url, null, null); } catch (Exception error) {