Skip to content

Commit

Permalink
Added handling for sites with redirects in head, fixing pwa-builder/C…
Browse files Browse the repository at this point in the history
  • Loading branch information
JudahGabriel committed Jul 1, 2021
1 parent 48527bf commit 9cfa2e9
Showing 1 changed file with 41 additions and 5 deletions.
46 changes: 41 additions & 5 deletions ManifestService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ public ManifestService(Uri url, ILogger logger)
/// <returns></returns>
public async Task<ManifestResult> Run()
{
var document = await LoadPage();
var manifestNode = LoadManifestNode(document);
var document = await LoadPage(this.url);
var manifestNode = await LoadManifestNode(document);
var manifestContext = await LoadManifestInfo(manifestNode);
var (manifestObject, dynamicManifest) = DeserializeManifest(manifestContext.Json);
var manifestScore = GetManifestScore(manifestObject);
Expand All @@ -46,10 +46,18 @@ public async Task<ManifestResult> Run()
};
}

private HtmlNode LoadManifestNode(HtmlDocument document)
private async Task<HtmlNode> LoadManifestNode(HtmlDocument document)
{
var manifestNode = document.DocumentNode?.SelectSingleNode("//head/link[@rel='manifest']") ??
document.DocumentNode?.SelectSingleNode("//link[@rel='manifest']"); // We've witnesses some sites in the wild with no <head>, and they put the manifest link right in the HTML.

// If we can't find a manifest node, see if we're being redirected via a <meta http-equiv="refresh" content="0; url='https://someotherurl'" /> tag
// See https://github.com/pwa-builder/CloudAPK/issues/78#issuecomment-872132508
if (manifestNode == null)
{
manifestNode = await TryLoadManifestNodeFromRedirectTag(document);
}

if (manifestNode == null)
{
var error = new ManifestNotFoundException("Unable to find manifest node in document");
Expand All @@ -64,6 +72,34 @@ private HtmlNode LoadManifestNode(HtmlDocument document)
return manifestNode;
}

private async Task<HtmlNode?> TryLoadManifestNodeFromRedirectTag(HtmlDocument document)
{
// Redirect tags look like <meta http-equiv="refresh" content="0; url='https://someotherurl'" />

// Do we have a redirect? If so, follow that and then see if we can load the manifest node.
var redirectTag = document.DocumentNode?.SelectSingleNode("//head/meta[@http-equiv='refresh']");
if (redirectTag != null)
{
var redirectSettings = redirectTag.Attributes["content"]?.Value ?? string.Empty;
var redirectRegex = "url\\s*=\\s*['|\"]*([^'\"]+)";
var regexMatch = System.Text.RegularExpressions.Regex.Match(redirectSettings, redirectRegex, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
if (regexMatch.Success && regexMatch.Groups.Count == 2)
{
var redirectUrl = regexMatch.Groups[1].Value;

// Make sure it's a legit URI, and make sure it's not the page we're already on.
if (Uri.TryCreate(this.url, redirectUrl, out var redirectUri) && redirectUri != this.url)
{
logger.LogInformation("Page contained redirect tag in <head>. Redirecting to {url}", redirectUrl);
var redirectDoc = await LoadPage(redirectUri);
return await LoadManifestNode(redirectDoc);
}
}
}

return null;
}

private async Task<string?> TryFetchHttpWithHttp2Fallback(Uri url, string? acceptHeader)
{
try
Expand Down Expand Up @@ -228,15 +264,15 @@ private async Task<ManifestContext> LoadManifestInfo(string manifestHref, HtmlNo
throw new ManifestNotFoundException($"Unable to detect manifest. Attempted manifest download at {manifestAbsoluteUrl} and {localPathManifestUrl}, but both failed.");
}

private async Task<HtmlDocument> LoadPage()
private async Task<HtmlDocument> LoadPage(Uri url)
{
var web = new HtmlWeb
{
UserAgent = userAgent
};
try
{
return await web.LoadFromWebAsync(this.url, null, null);
return await web.LoadFromWebAsync(url, null, null);
}
catch (Exception error)
{
Expand Down

0 comments on commit 9cfa2e9

Please sign in to comment.