Skip to content

Commit

Permalink
Fix relative links
Browse files Browse the repository at this point in the history
Closes #10
  • Loading branch information
wasi-master committed Aug 14, 2024
1 parent 1f43c8a commit ffefe11
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
25 changes: 24 additions & 1 deletion app/index.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,35 @@
import flask
import requests
from flask import request
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin

app = flask.Flask(__name__)
googlebot_headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
}

def add_base_tag(html_content, original_url):
soup = BeautifulSoup(html_content, 'html.parser')
parsed_url = urlparse(original_url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"

# Handle paths that are not root, e.g., "https://x.com/some/path/w.html"
if parsed_url.path and not parsed_url.path.endswith('/'):
base_url = urljoin(base_url, parsed_url.path.rsplit('/', 1)[0] + '/')
base_tag = soup.find('base')

print(base_url)
if not base_tag:
new_base_tag = soup.new_tag('base', href=base_url)
if soup.head:
soup.head.insert(0, new_base_tag)
else:
head_tag = soup.new_tag('head')
head_tag.insert(0, new_base_tag)
soup.insert(0, head_tag)

return str(soup)

def bypass_paywall(url):
"""
Expand All @@ -15,7 +38,7 @@ def bypass_paywall(url):
if url.startswith("http"):
response = requests.get(url, headers=googlebot_headers)
response.encoding = response.apparent_encoding
return response.text
return add_base_tag(response.text, response.url)

try:
return bypass_paywall("https://" + url)
Expand Down
1 change: 1 addition & 0 deletions app/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
flask
requests
bs4

0 comments on commit ffefe11

Please sign in to comment.