Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Link preview fix #1416

Merged
merged 13 commits into from
Mar 9, 2023
101 changes: 101 additions & 0 deletions openstax/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@
from django.core.handlers.base import BaseHandler
from django.middleware.common import CommonMiddleware
from django.conf import settings
from ua_parser import user_agent_parser
from wagtail.models import Page
from django.shortcuts import get_object_or_404
from django.template.response import TemplateResponse
from django.http import HttpResponse
from books.models import Book
from openstax.functions import build_image_url
from news.models import NewsArticle
from pages.models import HomePage, Supporters


class HttpSmartRedirectResponse(HttpResponsePermanentRedirect):
Expand Down Expand Up @@ -39,3 +48,95 @@ def process_response(self, request, response):
response = self.handler.get_response(request)

return response


class CommonMiddlewareOpenGraphRedirect(CommonMiddleware):
OG_USER_AGENTS = [
'twitterbot',
'facebookbot',
'pinterestbot',
'slackbot-linkexpanding',
]

def __init__(self, get_response):
self.get_response = get_response

def __call__(self, request, *args, **kwargs):
if 'HTTP_USER_AGENT' in request.META:

user_agent = user_agent_parser.ParseUserAgent(request.META["HTTP_USER_AGENT"])
if user_agent['family'].lower() in self.OG_USER_AGENTS:
# url path minus the trailing /
url_path = request.get_full_path()[:-1]

full_url = request.build_absolute_uri()

# imdex of last / to find slug, except when there isn't a last /
if url_path == '':
page_slug = "openstax-homepage"
else:
index = url_path.rindex('/')
page_slug = url_path[index+1:]

if self.redirect_path_found(url_path):
# supporters page has the wrong slug
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would probably be worth at least making a card to align these two slugs on BE/FE

if page_slug == 'foundation':
page_slug = 'supporters'

# look up correct object based on path
if '/details/books/' in url_path:
page = Book.objects.filter(slug = page_slug)
elif '/blog/' in url_path:
page = NewsArticle.objects.filter(slug = page_slug)
else:
page = self.page_by_slug(page_slug)

template = self.build_template(page[0], full_url)
return HttpResponse(template)
else:
return self.get_response(request)
return self.get_response(request)

def build_template(self, page, page_url):
image_url = self.image_url(page.promote_image)
template = '<!DOCTYPE html> <html> <head> <meta charset="utf-8">\n'
template += ' <title>' + str(page.seo_title) + '</title>\n'
template += ' <meta name="description" content="{}" >\n'.format(page.search_description)
template += ' <link rel="canonical" href="{}" />\n'.format(page_url)
template += ' <meta property="og:url" content="{}" />\n'.format(page_url)
template += ' <meta property="og:type" content="article" />\n'
template += ' <meta property="og:title" content="{}" />\n'.format(page.seo_title)
template += ' <meta property="og:description" content="{}" />\n'.format(page.search_description)
template += ' <meta property="og:image" content="{}" />\n'.format(image_url)
template += ' <meta property="og:image:alt" content="{}" />\n'.format(page.seo_title)
template += ' <meta name="twitter:card" content="summary_large_image">\n'
template += ' <meta name="twitter:site" content="@OpenStax">\n'
template += ' <meta name="twitter:title" content="{}">\n'.format(page.seo_title)
template += ' <meta name="twitter:description" content="{}">\n'.format(page.search_description)
template += ' <meta name="twitter:image" content="{}">\n'.format(image_url)
template += ' <meta name="twitter:image:alt" content="OpenStax">\n'

template += '</head><body></body></html>'
return template

def redirect_path_found(self, url_path):
if '/blog/' in url_path or '/details/books/' in url_path or '/foundation' in url_path or '' == url_path:
return True
else:
return False

def image_url(self, image):
image_url = build_image_url(image)
if not image_url:
return ''
return image_url

def page_by_slug(self, page_slug):
if page_slug == 'supporters':
return Supporters.objects.all()
if page_slug == 'openstax-homepage':
return HomePage.objects.filter(locale = 1)




1 change: 1 addition & 0 deletions openstax/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@
'whitenoise.middleware.WhiteNoiseMiddleware',
'healthcheck.middleware.HealthCheckMiddleware', # has to be before CommonMiddleware
'openstax.middleware.CommonMiddlewareAppendSlashWithoutRedirect',
'openstax.middleware.CommonMiddlewareOpenGraphRedirect',
'django.middleware.common.CommonMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
Expand Down
109 changes: 109 additions & 0 deletions openstax/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
import datetime
import json
from .functions import remove_locked_links_detail, remove_locked_links_listing, build_document_url, build_image_url

from django.test import TestCase, Client
from django.core.files.uploadedfile import SimpleUploadedFile
from wagtail.models import Page
from pages.models import HomePage, WebinarPage
from books.models import BookIndex, Book
from news.models import NewsIndex, NewsArticle
from snippets.models import Subject, BlogContentType, BlogCollection
from wagtail.documents.models import Document


class TestClass(object):
pass
Expand Down Expand Up @@ -79,3 +89,102 @@ def test_build_document_url_none(self):

def test_build_image_url_none(self):
self.assertEqual(build_image_url(None), None)


class TestOpenGraphMiddleware(TestCase):
def setUp(self):
self.client = Client(HTTP_USER_AGENT='twitterbot')
self.root_page = Page.objects.get(title="Root")
self.homepage = HomePage(title="Hello World",
slug="openstax-homepage",
seo_title='OpenStax Home',
search_description='Home page for OpenStax'
)
self.root_page.add_child(instance=self.homepage)

def test_home_page_link_preview(self):
response = self.client.get('/')
self.assertContains(response, 'og:image')


def test_book_link_preview(self):
test_image = SimpleUploadedFile(name='openstax.png',
content=open("oxauth/static/images/openstax.png", 'rb').read())
self.test_doc = Document.objects.create(title='Test Doc', file=test_image)
book_index = BookIndex(title="Book Index",
page_description="Test",
dev_standard_1_description="Test",
dev_standard_2_description="Test",
dev_standard_3_description="Test",
dev_standard_4_description="Test",
)
# add book index to homepage
self.homepage.add_child(instance=book_index)
book = Book(title="Biology 2e",
slug="biology-2e",
cnx_id='031da8d3-b525-429c-80cf-6c8ed997733a',
description="Test Book",
cover=self.test_doc,
title_image=self.test_doc,
publish_date=datetime.date.today(),
locale=self.root_page.locale,
license_name='Creative Commons Attribution License',
seo_title='Biology 2e',
search_description='2nd edition of Biology'
)
book_index.add_child(instance=book)
self.client = Client(HTTP_USER_AGENT='Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)')
response = self.client.get('/details/books/biology-2e')
self.assertContains(response, 'og:image')

def test_blog_link_preview(self):
self.news_index = NewsIndex(title="News Index")
self.homepage.add_child(instance=self.news_index)
self.math = Subject(name="Math", page_content="Math page content.", seo_title="Math SEO Title",
search_description="Math page description.")
self.math.save()
math_id = self.math.id
self.case_study = BlogContentType(content_type='Case Study')
self.case_study.save()
case_study_id = self.case_study.id
self.learning = BlogCollection(name='Teaching and Learning', description='this is a collection')
self.learning.save()
learning_id = self.learning.id
self.article = NewsArticle(title="Article 1",
slug="article-1",
date=datetime.date.today(),
heading="Sample Article",
subheading="Sample Subheading",
author="OpenStax",
seo_title='Test Article 1',
search_description='Test Article 1 description',
body=json.dumps(
[
{"type": "paragraph",
"value": "<p>This is the body of the post</p><p>This is the second paragraph</p>"}
]
),
article_subjects=json.dumps(
[
{'type': 'subject', 'value': [
{'type': 'item', 'value': {'subject': math_id, 'featured': False}}]}
]
),
content_types=json.dumps(
[
{'type': 'content_type', 'value': [
{'type': 'item', 'value': {'content_type': case_study_id}}]}
]
),
collections=json.dumps(
[
{'type': 'collection', 'value': [
{'type': 'item', 'value': {'collection': learning_id, 'featured': False,
'popular': False}}]}
]
))
self.news_index.add_child(instance=self.article)
self.client = Client(HTTP_USER_AGENT='facebookexternalhit/1.1')
response = self.client.get('/blog/article-1')
self.assertContains(response, 'og:image')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice job testing!


1 change: 1 addition & 0 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ sentry-sdk
setuptools==65.5.1
simple_salesforce==1.11.6
social-auth-app-django==5.0.0
ua_parser==0.16.1
unicodecsv==0.14.1
Unidecode==1.3.4
vcrpy==4.1.1 # for recording test interactions with third-party APIs
Expand Down