diff --git a/openstax/middleware.py b/openstax/middleware.py index 2d5eb2bbe..d3c4046db 100644 --- a/openstax/middleware.py +++ b/openstax/middleware.py @@ -2,6 +2,15 @@ from django.core.handlers.base import BaseHandler from django.middleware.common import CommonMiddleware from django.conf import settings +from ua_parser import user_agent_parser +from wagtail.models import Page +from django.shortcuts import get_object_or_404 +from django.template.response import TemplateResponse +from django.http import HttpResponse +from books.models import Book +from openstax.functions import build_image_url +from news.models import NewsArticle +from pages.models import HomePage, Supporters class HttpSmartRedirectResponse(HttpResponsePermanentRedirect): @@ -39,3 +48,95 @@ def process_response(self, request, response): response = self.handler.get_response(request) return response + + +class CommonMiddlewareOpenGraphRedirect(CommonMiddleware): + OG_USER_AGENTS = [ + 'twitterbot', + 'facebookbot', + 'pinterestbot', + 'slackbot-linkexpanding', + ] + + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request, *args, **kwargs): + if 'HTTP_USER_AGENT' in request.META: + + user_agent = user_agent_parser.ParseUserAgent(request.META["HTTP_USER_AGENT"]) + if user_agent['family'].lower() in self.OG_USER_AGENTS: + # url path minus the trailing / + url_path = request.get_full_path()[:-1] + + full_url = request.build_absolute_uri() + + # imdex of last / to find slug, except when there isn't a last / + if url_path == '': + page_slug = "openstax-homepage" + else: + index = url_path.rindex('/') + page_slug = url_path[index+1:] + + if self.redirect_path_found(url_path): + # supporters page has the wrong slug + if page_slug == 'foundation': + page_slug = 'supporters' + + # look up correct object based on path + if '/details/books/' in url_path: + page = Book.objects.filter(slug = page_slug) + elif '/blog/' in url_path: + page = NewsArticle.objects.filter(slug = page_slug) + else: + page = self.page_by_slug(page_slug) + + template = self.build_template(page[0], full_url) + return HttpResponse(template) + else: + return self.get_response(request) + return self.get_response(request) + + def build_template(self, page, page_url): + image_url = self.image_url(page.promote_image) + template = ' \n' + template += ' ' + str(page.seo_title) + '\n' + template += ' \n'.format(page.search_description) + template += ' \n'.format(page_url) + template += ' \n'.format(page_url) + template += ' \n' + template += ' \n'.format(page.seo_title) + template += ' \n'.format(page.search_description) + template += ' \n'.format(image_url) + template += ' \n'.format(page.seo_title) + template += ' \n' + template += ' \n' + template += ' \n'.format(page.seo_title) + template += ' \n'.format(page.search_description) + template += ' \n'.format(image_url) + template += ' \n' + + template += '' + return template + + def redirect_path_found(self, url_path): + if '/blog/' in url_path or '/details/books/' in url_path or '/foundation' in url_path or '' == url_path: + return True + else: + return False + + def image_url(self, image): + image_url = build_image_url(image) + if not image_url: + return '' + return image_url + + def page_by_slug(self, page_slug): + if page_slug == 'supporters': + return Supporters.objects.all() + if page_slug == 'openstax-homepage': + return HomePage.objects.filter(locale = 1) + + + + diff --git a/openstax/settings/base.py b/openstax/settings/base.py index 5c45d490c..aacbf7ab6 100644 --- a/openstax/settings/base.py +++ b/openstax/settings/base.py @@ -140,6 +140,7 @@ 'whitenoise.middleware.WhiteNoiseMiddleware', 'healthcheck.middleware.HealthCheckMiddleware', # has to be before CommonMiddleware 'openstax.middleware.CommonMiddlewareAppendSlashWithoutRedirect', + 'openstax.middleware.CommonMiddlewareOpenGraphRedirect', 'django.middleware.common.CommonMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', diff --git a/openstax/tests.py b/openstax/tests.py index c454080a5..3050dbafd 100644 --- a/openstax/tests.py +++ b/openstax/tests.py @@ -1,6 +1,16 @@ +import datetime +import json from .functions import remove_locked_links_detail, remove_locked_links_listing, build_document_url, build_image_url from django.test import TestCase, Client +from django.core.files.uploadedfile import SimpleUploadedFile +from wagtail.models import Page +from pages.models import HomePage, WebinarPage +from books.models import BookIndex, Book +from news.models import NewsIndex, NewsArticle +from snippets.models import Subject, BlogContentType, BlogCollection +from wagtail.documents.models import Document + class TestClass(object): pass @@ -79,3 +89,102 @@ def test_build_document_url_none(self): def test_build_image_url_none(self): self.assertEqual(build_image_url(None), None) + + +class TestOpenGraphMiddleware(TestCase): + def setUp(self): + self.client = Client(HTTP_USER_AGENT='twitterbot') + self.root_page = Page.objects.get(title="Root") + self.homepage = HomePage(title="Hello World", + slug="openstax-homepage", + seo_title='OpenStax Home', + search_description='Home page for OpenStax' + ) + self.root_page.add_child(instance=self.homepage) + + def test_home_page_link_preview(self): + response = self.client.get('/') + self.assertContains(response, 'og:image') + + + def test_book_link_preview(self): + test_image = SimpleUploadedFile(name='openstax.png', + content=open("oxauth/static/images/openstax.png", 'rb').read()) + self.test_doc = Document.objects.create(title='Test Doc', file=test_image) + book_index = BookIndex(title="Book Index", + page_description="Test", + dev_standard_1_description="Test", + dev_standard_2_description="Test", + dev_standard_3_description="Test", + dev_standard_4_description="Test", + ) + # add book index to homepage + self.homepage.add_child(instance=book_index) + book = Book(title="Biology 2e", + slug="biology-2e", + cnx_id='031da8d3-b525-429c-80cf-6c8ed997733a', + description="Test Book", + cover=self.test_doc, + title_image=self.test_doc, + publish_date=datetime.date.today(), + locale=self.root_page.locale, + license_name='Creative Commons Attribution License', + seo_title='Biology 2e', + search_description='2nd edition of Biology' + ) + book_index.add_child(instance=book) + self.client = Client(HTTP_USER_AGENT='Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)') + response = self.client.get('/details/books/biology-2e') + self.assertContains(response, 'og:image') + + def test_blog_link_preview(self): + self.news_index = NewsIndex(title="News Index") + self.homepage.add_child(instance=self.news_index) + self.math = Subject(name="Math", page_content="Math page content.", seo_title="Math SEO Title", + search_description="Math page description.") + self.math.save() + math_id = self.math.id + self.case_study = BlogContentType(content_type='Case Study') + self.case_study.save() + case_study_id = self.case_study.id + self.learning = BlogCollection(name='Teaching and Learning', description='this is a collection') + self.learning.save() + learning_id = self.learning.id + self.article = NewsArticle(title="Article 1", + slug="article-1", + date=datetime.date.today(), + heading="Sample Article", + subheading="Sample Subheading", + author="OpenStax", + seo_title='Test Article 1', + search_description='Test Article 1 description', + body=json.dumps( + [ + {"type": "paragraph", + "value": "

This is the body of the post

This is the second paragraph

"} + ] + ), + article_subjects=json.dumps( + [ + {'type': 'subject', 'value': [ + {'type': 'item', 'value': {'subject': math_id, 'featured': False}}]} + ] + ), + content_types=json.dumps( + [ + {'type': 'content_type', 'value': [ + {'type': 'item', 'value': {'content_type': case_study_id}}]} + ] + ), + collections=json.dumps( + [ + {'type': 'collection', 'value': [ + {'type': 'item', 'value': {'collection': learning_id, 'featured': False, + 'popular': False}}]} + ] + )) + self.news_index.add_child(instance=self.article) + self.client = Client(HTTP_USER_AGENT='facebookexternalhit/1.1') + response = self.client.get('/blog/article-1') + self.assertContains(response, 'og:image') + diff --git a/requirements/base.txt b/requirements/base.txt index d886a5418..3211d4cac 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -23,6 +23,7 @@ sentry-sdk setuptools==65.5.1 simple_salesforce==1.11.6 social-auth-app-django==5.0.0 +ua_parser==0.16.1 unicodecsv==0.14.1 Unidecode==1.3.4 vcrpy==4.1.1 # for recording test interactions with third-party APIs