From eff8e143dbbe253bc14f1162acec9f307ec66489 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 3 Sep 2021 16:10:18 -0400 Subject: [PATCH 1/3] Bring in code from https://github.com/ThePalaceProject/circulation/pull/25/files --- api/odl.py | 158 ++++++++++----- api/odl2.py | 287 +++++++++++++++++++++++++++ bin/odl2_reaper | 10 + bin/odl_reaper | 10 + tests/files/odl/single_license.opds | 79 ++++++++ tests/files/odl2/single_license.json | 110 ++++++++++ tests/test_odl.py | 225 ++++++++++++++++++--- 7 files changed, 796 insertions(+), 83 deletions(-) create mode 100644 api/odl2.py create mode 100755 bin/odl2_reaper create mode 100755 bin/odl_reaper create mode 100644 tests/files/odl/single_license.opds create mode 100644 tests/files/odl2/single_license.json diff --git a/api/odl.py b/api/odl.py index dae53c0ae..67c5e014b 100644 --- a/api/odl.py +++ b/api/odl.py @@ -1,68 +1,56 @@ import datetime -import dateutil import json import uuid -from flask_babel import lazy_gettext as _ -import urllib.parse -from collections import defaultdict -import flask -from flask import Response -import feedparser -from lxml import etree -from .problem_details import NO_LICENSES from io import StringIO -import re -from uritemplate import URITemplate +import dateutil +import feedparser +import flask +from flask import url_for +from flask_babel import lazy_gettext as _ +from lxml import etree from sqlalchemy.sql.expression import or_ +from uritemplate import URITemplate -from core.opds_import import ( - OPDSXMLParser, - OPDSImporter, - OPDSImportMonitor, -) -from core.monitor import ( - CollectionMonitor, - TimelineMonitor, +from core import util +from core.analytics import Analytics +from core.metadata_layer import ( + CirculationData, + FormatData, + LicenseData, + TimestampData, ) from core.model import ( Collection, ConfigurationSetting, - Credential, DataSource, DeliveryMechanism, Edition, ExternalIntegration, Hold, Hyperlink, - Identifier, - IntegrationClient, LicensePool, Loan, MediaTypes, RightsStatus, Session, - create, get_one, get_one_or_create, + Representation) +from core.monitor import ( + CollectionMonitor, + IdentifierSweepMonitor) +from core.opds_import import ( + OPDSXMLParser, + OPDSImporter, + OPDSImportMonitor, ) -from core.metadata_layer import ( - CirculationData, - FormatData, - IdentifierData, - LicenseData, - TimestampData, -) -from .circulation import ( - BaseCirculationAPI, - LoanInfo, - FulfillmentInfo, - HoldInfo, +from core.testing import ( + DatabaseTest, + MockRequestsResponse, ) -from core.analytics import Analytics from core.util.datetime_helpers import ( utc_now, - strptime_utc, ) from core.util.http import ( HTTP, @@ -70,14 +58,16 @@ RemoteIntegrationException, ) from core.util.string_helpers import base64 -from flask import url_for -from core.testing import ( - DatabaseTest, - MockRequestsResponse, +from .circulation import ( + BaseCirculationAPI, + LoanInfo, + FulfillmentInfo, + HoldInfo, ) from .circulation_exceptions import * from .shared_collection import BaseSharedCollectionAPI + class ODLAPI(BaseCirculationAPI, BaseSharedCollectionAPI): """ODL (Open Distribution to Libraries) is a specification that allows libraries to manage their own loans and holds. It offers a deeper level @@ -596,7 +586,7 @@ def update_hold_queue(self, licensepool): Loan.end>utc_now() ) ).count() - remaining_licenses = licensepool.licenses_owned - loans_count + remaining_licenses = max(licensepool.licenses_owned - loans_count, 0) holds = _db.query(Hold).filter( Hold.license_pool_id==licensepool.id @@ -782,6 +772,7 @@ class ODLXMLParser(OPDSXMLParser): NAMESPACES = dict(OPDSXMLParser.NAMESPACES, odl="http://opds-spec.org/odl") + class ODLImporter(OPDSImporter): """Import information and formats from an ODL feed. @@ -887,8 +878,13 @@ def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None, do_get= if terms: concurrent_checkouts = subtag(terms[0], "odl:concurrent_checkouts") expires = subtag(terms[0], "odl:expires") + if expires: - expires = dateutil.parser.parse(expires) + expires = util.datetime_helpers.to_utc(dateutil.parser.parse(expires)) + now = util.datetime_helpers.utc_now() + + if expires <= now: + continue licenses_owned += int(concurrent_checkouts or 0) licenses_available += int(available_checkouts or 0) @@ -914,6 +910,7 @@ def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None, do_get= data['circulation']['licenses_available'] = licenses_available return data + class ODLImportMonitor(OPDSImportMonitor): """Import information from an ODL feed.""" PROTOCOL = ODLImporter.NAME @@ -959,11 +956,12 @@ def run_once(self, progress): progress = TimestampData(achievements=message) return progress + class MockODLAPI(ODLAPI): """Mock API for tests that overrides _get and _url_for and tracks requests.""" @classmethod - def mock_collection(self, _db): + def mock_collection(cls, _db, protocol=ODLAPI.NAME): """Create a mock ODL collection to use in tests.""" library = DatabaseTest.make_default_library(_db) collection, ignore = get_one_or_create( @@ -973,7 +971,7 @@ def mock_collection(self, _db): ) ) integration = collection.create_external_integration( - protocol=ODLAPI.NAME + protocol=protocol ) integration.username = 'a' integration.password = 'b' @@ -1040,6 +1038,25 @@ def __init__(self, _db, collection): self.base_url = collection.external_account_id + @staticmethod + def _parse_feed_from_response(response): + """Parse ODL (Atom) feed from the HTTP response. + + :param response: HTTP response + :type response: requests.Response + + :return: Parsed ODL (Atom) feed + :rtype: dict + """ + response_content = response.content + + if not isinstance(response_content, (str, bytes)): + raise ValueError("Response content must be a string or byte-encoded value") + + feed = feedparser.parse(response_content) + + return feed + def internal_format(self, delivery_mechanism): """Each consolidated copy is only available in one format, so we don't need a mapping to internal formats. @@ -1091,7 +1108,8 @@ def checkout(self, patron, pin, licensepool, internal_format): hold_info_response = self._get(hold.external_identifier) except RemoteIntegrationException as e: raise CannotLoan() - feed = feedparser.parse(str(hold_info_response.content)) + + feed = self._parse_feed_from_response(hold_info_response) entries = feed.get("entries") if len(entries) < 1: raise CannotLoan() @@ -1117,7 +1135,8 @@ def checkout(self, patron, pin, licensepool, internal_format): elif response.status_code == 404: if hasattr(response, 'json') and response.json().get('type', '') == NO_LICENSES.uri: raise NoLicenses() - feed = feedparser.parse(str(response.content)) + + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CannotLoan() @@ -1181,7 +1200,8 @@ def checkin(self, patron, pin, licensepool): raise CannotReturn() if response.status_code == 404: raise NotCheckedOut() - feed = feedparser.parse(str(response.content)) + + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CannotReturn() @@ -1286,7 +1306,8 @@ def release_hold(self, patron, pin, licensepool): raise CannotReleaseHold() if response.status_code == 404: raise NotOnHold() - feed = feedparser.parse(str(response.content)) + + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CannotReleaseHold() @@ -1325,7 +1346,7 @@ def patron_activity(self, patron, pin): if response.status_code == 404: # 404 is returned when the loan has been deleted. Leave this loan out of the result. continue - feed = feedparser.parse(str(response.content)) + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CirculationException() @@ -1354,7 +1375,7 @@ def patron_activity(self, patron, pin): if response.status_code == 404: # 404 is returned when the hold has been deleted. Leave this hold out of the result. continue - feed = feedparser.parse(str(response.content)) + feed = self._parse_feed_from_response(response) entries = feed.get("entries") if len(entries) < 1: raise CirculationException() @@ -1518,3 +1539,38 @@ def _get(self, url, patron=None, headers=None, allowed_response_codes=None): self.request_args.append((patron, headers, allowed_response_codes)) response = self.responses.pop() return HTTP._process_response(url, response, allowed_response_codes=allowed_response_codes) + + +class ODLExpiredItemsReaper(IdentifierSweepMonitor): + """Responsible for removing expired ODL licenses.""" + + SERVICE_NAME = "ODL Expired Items Reaper" + PROTOCOL = ODLAPI.NAME + + def __init__(self, _db, collection): + super(ODLExpiredItemsReaper, self).__init__(_db, collection) + + def process_item(self, identifier): + for licensepool in identifier.licensed_through: + licenses_owned = licensepool.licenses_owned + licenses_available = licensepool.licenses_available + + for license in licensepool.licenses: + if license.is_expired: + licenses_owned -= 1 + licenses_available -= 1 + + if licenses_owned != licensepool.licenses_owned or licenses_available != licensepool.licenses_available: + licenses_owned = max(licenses_owned, 0) + licenses_available = max(licenses_available, 0) + + circulation_data = CirculationData( + data_source=licensepool.data_source, + primary_identifier=identifier, + licenses_owned=licenses_owned, + licenses_available=licenses_available, + licenses_reserved=licensepool.licenses_reserved, + patrons_in_hold_queue=licensepool.patrons_in_hold_queue, + ) + + circulation_data.apply(self._db, self.collection) diff --git a/api/odl2.py b/api/odl2.py new file mode 100644 index 000000000..f22e5e3ab --- /dev/null +++ b/api/odl2.py @@ -0,0 +1,287 @@ +import json +import logging + +from contextlib2 import contextmanager +from flask_babel import lazy_gettext as _ +from webpub_manifest_parser.odl import ODLFeedParserFactory +from webpub_manifest_parser.opds2.registry import OPDS2LinkRelationsRegistry + +from api.odl import ODLAPI, ODLExpiredItemsReaper +from core import util +from core.metadata_layer import FormatData, LicenseData +from core.model import DeliveryMechanism, Edition, MediaTypes, RightsStatus +from core.model.configuration import ( + ConfigurationAttributeType, + ConfigurationFactory, + ConfigurationGrouping, + ConfigurationMetadata, + ConfigurationStorage, + HasExternalIntegration, +) +from core.opds2_import import OPDS2Importer, OPDS2ImportMonitor, RWPMManifestParser +from core.util import first_or_default + + +class ODL2APIConfiguration(ConfigurationGrouping): + skipped_license_formats = ConfigurationMetadata( + key="odl2_skipped_license_formats", + label=_("License formats"), + description=_("Name of the data source associated with this collection."), + type=ConfigurationAttributeType.LIST, + required=False, + default=["text/html"], + ) + + +class ODL2API(ODLAPI): + NAME = "ODL 2.0" + SETTINGS = ODLAPI.SETTINGS + ODL2APIConfiguration.to_settings() + + +class ODL2Importer(OPDS2Importer, HasExternalIntegration): + """Import information and formats from an ODL feed. + + The only change from OPDS2Importer is that this importer extracts + FormatData and LicenseData from ODL 2.x's "licenses" arrays. + """ + + NAME = ODL2API.NAME + + FEEDBOOKS_AUDIO = "{0}; protection={1}".format( + MediaTypes.AUDIOBOOK_MANIFEST_MEDIA_TYPE, + DeliveryMechanism.FEEDBOOKS_AUDIOBOOK_DRM, + ) + + CONTENT_TYPE = "content-type" + DRM_SCHEME = "drm-scheme" + + LICENSE_FORMATS = { + FEEDBOOKS_AUDIO: { + CONTENT_TYPE: MediaTypes.AUDIOBOOK_MANIFEST_MEDIA_TYPE, + DRM_SCHEME: DeliveryMechanism.FEEDBOOKS_AUDIOBOOK_DRM + } + } + + def __init__( + self, + db, + collection, + parser=None, + data_source_name=None, + identifier_mapping=None, + http_get=None, + metadata_client=None, + content_modifier=None, + map_from_collection=None, + mirrors=None, + ): + """Initialize a new instance of ODL2Importer class. + + :param db: Database session + :type db: sqlalchemy.orm.session.Session + + :param collection: Circulation Manager's collection. + LicensePools created by this OPDS2Import class will be associated with the given Collection. + If this is None, no LicensePools will be created -- only Editions. + :type collection: Collection + + :param parser: Feed parser + :type parser: RWPMManifestParser + + :param data_source_name: Name of the source of this OPDS feed. + All Editions created by this import will be associated with this DataSource. + If there is no DataSource with this name, one will be created. + NOTE: If `collection` is provided, its .data_source will take precedence over any value provided here. + This is only for use when you are importing OPDS metadata without any particular Collection in mind. + :type data_source_name: str + + :param identifier_mapping: Dictionary used for mapping external identifiers into a set of internal ones + :type identifier_mapping: Dict + + :param metadata_client: A SimplifiedOPDSLookup object that is used to fill in missing metadata + :type metadata_client: SimplifiedOPDSLookup + + :param content_modifier: A function that may modify-in-place representations (such as images and EPUB documents) + as they come in from the network. + :type content_modifier: Callable + + :param map_from_collection: Identifier mapping + :type map_from_collection: Dict + + :param mirrors: A dictionary of different MirrorUploader objects for different purposes + :type mirrors: Dict[MirrorUploader] + """ + super(ODL2Importer, self).__init__( + db, + collection, + parser if parser else RWPMManifestParser(ODLFeedParserFactory()), + data_source_name, + identifier_mapping, + http_get, + metadata_client, + content_modifier, + map_from_collection, + mirrors, + ) + + self._logger = logging.getLogger(__name__) + + self._configuration_storage = ConfigurationStorage(self) + self._configuration_factory = ConfigurationFactory() + + @contextmanager + def _get_configuration(self, db): + """Return the configuration object. + + :param db: Database session + :type db: sqlalchemy.orm.session.Session + + :return: Configuration object + :rtype: ODL2APIConfiguration + """ + with self._configuration_factory.create( + self._configuration_storage, db, ODL2APIConfiguration + ) as configuration: + yield configuration + + def _extract_publication_metadata(self, feed, publication, data_source_name): + """Extract a Metadata object from webpub-manifest-parser's publication. + + :param publication: Feed object + :type publication: opds2_ast.OPDS2Feed + + :param publication: Publication object + :type publication: opds2_ast.OPDS2Publication + + :param data_source_name: Data source's name + :type data_source_name: str + + :return: Publication's metadata + :rtype: Metadata + """ + metadata = super(ODL2Importer, self)._extract_publication_metadata( + feed, publication, data_source_name + ) + formats = [] + licenses = [] + licenses_owned = 0 + licenses_available = 0 + medium = None + + with self._get_configuration(self._db) as configuration: + skipped_license_formats = configuration.skipped_license_formats + + if skipped_license_formats: + skipped_license_formats = set(json.loads(skipped_license_formats)) + + if publication.licenses: + for license in publication.licenses: + identifier = license.metadata.identifier + + for license_format in license.metadata.formats: + if ( + skipped_license_formats + and license_format in skipped_license_formats + ): + continue + + if not medium: + medium = Edition.medium_from_media_type(license_format) + + drm_schemes = ( + license.metadata.protection.formats + if license.metadata.protection + else [] + ) + + if license_format in self.LICENSE_FORMATS: + drm_scheme = self.LICENSE_FORMATS[license_format][self.DRM_SCHEME] + license_format = self.LICENSE_FORMATS[license_format][self.CONTENT_TYPE] + + drm_schemes.append(drm_scheme) + + for drm_scheme in drm_schemes or [None]: + formats.append( + FormatData( + content_type=license_format, + drm_scheme=drm_scheme, + rights_uri=RightsStatus.IN_COPYRIGHT, + ) + ) + + expires = None + remaining_checkouts = None + available_checkouts = None + concurrent_checkouts = None + + checkout_link = first_or_default( + license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) + ) + if checkout_link: + checkout_link = checkout_link.href + + odl_status_link = first_or_default( + license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) + ) + if odl_status_link: + odl_status_link = odl_status_link.href + + if odl_status_link: + status_code, _, response = self.http_get( + odl_status_link, headers={} + ) + + if status_code < 400: + status = json.loads(response) + checkouts = status.get("checkouts", {}) + remaining_checkouts = checkouts.get("left") + available_checkouts = checkouts.get("available") + + if license.metadata.terms: + expires = license.metadata.terms.expires + concurrent_checkouts = license.metadata.terms.concurrency + + if expires: + expires = util.datetime_helpers.to_utc(expires) + now = util.datetime_helpers.utc_now() + + if expires <= now: + continue + + licenses_owned += int(concurrent_checkouts or 0) + licenses_available += int(available_checkouts or 0) + + licenses.append( + LicenseData( + identifier=identifier, + checkout_url=checkout_link, + status_url=odl_status_link, + expires=expires, + remaining_checkouts=remaining_checkouts, + concurrent_checkouts=concurrent_checkouts, + ) + ) + + metadata.circulation.licenses_owned = licenses_owned + metadata.circulation.licenses_available = licenses_available + metadata.circulation.licenses = licenses + metadata.circulation.formats.extend(formats) + metadata.medium = medium + + return metadata + + def external_integration(self, db): + return self.collection.external_integration + + +class ODL2ImportMonitor(OPDS2ImportMonitor): + """Import information from an ODL feed.""" + + PROTOCOL = ODL2Importer.NAME + SERVICE_NAME = "ODL 2.x Import Monitor" + + +class ODL2ExpiredItemsReaper(ODLExpiredItemsReaper): + """Responsible for removing expired ODL licenses.""" + SERVICE_NAME = "ODL 2 Expired Items Reaper" + PROTOCOL = ODL2Importer.NAME diff --git a/bin/odl2_reaper b/bin/odl2_reaper new file mode 100755 index 000000000..a2103064a --- /dev/null +++ b/bin/odl2_reaper @@ -0,0 +1,10 @@ +#!/usr/bin/env python +"""Remove all expired licenses from ODL 2.x collections.""" +import os +import sys +bin_dir = os.path.split(__file__)[0] +package_dir = os.path.join(bin_dir, "..") +sys.path.append(os.path.abspath(package_dir)) +from core.scripts import RunCollectionMonitorScript +from api.odl2 import ODL2ExpiredItemsReaper +RunCollectionMonitorScript(ODL2ExpiredItemsReaper).run() diff --git a/bin/odl_reaper b/bin/odl_reaper new file mode 100755 index 000000000..ee09a1d3b --- /dev/null +++ b/bin/odl_reaper @@ -0,0 +1,10 @@ +#!/usr/bin/env python +"""Remove all expired licenses from ODL 1.x collections.""" +import os +import sys +bin_dir = os.path.split(__file__)[0] +package_dir = os.path.join(bin_dir, "..") +sys.path.append(os.path.abspath(package_dir)) +from core.scripts import RunCollectionMonitorScript +from api.odl import ODLExpiredItemsReaper +RunCollectionMonitorScript(ODLExpiredItemsReaper).run() diff --git a/tests/files/odl/single_license.opds b/tests/files/odl/single_license.opds new file mode 100644 index 000000000..e89a17d61 --- /dev/null +++ b/tests/files/odl/single_license.opds @@ -0,0 +1,79 @@ + + + https://market.feedbooks.com/api/libraries/harvest.atom + Feedbooks + 2021-08-16T09:07:14Z + /favicon.ico + + Feedbooks + https://market.feedbooks.com + support@feedbooks.zendesk.com + + + 481 + 100 + + + The Golden State + https://www.feedbooks.com/item/2895246 + urn:ISBN:9780374718060 + urn:ISBN:9780374164836 + + Lydia Kiesling + https://market.feedbooks.com/store/browse/recent.atom?author_id=954566&lang=en + + 2018-08-12T00:16:43Z + 2020-05-21T11:13:26Z + en + Mcd + 2018-09-03 + NATIONAL BOOK FOUNDATION 5 UNDER 35 PICK. LONGLISTED FOR THE CENTER FOR FICTION'S FIRST NOVEL PRIZE. Named one of the Best Books of 2018 by NPR, Bookforum and Bustle. One of Entertainment Weekly's 10 Best Debut Novels of 2018. An Amazon Best Book of the Month and named a fall read by Buzzfeed, Nylon, Entertainment Weekly, Elle, Vanity Fair, Vulture, Refinery29 and Mind Body GreenA gorgeous, raw debut novel about a young woman braving the ups and downs of motherhood in a fractured AmericaIn Lydia Kiesling's razor-sharp debut novel, The Golden State, we accompany Daphne, a young mother on the edge of a breakdown, as she flees her sensible but strained life in San Francisco for the high desert of Altavista with her toddler, Honey. Bucking under the weight of being a single parent--her Turkish husband is unable to return to the United States because of a "processing error"--Daphne takes refuge in a mobile home left to her by her grandparents in hopes that the quiet will bring clarity. But clarity proves elusive. Over the next ten days Daphne is anxious, she behaves a little erratically, she drinks too much. She wanders the town looking for anyone and anything to punctuate the long hours alone with the baby. Among others, she meets Cindy, a neighbor who is active in a secessionist movement, and befriends the elderly Alice, who has traveled to Altavista as she approaches the end of her life. When her relationships with these women culminate in a dangerous standoff, Daphne must reconcile her inner narrative with the reality of a deeply divided world. Keenly observed, bristling with humor, and set against the beauty of a little-known part of California, The Golden State is about class and cultural breakdowns, and desperate attempts to bridge old and new worlds. But more than anything, it is about motherhood: its voracious worry, frequent tedium, and enthralling, wondrous love. + 4 MB + + + + + + + + + 40.00 + + + + + + + + + urn:uuid:c981d61e-26f4-4070-aaa8-83df952cf61b + application/epub+zip + text/html + http://www.cantook.net/ + 40.00 + cant-2461538-24501117858552614-libraries + 2020-03-02T20:20:17+01:00 + + {{expires}} + 1 + 5097600 + + + application/vnd.adobe.adept+xml + 6 + true + false + false + + + application/vnd.readium.lcp.license.v1.0+json + 6 + true + false + false + + + + + + \ No newline at end of file diff --git a/tests/files/odl2/single_license.json b/tests/files/odl2/single_license.json new file mode 100644 index 000000000..695f12f51 --- /dev/null +++ b/tests/files/odl2/single_license.json @@ -0,0 +1,110 @@ +{ + "metadata": { + "title": "Test", + "itemsPerPage": 10, + "currentPage": 1, + "numberOfItems": 100 + }, + "links": [ + { + "type": "application/opds+json", + "rel": "self", + "href": "https://market.feedbooks.com/api/libraries/harvest.json" + } + ], + "publications": [ + { + "metadata": { + "@type": "http://schema.org/Book", + "title": "Moby-Dick", + "author": "Herman Melville", + "identifier": "urn:isbn:978-3-16-148410-0", + "language": "en", + "publisher": { + "name": "Test Publisher" + }, + "published": "2015-09-29T00:00:00Z", + "modified": "2015-09-29T17:00:00Z", + "subject": [ + { + "scheme": "http://schema.org/audience", + "code": "juvenile-fiction", + "name": "Juvenile Fiction", + "links": [] + } + ] + }, + "links": [ + { + "rel": "self", + "href": "http://example.org/publication.json", + "type": "application/opds-publication+json" + } + ], + "images": [ + { + "href": "http://example.org/cover.jpg", + "type": "image/jpeg", + "height": 1400, + "width": 800 + }, + { + "href": "http://example.org/cover-small.jpg", + "type": "image/jpeg", + "height": 700, + "width": 400 + }, + { + "href": "http://example.org/cover.svg", + "type": "image/svg+xml" + } + ], + "licenses": [ + { + "metadata": { + "identifier": "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799", + "format": [ + "application/epub+zip", + "text/html", + "application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction" + ], + "price": { + "currency": "USD", + "value": 7.99 + }, + "created": "2014-04-25T12:25:21+02:00", + "terms": { + "checkouts": 1, + "expires": "{{expires}}", + "concurrency": 1, + "length": 5097600 + }, + "protection": { + "format": [ + "application/vnd.adobe.adept+xml", + "application/vnd.readium.lcp.license.v1.0+json" + ], + "devices": 6, + "copy": false, + "print": false, + "tts": false + } + }, + "links": [ + { + "rel": "http://opds-spec.org/acquisition/borrow", + "href": "http://www.example.com/get{?id,checkout_id,expires,patron_id,passphrase,hint,hint_url,notification_url}", + "type": "application/vnd.readium.license.status.v1.0+json", + "templated": true + }, + { + "rel": "self", + "href": "http://www.example.com/status/294024", + "type": "application/vnd.odl.info+json" + } + ] + } + ] + } + ] +} diff --git a/tests/test_odl.py b/tests/test_odl.py index a3b19deaf..a7028f90d 100644 --- a/tests/test_odl.py +++ b/tests/test_odl.py @@ -1,50 +1,47 @@ -import pytest -import os -import json import datetime -import dateutil -import re +import json +import os import urllib.parse -from pdb import set_trace -from core.testing import DatabaseTest -from core.metadata_layer import TimestampData + +import dateutil +import pytest +from dateutil.tz import tzoffset +from freezegun import freeze_time +from mock import MagicMock + +from api.circulation_exceptions import * +from api.odl import ( + ODLAPI, + MockODLAPI, + MockSharedODLAPI, + ODLExpiredItemsReaper, + ODLHoldReaper, + ODLImporter, + SharedODLAPI, + SharedODLImporter, +) from core.model import ( Collection, ConfigurationSetting, - Credential, DataSource, DeliveryMechanism, Edition, ExternalIntegration, Hold, Hyperlink, - Identifier, Loan, MediaTypes, Representation, RightsStatus, - get_one, -) -from api.odl import ( - ODLImporter, - ODLHoldReaper, - MockODLAPI, - SharedODLAPI, - MockSharedODLAPI, - SharedODLImporter, -) -from api.circulation_exceptions import * -from core.util.datetime_helpers import ( - datetime_utc, - strptime_utc, - utc_now, -) -from core.util.http import ( - BadResponseException, - RemoteIntegrationException, ) +from core.scripts import RunCollectionMonitorScript +from core.testing import DatabaseTest +from core.util import datetime_helpers +from core.util.datetime_helpers import datetime_utc, utc_now +from core.util.http import BadResponseException, RemoteIntegrationException from core.util.string_helpers import base64 + class BaseODLTest(object): base_path = os.path.split(__file__)[0] resource_path = os.path.join(base_path, "files", "odl") @@ -52,7 +49,8 @@ class BaseODLTest(object): @classmethod def get_data(cls, filename): path = os.path.join(cls.resource_path, filename) - return open(path, "rb").read() + return open(path, "r").read() + class TestODLAPI(DatabaseTest, BaseODLTest): @@ -1290,8 +1288,12 @@ def test_release_hold_from_external_library(self): class TestODLImporter(DatabaseTest, BaseODLTest): - + @freeze_time("2019-01-01T00:00:00+00:00") def test_import(self): + """Ensure that ODLImporter correctly processes and imports the ODL feed encoded using OPDS 1.x. + + NOTE: `freeze_time` decorator is required to treat the licenses in the ODL feed as non-expired. + """ feed = self.get_data("feedbooks_bibliographic.atom") data_source = DataSource.lookup(self._db, "Feedbooks", autocreate=True) collection = MockODLAPI.mock_collection(self._db) @@ -1517,7 +1519,6 @@ def test_run_once(self): assert None == progress.finish - class TestSharedODLAPI(DatabaseTest, BaseODLTest): def setup_method(self): @@ -1847,6 +1848,7 @@ def test_patron_activity_remote_integration_exception(self): pytest.raises(RemoteIntegrationException, self.api.patron_activity, self.patron, "pin") assert [hold.external_identifier] == self.api.requests[1:] + class TestSharedODLImporter(DatabaseTest, BaseODLTest): def test_get_fulfill_url(self): @@ -1929,3 +1931,162 @@ def canonicalize_author_name(self, identifier, working_display_name): assert 'http://localhost:6500/AL/works/URI/http://www.feedbooks.com/item/1946289/borrow' == borrow_link.resource.url +class TestODLExpiredItemsReaper(DatabaseTest, BaseODLTest): + ODL_PROTOCOL = ODLAPI.NAME + ODL_FEED_FILENAME_WITH_SINGLE_ODL_LICENSE = "single_license.opds" + ODL_LICENSE_EXPIRATION_TIME_PLACEHOLDER = "{{expires}}" + ODL_REAPER_CLASS = ODLExpiredItemsReaper + SECONDS_PER_HOUR = 3600 + + def _create_importer(self, collection, http_get): + """Create a new ODL importer with the specified parameters. + + :param collection: Collection object + :type collection: core.model.collection.Collection + + :param http_get: Use this method to make an HTTP GET request. + This can be replaced with a stub method for testing purposes. + :type http_get: Callable + + :return: ODLImporter object + :rtype: ODLImporter + """ + importer = ODLImporter( + self._db, + collection=collection, + http_get=http_get, + ) + + return importer + + def _get_test_feed_with_single_odl_license(self, expires): + """Get the feed with a single ODL license with the specific expiration date. + + :param expires: Expiration date of the ODL license + :type expires: datetime.datetime + + :return: Test ODL feed with a single ODL license with the specific expiration date + :rtype: str + """ + feed = self.get_data(self.ODL_FEED_FILENAME_WITH_SINGLE_ODL_LICENSE) + feed = feed.replace(self.ODL_LICENSE_EXPIRATION_TIME_PLACEHOLDER, expires.isoformat()) + + return feed + + def _import_test_feed_with_single_odl_license(self, expires): + """Import the test ODL feed with a single ODL license with the specific expiration date. + + :param expires: Expiration date of the ODL license + :type expires: datetime.datetime + + :return: 3-tuple containing imported editions, license pools and works + :rtype: Tuple[List[Edition], List[LicensePool], List[Work]] + """ + feed = self._get_test_feed_with_single_odl_license(expires) + data_source = DataSource.lookup(self._db, "Feedbooks", autocreate=True) + collection = MockODLAPI.mock_collection(self._db, protocol=self.ODL_PROTOCOL) + collection.external_integration.set_setting( + Collection.DATA_SOURCE_NAME_SETTING, + data_source.name + ) + license_status = { + "checkouts": { + "available": 1 + } + } + license_status_response = MagicMock(return_value=(200, {}, json.dumps(license_status))) + importer = self._create_importer(collection, license_status_response) + + imported_editions, imported_pools, imported_works, _ = ( + importer.import_from_feed(feed) + ) + + return imported_editions, imported_pools, imported_works + + @freeze_time("2021-01-01T00:00:00+00:00") + def test_odl_importer_skips_expired_licenses(self): + """Ensure ODLImporter skips expired licenses + and does not count them in the total number of available licenses.""" + # 1.1. Import the test feed with an expired ODL license. + # The license expires 2021-01-01T00:01:00+01:00 that equals to 2010-01-01T00:00:00+00:00, the current time. + # It means the license had already expired at the time of the import. + license_expiration_date = datetime.datetime(2021, 1, 1, 1, 0, 0, tzinfo=tzoffset(None, self.SECONDS_PER_HOUR)) + imported_editions, imported_pools, imported_works = self._import_test_feed_with_single_odl_license( + license_expiration_date + ) + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 1.2. Ensure that the license pool was successfully created but it does not have any available licenses. + assert len(imported_pools) == 1 + + [imported_pool] = imported_pools + assert imported_pool.licenses_owned == 0 + assert imported_pool.licenses_available == 0 + assert len(imported_pool.licenses) == 0 + + @freeze_time("2021-01-01T00:00:00+00:00") + def test_odl_reaper_removes_expired_licenses(self): + """Ensure ODLExpiredItemsReaper removes expired licenses.""" + patron = self._patron() + + # 1.1. Import the test feed with an ODL license that is still valid. + # The license will be valid for one more day since this very moment. + license_expiration_date = datetime_helpers.utc_now() + datetime.timedelta(days=1) + imported_editions, imported_pools, imported_works = self._import_test_feed_with_single_odl_license( + license_expiration_date + ) + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 1.2. Ensure that there is a license pool with available license. + assert len(imported_pools) == 1 + + [imported_pool] = imported_pools + assert imported_pool.licenses_owned == 1 + assert imported_pool.licenses_available == 1 + + assert len(imported_pool.licenses) == 1 + [license] = imported_pool.licenses + assert license.expires == license_expiration_date + + # 2. Create a loan to ensure that the licence with active loan can also be removed (hidden). + loan, _ = license.loan_to(patron) + + # 3.1. Run ODLExpiredItemsReaper. This time nothing should happen since the license is still valid. + script = RunCollectionMonitorScript(self.ODL_REAPER_CLASS, _db=self._db, cmd_args=["Test ODL Collection"]) + script.run() + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 3.2. Ensure that availability of the license pool didn't change. + assert imported_pool.licenses_owned == 1 + assert imported_pool.licenses_available == 1 + + # 4. Expire the license. + # Set the expiration date to 2021-01-01T00:01:00+01:00 + # that equals to 2010-01-01T00:00:00+00:00, the current time. + license.expires = datetime.datetime(2021, 1, 1, 1, 0, 0, tzinfo=tzoffset(None, self.SECONDS_PER_HOUR)) + + # 5.1. Run ODLExpiredItemsReaper again. This time it should remove the expired license. + script.run() + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 5.2. Ensure that availability of the license pool was updated and now it doesn't have any available licenses. + assert imported_pool.licenses_owned == 0 + assert imported_pool.licenses_available == 0 + + # 6.1. Run ODLExpiredItemsReaper again to ensure that number of licenses won't become negative. + script.run() + + # Commit to expire the SQLAlchemy cache. + self._db.commit() + + # 6.2. Ensure that number of licenses is still 0. + assert imported_pool.licenses_owned == 0 + assert imported_pool.licenses_available == 0 From 071391b1484453cccb958e616c68beb934b36ef9 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Fri, 3 Sep 2021 16:43:40 -0400 Subject: [PATCH 2/3] Removed ODL2 importer code, which doesn't need to be here for the bug fix we're looking for. --- api/odl2.py | 287 --------------------------- tests/files/odl2/single_license.json | 110 ---------- 2 files changed, 397 deletions(-) delete mode 100644 api/odl2.py delete mode 100644 tests/files/odl2/single_license.json diff --git a/api/odl2.py b/api/odl2.py deleted file mode 100644 index f22e5e3ab..000000000 --- a/api/odl2.py +++ /dev/null @@ -1,287 +0,0 @@ -import json -import logging - -from contextlib2 import contextmanager -from flask_babel import lazy_gettext as _ -from webpub_manifest_parser.odl import ODLFeedParserFactory -from webpub_manifest_parser.opds2.registry import OPDS2LinkRelationsRegistry - -from api.odl import ODLAPI, ODLExpiredItemsReaper -from core import util -from core.metadata_layer import FormatData, LicenseData -from core.model import DeliveryMechanism, Edition, MediaTypes, RightsStatus -from core.model.configuration import ( - ConfigurationAttributeType, - ConfigurationFactory, - ConfigurationGrouping, - ConfigurationMetadata, - ConfigurationStorage, - HasExternalIntegration, -) -from core.opds2_import import OPDS2Importer, OPDS2ImportMonitor, RWPMManifestParser -from core.util import first_or_default - - -class ODL2APIConfiguration(ConfigurationGrouping): - skipped_license_formats = ConfigurationMetadata( - key="odl2_skipped_license_formats", - label=_("License formats"), - description=_("Name of the data source associated with this collection."), - type=ConfigurationAttributeType.LIST, - required=False, - default=["text/html"], - ) - - -class ODL2API(ODLAPI): - NAME = "ODL 2.0" - SETTINGS = ODLAPI.SETTINGS + ODL2APIConfiguration.to_settings() - - -class ODL2Importer(OPDS2Importer, HasExternalIntegration): - """Import information and formats from an ODL feed. - - The only change from OPDS2Importer is that this importer extracts - FormatData and LicenseData from ODL 2.x's "licenses" arrays. - """ - - NAME = ODL2API.NAME - - FEEDBOOKS_AUDIO = "{0}; protection={1}".format( - MediaTypes.AUDIOBOOK_MANIFEST_MEDIA_TYPE, - DeliveryMechanism.FEEDBOOKS_AUDIOBOOK_DRM, - ) - - CONTENT_TYPE = "content-type" - DRM_SCHEME = "drm-scheme" - - LICENSE_FORMATS = { - FEEDBOOKS_AUDIO: { - CONTENT_TYPE: MediaTypes.AUDIOBOOK_MANIFEST_MEDIA_TYPE, - DRM_SCHEME: DeliveryMechanism.FEEDBOOKS_AUDIOBOOK_DRM - } - } - - def __init__( - self, - db, - collection, - parser=None, - data_source_name=None, - identifier_mapping=None, - http_get=None, - metadata_client=None, - content_modifier=None, - map_from_collection=None, - mirrors=None, - ): - """Initialize a new instance of ODL2Importer class. - - :param db: Database session - :type db: sqlalchemy.orm.session.Session - - :param collection: Circulation Manager's collection. - LicensePools created by this OPDS2Import class will be associated with the given Collection. - If this is None, no LicensePools will be created -- only Editions. - :type collection: Collection - - :param parser: Feed parser - :type parser: RWPMManifestParser - - :param data_source_name: Name of the source of this OPDS feed. - All Editions created by this import will be associated with this DataSource. - If there is no DataSource with this name, one will be created. - NOTE: If `collection` is provided, its .data_source will take precedence over any value provided here. - This is only for use when you are importing OPDS metadata without any particular Collection in mind. - :type data_source_name: str - - :param identifier_mapping: Dictionary used for mapping external identifiers into a set of internal ones - :type identifier_mapping: Dict - - :param metadata_client: A SimplifiedOPDSLookup object that is used to fill in missing metadata - :type metadata_client: SimplifiedOPDSLookup - - :param content_modifier: A function that may modify-in-place representations (such as images and EPUB documents) - as they come in from the network. - :type content_modifier: Callable - - :param map_from_collection: Identifier mapping - :type map_from_collection: Dict - - :param mirrors: A dictionary of different MirrorUploader objects for different purposes - :type mirrors: Dict[MirrorUploader] - """ - super(ODL2Importer, self).__init__( - db, - collection, - parser if parser else RWPMManifestParser(ODLFeedParserFactory()), - data_source_name, - identifier_mapping, - http_get, - metadata_client, - content_modifier, - map_from_collection, - mirrors, - ) - - self._logger = logging.getLogger(__name__) - - self._configuration_storage = ConfigurationStorage(self) - self._configuration_factory = ConfigurationFactory() - - @contextmanager - def _get_configuration(self, db): - """Return the configuration object. - - :param db: Database session - :type db: sqlalchemy.orm.session.Session - - :return: Configuration object - :rtype: ODL2APIConfiguration - """ - with self._configuration_factory.create( - self._configuration_storage, db, ODL2APIConfiguration - ) as configuration: - yield configuration - - def _extract_publication_metadata(self, feed, publication, data_source_name): - """Extract a Metadata object from webpub-manifest-parser's publication. - - :param publication: Feed object - :type publication: opds2_ast.OPDS2Feed - - :param publication: Publication object - :type publication: opds2_ast.OPDS2Publication - - :param data_source_name: Data source's name - :type data_source_name: str - - :return: Publication's metadata - :rtype: Metadata - """ - metadata = super(ODL2Importer, self)._extract_publication_metadata( - feed, publication, data_source_name - ) - formats = [] - licenses = [] - licenses_owned = 0 - licenses_available = 0 - medium = None - - with self._get_configuration(self._db) as configuration: - skipped_license_formats = configuration.skipped_license_formats - - if skipped_license_formats: - skipped_license_formats = set(json.loads(skipped_license_formats)) - - if publication.licenses: - for license in publication.licenses: - identifier = license.metadata.identifier - - for license_format in license.metadata.formats: - if ( - skipped_license_formats - and license_format in skipped_license_formats - ): - continue - - if not medium: - medium = Edition.medium_from_media_type(license_format) - - drm_schemes = ( - license.metadata.protection.formats - if license.metadata.protection - else [] - ) - - if license_format in self.LICENSE_FORMATS: - drm_scheme = self.LICENSE_FORMATS[license_format][self.DRM_SCHEME] - license_format = self.LICENSE_FORMATS[license_format][self.CONTENT_TYPE] - - drm_schemes.append(drm_scheme) - - for drm_scheme in drm_schemes or [None]: - formats.append( - FormatData( - content_type=license_format, - drm_scheme=drm_scheme, - rights_uri=RightsStatus.IN_COPYRIGHT, - ) - ) - - expires = None - remaining_checkouts = None - available_checkouts = None - concurrent_checkouts = None - - checkout_link = first_or_default( - license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key) - ) - if checkout_link: - checkout_link = checkout_link.href - - odl_status_link = first_or_default( - license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key) - ) - if odl_status_link: - odl_status_link = odl_status_link.href - - if odl_status_link: - status_code, _, response = self.http_get( - odl_status_link, headers={} - ) - - if status_code < 400: - status = json.loads(response) - checkouts = status.get("checkouts", {}) - remaining_checkouts = checkouts.get("left") - available_checkouts = checkouts.get("available") - - if license.metadata.terms: - expires = license.metadata.terms.expires - concurrent_checkouts = license.metadata.terms.concurrency - - if expires: - expires = util.datetime_helpers.to_utc(expires) - now = util.datetime_helpers.utc_now() - - if expires <= now: - continue - - licenses_owned += int(concurrent_checkouts or 0) - licenses_available += int(available_checkouts or 0) - - licenses.append( - LicenseData( - identifier=identifier, - checkout_url=checkout_link, - status_url=odl_status_link, - expires=expires, - remaining_checkouts=remaining_checkouts, - concurrent_checkouts=concurrent_checkouts, - ) - ) - - metadata.circulation.licenses_owned = licenses_owned - metadata.circulation.licenses_available = licenses_available - metadata.circulation.licenses = licenses - metadata.circulation.formats.extend(formats) - metadata.medium = medium - - return metadata - - def external_integration(self, db): - return self.collection.external_integration - - -class ODL2ImportMonitor(OPDS2ImportMonitor): - """Import information from an ODL feed.""" - - PROTOCOL = ODL2Importer.NAME - SERVICE_NAME = "ODL 2.x Import Monitor" - - -class ODL2ExpiredItemsReaper(ODLExpiredItemsReaper): - """Responsible for removing expired ODL licenses.""" - SERVICE_NAME = "ODL 2 Expired Items Reaper" - PROTOCOL = ODL2Importer.NAME diff --git a/tests/files/odl2/single_license.json b/tests/files/odl2/single_license.json deleted file mode 100644 index 695f12f51..000000000 --- a/tests/files/odl2/single_license.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "metadata": { - "title": "Test", - "itemsPerPage": 10, - "currentPage": 1, - "numberOfItems": 100 - }, - "links": [ - { - "type": "application/opds+json", - "rel": "self", - "href": "https://market.feedbooks.com/api/libraries/harvest.json" - } - ], - "publications": [ - { - "metadata": { - "@type": "http://schema.org/Book", - "title": "Moby-Dick", - "author": "Herman Melville", - "identifier": "urn:isbn:978-3-16-148410-0", - "language": "en", - "publisher": { - "name": "Test Publisher" - }, - "published": "2015-09-29T00:00:00Z", - "modified": "2015-09-29T17:00:00Z", - "subject": [ - { - "scheme": "http://schema.org/audience", - "code": "juvenile-fiction", - "name": "Juvenile Fiction", - "links": [] - } - ] - }, - "links": [ - { - "rel": "self", - "href": "http://example.org/publication.json", - "type": "application/opds-publication+json" - } - ], - "images": [ - { - "href": "http://example.org/cover.jpg", - "type": "image/jpeg", - "height": 1400, - "width": 800 - }, - { - "href": "http://example.org/cover-small.jpg", - "type": "image/jpeg", - "height": 700, - "width": 400 - }, - { - "href": "http://example.org/cover.svg", - "type": "image/svg+xml" - } - ], - "licenses": [ - { - "metadata": { - "identifier": "urn:uuid:f7847120-fc6f-11e3-8158-56847afe9799", - "format": [ - "application/epub+zip", - "text/html", - "application/audiobook+json; protection=http://www.feedbooks.com/audiobooks/access-restriction" - ], - "price": { - "currency": "USD", - "value": 7.99 - }, - "created": "2014-04-25T12:25:21+02:00", - "terms": { - "checkouts": 1, - "expires": "{{expires}}", - "concurrency": 1, - "length": 5097600 - }, - "protection": { - "format": [ - "application/vnd.adobe.adept+xml", - "application/vnd.readium.lcp.license.v1.0+json" - ], - "devices": 6, - "copy": false, - "print": false, - "tts": false - } - }, - "links": [ - { - "rel": "http://opds-spec.org/acquisition/borrow", - "href": "http://www.example.com/get{?id,checkout_id,expires,patron_id,passphrase,hint,hint_url,notification_url}", - "type": "application/vnd.readium.license.status.v1.0+json", - "templated": true - }, - { - "rel": "self", - "href": "http://www.example.com/status/294024", - "type": "application/vnd.odl.info+json" - } - ] - } - ] - } - ] -} From e23d1e3e0cefbc3366befb0b80f95858f367d958 Mon Sep 17 00:00:00 2001 From: Leonard Richardson Date: Tue, 7 Sep 2021 12:53:12 -0400 Subject: [PATCH 3/3] Added crontab entry for odl_reaper. --- bin/odl_reaper | 2 +- docker/services/simplified_crontab | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/odl_reaper b/bin/odl_reaper index ee09a1d3b..66dd03f7d 100755 --- a/bin/odl_reaper +++ b/bin/odl_reaper @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """Remove all expired licenses from ODL 1.x collections.""" import os import sys diff --git a/docker/services/simplified_crontab b/docker/services/simplified_crontab index fba32d6ab..71106d550 100644 --- a/docker/services/simplified_crontab +++ b/docker/services/simplified_crontab @@ -113,6 +113,7 @@ HOME=/var/www/circulation # 0 6 * * * root core/bin/run odl_import_monitor >> /var/log/cron.log 2>&1 0 */8 * * * root core/bin/run odl_hold_reaper >> /var/log/cron.log 2>&1 +*/15 * * * * root core/bin/run odl_reaper >> /var/log/cron.log 2>&1 5 */6 * * * root core/bin/run shared_odl_import_monitor >> /var/log/cron.log 2>&1 # Odilo