Skip to content

Commit

Permalink
Implemented automatic caching for the discovery documents.
Browse files Browse the repository at this point in the history
  • Loading branch information
Takashi Matsuo committed Aug 23, 2015
1 parent a98add2 commit 3c79cd3
Show file tree
Hide file tree
Showing 7 changed files with 411 additions and 13 deletions.
57 changes: 46 additions & 11 deletions googleapiclient/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ def build(serviceName,
developerKey=None,
model=None,
requestBuilder=HttpRequest,
credentials=None):
credentials=None,
cache_discovery=True,
cache=None):
"""Construct a Resource for interacting with an API.
Construct a Resource object for interacting with an API. The serviceName and
Expand All @@ -171,6 +173,9 @@ def build(serviceName,
request.
credentials: oauth2client.Credentials, credentials to be used for
authentication.
cache_discovery: Boolean, whether or not to cache the discovery doc.
cache: googleapiclient.discovery_cache.base.CacheBase, an optional
cache object for the discovery documents.
Returns:
A Resource object with methods for interacting with the service.
Expand All @@ -185,22 +190,53 @@ def build(serviceName,

requested_url = uritemplate.expand(discoveryServiceUrl, params)

content = _retrieve_discovery_doc(requested_url, http, cache_discovery, cache)

return build_from_document(content, base=discoveryServiceUrl, http=http,
developerKey=developerKey, model=model, requestBuilder=requestBuilder,
credentials=credentials)


def _retrieve_discovery_doc(url, http, cache_discovery, cache=None):
"""Retrieves the discovery_doc from cache or the internet.
Args:
url: string, the URL of the discovery document.
http: httplib2.Http, An instance of httplib2.Http or something that acts
like it through which HTTP requests will be made.
cache_discovery: Boolean, whether or not to cache the discovery doc.
cache: googleapiclient.discovery_cache.base.Cache, an optional cache
object for the discovery documents.
Returns:
A unicode string representation of the discovery document.
"""
if cache_discovery:
from . import discovery_cache
from .discovery_cache import base
if cache is None:
cache = discovery_cache.autodetect()
if cache:
content = cache.get(url)
if content:
return content

actual_url = url
# REMOTE_ADDR is defined by the CGI spec [RFC3875] as the environment
# variable that contains the network address of the client sending the
# request. If it exists then add that to the request for the discovery
# document to avoid exceeding the quota on discovery requests.
if 'REMOTE_ADDR' in os.environ:
requested_url = _add_query_parameter(requested_url, 'userIp',
os.environ['REMOTE_ADDR'])
logger.info('URL being requested: GET %s' % requested_url)
actual_url = _add_query_parameter(url, 'userIp', os.environ['REMOTE_ADDR'])
logger.info('URL being requested: GET %s', actual_url)

resp, content = http.request(requested_url)
resp, content = http.request(actual_url)

if resp.status == 404:
raise UnknownApiNameOrVersion("name: %s version: %s" % (serviceName,
version))
version))
if resp.status >= 400:
raise HttpError(resp, content, uri=requested_url)
raise HttpError(resp, content, uri=actual_url)

try:
content = content.decode('utf-8')
Expand All @@ -212,10 +248,9 @@ def build(serviceName,
except ValueError as e:
logger.error('Failed to parse as JSON: ' + content)
raise InvalidJsonError()

return build_from_document(content, base=discoveryServiceUrl, http=http,
developerKey=developerKey, model=model, requestBuilder=requestBuilder,
credentials=credentials)
if cache_discovery and cache:
cache.set(url, content)
return content


@positional(1)
Expand Down
42 changes: 42 additions & 0 deletions googleapiclient/discovery_cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Caching utility for the discovery document."""

from __future__ import absolute_import

import logging
import datetime

DISCOVERY_DOC_MAX_AGE = 60 * 60 * 24 # 1 day


def autodetect():
"""Detects an appropriate cache module and returns it.
Returns:
googleapiclient.discovery_cache.base.Cache, a cache object which
is auto detected, or None if no cache object is available.
"""
try:
from google.appengine.api import memcache
from . import appengine_memcache
return appengine_memcache.cache
except Exception:
try:
from . import file_cache
return file_cache.cache
except Exception as e:
logging.warning(e, exc_info=True)
return None
52 changes: 52 additions & 0 deletions googleapiclient/discovery_cache/appengine_memcache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""App Engine memcache based cache for the discovery document."""

import logging

# This is only an optional dependency because we only import this
# module when google.appengine.api.memcache is available.
from google.appengine.api import memcache

from . import base
from ..discovery_cache import DISCOVERY_DOC_MAX_AGE

NAMESPACE = 'google-api-client'


class Cache(base.Cache):
"""A cache with app engine memcache API."""

def __init__(self, max_age):
"""Constructor.
Args:
max_age: Cache expiration in seconds.
"""
self._max_age = max_age

def get(self, url):
try:
return memcache.get(url, namespace=NAMESPACE)
except Exception as e:
logging.warning(e, exc_info=True)

def set(self, url, content):
try:
memcache.set(url, content, time=int(self._max_age), namespace=NAMESPACE)
except Exception as e:
logging.warning(e, exc_info=True)

cache = Cache(max_age=DISCOVERY_DOC_MAX_AGE)
45 changes: 45 additions & 0 deletions googleapiclient/discovery_cache/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""An abstract class for caching the discovery document."""

import abc


class Cache(object):
"""A base abstract cache class."""
__metaclass__ = abc.ABCMeta

@abc.abstractmethod
def get(self, url):
"""Gets the content from the memcache with a given key.
Args:
url: string, the key for the cache.
Returns:
object, the value in the cache for the given key, or None if the key is
not in the cache.
"""
raise NotImplementedError()

@abc.abstractmethod
def set(self, url, content):
"""Sets the given key and content in the cache.
Args:
url: string, the key for the cache.
content: string, the discovery document.
"""
raise NotImplementedError()
123 changes: 123 additions & 0 deletions googleapiclient/discovery_cache/file_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""File based cache for the discovery document.
The cache is stored in a single file so that multiple processes can
share the same cache. It locks the file whenever accesing to the
file. When the cache content is corrupted, it will be initialized with
an empty cache.
"""

import datetime
import json
import logging
import os
import tempfile
import threading

from oauth2client.locked_file import LockedFile

from . import base
from ..discovery_cache import DISCOVERY_DOC_MAX_AGE

logger = logging.getLogger(__name__)

FILENAME = 'google-api-python-client-discovery-doc.cache'
EPOCH = datetime.datetime.utcfromtimestamp(0)


def _to_timestamp(d):
return (d - EPOCH).total_seconds()


def _read_or_initialize_cache(f):
f.file_handle().seek(0)
try:
cache = json.load(f.file_handle())
except Exception:
# This means it opens the file for the first time, or the cache is
# corrupted, so initializing the file with an empty dict.
cache = {}
f.file_handle().truncate(0)
f.file_handle().seek(0)
json.dump(cache, f.file_handle())
return cache


class Cache(base.Cache):
"""A file based cache for the discovery documents."""

def __init__(self, max_age):
"""Constructor.
Args:
max_age: Cache expiration in seconds.
"""
self._max_age = max_age
self._file = os.path.join(tempfile.gettempdir(), FILENAME)
f = LockedFile(self._file, 'a+', 'r')
try:
f.open_and_lock()
if f.is_locked():
_read_or_initialize_cache(f)
# If we can not obtain the lock, other process or thread must
# have initialized the file.
except Exception as e:
logging.warning(e, exc_info=True)
finally:
f.unlock_and_close()

def get(self, url):
f = LockedFile(self._file, 'r+', 'r')
try:
f.open_and_lock()
if f.is_locked():
cache = _read_or_initialize_cache(f)
if url in cache:
content, t = cache.get(url, (None, 0))
if _to_timestamp(datetime.datetime.now()) < t + self._max_age:
return content
return None
else:
logger.debug('Could not obtain a lock for the cache file.')
return None
except Exception as e:
logger.warning(e, exc_info=True)
finally:
f.unlock_and_close()

def set(self, url, content):
f = LockedFile(self._file, 'r+', 'r')
try:
f.open_and_lock()
if f.is_locked():
cache = _read_or_initialize_cache(f)
cache[url] = (content, _to_timestamp(datetime.datetime.now()))
# Remove stale cache
for k, (_, t) in cache.items():
if _to_timestamp(datetime.datetime.now()) >= t + self._max_age:
del cache[key]
f.file_handle().truncate(0)
f.file_handle().seek(0)
json.dump(cache, f.file_handle())
else:
logger.debug('Could not obtain a lock for the cache file.')
except Exception as e:
logger.warning(e, exc_info=True)
finally:
f.unlock_and_close()


cache = Cache(max_age=DISCOVERY_DOC_MAX_AGE)
Loading

0 comments on commit 3c79cd3

Please sign in to comment.