Implemented automatic caching for the discovery documents.

googleapis · Aug 23, 2015 · 3c79cd3 · 3c79cd3
1 parent a98add2
commit 3c79cd3
Show file tree

Hide file tree

Showing 7 changed files with 411 additions and 13 deletions.
diff --git a/googleapiclient/discovery.py b/googleapiclient/discovery.py
@@ -149,7 +149,9 @@ def build(serviceName,
           developerKey=None,
           model=None,
           requestBuilder=HttpRequest,
-          credentials=None):
+          credentials=None,
+          cache_discovery=True,
+          cache=None):
   """Construct a Resource for interacting with an API.
 
   Construct a Resource object for interacting with an API. The serviceName and
@@ -171,6 +173,9 @@ def build(serviceName,
       request.
     credentials: oauth2client.Credentials, credentials to be used for
       authentication.
+    cache_discovery: Boolean, whether or not to cache the discovery doc.
+    cache: googleapiclient.discovery_cache.base.CacheBase, an optional
+      cache object for the discovery documents.
 
   Returns:
     A Resource object with methods for interacting with the service.
@@ -185,22 +190,53 @@ def build(serviceName,
 
   requested_url = uritemplate.expand(discoveryServiceUrl, params)
 
+  content = _retrieve_discovery_doc(requested_url, http, cache_discovery, cache)
+
+  return build_from_document(content, base=discoveryServiceUrl, http=http,
+      developerKey=developerKey, model=model, requestBuilder=requestBuilder,
+      credentials=credentials)
+
+
+def _retrieve_discovery_doc(url, http, cache_discovery, cache=None):
+  """Retrieves the discovery_doc from cache or the internet.
+
+  Args:
+    url: string, the URL of the discovery document.
+    http: httplib2.Http, An instance of httplib2.Http or something that acts
+      like it through which HTTP requests will be made.
+    cache_discovery: Boolean, whether or not to cache the discovery doc.
+    cache: googleapiclient.discovery_cache.base.Cache, an optional cache
+      object for the discovery documents.
+
+  Returns:
+    A unicode string representation of the discovery document.
+  """
+  if cache_discovery:
+    from . import discovery_cache
+    from .discovery_cache import base
+    if cache is None:
+      cache = discovery_cache.autodetect()
+    if cache:
+      content = cache.get(url)
+      if content:
+        return content
+
+  actual_url = url
   # REMOTE_ADDR is defined by the CGI spec [RFC3875] as the environment
   # variable that contains the network address of the client sending the
   # request. If it exists then add that to the request for the discovery
   # document to avoid exceeding the quota on discovery requests.
   if 'REMOTE_ADDR' in os.environ:
-    requested_url = _add_query_parameter(requested_url, 'userIp',
-                                         os.environ['REMOTE_ADDR'])
-  logger.info('URL being requested: GET %s' % requested_url)
+    actual_url = _add_query_parameter(url, 'userIp', os.environ['REMOTE_ADDR'])
+  logger.info('URL being requested: GET %s', actual_url)
 
-  resp, content = http.request(requested_url)
+  resp, content = http.request(actual_url)
 
   if resp.status == 404:
     raise UnknownApiNameOrVersion("name: %s  version: %s" % (serviceName,
-                                                            version))
+                                                             version))
   if resp.status >= 400:
-    raise HttpError(resp, content, uri=requested_url)
+    raise HttpError(resp, content, uri=actual_url)
 
   try:
     content = content.decode('utf-8')
@@ -212,10 +248,9 @@ def build(serviceName,
   except ValueError as e:
     logger.error('Failed to parse as JSON: ' + content)
     raise InvalidJsonError()
-
-  return build_from_document(content, base=discoveryServiceUrl, http=http,
-      developerKey=developerKey, model=model, requestBuilder=requestBuilder,
-      credentials=credentials)
+  if cache_discovery and cache:
+    cache.set(url, content)
+  return content
 
 
 @positional(1)

diff --git a/googleapiclient/discovery_cache/__init__.py b/googleapiclient/discovery_cache/__init__.py
@@ -0,0 +1,42 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Caching utility for the discovery document."""
+
+from __future__ import absolute_import
+
+import logging
+import datetime
+
+DISCOVERY_DOC_MAX_AGE = 60 * 60 * 24  # 1 day
+
+
+def autodetect():
+  """Detects an appropriate cache module and returns it.
+
+  Returns:
+    googleapiclient.discovery_cache.base.Cache, a cache object which
+    is auto detected, or None if no cache object is available.
+  """
+  try:
+    from google.appengine.api import memcache
+    from . import appengine_memcache
+    return appengine_memcache.cache
+  except Exception:
+    try:
+      from . import file_cache
+      return file_cache.cache
+    except Exception as e:
+      logging.warning(e, exc_info=True)
+      return None
diff --git a/googleapiclient/discovery_cache/appengine_memcache.py b/googleapiclient/discovery_cache/appengine_memcache.py
@@ -0,0 +1,52 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""App Engine memcache based cache for the discovery document."""
+
+import logging
+
+# This is only an optional dependency because we only import this
+# module when google.appengine.api.memcache is available.
+from google.appengine.api import memcache
+
+from . import base
+from ..discovery_cache import DISCOVERY_DOC_MAX_AGE
+
+NAMESPACE = 'google-api-client'
+
+
+class Cache(base.Cache):
+  """A cache with app engine memcache API."""
+
+  def __init__(self, max_age):
+      """Constructor.
+
+      Args:
+        max_age: Cache expiration in seconds.
+      """
+      self._max_age = max_age
+
+  def get(self, url):
+    try:
+      return memcache.get(url, namespace=NAMESPACE)
+    except Exception as e:
+      logging.warning(e, exc_info=True)
+
+  def set(self, url, content):
+    try:
+      memcache.set(url, content, time=int(self._max_age), namespace=NAMESPACE)
+    except Exception as e:
+      logging.warning(e, exc_info=True)
+
+cache = Cache(max_age=DISCOVERY_DOC_MAX_AGE)
diff --git a/googleapiclient/discovery_cache/base.py b/googleapiclient/discovery_cache/base.py
@@ -0,0 +1,45 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""An abstract class for caching the discovery document."""
+
+import abc
+
+
+class Cache(object):
+  """A base abstract cache class."""
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def get(self, url):
+    """Gets the content from the memcache with a given key.
+
+    Args:
+      url: string, the key for the cache.
+
+    Returns:
+      object, the value in the cache for the given key, or None if the key is
+      not in the cache.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def set(self, url, content):
+    """Sets the given key and content in the cache.
+
+    Args:
+      url: string, the key for the cache.
+      content: string, the discovery document.
+    """
+    raise NotImplementedError()
diff --git a/googleapiclient/discovery_cache/file_cache.py b/googleapiclient/discovery_cache/file_cache.py
@@ -0,0 +1,123 @@
+# Copyright 2014 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""File based cache for the discovery document.
+The cache is stored in a single file so that multiple processes can
+
+share the same cache. It locks the file whenever accesing to the
+file. When the cache content is corrupted, it will be initialized with
+an empty cache.
+"""
+
+import datetime
+import json
+import logging
+import os
+import tempfile
+import threading
+
+from oauth2client.locked_file import LockedFile
+
+from . import base
+from ..discovery_cache import DISCOVERY_DOC_MAX_AGE
+
+logger = logging.getLogger(__name__)
+
+FILENAME = 'google-api-python-client-discovery-doc.cache'
+EPOCH = datetime.datetime.utcfromtimestamp(0)
+
+
+def _to_timestamp(d):
+  return (d - EPOCH).total_seconds()
+
+
+def _read_or_initialize_cache(f):
+  f.file_handle().seek(0)
+  try:
+    cache = json.load(f.file_handle())
+  except Exception:
+    # This means it opens the file for the first time, or the cache is
+    # corrupted, so initializing the file with an empty dict.
+    cache = {}
+    f.file_handle().truncate(0)
+    f.file_handle().seek(0)
+    json.dump(cache, f.file_handle())
+  return cache
+
+
+class Cache(base.Cache):
+  """A file based cache for the discovery documents."""
+
+  def __init__(self, max_age):
+      """Constructor.
+
+      Args:
+        max_age: Cache expiration in seconds.
+      """
+      self._max_age = max_age
+      self._file = os.path.join(tempfile.gettempdir(), FILENAME)
+      f = LockedFile(self._file, 'a+', 'r')
+      try:
+        f.open_and_lock()
+        if f.is_locked():
+          _read_or_initialize_cache(f)
+        # If we can not obtain the lock, other process or thread must
+        # have initialized the file.
+      except Exception as e:
+        logging.warning(e, exc_info=True)
+      finally:
+        f.unlock_and_close()
+
+  def get(self, url):
+    f = LockedFile(self._file, 'r+', 'r')
+    try:
+      f.open_and_lock()
+      if f.is_locked():
+        cache = _read_or_initialize_cache(f)
+        if url in cache:
+          content, t = cache.get(url, (None, 0))
+          if _to_timestamp(datetime.datetime.now()) < t + self._max_age:
+            return content
+        return None
+      else:
+        logger.debug('Could not obtain a lock for the cache file.')
+        return None
+    except Exception as e:
+      logger.warning(e, exc_info=True)
+    finally:
+      f.unlock_and_close()
+
+  def set(self, url, content):
+    f = LockedFile(self._file, 'r+', 'r')
+    try:
+      f.open_and_lock()
+      if f.is_locked():
+        cache = _read_or_initialize_cache(f)
+        cache[url] = (content, _to_timestamp(datetime.datetime.now()))
+        # Remove stale cache
+        for k, (_, t) in cache.items():
+          if _to_timestamp(datetime.datetime.now()) >= t + self._max_age:
+            del cache[key]
+        f.file_handle().truncate(0)
+        f.file_handle().seek(0)
+        json.dump(cache, f.file_handle())
+      else:
+        logger.debug('Could not obtain a lock for the cache file.')
+    except Exception as e:
+      logger.warning(e, exc_info=True)
+    finally:
+      f.unlock_and_close()
+
+
+cache = Cache(max_age=DISCOVERY_DOC_MAX_AGE)