Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add configurable crc32c checksumming for downloads #135

Merged
merged 15 commits into from
Jul 7, 2020
15 changes: 15 additions & 0 deletions google/resumable_media/_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,23 @@ class Download(DownloadBase):
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
ranged downloads where the remote service does not know the
correct checksum) an INFO-level log will be emitted. Supported
values are "md5", "crc32c" and None.
andrewsg marked this conversation as resolved.
Show resolved Hide resolved
"""

def __init__(
self, media_url, stream=None, start=None, end=None, headers=None, checksum="md5"
andrewsg marked this conversation as resolved.
Show resolved Hide resolved
):
super(Download, self).__init__(
media_url, stream=stream, start=start, end=end, headers=headers
)
self.checksum = checksum

def _prepare_request(self):
"""Prepare the contents of an HTTP request.

Expand Down
40 changes: 40 additions & 0 deletions google/resumable_media/requests/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@


import functools
import warnings

from google.resumable_media import _helpers
from google.resumable_media import common
Expand All @@ -33,6 +34,12 @@
# The number of seconds to wait between bytes sent from the server.
_DEFAULT_READ_TIMEOUT = 60

_SLOW_CRC32C_WARNING = (
"Currently using crcmod in pure python form. This is a slow "
"implementation. Python 3 has a faster implementation, `google-crc32c`, "
"which will be used if it is installed."
)
andrewsg marked this conversation as resolved.
Show resolved Hide resolved


class RequestsMixin(object):
"""Mix-in class implementing ``requests``-specific behavior.
Expand Down Expand Up @@ -134,3 +141,36 @@ def http_request(
transport.request, method, url, data=data, headers=headers, **transport_kwargs
)
return _helpers.wait_and_retry(func, RequestsMixin._get_status_code, retry_strategy)


def _get_crc32c_object():
""" Get crc32c object
Attempt to use the Google-CRC32c package. If it isn't available, try
to use CRCMod. CRCMod might be using a 'slow' varietal. If so, warn...
"""
try:
import crc32c

crc_obj = crc32c.Checksum()
except ImportError:
try:
import crcmod

crc_obj = crcmod.predefined.Crc("crc-32c")
_is_fast_crcmod()

except ImportError:
raise ImportError("Failed to import either `google-crc32c` or `crcmod`")

return crc_obj


def _is_fast_crcmod():
# Determine if this is using the slow form of crcmod.
nested_crcmod = __import__(
"crcmod.crcmod", globals(), locals(), ["_usingExtension"], 0,
)
fast_crc = getattr(nested_crcmod, "_usingExtension", False)
if not fast_crc:
warnings.warn(_SLOW_CRC32C_WARNING, RuntimeWarning, stacklevel=2)
return fast_crc
Loading