Skip to content

Commit

Permalink
added range downloads (#5081)
Browse files Browse the repository at this point in the history
* added range downloads

added start and end support for download_as_ methods. ChunckedDownload class works in a different way than Download (it does not accept start to be None, that why the start check is being done

* fixed tests

* fixed comment
  • Loading branch information
javisantana authored and chemelnucfin committed Mar 21, 2018
1 parent d915544 commit 44cebf8
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 9 deletions.
42 changes: 33 additions & 9 deletions storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def _get_download_url(self):

return _add_query_parameters(base_url, name_value_pairs)

def _do_download(self, transport, file_obj, download_url, headers):
def _do_download(self, transport, file_obj, download_url, headers, start=None, end=None):
"""Perform a download without any error handling.
This is intended to be called by :meth:`download_to_file` so it can
Expand All @@ -443,18 +443,24 @@ def _do_download(self, transport, file_obj, download_url, headers):
:type headers: dict
:param headers: Optional headers to be sent with the request(s).
:type start: int
:param start: Optional, the first byte in a range to be downloaded.
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
"""
if self.chunk_size is None:
download = Download(download_url, stream=file_obj, headers=headers)
download = Download(download_url, stream=file_obj, headers=headers, start=start, end=end)
download.consume(transport)
else:
download = ChunkedDownload(
download_url, self.chunk_size, file_obj, headers=headers)
download_url, self.chunk_size, file_obj, headers=headers, start=start if start else 0, end=end)

while not download.finished:
download.consume_next_chunk(transport)

def download_to_file(self, file_obj, client=None):
def download_to_file(self, file_obj, client=None, start=None, end=None):
"""Download the contents of this blob into a file-like object.
.. note::
Expand Down Expand Up @@ -488,6 +494,12 @@ def download_to_file(self, file_obj, client=None):
:param client: Optional. The client to use. If not passed, falls back
to the ``client`` stored on the blob's bucket.
:type start: int
:param start: Optional, the first byte in a range to be downloaded.
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
:raises: :class:`google.cloud.exceptions.NotFound`
"""
download_url = self._get_download_url()
Expand All @@ -496,11 +508,11 @@ def download_to_file(self, file_obj, client=None):

transport = self._get_transport(client)
try:
self._do_download(transport, file_obj, download_url, headers)
self._do_download(transport, file_obj, download_url, headers, start, end)
except resumable_media.InvalidResponse as exc:
_raise_from_invalid_response(exc)

def download_to_filename(self, filename, client=None):
def download_to_filename(self, filename, client=None, start=None, end=None):
"""Download the contents of this blob into a named file.
If :attr:`user_project` is set on the bucket, bills the API request
Expand All @@ -514,11 +526,17 @@ def download_to_filename(self, filename, client=None):
:param client: Optional. The client to use. If not passed, falls back
to the ``client`` stored on the blob's bucket.
:type start: int
:param start: Optional, the first byte in a range to be downloaded.
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
:raises: :class:`google.cloud.exceptions.NotFound`
"""
try:
with open(filename, 'wb') as file_obj:
self.download_to_file(file_obj, client=client)
self.download_to_file(file_obj, client=client, start=start, end=end)
except resumable_media.DataCorruption as exc:
# Delete the corrupt downloaded file.
os.remove(filename)
Expand All @@ -529,7 +547,7 @@ def download_to_filename(self, filename, client=None):
mtime = time.mktime(updated.timetuple())
os.utime(file_obj.name, (mtime, mtime))

def download_as_string(self, client=None):
def download_as_string(self, client=None, start=None, end=None):
"""Download the contents of this blob as a string.
If :attr:`user_project` is set on the bucket, bills the API request
Expand All @@ -540,12 +558,18 @@ def download_as_string(self, client=None):
:param client: Optional. The client to use. If not passed, falls back
to the ``client`` stored on the blob's bucket.
:type start: int
:param start: Optional, the first byte in a range to be downloaded.
:type end: int
:param end: Optional, The last byte in a range to be downloaded.
:rtype: bytes
:returns: The data stored in this blob.
:raises: :class:`google.cloud.exceptions.NotFound`
"""
string_buffer = BytesIO()
self.download_to_file(string_buffer, client=client)
self.download_to_file(string_buffer, client=client, start=start, end=end)
return string_buffer.getvalue()

def _get_content_type(self, content_type, filename=None):
Expand Down
71 changes: 71 additions & 0 deletions storage/tests/unit/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,20 @@ def _mock_download_transport(self):
fake_transport.request.side_effect = [chunk1_response, chunk2_response]
return fake_transport

def _mock_download_transport_range(self):
fake_transport = mock.Mock(spec=['request'])
# Give the transport two fake responses.
chunk1_response = self._mock_requests_response(
http_client.PARTIAL_CONTENT,
{'content-length': '2', 'content-range': 'bytes 1-2/6'},
content=b'bc')
chunk2_response = self._mock_requests_response(
http_client.PARTIAL_CONTENT,
{'content-length': '2', 'content-range': 'bytes 3-4/6'},
content=b'de')
fake_transport.request.side_effect = [chunk1_response, chunk2_response]
return fake_transport

def _check_session_mocks(self, client, transport,
expected_url, headers=None):
# Check that the transport was called exactly twice.
Expand Down Expand Up @@ -536,6 +550,35 @@ def test__do_download_simple(self):
transport.request.assert_called_once_with(
'GET', download_url, data=None, headers=headers, stream=True)

def test__do_download_simple_with_range(self):
blob_name = 'blob-name'
# Create a fake client/bucket and use them in the Blob() constructor.
client = mock.Mock(
_credentials=_make_credentials(), spec=['_credentials'])
bucket = _Bucket(client)
blob = self._make_one(blob_name, bucket=bucket)

# Make sure this will not be chunked.
self.assertIsNone(blob.chunk_size)

transport = mock.Mock(spec=['request'])
transport.request.return_value = self._mock_requests_response(
http_client.OK,
{'content-length': '3', 'content-range': 'bytes 1-3'},
content=b'bcd',
stream=True,
)
file_obj = io.BytesIO()
download_url = 'http://test.invalid'
headers = {}
blob._do_download(transport, file_obj, download_url, headers, start=1, end=3)
# Make sure the download was as expected.
self.assertEqual(file_obj.getvalue(), b'bcd')
self.assertEqual(headers['range'], 'bytes=1-3')

transport.request.assert_called_once_with(
'GET', download_url, data=None, headers=headers, stream=True)

def test__do_download_chunked(self):
blob_name = 'blob-name'
# Create a fake client/bucket and use them in the Blob() constructor.
Expand Down Expand Up @@ -564,6 +607,34 @@ def test__do_download_chunked(self):
'GET', download_url, data=None, headers=headers)
self.assertEqual(transport.request.mock_calls, [call, call])

def test__do_download_chunked_with_range(self):
blob_name = 'blob-name'
# Create a fake client/bucket and use them in the Blob() constructor.
client = mock.Mock(
_credentials=_make_credentials(), spec=['_credentials'])
bucket = _Bucket(client)
blob = self._make_one(blob_name, bucket=bucket)

# Modify the blob so there there will be 2 chunks of size 2.
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 2

transport = self._mock_download_transport_range()
file_obj = io.BytesIO()
download_url = 'http://test.invalid'
headers = {}
blob._do_download(transport, file_obj, download_url, headers, start=1, end=4)
# Make sure the download was as expected.
self.assertEqual(file_obj.getvalue(), b'bcde')

# Check that the transport was called exactly twice.
self.assertEqual(transport.request.call_count, 2)
# ``headers`` was modified (in place) once for each API call.
self.assertEqual(headers, {'range': 'bytes=3-4'})
call = mock.call(
'GET', download_url, data=None, headers=headers)
self.assertEqual(transport.request.mock_calls, [call, call])

def test_download_to_file_with_failure(self):
from google.cloud import exceptions

Expand Down

0 comments on commit 44cebf8

Please sign in to comment.