diff --git a/storage/google/cloud/storage/blob.py b/storage/google/cloud/storage/blob.py index 96c46245300a..b8590164bba4 100644 --- a/storage/google/cloud/storage/blob.py +++ b/storage/google/cloud/storage/blob.py @@ -58,8 +58,11 @@ class Blob(_PropertyMixin): """A wrapper around Cloud Storage's concept of an ``Object``. :type name: str - :param name: The name of the blob. This corresponds to the - unique path of the object in the bucket. + :param name: The name of the blob. This corresponds to the unique path of + the object in the bucket. If bytes, will be converted to a + unicode object. Blob / object names can contain any sequence + of valid unicode characters, of length 1-1024 bytes when + UTF-8 encoded. :type bucket: :class:`google.cloud.storage.bucket.Bucket` :param bucket: The bucket to which this blob belongs. @@ -104,6 +107,7 @@ class Blob(_PropertyMixin): """ def __init__(self, name, bucket, chunk_size=None, encryption_key=None): + name = _bytes_to_unicode(name) super(Blob, self).__init__(name=name) self.chunk_size = chunk_size # Check that setter accepts value. @@ -148,7 +152,7 @@ def path_helper(bucket_path, blob_name): :rtype: str :returns: The relative URL path for ``blob_name``. """ - return bucket_path + '/o/' + quote(blob_name, safe='') + return bucket_path + '/o/' + _quote(blob_name) @property def acl(self): @@ -190,7 +194,7 @@ def public_url(self): return '{storage_base_url}/{bucket_name}/{quoted_name}'.format( storage_base_url='https://storage.googleapis.com', bucket_name=self.bucket.name, - quoted_name=quote(self.name, safe='')) + quoted_name=_quote(self.name)) def generate_signed_url(self, expiration, method='GET', content_type=None, @@ -261,7 +265,7 @@ def generate_signed_url(self, expiration, method='GET', """ resource = '/{bucket_name}/{quoted_name}'.format( bucket_name=self.bucket.name, - quoted_name=quote(self.name, safe='')) + quoted_name=_quote(self.name)) if credentials is None: client = self._require_client(client) @@ -1362,3 +1366,21 @@ def _get_encryption_headers(key, source=False): prefix + 'Key': _bytes_to_unicode(key), prefix + 'Key-Sha256': _bytes_to_unicode(key_hash), } + + +def _quote(value): + """URL-quote a string. + + If the value is unicode, this method first UTF-8 encodes it as bytes and + then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this + encoding automatically, but in Python 2, non-ASCII characters cannot be + quoted.) + + :type value: str or bytes + :param value: The value to be URL-quoted. + + :rtype: str + :returns: The encoded value (bytes in Python 2, unicode in Python 3). + """ + value = _to_bytes(value, encoding='utf-8') + return quote(value, safe='') diff --git a/storage/tests/system.py b/storage/tests/system.py index 1d4b6e467c88..afab659882bf 100644 --- a/storage/tests/system.py +++ b/storage/tests/system.py @@ -246,6 +246,7 @@ def test_copy_existing_file(self): class TestUnicode(unittest.TestCase): + def test_fetch_object_and_check_content(self): client = storage.Client() bucket = client.bucket('storage-library-test-bucket') @@ -256,8 +257,8 @@ def test_fetch_object_and_check_content(self): # Normalization Form D: an ASCII e followed by U+0301 combining # character; URL should end with Caf%C3%A9 test_data = { - u'Caf\u00e9'.encode('utf-8'): b'Normalization Form C', - u'Cafe\u0301'.encode('utf-8'): b'Normalization Form D', + u'Caf\u00e9': b'Normalization Form C', + u'Cafe\u0301': b'Normalization Form D', } for blob_name, file_contents in test_data.items(): blob = bucket.blob(blob_name) diff --git a/storage/tests/unit/test_blob.py b/storage/tests/unit/test_blob.py index 746552d77c2d..cade3a458160 100644 --- a/storage/tests/unit/test_blob.py +++ b/storage/tests/unit/test_blob.py @@ -25,7 +25,8 @@ def _make_credentials(): class Test_Blob(unittest.TestCase): - def _make_one(self, *args, **kw): + @staticmethod + def _make_one(*args, **kw): from google.cloud.storage.blob import Blob properties = kw.pop('properties', None) @@ -45,6 +46,13 @@ def test_ctor_wo_encryption_key(self): self.assertIs(blob._acl.blob, blob) self.assertEqual(blob._encryption_key, None) + def test_ctor_with_encoded_unicode(self): + blob_name = b'wet \xe2\x9b\xb5' + blob = self._make_one(blob_name, bucket=None) + unicode_name = u'wet \N{sailboat}' + self.assertNotEqual(blob.name, blob_name) + self.assertEqual(blob.name, unicode_name) + def test_ctor_w_encryption_key(self): KEY = b'01234567890123456789012345678901' # 32 bytes BLOB_NAME = 'blob-name' @@ -91,21 +99,21 @@ def test_chunk_size_setter_bad_value(self): def test_acl_property(self): from google.cloud.storage.acl import ObjectACL - FAKE_BUCKET = _Bucket() - blob = self._make_one(None, bucket=FAKE_BUCKET) + fake_bucket = _Bucket() + blob = self._make_one(u'name', bucket=fake_bucket) acl = blob.acl self.assertIsInstance(acl, ObjectACL) self.assertIs(acl, blob._acl) - def test_path_no_bucket(self): - FAKE_BUCKET = object() - NAME = 'blob-name' - blob = self._make_one(NAME, bucket=FAKE_BUCKET) + def test_path_bad_bucket(self): + fake_bucket = object() + name = u'blob-name' + blob = self._make_one(name, bucket=fake_bucket) self.assertRaises(AttributeError, getattr, blob, 'path') def test_path_no_name(self): bucket = _Bucket() - blob = self._make_one(None, bucket=bucket) + blob = self._make_one(u'', bucket=bucket) self.assertRaises(ValueError, getattr, blob, 'path') def test_path_normal(self): @@ -120,6 +128,12 @@ def test_path_w_slash_in_name(self): blob = self._make_one(BLOB_NAME, bucket=bucket) self.assertEqual(blob.path, '/b/name/o/parent%2Fchild') + def test_path_with_non_ascii(self): + blob_name = u'Caf\xe9' + bucket = _Bucket() + blob = self._make_one(blob_name, bucket=bucket) + self.assertEqual(blob.path, '/b/name/o/Caf%C3%A9') + def test_public_url(self): BLOB_NAME = 'blob-name' bucket = _Bucket() @@ -136,6 +150,13 @@ def test_public_url_w_slash_in_name(self): blob.public_url, 'https://storage.googleapis.com/name/parent%2Fchild') + def test_public_url_with_non_ascii(self): + blob_name = u'winter \N{snowman}' + bucket = _Bucket() + blob = self._make_one(blob_name, bucket=bucket) + expected_url = 'https://storage.googleapis.com/name/winter%20%E2%98%83' + self.assertEqual(blob.public_url, expected_url) + def _basic_generate_signed_url_helper(self, credentials=None): BLOB_NAME = 'blob-name' EXPIRATION = '2014-10-16T20:34:37.000Z' @@ -2227,6 +2248,28 @@ def test_updated_unset(self): self.assertIsNone(blob.updated) +class Test__quote(unittest.TestCase): + + @staticmethod + def _call_fut(value): + from google.cloud.storage.blob import _quote + + return _quote(value) + + def test_bytes(self): + quoted = self._call_fut(b'\xDE\xAD\xBE\xEF') + self.assertEqual(quoted, '%DE%AD%BE%EF') + + def test_unicode(self): + helicopter = u'\U0001f681' + quoted = self._call_fut(helicopter) + self.assertEqual(quoted, '%F0%9F%9A%81') + + def test_bad_type(self): + with self.assertRaises(TypeError): + self._call_fut(None) + + class _Responder(object): def __init__(self, *responses):