diff --git a/ibm_cloud_sdk_core/base_service.py b/ibm_cloud_sdk_core/base_service.py index 84179b5..06aef87 100644 --- a/ibm_cloud_sdk_core/base_service.py +++ b/ibm_cloud_sdk_core/base_service.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import gzip import json as json_import import logging import platform @@ -39,6 +38,7 @@ read_external_sources, strip_extra_slashes, SSLHTTPAdapter, + GzipStream, ) from .version import __version__ @@ -420,10 +420,8 @@ def prepare_request( # Compress the request body if applicable if self.get_enable_gzip_compression() and 'content-encoding' not in headers and request['data'] is not None: headers['content-encoding'] = 'gzip' - uncompressed_data = request['data'] - request_body = gzip.compress(uncompressed_data) - request['data'] = request_body request['headers'] = headers + request['data'] = GzipStream(request['data']) # Next, we need to process the 'files' argument to try to fill in # any missing filenames where possible. diff --git a/ibm_cloud_sdk_core/utils.py b/ibm_cloud_sdk_core/utils.py index 61fb89e..965582f 100644 --- a/ibm_cloud_sdk_core/utils.py +++ b/ibm_cloud_sdk_core/utils.py @@ -15,6 +15,8 @@ # limitations under the License. # from ibm_cloud_sdk_core.authenticators import Authenticator import datetime +import gzip +import io import json as json_import import re import ssl @@ -43,6 +45,91 @@ def init_poolmanager(self, connections, maxsize, block): super().init_poolmanager(connections, maxsize, block, ssl_context=ssl_context) +class GzipStream(io.RawIOBase): + """Compress files on the fly. + + GzipStream is a helper class around the gzip library. It helps to + compress already opened files (file-like objects) on the fly, so + there is no need to read everything into the memory and call the + `compress` function on it. + The GzipFile is opened on the instance itself so it needs to act + as a file-like object. + + Args: + input: the source of the data to be compressed. + It can be a file-like object, bytes or string. + """ + + def __init__(self, source: Union[io.IOBase, bytes, str]) -> 'GzipStream': + self.buffer = b'' + + if isinstance(source, io.IOBase): + # The input is already a file-like object, use it as-is. + self.uncompressed = source + elif isinstance(source, str): + # Strings must be handled with StringIO. + self.uncompressed = io.StringIO(source) + else: + # Handle the rest as raw bytes. + self.uncompressed = io.BytesIO(source) + + self.compressor = gzip.GzipFile(fileobj=self, mode='wb') + + def read(self, size: int = -1) -> bytes: + """Compresses and returns the requested size of data. + + Args: + size: how many bytes to return. -1 to read and compress the whole file + """ + compressed = b'' + + if (size < 0) or (len(self.buffer) < size): + for raw in self.uncompressed: + # We need to encode text like streams (e.g. TextIOWrapper) to bytes. + if isinstance(raw, str): + raw = raw.encode() + + self.compressor.write(raw) + + # Stop compressing if we reached the max allowed size. + if 0 < size < len(self.buffer): + self.compressor.flush() + break + else: + self.compressor.close() + + if size < 0: + # Return all data from the buffer. + compressed = self.buffer + self.buffer = b'' + else: + # If we already have enough data in our buffer + # return the desired chunk of bytes + compressed = self.buffer[:size] + # then remove them from the buffer. + self.buffer = self.buffer[size:] + + return compressed + + def flush(self) -> None: + """Not implemented.""" + # Since this "pipe" sits between 2 other stream (source/read -> target/write) + # it wouldn't be worth to implemet flushing. + pass + + def write(self, compressed: bytes) -> None: + """Append the compressed data to the buffer + + This happens when the target stream calls the `read` method and + that triggers the gzip "compressor". + """ + self.buffer += compressed + + def close(self) -> None: + """Closes the underlying file-like object.""" + self.uncompressed.close() + + def has_bad_first_or_last_char(val: str) -> bool: """Returns true if a string starts with any of: {," ; or ends with any of: },". diff --git a/test/test_base_service.py b/test/test_base_service.py index e4e0b0c..b90fef4 100644 --- a/test/test_base_service.py +++ b/test/test_base_service.py @@ -607,13 +607,13 @@ def test_gzip_compression(): service.set_enable_gzip_compression(True) assert service.get_enable_gzip_compression() prepped = service.prepare_request('GET', url='', data=json.dumps({"foo": "bar"})) - assert prepped['data'] == gzip.compress(b'{"foo": "bar"}') + assert prepped['data'].read() == gzip.compress(b'{"foo": "bar"}') assert prepped['headers'].get('content-encoding') == 'gzip' # Should return compressed data when gzip is on for non-json data assert service.get_enable_gzip_compression() prepped = service.prepare_request('GET', url='', data=b'rawdata') - assert prepped['data'] == gzip.compress(b'rawdata') + assert prepped['data'].read() == gzip.compress(b'rawdata') assert prepped['headers'].get('content-encoding') == 'gzip' # Should return compressed data when gzip is on for gzip file data @@ -624,7 +624,7 @@ def test_gzip_compression(): with gzip.GzipFile(mode='rb', fileobj=t_f) as gz_f: gzip_data = gz_f.read() prepped = service.prepare_request('GET', url='', data=gzip_data) - assert prepped['data'] == gzip.compress(t_f.read()) + assert prepped['data'].read() == gzip.compress(t_f.read()) assert prepped['headers'].get('content-encoding') == 'gzip' # Should return compressed json data when gzip is on for gzip file json data @@ -635,7 +635,7 @@ def test_gzip_compression(): with gzip.GzipFile(mode='rb', fileobj=t_f) as gz_f: gzip_data = gz_f.read() prepped = service.prepare_request('GET', url='', data=gzip_data) - assert prepped['data'] == gzip.compress(t_f.read()) + assert prepped['data'].read() == gzip.compress(t_f.read()) assert prepped['headers'].get('content-encoding') == 'gzip' # Should return uncompressed data when content-encoding is set @@ -647,6 +647,66 @@ def test_gzip_compression(): assert prepped['headers'].get('content-encoding') == 'gzip' +def test_gzip_compression_file_input(): + service = AnyServiceV1('2018-11-20', authenticator=NoAuthAuthenticator()) + service.set_enable_gzip_compression(True) + + # Should return file-like object with the compressed data when compression is on + # and the input is a file, opened for reading in binary mode. + raw_data = b'rawdata' + with tempfile.TemporaryFile(mode='w+b') as tmp_file: + tmp_file.write(raw_data) + tmp_file.seek(0) + + prepped = service.prepare_request('GET', url='', data=tmp_file) + assert prepped['data'].read() == gzip.compress(raw_data) + assert prepped['headers'].get('content-encoding') == 'gzip' + assert prepped['data'].read() == b'' + + # Simulate the requests (urllib3) package reading method for binary files. + with tempfile.TemporaryFile(mode='w+b') as tmp_file: + tmp_file.write(raw_data) + tmp_file.seek(0) + + prepped = service.prepare_request('GET', url='', data=tmp_file) + compressed = b'' + for chunk in prepped['data']: + compressed += chunk + + assert compressed == gzip.compress(raw_data) + + # Make sure the decompression works fine. + assert gzip.decompress(compressed) == raw_data + + # Should return file-like object with the compressed data when compression is on + # and the input is a file, opened for reading in text mode. + assert service.get_enable_gzip_compression() + text_data = 'textdata' + with tempfile.TemporaryFile(mode='w+') as tmp_file: + tmp_file.write(text_data) + tmp_file.seek(0) + + prepped = service.prepare_request('GET', url='', data=tmp_file) + assert prepped['data'].read() == gzip.compress(text_data.encode()) + assert prepped['headers'].get('content-encoding') == 'gzip' + assert prepped['data'].read() == b'' + + # Simulate the requests (urllib3) package reading method for text files. + with tempfile.TemporaryFile(mode='w+') as tmp_file: + tmp_file.write(text_data) + tmp_file.seek(0) + + prepped = service.prepare_request('GET', url='', data=tmp_file) + compressed = b'' + for chunk in prepped['data']: + compressed += chunk + + assert compressed == gzip.compress(text_data.encode()) + + # Make sure the decompression works fine. + assert gzip.decompress(compressed).decode() == text_data + + def test_gzip_compression_external(): # Should set gzip compression from external config file_path = os.path.join(os.path.dirname(__file__), '../resources/ibm-credentials-gzip.env') @@ -655,7 +715,7 @@ def test_gzip_compression_external(): assert service.service_url == 'https://mockurl' assert service.get_enable_gzip_compression() is True prepped = service.prepare_request('GET', url='', data=json.dumps({"foo": "bar"})) - assert prepped['data'] == gzip.compress(b'{"foo": "bar"}') + assert prepped['data'].read() == gzip.compress(b'{"foo": "bar"}') assert prepped['headers'].get('content-encoding') == 'gzip'