IBM · pyrooka · Oct 4, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 26, 2023
diff --git a/ibm_cloud_sdk_core/base_service.py b/ibm_cloud_sdk_core/base_service.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 import gzip
+import io
 import json as json_import
 import logging
 import platform
@@ -39,6 +40,7 @@
     read_external_sources,
     strip_extra_slashes,
     SSLHTTPAdapter,
+    GzipStream,
 )
 from .version import __version__
 
@@ -420,10 +422,12 @@ def prepare_request(
         # Compress the request body if applicable
         if self.get_enable_gzip_compression() and 'content-encoding' not in headers and request['data'] is not None:
             headers['content-encoding'] = 'gzip'
-            uncompressed_data = request['data']
-            request_body = gzip.compress(uncompressed_data)
-            request['data'] = request_body
             request['headers'] = headers
+            raw = request['data']
+            # Handle the compression for file-like objects.
+            # We need to use a custom stream/pipe method to prevent
+            # reading the whole file into the memory.
+            request['data'] = GzipStream(raw) if isinstance(raw, io.IOBase) else gzip.compress(raw)
 
         # Next, we need to process the 'files' argument to try to fill in
         # any missing filenames where possible.

diff --git a/ibm_cloud_sdk_core/utils.py b/ibm_cloud_sdk_core/utils.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 # from ibm_cloud_sdk_core.authenticators import Authenticator
 import datetime
+import gzip
+import io
 import json as json_import
 import re
 import ssl
@@ -43,6 +45,79 @@ def init_poolmanager(self, connections, maxsize, block):
         super().init_poolmanager(connections, maxsize, block, ssl_context=ssl_context)
 
 
+class GzipStream(io.IOBase):
+    """Compress files on the fly.
+
+    GzipStream is a helper class around the gzip library. It helps to
+    compress already opened files (file-like objects) on the fly, so
+    there is no need to read everything into the memory and call the
+    `compress` function on it.
+    The GzipFile is opened on the instance itself so it needs to act
+    as a file-like object.
+
+    Args:
+        input: a file-like object to be compressed
+    """
+
+    def __init__(self, source: io.IOBase):
+        self.uncompressed = source
+        self.buffer = b''
+
+        self.compressor = gzip.GzipFile(fileobj=self, mode='wb')
+
+    def read(self, size: int = -1):
+        """Compresses and returns the reqested size of data.
+
+        Args:
+            size: how many bytes to return. -1 to read and compress the whole file
+        """
+        if (size < 0) or (len(self.buffer) < size):
+            for raw in self.uncompressed:
+                # We need to encode text like streams (e.g. TextIOWrapper) to bytes.
+                if isinstance(raw, str):
+                    raw = raw.encode()
+
+                self.compressor.write(raw)
+
+                # Stop compressing if we reached the max allowed size.
+                if 0 < size < len(self.buffer):
+                    self.compressor.flush()
+                    break
+            else:
+                self.compressor.close()
+
+            if size < 0:
+                # Return all data from the buffer.
+                compressed = self.buffer
+                self.buffer = b''
+        else:
+            # If we already have enough data in our buffer
+            # return the desired chunk of bytes
+            compressed = self.buffer[:size]
+            # then remove them from the buffer.
+            self.buffer = self.buffer[size:]
+
+        return compressed
+
+    def flush(self):
+        """Not implemented."""
+        # Since this "pipe" sits between 2 other stream (source/read -> target/write)
+        # it wouldn't be worth to implemet flushing.
+        pass
+
+    def write(self, compressed: bytes):
+        """Appened the compressed data to the buffer
+
+        This happens when the target stream calls the `read` method and
+        that triggers the gzip "compressor".
+        """
+        self.buffer += compressed
+
+    def close(self):
+        """Closes the underlying file-like object."""
+        self.uncompressed.close()
+
+
 def has_bad_first_or_last_char(val: str) -> bool:
     """Returns true if a string starts with any of: {," ; or ends with any of: },".
 

diff --git a/test/test_base_service.py b/test/test_base_service.py
@@ -647,6 +647,34 @@ def test_gzip_compression():
     assert prepped['headers'].get('content-encoding') == 'gzip'
 
 
+def test_gzip_compression_file_input():
+    service = AnyServiceV1('2018-11-20', authenticator=NoAuthAuthenticator())
+    service.set_enable_gzip_compression(True)
+
+    # Should return file-like object with the compressed data when compression is on
+    # and the input is a file, opened for reading in binary mode.
+    raw_data = b'rawdata'
+    with tempfile.TemporaryFile(mode='w+b') as tmp_file:
+        tmp_file.write(raw_data)
+        tmp_file.seek(0)
+
+        prepped = service.prepare_request('GET', url='', data=tmp_file)
+        assert prepped['data'].read() == gzip.compress(raw_data)
+        assert prepped['headers'].get('content-encoding') == 'gzip'
+
+    # Should return file-like object with the compressed data when compression is on
+    # and the input is a file, opened for reading in text mode.
+    assert service.get_enable_gzip_compression()
+    text_data = 'textata'
+    with tempfile.TemporaryFile(mode='w+') as tmp_file:
+        tmp_file.write(text_data)
+        tmp_file.seek(0)
+
+        prepped = service.prepare_request('GET', url='', data=tmp_file)
+        assert prepped['data'].read() == gzip.compress(text_data.encode())
+        assert prepped['headers'].get('content-encoding') == 'gzip'
+
+
 def test_gzip_compression_external():
     # Should set gzip compression from external config
     file_path = os.path.join(os.path.dirname(__file__), '../resources/ibm-credentials-gzip.env')