Skip to content

Commit

Permalink
Reduce number of concurrent S3 uploads to reduce throtlting
Browse files Browse the repository at this point in the history
  • Loading branch information
2015aroras committed Jan 22, 2024
1 parent 9b5155d commit 5c7d9c6
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion scripts/storage_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import google.cloud.storage as gcs
import torch
import wandb
from boto3.s3.transfer import TransferConfig
from cached_path import add_scheme_client, cached_path, set_cache_dir
from cached_path.schemes import S3Client
from google.api_core.exceptions import NotFound
Expand Down Expand Up @@ -579,7 +580,8 @@ def download_folder(self, directory_path: str, local_dest_folder: PathOrStr):
raise ValueError(f"Path {directory_path} is not a valid directory")

def _upload_file(self, local_filepath: str, bucket_name: str, key: str):
self._s3_client.upload_file(local_filepath, bucket_name, key)
transfer_config = TransferConfig(max_concurrency=4)
self._s3_client.upload_file(local_filepath, bucket_name, key, Config=transfer_config)

def upload(self, local_src: PathOrStr, dest_path: str):
if self.local_fs_adapter.is_file(str(local_src)):
Expand Down

0 comments on commit 5c7d9c6

Please sign in to comment.