diff --git a/skyplane/cli/cli.py b/skyplane/cli/cli.py index 19b05f896..75e3ea1a6 100644 --- a/skyplane/cli/cli.py +++ b/skyplane/cli/cli.py @@ -25,7 +25,13 @@ import skyplane.cli.experiments from skyplane import config_path, exceptions, skyplane_root, cloud_config, tmp_log_dir from skyplane.cli.common import print_header, console -from skyplane.cli.cli_impl.cp_replicate import generate_full_transferobjlist, generate_topology, confirm_transfer, launch_replication_job +from skyplane.cli.cli_impl.cp_replicate import ( + enrich_dest_objs, + generate_full_transferobjlist, + generate_topology, + confirm_transfer, + launch_replication_job, +) from skyplane.replicate.replication_plan import ReplicationJob from skyplane.cli.cli_impl.init import load_aws_config, load_azure_config, load_gcp_config from skyplane.cli.common import parse_path, query_instances @@ -335,6 +341,8 @@ def sync( raise typer.Exit(1) + enrich_dest_objs(dst_region, path_dst, bucket_dst, [i[1] for i in full_transfer_pairs]) + # filter out any transfer pairs that are already in the destination transfer_pairs = [] for src_obj, dst_obj in full_transfer_pairs: diff --git a/skyplane/cli/cli_impl/cp_replicate.py b/skyplane/cli/cli_impl/cp_replicate.py index 8744346f5..38201447b 100644 --- a/skyplane/cli/cli_impl/cp_replicate.py +++ b/skyplane/cli/cli_impl/cp_replicate.py @@ -193,6 +193,15 @@ def generate_full_transferobjlist( # dest_obj = ObjectStoreObject(dest_region.split(":")[0], dest_bucket, dest_key) dest_objs.append(dest_obj) + return list(zip(source_objs, dest_objs)) + + +def enrich_dest_objs(dest_region: str, dest_prefix: str, dest_bucket: str, dest_objs: list): + """ + For skyplane sync, we enrich dest obj metadata with our existing dest obj metadata from the dest bucket following a query. + """ + dest_iface = ObjectStoreInterface.create(dest_region, dest_bucket) + # query destination at dest_key logger.fs.debug(f"Querying objects in {dest_bucket}") dest_objs_keys = {obj.key for obj in dest_objs} @@ -210,8 +219,6 @@ def generate_full_transferobjlist( dest_obj.size = found_dest_objs[dest_obj.key].size dest_obj.last_modified = found_dest_objs[dest_obj.key].last_modified - return list(zip(source_objs, dest_objs)) - def confirm_transfer(topo: ReplicationTopology, job: ReplicationJob, ask_to_confirm_transfer=True): console.print(