From cfe900ded6a201638daf08ce5809834f1954f1d5 Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Tue, 14 Nov 2023 14:11:05 +0100 Subject: [PATCH 01/14] feat: Replication draft --- .../services/v2/dataproxy_service.proto | 27 ++++++++++--------- aruna/api/storage/models/v2/models.proto | 24 ++++++++++++++++- .../v2/data_replication_service.proto | 19 +++++-------- 3 files changed, 44 insertions(+), 26 deletions(-) diff --git a/aruna/api/dataproxy/services/v2/dataproxy_service.proto b/aruna/api/dataproxy/services/v2/dataproxy_service.proto index c613e519..84ddd302 100644 --- a/aruna/api/dataproxy/services/v2/dataproxy_service.proto +++ b/aruna/api/dataproxy/services/v2/dataproxy_service.proto @@ -18,14 +18,14 @@ service DataproxyService { // Status: BETA // // Creates a replication request - rpc RequestReplication(RequestReplicationRequest) returns (RequestReplicationResponse) {} + rpc PullReplication(PullReplicationRequest) returns (PullReplicationResponse) {} // InitReplication // // Status: BETA // // Provides the necessary url to init replication - rpc InitReplication(InitReplicationRequest) returns (InitReplicationResponse) {} + rpc PushReplication(PushReplicationRequest) returns (PushReplicationResponse) {} } @@ -100,9 +100,10 @@ message DataProxyInfo { int64 available_space = 2; } -message RequestReplicationRequest { +message PullReplicationRequest { DataProxyInfo info = 1; bool user_initialized = 2; + repeated string object_ids = 3; } message DataInfo { @@ -116,18 +117,18 @@ message DataInfos { repeated DataInfo data_info = 1; } -message RequestReplicationResponse { - oneof response { - DataInfos data_infos = 1; - bool ack = 2; - } +message PullReplicationResponse { + //oneof response { + dataInfos data_infos = 1; + // bool ack = 2; + //} } -message InitReplicationRequest { +message PushReplicationRequest { DataInfos data_infos = 1; } -message InitReplicationResponse { +message PushReplicationResponse { bool ack = 1; } @@ -148,7 +149,7 @@ message PushReplicaRequest { string resource_id = 1; S3Path s3_path = 2; } - string target_location = 3; + string target_endpoint_id = 3; } message PushReplicaResponse { @@ -163,11 +164,11 @@ message PullReplicaRequest { } message PullReplicaResponse { - string replication_id = 1; + string replication_id = 1; // why ? } message ReplicationStatusRequest { - string replication_id = 1; + string replication_id = 1; // why ? } enum ReplicationStatus { diff --git a/aruna/api/storage/models/v2/models.proto b/aruna/api/storage/models/v2/models.proto index 9fab1d0a..e1c69baa 100644 --- a/aruna/api/storage/models/v2/models.proto +++ b/aruna/api/storage/models/v2/models.proto @@ -120,6 +120,13 @@ enum ResourceVariant { RESOURCE_VARIANT_OBJECT = 4; } +enum ReplicationStatus { + REPLICATION_STATUS_UNSPECIFIED = 0; + REPLICATION_STATUS_WAITING = 1; + REPLICATION_STATUS_RUNNING = 2; + REPLICATION_STATUS_FINISHED = 3; + REPLICATION_STATUS_ERROR = 4; +} // ------------- USERS & PERMISSIONS ----------------------- message User { // Internal Aruna UserID @@ -245,7 +252,22 @@ message DataEndpoint { string id = 1; // Hint if the objects' project // is fully synced to the endpoint - bool full_synced = 2; + oneof variant { + FullSync full_sync = 2; + PartialSync partial_sync = 3; + } + optional ReplicationStatus status = 4; +} + +message FullSync { + string project_id = 1; +} +message PartialSync { + oneof origin { + string collection_id = 1; + string dataset_id = 2; + string object_id = 3; + } } message Copy { diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index 02cb6b08..a85af0f2 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -7,6 +7,7 @@ option java_package = "com.github.ArunaStorage.java_api.aruna.api.storage.servic option java_outer_classname = "DataReplicationService"; import "google/api/annotations.proto"; +import "aruna/api/storage/models/v2/models.proto"; // DataReplicationService // @@ -77,13 +78,7 @@ service DataReplicationService { } } -enum ReplicationStatus { - REPLICATION_STATUS_UNSPECIFIED = 0; - REPLICATION_STATUS_WAITING = 1; - REPLICATION_STATUS_RUNNING = 2; - REPLICATION_STATUS_FINISHED = 3; - REPLICATION_STATUS_ERROR = 4; -} + message ReplicateProjectDataRequest { string project_id = 1; @@ -91,7 +86,7 @@ message ReplicateProjectDataRequest { } message ReplicateProjectDataResponse { - ReplicationStatus status = 1; + storage.models.v2.ReplicationStatus status = 1; } message PartialReplicateDataRequest { @@ -104,13 +99,13 @@ message PartialReplicateDataRequest { } message PartialReplicateDataResponse { - ReplicationStatus status = 1; + storage.models.v2.ReplicationStatus status = 1; } message UpdateReplicationStatusRequest { - string resource_id = 1; + string object_id = 1; string endpoint_id = 2; - ReplicationStatus status = 3; + storage.models.v2.ReplicationStatus status = 3; } message UpdateReplicationStatusResponse {} @@ -120,7 +115,7 @@ message GetReplicationStatusRequest { } message GetReplicationStatusResponse { - ReplicationStatus status = 1; + storage.models.v2.ReplicationStatus status = 1; } message DeleteReplicationRequest { From 719adf2e59115a2b2a1f6719000d0dac8315d131 Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 15 Nov 2023 13:28:44 +0100 Subject: [PATCH 02/14] refactor: Explicit data_variants for partial replication --- .../storage/services/v2/data_replication_service.proto | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index 822fb113..d3487c44 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -90,10 +90,15 @@ message ReplicateProjectDataResponse { } message PartialReplicateDataRequest { - string resource_id = 1; - string endpoint_id = 2; + oneof data_variant { + string collection_id = 1; + string dataset_id = 2; + string object_id = 3; + } + string endpoint_id = 4; } + message PartialReplicateDataResponse { storage.models.v2.ReplicationStatus status = 1; } From 673c92552b07649469249e05c2db9dc0a47190da Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 15 Nov 2023 13:49:29 +0100 Subject: [PATCH 03/14] fix: Typo --- aruna/api/dataproxy/services/v2/dataproxy_service.proto | 2 +- aruna/api/storage/services/v2/data_replication_service.proto | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/aruna/api/dataproxy/services/v2/dataproxy_service.proto b/aruna/api/dataproxy/services/v2/dataproxy_service.proto index ce559a51..552e1df9 100644 --- a/aruna/api/dataproxy/services/v2/dataproxy_service.proto +++ b/aruna/api/dataproxy/services/v2/dataproxy_service.proto @@ -119,7 +119,7 @@ message DataInfos { message PullReplicationResponse { //oneof response { - dataInfos data_infos = 1; + DataInfos data_infos = 1; // bool ack = 2; //} } diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index d3487c44..74da55a0 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -10,7 +10,6 @@ import "google/api/annotations.proto"; import "aruna/api/storage/models/v2/models.proto"; // DataReplicationService -// // Endpoint specific methods for syncing data service DataReplicationService { @@ -95,7 +94,7 @@ message PartialReplicateDataRequest { string dataset_id = 2; string object_id = 3; } - string endpoint_id = 4; + string endpoint_id = 4; } From 51d05ff7ebfa0ace57077ce9b409362620fa3781 Mon Sep 17 00:00:00 2001 From: Sebastian Beyvers Date: Mon, 4 Dec 2023 15:46:18 +0100 Subject: [PATCH 04/14] feat: Proposal for new PullReplication --- .../services/v2/dataproxy_service.proto | 79 +++++++++++++++---- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/aruna/api/dataproxy/services/v2/dataproxy_service.proto b/aruna/api/dataproxy/services/v2/dataproxy_service.proto index 26eac8df..3f72cdb5 100644 --- a/aruna/api/dataproxy/services/v2/dataproxy_service.proto +++ b/aruna/api/dataproxy/services/v2/dataproxy_service.proto @@ -14,15 +14,15 @@ import "google/api/visibility.proto"; // Status: ALPHA // // Service for data replication between data-proxies -service DataproxyService { +service DataproxyReplicationService { option (google.api.api_visibility).restriction = "PROXY"; // RequestReplication // // Status: ALPHA // - // Creates a replication request - rpc PullReplication(PullReplicationRequest) returns (PullReplicationResponse) {} + // Creates a replication stream + rpc PullReplication(stream PullReplicationRequest) returns (stream PullReplicationResponse) {} // InitReplication // @@ -96,20 +96,74 @@ service DataproxyUserService { } +// ----- PullReplication ----- +// PROXY A (data) <--> PROXY B (wants data) +// PROXY B pulls data from PROXY A +// Messages (requests) from PROXY B +message InitMessage { + string dataproxy_id = 1; + repeated string object_ids = 2; +} +message InfoAckMessage { + string object_id = 1; +} -message DataProxyInfo { - string dataproxy_id = 1; - int64 available_space = 2; +message ChunkAckMessage { + string object_id = 1; + int64 chunk_idx = 2; +} + +message RetryChunkMessage { + string object_id = 1; + int64 chunk_idx = 2; +} + +message Empty {} + +message ErrorMessage { + oneof error { + RetryChunkMessage retry_chunk = 1; + Empty abort = 2; + string retry_object_id = 3; + } } message PullReplicationRequest { - DataProxyInfo info = 1; - bool user_initialized = 2; - repeated string object_ids = 3; + oneof message { + InitMessage init_message = 1; + InfoAckMessage info_ack_message = 2; + ChunkAckMessage chunk_ack_message = 3; + ErrorMessage error_message = 4; + Empty finish_message = 5; + } } +// Messages (responses) from PROXY A +message ObjectInfo { + string object_id = 1; + int64 chunks = 2; + optional string extra = 3; // JSON encoded proxy specific extra fields +} + +message Chunk { + string object_id = 1; + int64 chunk_idx = 2; + bytes data = 3; + string checksum = 4; +} + +message PullReplicationResponse { + oneof message { + ObjectInfo object_info = 1; + Chunk chunk = 2; // If no ack is received, the chunk will be resent + Empty finish_message = 3; + } +} + +// --------------- + message DataInfo { string object_id = 1; string download_url = 2; @@ -121,13 +175,6 @@ message DataInfos { repeated DataInfo data_info = 1; } -message PullReplicationResponse { - //oneof response { - DataInfos data_infos = 1; - // bool ack = 2; - //} -} - message PushReplicationRequest { DataInfos data_infos = 1; } From d986a2e999f571458f1d9b000608f7296b6f05d9 Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Tue, 5 Dec 2023 08:44:47 +0100 Subject: [PATCH 05/14] chore: Updated google apis --- aruna/api/google | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aruna/api/google b/aruna/api/google index 03fdadd0..38894df9 160000 --- a/aruna/api/google +++ b/aruna/api/google @@ -1 +1 @@ -Subproject commit 03fdadd0a6db5cc1f76de4afb8b367837a25f5d2 +Subproject commit 38894df9fd8faebe373d15f25da07119582c82f3 From 5dab5b533bf3035cf964fbe887f3f2733f7bd688 Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Tue, 5 Dec 2023 08:56:35 +0100 Subject: [PATCH 06/14] chore: Fixed updated deps --- aruna/api/google | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aruna/api/google b/aruna/api/google index 38894df9..89b562b7 160000 --- a/aruna/api/google +++ b/aruna/api/google @@ -1 +1 @@ -Subproject commit 38894df9fd8faebe373d15f25da07119582c82f3 +Subproject commit 89b562b76f5b215990a20d3ea08bc6e1c0377906 From a32816cffeb816c323cf2e99c9994aba2f0e6aec Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:24:06 +0100 Subject: [PATCH 07/14] feat: Added raw_size and block_list to replication protocol --- aruna/api/dataproxy/services/v2/dataproxy_service.proto | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aruna/api/dataproxy/services/v2/dataproxy_service.proto b/aruna/api/dataproxy/services/v2/dataproxy_service.proto index 3f72cdb5..d0d3b08d 100644 --- a/aruna/api/dataproxy/services/v2/dataproxy_service.proto +++ b/aruna/api/dataproxy/services/v2/dataproxy_service.proto @@ -144,7 +144,9 @@ message PullReplicationRequest { message ObjectInfo { string object_id = 1; int64 chunks = 2; - optional string extra = 3; // JSON encoded proxy specific extra fields + int64 raw_size = 3; + repeated uint32 block_list = 4; + optional string extra = 5; // JSON encoded proxy specific extra fields } message Chunk { From e7a6e157199d6b3b5c4a014d75e4e0c71fd3b43f Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 10 Jan 2024 10:57:52 +0100 Subject: [PATCH 08/14] feat: PartialSync projects --- aruna/api/storage/models/v2/models.proto | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aruna/api/storage/models/v2/models.proto b/aruna/api/storage/models/v2/models.proto index 3c372636..5adff969 100644 --- a/aruna/api/storage/models/v2/models.proto +++ b/aruna/api/storage/models/v2/models.proto @@ -270,9 +270,10 @@ message FullSync { } message PartialSync { oneof origin { - string collection_id = 1; - string dataset_id = 2; - string object_id = 3; + string project_id = 1; + string collection_id = 2; + string dataset_id = 3; + string object_id = 4; } } From 67de4c9090a62aec20ec3a05baf89e193ac49958 Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 17 Jan 2024 11:41:15 +0100 Subject: [PATCH 09/14] fix: Fixed path annotations --- aruna/api/storage/services/v2/data_replication_service.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index 74da55a0..5adae612 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -34,7 +34,7 @@ service DataReplicationService { rpc PartialReplicateData(PartialReplicateDataRequest) returns (PartialReplicateDataResponse) { option (google.api.http) = { - post : "/v2/endpoints/{endpoint_id}/replication/{resource_id}/partial" + post : "/v2/endpoints/{endpoint_id}/replication/partial" body: "*" }; } @@ -47,7 +47,7 @@ service DataReplicationService { rpc UpdateReplicationStatus(UpdateReplicationStatusRequest) returns (UpdateReplicationStatusResponse) { option (google.api.http) = { - patch : "/v2/endpoints/{endpoint_id}/replication/{resource_id}/status" + patch : "/v2/endpoints/{endpoint_id}/replication/{object_id}/status" body: "*" }; } From 187cf828f77188a2de655473610e9be26efe782e Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 17 Jan 2024 12:46:14 +0100 Subject: [PATCH 10/14] feat: GetReplicationStatus overhaul --- .../api/storage/services/v2/data_replication_service.proto | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index 5adae612..62d14bb7 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -116,7 +116,12 @@ message GetReplicationStatusRequest { } message GetReplicationStatusResponse { - storage.models.v2.ReplicationStatus status = 1; + repeated ReplicationInfo infos = 1; +} + +message ReplicationInfo { + string resource_id = 1; + optional storage.models.v2.ReplicationStatus status = 2; } message DeleteReplicationRequest { From 16f9c1b42a2c10005d491bb3027674aeb499f04b Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 17 Jan 2024 12:52:02 +0100 Subject: [PATCH 11/14] fix: Changed resource_id to object_id in ReplicationStatus --- aruna/api/storage/services/v2/data_replication_service.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index 62d14bb7..6ea5c61f 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -120,7 +120,7 @@ message GetReplicationStatusResponse { } message ReplicationInfo { - string resource_id = 1; + string object_id = 1; optional storage.models.v2.ReplicationStatus status = 2; } From 56f5026c41f6fbbd9ac0a94eaef30ec3717254f2 Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 17 Jan 2024 13:09:05 +0100 Subject: [PATCH 12/14] refactor: Recursive GetReplicationStatus with full info --- .../storage/services/v2/data_replication_service.proto | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index 6ea5c61f..436429f0 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -120,8 +120,13 @@ message GetReplicationStatusResponse { } message ReplicationInfo { - string object_id = 1; - optional storage.models.v2.ReplicationStatus status = 2; + oneof resource { + string project_id = 1; + string collection_id = 2; + string dataset_id = 3; + string object_id = 4; + } + repeated storage.models.v2.DataEndpoint endpoint_info = 5; } message DeleteReplicationRequest { From 02707d4265850b8286a440270092f844ea414b21 Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 17 Jan 2024 13:20:56 +0100 Subject: [PATCH 13/14] fix: Fixed GetReplicationStatus info field --- aruna/api/storage/services/v2/data_replication_service.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aruna/api/storage/services/v2/data_replication_service.proto b/aruna/api/storage/services/v2/data_replication_service.proto index 436429f0..728a8174 100644 --- a/aruna/api/storage/services/v2/data_replication_service.proto +++ b/aruna/api/storage/services/v2/data_replication_service.proto @@ -126,7 +126,7 @@ message ReplicationInfo { string dataset_id = 3; string object_id = 4; } - repeated storage.models.v2.DataEndpoint endpoint_info = 5; + storage.models.v2.DataEndpoint endpoint_info = 5; } message DeleteReplicationRequest { From 993bf168d3723b395253123aa1cdf7f7dc6d88bb Mon Sep 17 00:00:00 2001 From: lfbrehm <97600985+lfbrehm@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:05:44 +0100 Subject: [PATCH 14/14] fix: Added body to HookCallback --- aruna/api/hooks/services/v2/hooks_service.proto | 1 + 1 file changed, 1 insertion(+) diff --git a/aruna/api/hooks/services/v2/hooks_service.proto b/aruna/api/hooks/services/v2/hooks_service.proto index 39654659..6d7e6739 100644 --- a/aruna/api/hooks/services/v2/hooks_service.proto +++ b/aruna/api/hooks/services/v2/hooks_service.proto @@ -85,6 +85,7 @@ service HooksService { rpc HookCallback(HookCallbackRequest) returns (HookCallbackResponse) { option (google.api.http) = { patch : "/v2/hooks/callback" + body: "*" }; } }