From fedf37da2d9edc92340cc9d92e6affa346ab01f6 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 16 Jan 2022 15:58:55 +0000 Subject: [PATCH] Non-zero instance_start_time Set instance_start_time to the creation time of the database to restore the ability for the replicator to detect a db recreation event during a replication. Without this, a replication can fail to notice the db was deleted and recreated, write the checkpoint document for the in-progress replication and keep going, whereas what should happen is the replication should reset from sequence 0. --- src/chttpd/src/chttpd_db.erl | 3 ++- .../src/couch_replicator_scheduler_job.erl | 9 +++------ src/fabric/src/fabric_db_info.erl | 5 +++-- src/mem3/src/mem3.erl | 15 +++++++++++++++ 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/chttpd/src/chttpd_db.erl b/src/chttpd/src/chttpd_db.erl index 875df6e0005..cfae8ad5f45 100644 --- a/src/chttpd/src/chttpd_db.erl +++ b/src/chttpd/src/chttpd_db.erl @@ -554,12 +554,13 @@ db_req( %% for missing databases that'd return error 404 from chttpd %% get_security used to prefer shards on the same node over other nodes fabric:get_security(DbName, [{user_ctx, Ctx}]), + CreationTime = mem3:shard_creation_time(DbName), send_json( Req, 201, {[ {ok, true}, - {instance_start_time, <<"0">>} + {instance_start_time, CreationTime} ]} ); db_req(#httpd{path_parts = [_, <<"_ensure_full_commit">>]} = Req, _Db) -> diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl index f300dd44e93..6d9711790d9 100644 --- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl +++ b/src/couch_replicator/src/couch_replicator_scheduler_job.erl @@ -846,18 +846,15 @@ do_checkpoint(State) -> end; {SrcInstanceStartTime, _NewTgtInstanceStartTime} -> {checkpoint_commit_failure, << - "Target database out of sync. " - "Try to increase max_dbs_open at the target's server." + "instance_start_time on target database has changed since last checkpoint." >>}; {_NewSrcInstanceStartTime, TgtInstanceStartTime} -> {checkpoint_commit_failure, << - "Source database out of sync. " - "Try to increase max_dbs_open at the source's server." + "instance_start_time on source database has changed since last checkpoint." >>}; {_NewSrcInstanceStartTime, _NewTgtInstanceStartTime} -> {checkpoint_commit_failure, << - "Source and target databases out of " - "sync. Try to increase max_dbs_open at both servers." + "instance_start_time on source and target database has changed since last checkpoint." >>} end. diff --git a/src/fabric/src/fabric_db_info.erl b/src/fabric/src/fabric_db_info.erl index 2366420c894..5461404c508 100644 --- a/src/fabric/src/fabric_db_info.erl +++ b/src/fabric/src/fabric_db_info.erl @@ -19,6 +19,7 @@ go(DbName) -> Shards = mem3:shards(DbName), + CreationTime = mem3:shard_creation_time(DbName), Workers = fabric_util:submit_jobs(Shards, get_db_info, []), RexiMon = fabric_util:create_monitors(Shards), Fun = fun handle_message/3, @@ -28,7 +29,7 @@ go(DbName) -> try case fabric_util:recv(Workers, #shard.ref, Fun, Acc0) of {ok, Acc} -> - {ok, Acc}; + {ok, [{instance_start_time, CreationTime} | Acc]}; {timeout, {WorkersDict, _, _}} -> DefunctWorkers = fabric_util:remove_done_workers( WorkersDict, @@ -117,7 +118,7 @@ merge_results(Info) -> (_K, _V, Acc) -> Acc end, - [{instance_start_time, <<"0">>}], + [], Dict ). diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl index 7151a3ec119..5a985b7f827 100644 --- a/src/mem3/src/mem3.erl +++ b/src/mem3/src/mem3.erl @@ -33,6 +33,7 @@ -export([get_placement/1]). -export([ping/1, ping/2]). -export([db_is_current/1]). +-export([shard_creation_time/1]). %% For mem3 use only. -export([name/1, node/1, range/1, engine/1]). @@ -198,6 +199,20 @@ shard_suffix(DbName0) when is_binary(DbName0) -> shard_suffix(Db) -> shard_suffix(couch_db:name(Db)). +shard_creation_time(DbName0) -> + Shard = hd(shards(DbName0)), + case Shard#shard.name of + <<"shards/", _:8/binary, "-", _:8/binary, "/", DbName/binary>> -> + case filename:extension(DbName) of + <<".", Time/binary>> -> + Time; + _ -> + <<"0">> + end; + _ -> + <<"0">> + end. + fold_shards(Fun, Acc) -> mem3_shards:fold(Fun, Acc).