Skip to content

Commit

Permalink
Merge pull request #275 from rabbitmq/auto-cleanup-store-ids-list
Browse files Browse the repository at this point in the history
Automatically clean the list of running stores if a Ra server has stopped behind the scene
  • Loading branch information
dumbbell committed Jul 22, 2024
2 parents c6647cf + aabaa5a commit 467b4b6
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 3 deletions.
22 changes: 19 additions & 3 deletions src/khepri_cluster.erl
Original file line number Diff line number Diff line change
Expand Up @@ -1489,7 +1489,9 @@ forget_store(StoreId) ->
%% @doc Returns the list of running stores.

get_store_ids() ->
maps:keys(persistent_term:get(?PT_STORE_IDS, #{})).
StoreIds0 = maps:keys(persistent_term:get(?PT_STORE_IDS, #{})),
StoreIds1 = lists:filter(fun is_store_running/1, StoreIds0),
StoreIds1.

-spec is_store_running(StoreId) -> IsRunning when
StoreId :: khepri:store_id(),
Expand All @@ -1504,7 +1506,21 @@ is_store_running(StoreId) ->
{error, _} -> false;
timeout -> false
end,

%% We know the real state of the Ra server. In the case the Ra server
%% stopped behind the back of Khepri, we update the cached list of running
%% stores as a side effect here.
StoreIds = persistent_term:get(?PT_STORE_IDS, #{}),
Known = maps:is_key(StoreId, StoreIds),
?assertEqual(Known, Runs),
case maps:is_key(StoreId, StoreIds) of
true when Runs ->
ok;
false when not Runs ->
ok;
true when not Runs ->
?LOG_DEBUG(
"Ra server for store ~s stopped behind the back of Khepri",
[StoreId]),
forget_store(StoreId)
end,

Runs.
26 changes: 26 additions & 0 deletions test/db_info.erl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,32 @@ get_store_ids_with_running_store_test_() ->
[?FUNCTION_NAME],
khepri:get_store_ids())]}.

get_store_ids_after_killing_ra_server_test_() ->
{setup,
fun() -> test_ra_server_helpers:setup(?FUNCTION_NAME) end,
fun(Priv) -> test_ra_server_helpers:cleanup(Priv) end,
[?_assertEqual(
[?FUNCTION_NAME],
khepri:get_store_ids()),
?_assertEqual(
true,
khepri_cluster:is_store_running(?FUNCTION_NAME)),

?_test(terminate_ra_server(?FUNCTION_NAME)),

?_assertEqual(
[],
khepri:get_store_ids()),
?_assertEqual(
false,
khepri_cluster:is_store_running(?FUNCTION_NAME))]}.

terminate_ra_server(ClusterName) ->
RaSystem = ClusterName,
Member = {ClusterName, node()},
_ = ra:stop_server(RaSystem, Member),
khepri_cluster:wait_for_ra_server_exit(Member).

is_store_running_with_no_running_stores_test_() ->
[?_assertEqual(
false,
Expand Down

0 comments on commit 467b4b6

Please sign in to comment.