diff --git a/deps/rabbit/src/rabbit_feature_flags.erl b/deps/rabbit/src/rabbit_feature_flags.erl index 12fc1b7b939..c6f2e559976 100644 --- a/deps/rabbit/src/rabbit_feature_flags.erl +++ b/deps/rabbit/src/rabbit_feature_flags.erl @@ -1363,7 +1363,7 @@ run_feature_flags_mod_on_remote_node(Node, Function, Args, Timeout) -> sync_feature_flags_with_cluster([] = _Nodes, true = _NodeIsVirgin) -> rabbit_ff_controller:enable_default(); sync_feature_flags_with_cluster([] = _Nodes, false = _NodeIsVirgin) -> - ok; + rabbit_ff_controller:enable_required(); sync_feature_flags_with_cluster(Nodes, _NodeIsVirgin) -> %% We don't use `rabbit_nodes:filter_running()' here because the given %% `Nodes' list may contain nodes which are not members yet (the cluster diff --git a/deps/rabbit/src/rabbit_ff_controller.erl b/deps/rabbit/src/rabbit_ff_controller.erl index d6f11a73c9a..623717ecc6c 100644 --- a/deps/rabbit/src/rabbit_ff_controller.erl +++ b/deps/rabbit/src/rabbit_ff_controller.erl @@ -38,6 +38,7 @@ -export([is_supported/1, is_supported/2, enable/1, enable_default/0, + enable_required/0, check_node_compatibility/2, sync_cluster/1, refresh_after_app_load/0, @@ -136,6 +137,24 @@ enable_default() -> Ret end. +enable_required() -> + ?LOG_DEBUG( + "Feature flags: enable required feature flags", + #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), + case erlang:whereis(?LOCAL_NAME) of + Pid when is_pid(Pid) -> + %% The function is called while `rabbit' is running. + gen_statem:call(?LOCAL_NAME, enable_required); + undefined -> + %% The function is called while `rabbit' is stopped. We need to + %% start a one-off controller, again to make sure concurrent + %% changes are blocked. + {ok, Pid} = start_link(), + Ret = gen_statem:call(Pid, enable_required), + gen_statem:stop(Pid), + Ret + end. + check_node_compatibility(RemoteNode, LocalNodeAsVirgin) -> ThisNode = node(), case LocalNodeAsVirgin of @@ -304,6 +323,8 @@ proceed_with_task({enable, FeatureNames}) -> enable_task(FeatureNames); proceed_with_task(enable_default) -> enable_default_task(); +proceed_with_task(enable_required) -> + enable_required_task(); proceed_with_task({sync_cluster, Nodes}) -> sync_cluster_task(Nodes); proceed_with_task(refresh_after_app_load) -> @@ -841,6 +862,24 @@ get_forced_feature_flag_names_from_config() -> _ when is_list(Value) -> {ok, Value} end. +-spec enable_required_task() -> Ret when + Ret :: ok | {error, Reason}, + Reason :: term(). + +enable_required_task() -> + {ok, Inventory} = collect_inventory_on_nodes([node()]), + RequiredFeatureNames = list_required_feature_flags(Inventory), + case RequiredFeatureNames of + [] -> + ok; + _ -> + ?LOG_DEBUG( + "Feature flags: enabling required feature flags: ~0p", + [RequiredFeatureNames], + #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}) + end, + enable_many(Inventory, RequiredFeatureNames). + -spec sync_cluster_task() -> Ret when Ret :: ok | {error, Reason}, Reason :: term(). @@ -855,23 +894,6 @@ sync_cluster_task() -> Reason :: term(). sync_cluster_task(Nodes) -> - %% We assume that a feature flag can only be enabled, not disabled. - %% Therefore this synchronization searches for feature flags enabled on - %% some nodes but not all, and make sure they are enabled everywhere. - %% - %% This happens when a node joins a cluster and that node has a different - %% set of enabled feature flags. - %% - %% FIXME: `enable_task()' requires that all nodes in the cluster run to - %% enable anything. Should we require the same here? On one hand, this - %% would make sure a feature flag isn't enabled while there is a network - %% partition. On the other hand, this would require that all nodes are - %% running before we can expand the cluster... - ?LOG_DEBUG( - "Feature flags: synchronizing feature flags on nodes: ~tp", - [Nodes], - #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), - case collect_inventory_on_nodes(Nodes) of {ok, Inventory} -> CantEnable = list_deprecated_features_that_cant_be_denied( @@ -880,7 +902,27 @@ sync_cluster_task(Nodes) -> [] -> FeatureNames = list_feature_flags_enabled_somewhere( Inventory, false), - enable_many(Inventory, FeatureNames); + + %% In addition to feature flags enabled somewhere, we also + %% ensure required feature flags are enabled accross the + %% board. + RequiredFeatureNames = list_required_feature_flags( + Inventory), + case RequiredFeatureNames of + [] -> + ok; + _ -> + ?LOG_DEBUG( + "Feature flags: enabling required feature " + "flags as part of cluster sync: ~0p", + [RequiredFeatureNames], + #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}) + end, + + FeatureNamesToEnable = lists:usort( + FeatureNames ++ + RequiredFeatureNames), + enable_many(Inventory, FeatureNamesToEnable); _ -> ?LOG_ERROR( "Feature flags: the following deprecated features " @@ -998,7 +1040,7 @@ enable_with_registry_locked( [FeatureName], #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), - case check_required_and_enable(Inventory, FeatureName) of + case update_feature_state_and_enable(Inventory, FeatureName) of {ok, _Inventory} = Ok -> ?LOG_NOTICE( "Feature flags: `~ts` enabled", @@ -1014,91 +1056,6 @@ enable_with_registry_locked( end end. --spec check_required_and_enable(Inventory, FeatureName) -> Ret when - Inventory :: rabbit_feature_flags:cluster_inventory(), - FeatureName :: rabbit_feature_flags:feature_name(), - Ret :: {ok, Inventory} | {error, Reason}, - Reason :: term(). - -check_required_and_enable( - #{feature_flags := FeatureFlags, - states_per_node := _} = Inventory, - FeatureName) -> - %% Required feature flags vs. virgin nodes. - FeatureProps = maps:get(FeatureName, FeatureFlags), - Stability = rabbit_feature_flags:get_stability(FeatureProps), - ProvidedBy = maps:get(provided_by, FeatureProps), - NodesWhereDisabled = list_nodes_where_feature_flag_is_disabled( - Inventory, FeatureName), - - MarkDirectly = case Stability of - required when ProvidedBy =:= rabbit -> - ?LOG_DEBUG( - "Feature flags: `~s`: the feature flag is " - "required on some nodes; list virgin nodes " - "to determine if the feature flag can simply " - "be marked as enabled", - [FeatureName], - #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), - VirginNodesWhereDisabled = - lists:filter( - fun(Node) -> - case rabbit_db:is_virgin_node(Node) of - IsVirgin when is_boolean(IsVirgin) -> - IsVirgin; - undefined -> - false - end - end, NodesWhereDisabled), - VirginNodesWhereDisabled =:= NodesWhereDisabled; - required when ProvidedBy =/= rabbit -> - %% A plugin can be enabled/disabled at runtime and - %% between restarts. Thus we have no way to - %% distinguish a newly enabled plugin from a plugin - %% which was enabled in the past. - %% - %% Therefore, we always mark required feature flags - %% from plugins directly as enabled. However, the - %% plugin is responsible for checking that its - %% possibly existing data is as it expects it or - %% perform any cleanup/conversion! - ?LOG_DEBUG( - "Feature flags: `~s`: the feature flag is " - "required on some nodes; it comes from a " - "plugin which can be enabled at runtime, " - "so it can be marked as enabled", - [FeatureName], - #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), - true; - _ -> - false - end, - - case MarkDirectly of - false -> - case Stability of - required -> - ?LOG_DEBUG( - "Feature flags: `~s`: some nodes where the feature " - "flag is disabled are not virgin, we need to perform " - "a regular sync", - [FeatureName], - #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}); - _ -> - ok - end, - update_feature_state_and_enable(Inventory, FeatureName); - true -> - ?LOG_DEBUG( - "Feature flags: `~s`: all nodes where the feature flag is " - "disabled are virgin, we can directly mark it as enabled " - "there", - [FeatureName], - #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), - mark_as_enabled_on_nodes( - NodesWhereDisabled, Inventory, FeatureName, true) - end. - -spec update_feature_state_and_enable(Inventory, FeatureName) -> Ret when Inventory :: rabbit_feature_flags:cluster_inventory(), FeatureName :: rabbit_feature_flags:feature_name(), @@ -1445,6 +1402,26 @@ list_feature_flags_enabled_somewhere( end, #{}, StatesPerNode), lists:sort(maps:keys(MergedStates)). +list_required_feature_flags( + #{feature_flags := FeatureFlags, states_per_node := StatesPerNode}) -> + FeatureStates = maps:get(node(), StatesPerNode), + RequiredFeatureNames = maps:fold( + fun(FeatureName, FeatureProps, Acc) -> + Stability = ( + rabbit_feature_flags:get_stability( + FeatureProps)), + IsEnabled = maps:get( + FeatureName, FeatureStates, + false), + case Stability of + required when IsEnabled =:= false -> + [FeatureName | Acc]; + _ -> + Acc + end + end, [], FeatureFlags), + lists:sort(RequiredFeatureNames). + -spec list_deprecated_features_that_cant_be_denied(Inventory) -> Ret when Inventory :: rabbit_feature_flags:cluster_inventory(), diff --git a/deps/rabbit/src/rabbit_ff_registry_factory.erl b/deps/rabbit/src/rabbit_ff_registry_factory.erl index a0197171efa..442d7235547 100644 --- a/deps/rabbit/src/rabbit_ff_registry_factory.erl +++ b/deps/rabbit/src/rabbit_ff_registry_factory.erl @@ -247,8 +247,6 @@ maybe_initialize_registry(NewSupportedFeatureFlags, %% return an error (and RabbitMQ start will abort). RabbitMQ won't be %% able to work, especially if the feature flag needed some %% migration, because the corresponding code was removed. - NewNode = - not rabbit_feature_flags:does_enabled_feature_flags_list_file_exist(), FeatureStates0 = case RegistryInitialized of true -> maps:merge( @@ -261,42 +259,11 @@ maybe_initialize_registry(NewSupportedFeatureFlags, maps:map( fun (FeatureName, FeatureProps) when ?IS_FEATURE_FLAG(FeatureProps) -> - Stability = rabbit_feature_flags:get_stability(FeatureProps), - ProvidedBy = maps:get(provided_by, FeatureProps), State = case FeatureStates0 of #{FeatureName := FeatureState} -> FeatureState; _ -> false end, - case Stability of - required when State =:= true -> - %% The required feature flag is already enabled, we keep - %% it this way. - State; - required when NewNode -> - %% This is the very first time the node starts, we - %% already mark the required feature flag as enabled. - ?assertNotEqual(state_changing, State), - true; - required when ProvidedBy =/= rabbit -> - ?assertNotEqual(state_changing, State), - true; - required -> - %% This is not a new node and the required feature flag - %% is disabled. This is an error and RabbitMQ must be - %% downgraded to enable the feature flag. - ?assertNotEqual(state_changing, State), - ?LOG_ERROR( - "Feature flags: `~ts`: required feature flag not " - "enabled! It must be enabled before upgrading " - "RabbitMQ.", - [FeatureName], - #{domain => ?RMQLOG_DOMAIN_FEAT_FLAGS}), - throw({error, - {disabled_required_feature_flag, - FeatureName}}); - _ -> - State - end; + State; (FeatureName, FeatureProps) when ?IS_DEPRECATION(FeatureProps) -> case FeatureStates0 of #{FeatureName := FeatureState} -> diff --git a/deps/rabbit/test/feature_flags_v2_SUITE.erl b/deps/rabbit/test/feature_flags_v2_SUITE.erl index 534c5cbdd65..f4594b65a2a 100644 --- a/deps/rabbit/test/feature_flags_v2_SUITE.erl +++ b/deps/rabbit/test/feature_flags_v2_SUITE.erl @@ -1480,15 +1480,8 @@ have_required_feature_flag_in_cluster_and_add_member_without_it( ok = run_on_node( NewNode, fun() -> - ?assertMatch( - {error, - {exception, - {assertNotEqual, - [{module, rabbit_ff_registry_factory}, - {line, _}, - {expression, "State"}, - {value, state_changing}]}, - _}}, + ?assertEqual( + ok, rabbit_feature_flags:sync_feature_flags_with_cluster( Nodes, false)), ok @@ -1500,7 +1493,7 @@ have_required_feature_flag_in_cluster_and_add_member_without_it( Node, fun() -> ?assertEqual( - Node =/= NewNode, + true, rabbit_feature_flags:is_enabled(FeatureName)), ok end,