-
Notifications
You must be signed in to change notification settings - Fork 426
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Restart db service once domain core is down #3224
Changes from all commits
f66faa1
ece9b7f
d8fe7a6
7c0b35a
db26949
0036269
91ab7b8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,12 +56,12 @@ start(Pairs, AllowedHostTypes) -> | |
{?MODULE, | ||
{?MODULE, start_link, [Pairs, AllowedHostTypes]}, | ||
permanent, infinity, worker, [?MODULE]}, | ||
just_ok(supervisor:start_child(ejabberd_sup, ChildSpec)). | ||
just_ok(supervisor:start_child(mongoose_domain_sup, ChildSpec)). | ||
|
||
%% required for integration tests | ||
stop() -> | ||
supervisor:terminate_child(ejabberd_sup, ?MODULE), | ||
supervisor:delete_child(ejabberd_sup, ?MODULE), | ||
supervisor:terminate_child(mongoose_domain_sup, ?MODULE), | ||
supervisor:delete_child(mongoose_domain_sup, ?MODULE), | ||
ok. | ||
|
||
-endif. | ||
|
@@ -130,6 +130,7 @@ get_start_args() -> | |
%% gen_server callbacks | ||
%%-------------------------------------------------------------------- | ||
init([Pairs, AllowedHostTypes]) -> | ||
service_domain_db:reset_last_event_id(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fix for the @NelsonVides's bug |
||
ets:new(?TABLE, [set, named_table, protected, {read_concurrency, true}]), | ||
ets:new(?HOST_TYPE_TABLE, [set, named_table, protected, {read_concurrency, true}]), | ||
insert_host_types(?HOST_TYPE_TABLE, AllowedHostTypes), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -148,6 +148,7 @@ select_from(FromId, Limit) -> | |
Pool = get_db_pool(), | ||
Args = rdbms_queries:add_limit_arg(Limit, [FromId]), | ||
{selected, Rows} = execute_successfully(Pool, domain_select_from, Args), | ||
?LOG_ERROR(#{what => select_from, rows => Rows, from_id => FromId, limit => Limit}), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Error seems a bit hardcore here 🤔 |
||
Rows. | ||
|
||
-spec select_updates_from(event_id(), limit()) -> [row()]. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
-module(mongoose_domain_sup). | ||
-export([start_link/0]). | ||
-export([init/1]). | ||
|
||
-ignore_xref([start_link/0, init/1]). | ||
|
||
-spec(start_link() -> | ||
{ok, Pid :: pid()} | ignore | {error, Reason :: term()}). | ||
start_link() -> | ||
supervisor:start_link({local, ?MODULE}, ?MODULE, []). | ||
|
||
init([]) -> | ||
SupFlags = #{strategy => rest_for_one, | ||
intensity => 100, period => 5, | ||
shutdown => 1000}, | ||
ChildSpecs = [], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we have here this supervisor, why don't we give the supervisor its child specs and we get the children started here, instead of hardcoded in the mongooseim:start() entry point? Also, a strategy of rest_for_one would restart all chlidren started after the one that died (which kinda seems right here), and right now it is not clear in a single place the order that children are started as. |
||
{ok, { SupFlags, ChildSpecs }}. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,16 +10,14 @@ | |
-define(LAST_EVENT_ID_KEY, {?MODULE, last_event_id}). | ||
|
||
-export([start/1, stop/0, restart/0, config_spec/0]). | ||
-export([start_link/0]). | ||
-export([start_link/1]). | ||
-export([enabled/0]). | ||
-export([force_check_for_updates/0]). | ||
-export([sync/0, sync_local/0]). | ||
|
||
%% exported for integration tests only! | ||
-export([reset_last_event_id/0]). | ||
|
||
-ignore_xref([code_change/3, handle_call/3, handle_cast/2, handle_info/2, | ||
init/1, start_link/0, sync/0, sync_local/0, terminate/2, reset_last_event_id/0]). | ||
init/1, start_link/1, sync/0, sync_local/0, terminate/2, reset_last_event_id/0]). | ||
|
||
%% gen_server callbacks | ||
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, | ||
|
@@ -29,19 +27,18 @@ | |
%% Client code | ||
|
||
start(Opts) -> | ||
mongoose_domain_sql:start(Opts), | ||
ChildSpec = | ||
{?MODULE, | ||
{?MODULE, start_link, []}, | ||
{?MODULE, start_link, [Opts]}, | ||
permanent, infinity, worker, [?MODULE]}, | ||
supervisor:start_child(ejabberd_sup, ChildSpec), | ||
supervisor:start_child(mongoose_domain_sup, ChildSpec), | ||
mongoose_domain_db_cleaner:start(Opts), | ||
ok. | ||
|
||
stop() -> | ||
mongoose_domain_db_cleaner:stop(), | ||
supervisor:terminate_child(ejabberd_sup, ?MODULE), | ||
supervisor:delete_child(ejabberd_sup, ?MODULE), | ||
supervisor:terminate_child(mongoose_domain_sup, ?MODULE), | ||
supervisor:delete_child(mongoose_domain_sup, ?MODULE), | ||
ok. | ||
|
||
restart() -> | ||
|
@@ -62,8 +59,8 @@ config_spec() -> | |
validate = pool_name} | ||
}}. | ||
|
||
start_link() -> | ||
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). | ||
start_link(Opts) -> | ||
gen_server:start_link({local, ?MODULE}, ?MODULE, [Opts], []). | ||
|
||
enabled() -> | ||
mongoose_service:is_loaded(?MODULE). | ||
|
@@ -94,11 +91,11 @@ sync_local() -> | |
%% --------------------------------------------------------------------------- | ||
%% Server callbacks | ||
|
||
init([]) -> | ||
init([Opts]) -> | ||
mongoose_domain_sql:start(Opts), | ||
mongoose_domain_gaps:init(), | ||
?PG_JOIN(?GROUP, self()), | ||
gen_server:cast(self(), initial_loading), | ||
%% initial state will be set on initial_loading processing | ||
{ok, #{}}. | ||
|
||
handle_call(ping, _From, State) -> | ||
|
@@ -112,7 +109,7 @@ handle_cast(initial_loading, State) -> | |
?LOG_INFO(#{what => domains_loaded, last_event_id => LastEventId}), | ||
NewState = State#{last_event_id => LastEventId, | ||
check_for_updates_interval => 30000}, | ||
{noreply, handle_check_for_updates(NewState)}; | ||
{noreply, handle_check_for_updates(NewState, true)}; | ||
handle_cast(reset_and_shutdown, State) -> | ||
%% to ensure that domains table is re-read from | ||
%% scratch, we must reset the last event id. | ||
|
@@ -123,7 +120,7 @@ handle_cast(Msg, State) -> | |
{noreply, State}. | ||
|
||
handle_info(check_for_updates, State) -> | ||
{noreply, handle_check_for_updates(State)}; | ||
{noreply, handle_check_for_updates(State, false)}; | ||
handle_info(Info, State) -> | ||
?UNEXPECTED_INFO(Info), | ||
{noreply, State}. | ||
|
@@ -140,27 +137,33 @@ code_change(_OldVsn, State, _Extra) -> | |
initial_load(undefined) -> | ||
LastEventId = mongoose_domain_sql:get_max_event_id_or_set_dummy(), | ||
PageSize = 10000, | ||
mongoose_domain_loader:load_data_from_base(0, PageSize), | ||
mongoose_domain_loader:remove_outdated_domains_from_core(), | ||
{ok, #{count := Count}} = mongoose_domain_loader:load_data_from_base(0, PageSize), | ||
{ok, #{count := Outdated}} = mongoose_domain_loader:remove_outdated_domains_from_core(), | ||
set_last_event_id(LastEventId), | ||
?LOG_WARNING(#{what => initial_load, last_event_id => LastEventId, | ||
domains_count => Count, outdated_count => Outdated}), | ||
LastEventId; | ||
initial_load(LastEventId) when is_integer(LastEventId) -> | ||
?LOG_WARNING(#{what => skip_initial_load, last_event_id => LastEventId}), | ||
LastEventId. %% Skip initial init | ||
|
||
handle_check_for_updates(State = #{last_event_id := LastEventId, | ||
check_for_updates_interval := Interval}) -> | ||
maybe_cancel_timer(State), | ||
check_for_updates_interval := Interval}, | ||
IsInitial) -> | ||
maybe_cancel_timer(IsInitial, State), | ||
receive_all_check_for_updates(), | ||
PageSize = 1000, | ||
LastEventId2 = mongoose_domain_loader:check_for_updates(LastEventId, PageSize), | ||
maybe_set_last_event_id(LastEventId, LastEventId2), | ||
TRef = erlang:send_after(Interval, self(), check_for_updates), | ||
State#{last_event_id => LastEventId2, check_for_updates => TRef}. | ||
State#{last_event_id => LastEventId2, check_for_updates_tref => TRef}. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix for @chrzaszcz bug with too many queries. |
||
|
||
maybe_cancel_timer(#{check_for_updates_tref := TRef}) -> | ||
erlang:cancel_timer(TRef); | ||
maybe_cancel_timer(_) -> | ||
ok. | ||
maybe_cancel_timer(IsInitial, State) -> | ||
TRef = maps:get(check_for_updates_tref, State, undefined), | ||
case {IsInitial, TRef} of | ||
{true, undefined} -> ok; %% TRef is not set the first time | ||
{false, _} -> erlang:cancel_timer(TRef) | ||
end. | ||
|
||
receive_all_check_for_updates() -> | ||
receive check_for_updates -> receive_all_check_for_updates() after 0 -> ok end. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not exactly the scenario that I had in mind, but a much simpler one: put a bunch of domains one one node, then add an empty second node to cluster with the first, then we expect this second node to serve the same domains the first does.
How does this scenario you propose proves my scenario is also valid?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is never empty, if it is running.
It gets domains from DB.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is explicitly not true. From my tests, it only loads the static toml config, nothing else. Try this (using macros from
dynamic_domains_SUITE
And you'll see they report different values.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code:
rpc(mim(), mongoose_service, is_loaded, [service_domain_db])
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Manually running this branch against that test still returns different ets tables. Am I wrong to expect both ets tables to be equal?