Skip to content

Commit

Permalink
Merge pull request #2260 from val214/2187
Browse files Browse the repository at this point in the history
[FR] Group Replication Timeout count
  • Loading branch information
renecannao authored Sep 18, 2019
2 parents b545cb4 + 757684c commit b723dd5
Show file tree
Hide file tree
Showing 11 changed files with 261 additions and 48 deletions.
15 changes: 13 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ testaurora: build_deps_debug build_lib_testaurora build_src_testaurora
.PHONY: testgalera
testgalera: build_deps_debug build_lib_testgalera build_src_testgalera

.PHONY: testgrouprep
testgrouprep: build_deps_debug build_lib_testgrouprep build_src_testgrouprep

.PHONY: testall
testall: build_deps_debug build_lib_testall build_src_testall

Expand Down Expand Up @@ -94,13 +97,21 @@ build_src_testgalera: build_deps build_lib_testgalera
build_lib_testgalera: build_deps_debug
cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_GALERA" CC=${CC} CXX=${CXX} ${MAKE}

.PHONY: build_src_testgrouprep
build_src_testgrouprep: build_deps build_lib_testgrouprep
cd src && OPTZ="${O0} -ggdb -DDEBUG -DTEST_GROUPREP" CC=${CC} CXX=${CXX} ${MAKE}

.PHONY: build_lib_testgrouprep
build_lib_testgrouprep: build_deps_debug
cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_GROUPREP" CC=${CC} CXX=${CXX} ${MAKE}

.PHONY: build_src_testall
build_src_testall: build_deps build_lib_testall
cd src && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA -DTEST_GALERA" CC=${CC} CXX=${CXX} ${MAKE}
cd src && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA -DTEST_GALERA -DTEST_GROUPREP" CC=${CC} CXX=${CXX} ${MAKE}

.PHONY: build_lib_testall
build_lib_testall: build_deps_debug
cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA -DTEST_GALERA" CC=${CC} CXX=${CXX} ${MAKE}
cd lib && OPTZ="${O0} -ggdb -DDEBUG -DTEST_AURORA -DTEST_GALERA -DTEST_GROUPREP" CC=${CC} CXX=${CXX} ${MAKE}

.PHONY: build_src_debug
build_src_debug: build_deps build_lib_debug
Expand Down
1 change: 1 addition & 0 deletions include/MySQL_Thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ class MySQL_Threads_Handler
int monitor_replication_lag_timeout;
int monitor_groupreplication_healthcheck_interval;
int monitor_groupreplication_healthcheck_timeout;
int monitor_groupreplication_healthcheck_max_timeout_count;
int monitor_galera_healthcheck_interval;
int monitor_galera_healthcheck_timeout;
int monitor_galera_healthcheck_max_timeout_count;
Expand Down
12 changes: 11 additions & 1 deletion include/SQLite3_Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ class SQLite3_Server {
#ifdef TEST_GALERA
std::vector<table_def_t *> *tables_defs_galera;
#endif // TEST_GALERA
#if defined(TEST_AURORA) || defined(TEST_GALERA)
#ifdef TEST_GROUPREP
std::vector<table_def_t *> *tables_defs_grouprep;
#endif // TEST_GROUPREP
#if defined(TEST_AURORA) || defined(TEST_GALERA) || defined(TEST_GROUPREP)
void insert_into_tables_defs(std::vector<table_def_t *> *, const char *table_name, const char *table_def);
void drop_tables_defs(std::vector<table_def_t *> *tables_defs);
void check_and_build_standard_tables(SQLite3DB *db, std::vector<table_def_t *> *tables_defs);
Expand All @@ -55,14 +58,21 @@ class SQLite3_Server {
unsigned int max_num_aurora_servers;
pthread_mutex_t aurora_mutex;
void populate_aws_aurora_table(MySQL_Session *sess);
void init_aurora_ifaces_string(std::string& s);
#endif // TEST_AURORA
#ifdef TEST_GALERA
//unsigned int cur_aurora_writer[3];
unsigned int num_galera_servers[3];
unsigned int max_num_galera_servers;
pthread_mutex_t galera_mutex;
void populate_galera_table(MySQL_Session *sess);
void init_galera_ifaces_string(std::string& s);
#endif // TEST_GALERA
#ifdef TEST_GROUPREP
pthread_mutex_t grouprep_mutex;
void populate_grouprep_table(MySQL_Session *sess);
void init_grouprep_ifaces_string(std::string& s);
#endif // TEST_GROUPREP
SQLite3_Server();
~SQLite3_Server();
char **get_variables_list();
Expand Down
4 changes: 4 additions & 0 deletions include/proxysql_admin.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,5 +319,9 @@ class ProxySQL_Admin {
void enable_galera_testing();
#endif // TEST_GALERA

#ifdef TEST_GROUPREP
void enable_grouprep_testing();
#endif // TEST_GROUPREP

};
#endif /* __CLASS_PROXYSQL_ADMIN_H */
2 changes: 2 additions & 0 deletions include/proxysql_structs.h
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,7 @@ __thread int mysql_thread___monitor_replication_lag_interval;
__thread int mysql_thread___monitor_replication_lag_timeout;
__thread int mysql_thread___monitor_groupreplication_healthcheck_interval;
__thread int mysql_thread___monitor_groupreplication_healthcheck_timeout;
__thread int mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count;
__thread int mysql_thread___monitor_galera_healthcheck_interval;
__thread int mysql_thread___monitor_galera_healthcheck_timeout;
__thread int mysql_thread___monitor_galera_healthcheck_max_timeout_count;
Expand Down Expand Up @@ -882,6 +883,7 @@ extern __thread int mysql_thread___monitor_replication_lag_interval;
extern __thread int mysql_thread___monitor_replication_lag_timeout;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_interval;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_timeout;
extern __thread int mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count;
extern __thread int mysql_thread___monitor_galera_healthcheck_interval;
extern __thread int mysql_thread___monitor_galera_healthcheck_timeout;
extern __thread int mysql_thread___monitor_galera_healthcheck_max_timeout_count;
Expand Down
63 changes: 60 additions & 3 deletions lib/MySQL_Monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,9 +852,9 @@ bool MySQL_Monitor_State_Data::set_wait_timeout() {
if (mysql_thread___monitor_wait_timeout==false) {
return true;
}
#if defined(TEST_AURORA) || defined(TEST_GALERA)
#if defined(TEST_AURORA) || defined(TEST_GALERA) || defined(TEST_GROUPREP)
return true;
#endif // TEST_AURORA || TEST_GALERA
#endif // TEST_AURORA || TEST_GALERA || TEST_GROUPREP
bool ret=false;
char *query=NULL;
char *qt=(char *)"SET wait_timeout=%d";
Expand Down Expand Up @@ -1200,7 +1200,11 @@ void * monitor_group_replication_thread(void *arg) {
//async_exit_status=mysql_change_user_start(&ret_bool, mysql,"msandbox2","msandbox2","information_schema");
//mmsd->async_exit_status=mysql_ping_start(&mmsd->interr,mmsd->mysql);
mmsd->interr=0; // reset the value
#ifdef TEST_GROUPREP
mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT viable_candidate,read_only,transactions_behind FROM GR_MEMBER_ROUTING_CANDIDATE_STATUS");
#else
mmsd->async_exit_status=mysql_query_start(&mmsd->interr,mmsd->mysql,"SELECT viable_candidate,read_only,transactions_behind FROM sys.gr_member_routing_candidate_status");
#endif
while (mmsd->async_exit_status) {
mmsd->async_exit_status=wait_for_mysql(mmsd->mysql, mmsd->async_exit_status);
unsigned long long now=monotonic_time();
Expand Down Expand Up @@ -1274,6 +1278,7 @@ void * monitor_group_replication_thread(void *arg) {
sprintf(s,"%s:%d",mmsd->hostname,mmsd->port);
bool viable_candidate=false;
bool read_only=true;
int num_timeouts = 0;
long long transactions_behind=-1;
if (mmsd->interr == 0 && mmsd->result) {
int num_fields=0;
Expand Down Expand Up @@ -1335,11 +1340,63 @@ void * monitor_group_replication_thread(void *arg) {
node->add_entry(time_now, (mmsd->mysql_error_msg ? 0 : mmsd->t2-mmsd->t1) , transactions_behind,viable_candidate,read_only,mmsd->mysql_error_msg);
GloMyMon->Group_Replication_Hosts_Map.insert(std::make_pair(s,node));
}
if (mmsd->mysql_error_msg) {
if (strncasecmp(mmsd->mysql_error_msg, (char *)"timeout", 7) == 0) {
int max_num_timeout = 10;
if (mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count < max_num_timeout) {
max_num_timeout = mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count;
}
unsigned long long start_times[max_num_timeout];
bool timeouts[max_num_timeout];
for (int i=0; i<max_num_timeout; i++) {
start_times[i]=0;
timeouts[i]=false;
}
for (int i=0; i<MyGR_Nentries; i++) {
if (node->last_entries[i].start_time) {
int smallidx = 0;
for (int j=0; j<max_num_timeout; j++) {
if (j!=smallidx) {
if (start_times[j] < start_times[smallidx]) {
smallidx = j;
}
}
}
if (start_times[smallidx] < node->last_entries[i].start_time) {
start_times[smallidx] = node->last_entries[i].start_time;
timeouts[smallidx] = false;
if (node->last_entries[i].error) {
if (strncasecmp(node->last_entries[i].error, (char *)"timeout", 7) == 0) {
timeouts[smallidx] = true;
}
}
}
}
}
for (int i=0; i<max_num_timeout; i++) {
if (timeouts[i]) {
num_timeouts++;
}
}
}
}
pthread_mutex_unlock(&GloMyMon->group_replication_mutex);

// NOTE: we update MyHGM outside the mutex group_replication_mutex
if (mmsd->mysql_error_msg) { // there was an error checking the status of the server, surely we need to reconfigure GR
MyHGM->update_group_replication_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg);
if (num_timeouts == 0) {
// it wasn't a timeout, reconfigure immediately
MyHGM->update_group_replication_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg);
} else {
// it was a timeout. Check if we are having consecutive timeout
if (num_timeouts == mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count) {
proxy_error("Server %s:%d missed %d group replication checks. Number retires %d, Assuming offline\n",
mmsd->hostname, mmsd->port, num_timeouts, num_timeouts);
MyHGM->update_group_replication_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, mmsd->mysql_error_msg);
} else {
// not enough timeout
}
}
} else {
if (viable_candidate==false) {
MyHGM->update_group_replication_set_offline(mmsd->hostname, mmsd->port, mmsd->writer_hostgroup, (char *)"viable_candidate=NO");
Expand Down
2 changes: 1 addition & 1 deletion lib/MySQL_Protocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1746,7 +1746,7 @@ bool MySQL_Protocol::process_pkt_handshake_response(unsigned char *pkt, unsigned
((*myds)->sess->session_type == PROXYSQL_SESSION_ADMIN)
||
((*myds)->sess->session_type == PROXYSQL_SESSION_STATS)
#if defined(TEST_AURORA) || defined(TEST_GALERA)
#if defined(TEST_AURORA) || defined(TEST_GALERA) || defined(TEST_GROUPREP)
||
((*myds)->sess->session_type == PROXYSQL_SESSION_SQLITE)
#endif // TEST_AURORA || TEST_GALERA
Expand Down
8 changes: 4 additions & 4 deletions lib/MySQL_Session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4711,11 +4711,11 @@ void MySQL_Session::handler___status_CONNECTING_CLIENT___STATE_SERVER_HANDSHAKE(
(handshake_response_return == true)
&&
(
#if defined(TEST_AURORA) || defined(TEST_GALERA)
#if defined(TEST_AURORA) || defined(TEST_GALERA) || defined(TEST_GROUPREP)
(default_hostgroup<0 && ( session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS || session_type == PROXYSQL_SESSION_SQLITE) )
#else
(default_hostgroup<0 && ( session_type == PROXYSQL_SESSION_ADMIN || session_type == PROXYSQL_SESSION_STATS) )
#endif // TEST_AURORA || TEST_GALERA
#endif // TEST_AURORA || TEST_GALERA || TEST_GROUPREP
||
(default_hostgroup == 0 && session_type == PROXYSQL_SESSION_CLICKHOUSE)
||
Expand Down Expand Up @@ -4760,10 +4760,10 @@ void MySQL_Session::handler___status_CONNECTING_CLIENT___STATE_SERVER_HANDSHAKE(
client_authenticated=true;
switch (session_type) {
case PROXYSQL_SESSION_SQLITE:
#if defined(TEST_AURORA) || defined(TEST_GALERA)
#if defined(TEST_AURORA) || defined(TEST_GALERA) || defined(TEST_GROUPREP)
free_users=1;
break;
#endif // TEST_AURORA || TEST_GALERA
#endif // TEST_AURORA || TEST_GALERA || TEST_GROUPREP
case PROXYSQL_SESSION_MYSQL:
proxy_debug(PROXY_DEBUG_MYSQL_CONNECTION,8,"Session=%p , DS=%p , session_type=PROXYSQL_SESSION_MYSQL\n", this, client_myds);
if (ldap_ctx==NULL) {
Expand Down
17 changes: 17 additions & 0 deletions lib/MySQL_Thread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ static char * mysql_thread_variables_names[]= {
(char *)"monitor_replication_lag_timeout",
(char *)"monitor_groupreplication_healthcheck_interval",
(char *)"monitor_groupreplication_healthcheck_timeout",
(char *)"monitor_groupreplication_healthcheck_max_timeout_count",
(char *)"monitor_galera_healthcheck_interval",
(char *)"monitor_galera_healthcheck_timeout",
(char *)"monitor_galera_healthcheck_max_timeout_count",
Expand Down Expand Up @@ -394,6 +395,7 @@ MySQL_Threads_Handler::MySQL_Threads_Handler() {
variables.monitor_replication_lag_timeout=1000;
variables.monitor_groupreplication_healthcheck_interval=5000;
variables.monitor_groupreplication_healthcheck_timeout=800;
variables.monitor_groupreplication_healthcheck_timeout=800;
variables.monitor_galera_healthcheck_interval=5000;
variables.monitor_galera_healthcheck_timeout=800;
variables.monitor_galera_healthcheck_max_timeout_count=3;
Expand Down Expand Up @@ -753,6 +755,7 @@ int MySQL_Threads_Handler::get_variable_int(const char *name) {
if (a == 'g' || a == 'G') {
if (!strcmp(name,"monitor_groupreplication_healthcheck_interval")) return (int)variables.monitor_groupreplication_healthcheck_interval;
if (!strcmp(name,"monitor_groupreplication_healthcheck_timeout")) return (int)variables.monitor_groupreplication_healthcheck_timeout;
if (!strcmp(name,"monitor_groupreplication_healthcheck_max_timeout_count")) return (int)variables.monitor_groupreplication_healthcheck_max_timeout_count;
if (!strcmp(name,"monitor_galera_healthcheck_interval")) return (int)variables.monitor_galera_healthcheck_interval;
if (!strcmp(name,"monitor_galera_healthcheck_timeout")) return (int)variables.monitor_galera_healthcheck_timeout;
if (!strcmp(name,"monitor_galera_healthcheck_max_timeout_count")) return (int)variables.monitor_galera_healthcheck_max_timeout_count;
Expand Down Expand Up @@ -1115,6 +1118,10 @@ char * MySQL_Threads_Handler::get_variable(char *name) { // this is the public f
sprintf(intbuf,"%d",variables.monitor_groupreplication_healthcheck_timeout);
return strdup(intbuf);
}
if (!strcasecmp(name,"monitor_groupreplication_healthcheck_max_timeout_count")) {
sprintf(intbuf,"%d",variables.monitor_groupreplication_healthcheck_max_timeout_count);
return strdup(intbuf);
}
if (!strcasecmp(name,"monitor_galera_healthcheck_interval")) {
sprintf(intbuf,"%d",variables.monitor_galera_healthcheck_interval);
return strdup(intbuf);
Expand Down Expand Up @@ -1649,6 +1656,15 @@ bool MySQL_Threads_Handler::set_variable(char *name, char *value) { // this is t
return false;
}
}
if (!strcasecmp(name,"monitor_groupreplication_healthcheck_max_timeout_count")) {
int intv=atoi(value);
if (intv >= 1 && intv <= 10) {
variables.monitor_groupreplication_healthcheck_max_timeout_count=intv;
return true;
} else {
return false;
}
}
if (!strcasecmp(name,"monitor_galera_healthcheck_interval")) {
int intv=atoi(value);
if (intv >= 50 && intv <= 7*24*3600*1000) {
Expand Down Expand Up @@ -4337,6 +4353,7 @@ void MySQL_Thread::refresh_variables() {
mysql_thread___monitor_replication_lag_timeout=GloMTH->get_variable_int((char *)"monitor_replication_lag_timeout");
mysql_thread___monitor_groupreplication_healthcheck_interval=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_interval");
mysql_thread___monitor_groupreplication_healthcheck_timeout=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_timeout");
mysql_thread___monitor_groupreplication_healthcheck_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_groupreplication_healthcheck_max_timeout_count");
mysql_thread___monitor_galera_healthcheck_interval=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_interval");
mysql_thread___monitor_galera_healthcheck_timeout=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_timeout");
mysql_thread___monitor_galera_healthcheck_max_timeout_count=GloMTH->get_variable_int((char *)"monitor_galera_healthcheck_max_timeout_count");
Expand Down
27 changes: 27 additions & 0 deletions lib/ProxySQL_Admin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10346,3 +10346,30 @@ void ProxySQL_Admin::enable_aurora_testing() {
load_mysql_query_rules_to_runtime();
}
#endif // TEST_AURORA

#ifdef TEST_GROUPREP
void ProxySQL_Admin::enable_grouprep_testing() {
proxy_info("Admin is enabling Group Replication Testing using SQLite3 Server and HGs from 3271 to 3274\n");
mysql_servers_wrlock();
admindb->execute("DELETE FROM mysql_servers WHERE hostgroup_id BETWEEN 3271 AND 3274");
admindb->execute("INSERT INTO mysql_servers (hostgroup_id, hostname, use_ssl, comment) VALUES (3272, '127.2.1.1', 0, '')");
admindb->execute("INSERT INTO mysql_servers (hostgroup_id, hostname, use_ssl, comment) VALUES (3273, '127.2.1.2', 0, '')");
admindb->execute("INSERT INTO mysql_servers (hostgroup_id, hostname, use_ssl, comment) VALUES (3273, '127.2.1.3', 0, '')");
admindb->execute("INSERT INTO mysql_group_replication_hostgroups "
"(writer_hostgroup,backup_writer_hostgroup,reader_hostgroup,offline_hostgroup,active,max_writers,"
"writer_is_also_reader,max_transactions_behind) VALUES (3272,3274,3273,3271,1,1,1,0);");

load_mysql_servers_to_runtime();
mysql_servers_wrunlock();

admindb->execute("UPDATE global_variables SET variable_value=5000 WHERE variable_name='mysql-monitor_groupreplication_healthcheck_interval'");
admindb->execute("UPDATE global_variables SET variable_value=800 WHERE variable_name='mysql-monitor_groupreplication_healthcheck_timeout'");
admindb->execute("UPDATE global_variables SET variable_value=3 WHERE variable_name='mysql-monitor_groupreplication_healthcheck_max_timeout_count'");
load_mysql_variables_to_runtime();

admindb->execute("INSERT INTO mysql_users (username,password,default_hostgroup) VALUES ('grouprep1','pass1',3272)");
init_users();

load_mysql_query_rules_to_runtime();
}
#endif // TEST_GROUPREP
Loading

0 comments on commit b723dd5

Please sign in to comment.