Skip to content

Commit

Permalink
[#376] Fix tls/errors subtest failure due to certificate verification…
Browse files Browse the repository at this point in the history
… failures showing up after tls handshake in TLS 1.3

In the tls/errors subtest, the "TEST CASE 5" section tests that expired certificates issue appropriate
errors. This test fails with an assert failure in the source server.

%YDB-F-ASSERT, Assert failed in sr_port/repl_comm.c line 352 for expression (FALSE)

Below is the C-stack of the source server at the time of the failure.

 #4  rts_error_va () at sr_unix/rts_error.c:194
 #5  rts_error_csa () at sr_unix/rts_error.c:103
 #6  repl_recv () at sr_port/repl_comm.c:352
 #7  gtmsource_recv_ctl () at sr_unix/gtmsource_process.c:438
 #8  gtmsource_process () at sr_unix/gtmsource_process.c:1488
 #9  gtmsource () at sr_unix/gtmsource.c:528

The source server fails a SSL_read() call after the initial handshake due to an expired certificate.

In TLS 1.2 and before, the initial handshake used to detect the expired certificate. That used to
happen in the function repl_do_tls_handshake() and if a failure is detected there, the function
gtmsource_exchange_tls_info() used to fall back to plaintext mode of replication thereby keeping the
source server alive.

But with TLS 1.3, if the failing assert in repl_comm.c line 352 is removed, a TLSIOERROR error would
be issued and the source server would still terminate with an error instead of falling back to
plaintext mode. This is because the TLS error was detected after the initial handshake and the
replication code was not written to handle plaintext fallback after the initial handshake.

With TLS 1.3, the initial handshake has been cut down a lot and so I guess the certificate expiry case
cannot be made to issue an error as part of the initial handshake by using any controls in the SSL*
functions. Therefore, handling plaintext fallback in case of TLS errors after the initial handshake
effectively becomes a necessity with TLS 1.3.

Towards this, repl_recv() and repl_send() have been enhanced to return with a status of ERR_TLSIOERROR
when repl_errno is set to EREPL_RECV and EREPL_SEND respectively. And all places that check for
EREPL_RECV and/or EREPL_SEND have been enhanced to handle the new ERR_TLSIOERROR status code by
invoking new macros GTMSOURCE_HANDLE_TLSIOERROR or GTMRECV_HANDLE_TLSIOERROR depending on whether the
caller is the source or receiver server respectively. These macros take care of checking if plaintext
fallback was specified at the source/receiver server startup and if so do the plaintext fallback
without terminating the server (but issuing a warning-type YDB-W-TLSIOERROR message). If no fallback
was specified, a error-type YDB-E-TLSIOERROR message is issued and the server terminates.
  • Loading branch information
nars1 committed Nov 5, 2018
1 parent 1d0a894 commit 55a93a2
Show file tree
Hide file tree
Showing 8 changed files with 283 additions and 107 deletions.
77 changes: 62 additions & 15 deletions sr_port/gtm_repl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
* Copyright (c) 2013-2016 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* Copyright (c) 2018 YottaDB LLC. and/or its subsidiaries. *
* All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
Expand Down Expand Up @@ -48,22 +51,66 @@ error_def(ERR_TLSCLOSE);
#define DEFAULT_RENEGOTIATE_TIMEOUT (2 * 60) /* About 2 hours between renegotiation. */
#define MIN_RENEGOTIATE_TIMEOUT 1

#define REPLTLS_SET_NEXT_RENEGOTIATE_HRTBT(NEXT_RENEG_HRTBT) \
{ \
if (0 < gtmsource_options.renegotiate_interval) \
{ \
repl_tls.renegotiate_state = REPLTLS_WAITING_FOR_RENEG_TIMEOUT; \
TIMEOUT_DONE_NOCH(NEXT_RENEG_HRTBT); \
TIMEOUT_INIT_NOCH(NEXT_RENEG_HRTBT, gtmsource_options.renegotiate_interval * MILLISECS_IN_SEC); \
} \
}
#define REPLTLS_SET_NEXT_RENEGOTIATE_HRTBT(NEXT_RENEG_HRTBT) \
MBSTART { \
if (0 < gtmsource_options.renegotiate_interval) \
{ \
repl_tls.renegotiate_state = REPLTLS_WAITING_FOR_RENEG_TIMEOUT; \
TIMEOUT_DONE_NOCH(NEXT_RENEG_HRTBT); \
TIMEOUT_INIT_NOCH(NEXT_RENEG_HRTBT, gtmsource_options.renegotiate_interval * MILLISECS_IN_SEC); \
} \
} MBEND

#define ISSUE_REPLNOTLS(ERRID, STR1, STR2) \
MBSTART { \
if (!PLAINTEXT_FALLBACK) \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERRID, 4, LEN_AND_LIT(STR1), LEN_AND_LIT(STR2)); \
gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) MAKE_MSG_WARNING(ERRID), 4, LEN_AND_LIT(STR1), LEN_AND_LIT(STR2)); \
} MBEND

/* The GTMSOURCE_HANDLE_TLSIOERROR and GTMRECV_HANDLE_TLSIOERROR macros handle an error from the TLS/SSL layer.
* They check if plaintext fallback is specified by the user and if so, close the current connection and fall
* back to non-tls (i.e. non-encrypted) communication or replication. If plaintext fallback is not specified,
* then issue a TLSIOERROR error and terminate the caller replication server (source or receiver server).
*/
#define GTMSOURCE_HANDLE_TLSIOERROR(SEND_OR_RECV) \
MBSTART { \
assert(repl_tls.enabled); \
if (!PLAINTEXT_FALLBACK) \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_TLSIOERROR, 2, LEN_AND_LIT(SEND_OR_RECV), \
ERR_TEXT, 2, LEN_AND_STR(gtm_tls_get_error())); \
else \
{ /* Fall back from TLS to Plaintext */ \
gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) \
MAKE_MSG_WARNING(ERR_TLSIOERROR), 2, LEN_AND_LIT(SEND_OR_RECV), \
ERR_TEXT, 2, LEN_AND_STR(gtm_tls_get_error())); \
repl_log(gtmsource_log_fp, TRUE, TRUE, \
"Plaintext fallback enabled. Closing and reconnecting without TLS/SSL.\n"); \
repl_close(&gtmsource_sock_fd); \
SHORT_SLEEP(GTMSOURCE_WAIT_FOR_RECEIVER_CLOSE_CONN); \
gtmsource_state = gtmsource_local->gtmsource_state = GTMSOURCE_WAITING_FOR_CONNECTION; \
CLEAR_REPL_TLS_REQUESTED; /* As if -tlsid qualifier was never specified. */ \
} \
} MBEND

#define ISSUE_REPLNOTLS(ERRID, STR1, STR2) \
{ \
if (!PLAINTEXT_FALLBACK) \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERRID, 4, LEN_AND_LIT(STR1), LEN_AND_LIT(STR2)); \
gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(6) MAKE_MSG_WARNING(ERRID), 4, LEN_AND_LIT(STR1), LEN_AND_LIT(STR2)); \
}
#define GTMRECV_HANDLE_TLSIOERROR(SEND_OR_RECV) \
MBSTART { \
assert(repl_tls.enabled); \
if (!PLAINTEXT_FALLBACK) \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_TLSIOERROR, 2, LEN_AND_LIT(SEND_OR_RECV), \
ERR_TEXT, 2, LEN_AND_STR(gtm_tls_get_error())); \
else \
{ /* Fall back from TLS to Plaintext */ \
gtm_putmsg_csa(CSA_ARG(NULL) VARLSTCNT(8) \
MAKE_MSG_WARNING(ERR_TLSIOERROR), 2, LEN_AND_LIT(SEND_OR_RECV), \
ERR_TEXT, 2, LEN_AND_STR(gtm_tls_get_error())); \
repl_log(gtmrecv_log_fp, TRUE, TRUE, \
"Plaintext fallback enabled. Closing and reconnecting without TLS/SSL.\n"); \
repl_close(&gtmrecv_sock_fd); \
repl_connection_reset = TRUE; \
CLEAR_REPL_TLS_REQUESTED; /* As if -tlsid qualifier was never specified. */ \
} \
} MBEND

void repl_log_tls_info(FILE *logfp, gtm_tls_socket_t *socket);
int repl_do_tls_handshake(FILE *logfp, int sock_fd, boolean_t do_accept, int *poll_direction);
Expand Down
17 changes: 9 additions & 8 deletions sr_port/repl_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,11 @@ int repl_send(int sock_fd, unsigned char *buff, int *send_len, int timeout GTMTL
/* handle error */
save_errno = repl_tls.enabled ? gtm_tls_errno() : ERRNO;
if (-1 == save_errno)
{ /* This indicates an error from TLS/SSL layer and not from a system call. */
{ /* This indicates an error from TLS/SSL layer and not from a system call.
* Set error status to ERR_TLSIOERROR and let caller handle it appropriately.
*/
assert(repl_tls.enabled);
assert(FALSE);
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_TLSIOERROR, 2, LEN_AND_LIT("send"), ERR_TEXT, 2,
LEN_AND_STR(gtm_tls_get_error()));
save_errno = ERR_TLSIOERROR;
}
# else
save_errno = ERRNO;
Expand Down Expand Up @@ -347,11 +347,12 @@ int repl_recv(int sock_fd, unsigned char *buff, int *recv_len, int timeout GTMTL
/* handle error */
save_errno = repl_tls.enabled ? gtm_tls_errno() : ERRNO;
if (-1 == save_errno)
{
{ /* This indicates an error from TLS/SSL layer and not from a system call.
* Set error status to ERR_TLSIOERROR and let caller handle it appropriately.
*/
assert(repl_tls.enabled);
assert(FALSE);
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(8) ERR_TLSIOERROR, 2, LEN_AND_LIT("recv"), ERR_TEXT, 2,
LEN_AND_STR(gtm_tls_get_error()));
save_errno = ERR_TLSIOERROR;
bytes_recvd = -1; /* to ensure "save_errno" does not get overwritten a few lines later */
}
# else
save_errno = ERRNO;
Expand Down
16 changes: 14 additions & 2 deletions sr_unix/gtmrecv_fetchresync.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
#include "repl_instance.h"
#include "gtmio.h"
#include "replgbl.h"
#include "gtm_repl.h"

#define MAX_ATTEMPTS_FOR_FETCH_RESYNC 60 /* max-wait in seconds for source server response after connection is established */
#define MAX_WAIT_FOR_FETCHRESYNC_CONN 60 /* max-wait in seconds to establish connection with the source server */
Expand All @@ -72,14 +73,24 @@
if (SS_NORMAL != STATUS) \
{ \
if (EREPL_RECV == repl_errno) \
{ /* Note that no check for TLSIOERROR is needed here since the fetchresync rollback \
* currently never connects with a source server using TLS. \
*/ \
assert(ERR_TLSIOERROR != STATUS); \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, \
LEN_AND_LIT("Error in recv() for " MESSAGE), STATUS); /* BYPASSOK(recv) */ \
else if (EREPL_SEND == repl_errno) \
} else if (EREPL_SEND == repl_errno) \
{ /* Note that no check for TLSIOERROR is needed here since the fetchresync rollback \
* currently never connects with a source server using TLS. \
*/ \
assert(ERR_TLSIOERROR != STATUS); \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, \
LEN_AND_LIT("Error in send() for " MESSAGE), STATUS); /* BYPASSOK(send) */ \
else if (EREPL_SELECT == repl_errno) \
} else if (EREPL_SELECT == repl_errno) \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2, \
LEN_AND_LIT("Error in select() for " MESSAGE), STATUS); \
else \
assert(FALSE); \
} \
if (0 >= WAIT_COUNT) \
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_REPLCOMM, 0, ERR_TEXT, 2, \
Expand Down Expand Up @@ -236,6 +247,7 @@ int gtmrecv_fetchresync(int port, seq_num *resync_seqno, seq_num max_reg_seqno)
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(1) ERR_REPLCOMM);
return ERR_REPLCOMM;
}
assert(!REPL_TLS_ENABLED); /* Currently, a fetchresync rollback never uses TLS to connect to the source server */
/* Wait for REPL_RESYNC_SEQNO (if dual-site primary) or REPL_OLD_NEED_INSTANCE_INFO (if multi-site primary)
* or REPL_NEED_INSTINFO (if multi-site primary with supplementary instance support) message
*/
Expand Down
86 changes: 61 additions & 25 deletions sr_unix/gtmrecv_poll_actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -267,18 +267,30 @@ int gtmrecv_poll_actions1(int *pending_data_len, int *buff_unprocessed, unsigned
; /* Empty Body */
if (SS_NORMAL != status)
{
if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno)
if (EREPL_SEND == repl_errno)
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending XOFF_ACK_ME. "
"Status = %d ; %s\n", status, STRERROR(status));
REPL_CLOSE_CONNECTION(gtmrecv_sock_fd, repl_connection_reset, curr_conn_state, \
gtmrecv_send_cmp2uncmp);
# ifdef GTM_TLS
if (ERR_TLSIOERROR == status)
{ /* Note that both GTMRECV_HANDLE_TLSIOERROR and REPL_CLOSE_CONNECTION do a
* "repl_close" but that is okay since "repl_close" is a no-op in the second call.
*/
GTMRECV_HANDLE_TLSIOERROR("send");
REPL_CLOSE_CONNECTION(gtmrecv_sock_fd, repl_connection_reset, curr_conn_state, \
gtmrecv_send_cmp2uncmp);
} else
# endif
if (REPL_CONN_RESET(status))
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending XOFF_ACK_ME. "
"Status = %d ; %s\n", status, STRERROR(status));
REPL_CLOSE_CONNECTION(gtmrecv_sock_fd, repl_connection_reset, curr_conn_state, \
gtmrecv_send_cmp2uncmp);

} else if (EREPL_SEND == repl_errno)
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD crash/shutdown. "
"Error in send"), status);
else
} else
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0,
ERR_TEXT, 2, LEN_AND_LIT("Error sending XOFF msg due to BAD_TRANS or UPD"
" crash/shutdown. Error in send"), status);
} else
{
assert(EREPL_SELECT == repl_errno);
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
Expand Down Expand Up @@ -415,6 +427,17 @@ int gtmrecv_poll_actions1(int *pending_data_len, int *buff_unprocessed, unsigned
{
if (EREPL_RECV == repl_errno)
{
# ifdef GTM_TLS
if (ERR_TLSIOERROR == status)
{ /* Note that both GTMRECV_HANDLE_TLSIOERROR and REPL_CLOSE_CONNECTION do a "repl_close"
* but that is okay since "repl_close" returns as a no-op in the second call.
*/
GTMRECV_HANDLE_TLSIOERROR("recv");
REPL_CLOSE_CONNECTION(gtmrecv_sock_fd, repl_connection_reset, curr_conn_state, \
gtmrecv_send_cmp2uncmp);
return_status = STOP_POLL;
} else
# endif
if (REPL_CONN_RESET(status))
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while receiving XOFF_ACK. "
Expand Down Expand Up @@ -464,24 +487,37 @@ int gtmrecv_poll_actions1(int *pending_data_len, int *buff_unprocessed, unsigned
curr_conn_state.send_seqno);
} else
{
if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno)
if (EREPL_SEND == repl_errno)
{
if (curr_conn_state.send_cmp2uncmp)
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_CMP2UNCMP. "
"Status = %d ; %s\n", status, STRERROR(status));
# ifdef GTM_TLS
if (ERR_TLSIOERROR == status)
{ /* Note that both GTMRECV_HANDLE_TLSIOERROR and REPL_CLOSE_CONNECTION do a
* "repl_close" but that is okay since "repl_close" is a no-op in the second call.
*/
GTMRECV_HANDLE_TLSIOERROR("send");
REPL_CLOSE_CONNECTION(gtmrecv_sock_fd, repl_connection_reset, curr_conn_state, \
gtmrecv_send_cmp2uncmp);
return_status = STOP_POLL;
} else
# endif
if (REPL_CONN_RESET(status))
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_BADTRANS. "
"Status = %d ; %s\n", status, STRERROR(status));
}
REPL_CLOSE_CONNECTION(gtmrecv_sock_fd, repl_connection_reset, curr_conn_state, \
gtmrecv_send_cmp2uncmp);
return_status = STOP_POLL;
} else if (EREPL_SEND == repl_errno)
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in send"), status);
else
if (curr_conn_state.send_cmp2uncmp)
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_CMP2UNCMP. "
"Status = %d ; %s\n", status, STRERROR(status));
} else
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset while sending REPL_BADTRANS. "
"Status = %d ; %s\n", status, STRERROR(status));
}
REPL_CLOSE_CONNECTION(gtmrecv_sock_fd, repl_connection_reset, curr_conn_state, \
gtmrecv_send_cmp2uncmp);
return_status = STOP_POLL;
} else
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
LEN_AND_LIT("Error sending REPL_BADTRANS/REPL_CMP2UNCMP. Error in send"), status);
} else
{
assert(EREPL_SELECT == repl_errno);
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(7) ERR_REPLCOMM, 0, ERR_TEXT, 2,
Expand Down
47 changes: 32 additions & 15 deletions sr_unix/gtmrecv_process.c
Original file line number Diff line number Diff line change
Expand Up @@ -392,21 +392,31 @@ STATICFNDEF void gtmrecv_repl_send_loop_error(int status, char *msgtypestr)
char print_msg[1024];

assert((EREPL_SEND == repl_errno) || (EREPL_SELECT == repl_errno));
if (REPL_CONN_RESET(status) && EREPL_SEND == repl_errno)
if (EREPL_SEND == repl_errno)
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection got reset while sending %s message. Status = %d ; %s\n",
msgtypestr, status, STRERROR(status));
repl_connection_reset = TRUE;
repl_close(&gtmrecv_sock_fd);
SNPRINTF(print_msg, SIZEOF(print_msg), "Closing connection on receiver side\n");
repl_log(gtmrecv_log_fp, TRUE, TRUE, print_msg);
gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "ERR_REPLWARN", print_msg);
return;
} else if (EREPL_SEND == repl_errno)
{
SNPRINTF(print_msg, SIZEOF(print_msg), "Error sending %s message. Error in send : %s",
msgtypestr, STRERROR(status));
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_STR(print_msg));
# ifdef GTM_TLS
if (ERR_TLSIOERROR == status)
{
GTMRECV_HANDLE_TLSIOERROR("send");
return;
} else
# endif
if (REPL_CONN_RESET(status))
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection got reset while sending %s message. Status = %d ; %s\n",
msgtypestr, status, STRERROR(status));
repl_connection_reset = TRUE;
repl_close(&gtmrecv_sock_fd);
SNPRINTF(print_msg, SIZEOF(print_msg), "Closing connection on receiver side\n");
repl_log(gtmrecv_log_fp, TRUE, TRUE, print_msg);
gtm_event_log(GTM_EVENT_LOG_ARGC, "MUPIP", "ERR_REPLWARN", print_msg);
return;
} else
{
SNPRINTF(print_msg, SIZEOF(print_msg), "Error sending %s message. Error in send : %s",
msgtypestr, STRERROR(status));
rts_error_csa(CSA_ARG(NULL) VARLSTCNT(6) ERR_REPLCOMM, 0, ERR_TEXT, 2, LEN_AND_STR(print_msg));
}
} else if (EREPL_SELECT == repl_errno)
{
SNPRINTF(print_msg, SIZEOF(print_msg), "Error sending %s message. Error in select : %s",
Expand Down Expand Up @@ -2881,6 +2891,13 @@ STATICFNDEF void do_main_loop(boolean_t crash_restart)
{
if (EREPL_RECV == repl_errno)
{
# ifdef GTM_TLS
if (ERR_TLSIOERROR == status)
{
GTMRECV_HANDLE_TLSIOERROR("recv");
return;
} else
# endif
if (REPL_CONN_RESET(status))
{
repl_log(gtmrecv_log_fp, TRUE, TRUE, "Connection reset. Status = %d ; %s\n",
Expand All @@ -2894,7 +2911,7 @@ STATICFNDEF void do_main_loop(boolean_t crash_restart)
}
} else if (EREPL_SELECT == repl_errno)
{
ISSUE_REPLCOMM_ERROR("Error in receiving from source. Error in select", status);
ISSUE_REPLCOMM_ERROR("Error in receiving from source. Error in select", status);
}
}
if (repl_connection_reset)
Expand Down
Loading

0 comments on commit 55a93a2

Please sign in to comment.