sysown · renecannao · May 24, 2024 · May 22, 2024 · May 22, 2024 · May 22, 2024
diff --git a/lib/c_tokenizer.cpp b/lib/c_tokenizer.cpp
@@ -1267,10 +1267,10 @@ void stage_1_parsing(shared_st* shared_st, stage_1_st* stage_1_st, options* opts
 				// Q: `SELECT\s\s  1`
 				//              ^ address used to be replaced by next char
 				// ```
-				if (shared_st->prev_char == ' ' && is_space_char(*shared_st->q)) {
+				if (is_space_char(shared_st->prev_char) && is_space_char(*shared_st->q)) {
 					// if current position in result buffer is the first space found, we move to the next
 					// position, in order to respect the first space char.
-					if (*(shared_st->res_cur_pos-1) != ' ') {
+					if (!is_space_char(*(shared_st->res_cur_pos-1))) {
 						shared_st->res_cur_pos++;
 					}
 
@@ -2393,17 +2393,17 @@ char* mysql_query_digest_and_first_comment_one_it(char* q, int q_len, char** fst
 				// suppress all the double spaces.
 				// ==============================
 				//
-				// The supression is performed using the address of the second space found as the
+				// The suppression is performed using the address of the second space found as the
 				// pivoting point for further space suppression in the result buffer:
 				//
 				// ```
 				// Q: `SELECT\s\s  1`
 				//              ^ address used to be replaced by next char
 				// ```
-				if (shared_st.prev_char == ' ' && is_space_char(*shared_st.q)) {
+				if (is_space_char(shared_st.prev_char) && is_space_char(*shared_st.q)) {
 					// if current position in result buffer is the first space found, we move to the next
 					// position, in order to respect the first space char.
-					if (*(shared_st.res_cur_pos-1) != ' ') {
+					if (!is_space_char(*(shared_st.res_cur_pos-1))) {
 						shared_st.res_cur_pos++;
 					}
 

diff --git a/test/tap/tests/test_mysql_query_digests_stages-t.cpp b/test/tap/tests/test_mysql_query_digests_stages-t.cpp
@@ -32,11 +32,11 @@
 #include <random>
 #include <vector>
 #include <string>
-#include <regex>
 
 #include "json.hpp"
 #include "proxysql.h"
 #include "proxysql_utils.h"
+#include "re2/re2.h"
 #include "command_line.h"
 #include "tap.h"
 
@@ -151,8 +151,10 @@ nlohmann::json get_tests_defs(const string& filepath) {
 	std::ifstream file_stream(filepath);
 	std::string test_file_contents((std::istreambuf_iterator<char>(file_stream)), (std::istreambuf_iterator<char>()));
 
-	std::regex comment_pattern { ".*\\/\\/.*[\\r\\n]" };
-	string test_file_no_comments { std::regex_replace(test_file_contents, comment_pattern, "") };
+	std::string comment_pattern { ".*\\/\\/.*[\\r\\n]" };
+	string test_file_no_comments { test_file_contents };
+
+	re2::RE2::GlobalReplace(&test_file_no_comments, comment_pattern, "");
 	nlohmann::json j_test_defs = nlohmann::json::parse(test_file_no_comments, nullptr, true);
 
 	return j_test_defs;

diff --git a/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson b/test/tap/tests/tokenizer_payloads/regular_tokenizer_digests.hjson
@@ -31,6 +31,84 @@
 		"s3": "select ?",
 		"s4": "select ?"
 	},
+	// Space compression; control characters, digits and literals
+	{
+		"q": [
+			"SELECT\r\n\t1",
+			"SELECT\r\n\t 1",
+			"SELECT \r\n\t1",
+			"SELECT \r\n\t 1",
+			"SELECT \r\n\t 1\r\n\t",
+			"SELECT \r\n\t 1 \r\n\t"
+		],
+		"s1": "SELECT ?",
+		"s2": "SELECT ?",
+		"s3": "SELECT ?",
+		"s4": "SELECT ?"
+	},
+	{
+		"q": [
+			"SELECT\n'foo',\n\t4\n\t",
+			"SELECT\n\t 'foo',\n\t4\n\t",
+			"SELECT\n\t 'foo' ,\n\t4\n\t",
+			"SELECT \n\t 'foo' ,\n\t4\n\t",
+			"SELECT \n\t 'foo' \n\t ,\n\t4\n\t",
+			"SELECT\n\t \n\t'foo',\n\t4\n\t",
+			"SELECT\n\t \n\t'foo',\n\t \n\t4\n\t",
+			"SELECT\n\t \n\t'foo',\n\t \n\t4\n\t"
+		],
+		"s2": "SELECT ?,?",
+		"s3": "SELECT ?,?",
+		"s4": "SELECT ?,?"
+	},
+	// Space compression; control characters, digits, literals and operators
+	{
+		"q": [
+			"SELECT\n\t-\t'1'",
+			"SELECT\n\t-\t'1'\n\t",
+			"SELECT\n\t -\t'1'\n\t",
+			"SELECT\n\t - \t'1'",
+			"SELECT\n\t -\n\t '1'",
+			"SELECT\n\t - \n\t '1' \n\t"
+		],
+		"s1": "SELECT - ?",
+		"s2": "SELECT ?",
+		"s3": "SELECT ?",
+		"s4": "SELECT ?"
+	},
+	{
+		"q": [ "SELECT (\n  \t + \t\t 2   / 2 \t\t)" ],
+		"s1": "SELECT ( + ? / ? )"
+	},
+	{
+		"q": [ "SELECT (\t\t+2/2\t\t)" ],
+		"s1": "SELECT ( +?/? )"
+	},
+	{
+		"q": [
+			"SELECT (\t\t+2/2\t\t)",
+			"SELECT (\n  \t + \t\t 2   / 2 \t\t)"
+		],
+		"s2": "SELECT (?/?)",
+		"s3": "SELECT (?/?)",
+		"s4": "SELECT (?/?)"
+	},
+	{
+		"q": [
+			"SELECT 1,\n-4",
+			"SELECT 1, \n-4",
+			"SELECT 1,\n -4",
+			"SELECT 1, \n -4",
+			"SELECT 'foo',\n\t-4",
+			"SELECT 'foo', \n\t-4",
+			"SELECT 'foo',\n\t -4",
+			"SELECT 'foo', \n\t -4"
+		],
+		"s1": "SELECT ?, -?",
+		"s2": "SELECT ?,?",
+		"s3": "SELECT ?,?",
+		"s4": "SELECT ?,?"
+	},
 	// Final spaces and ending query delimiters (';')
 	{
 		"q": [