From a9d35670a0679450fb6c6b9ad830f1108545f2ce Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Fri, 14 Apr 2017 11:59:54 -0400 Subject: [PATCH] Improve performance of extracting warning value When building headers for a REST response, we de-duplicate the warning headers based on the actual warning value. The current implementation of this uses a capturing regular expression that is prone to excessive backtracking. In cases a request involves a large number of warnings, this extraction can be a severe performance penalty. An example where this can arise is a bulk indexing request that utilizes a deprecated feature (e.g., using deprecated forms of boolean values). This commit is an attempt to address this performance regression. We already know the format of the warning header, so we do not need to use a regular expression to parse it but rather can parse it by hand to extract the warning value. This gains back the vast majority of the performance lost due to the usage of a deprecated feature. There is still a performance loss due to logging the deprecation message but we do not address that concern in this commit. --- .../common/logging/DeprecationLogger.java | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java b/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java index 8d412a0587b46..7594f96e2dfb8 100644 --- a/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java +++ b/core/src/main/java/org/elasticsearch/common/logging/DeprecationLogger.java @@ -226,10 +226,41 @@ public void deprecated(String msg, Object... params) { * @return the extracted warning value */ public static String extractWarningValueFromWarningHeader(final String s) { + /* + * We know the exact format of the warning header, so to extract the warning value we can skip forward from the front to the first + * quote, and skip backwards from the end to the penultimate quote: + * + * 299 Elasticsearch-6.0.0 "warning value" "Sat, 25, Feb 2017 10:27:43 GMT" + * ^ ^ ^ + * firstQuote penultimateQuote lastQuote + * + * We do it this way rather than seeking forward after the first quote because there could be escaped quotes in the warning value + * but since there are none in the warning date, we can skip backwards to find the quote that closes the quoted warning value. + * + * We parse this manually rather than using the capturing regular expression because the regular expression involves a lot of + * backtracking and carries a performance penalty. However, when assertions are enabled, we still use the regular expression to + * verify that we are maintaining the warning header format. + */ + final int firstQuote = s.indexOf('\"'); + final int lastQuote = s.lastIndexOf('\"'); + final int penultimateQuote = s.lastIndexOf('\"', lastQuote - 1); + final String warningValue = s.substring(firstQuote + 1, penultimateQuote - 2); + assert assertWarningValue(s, warningValue); + return warningValue; + } + + /** + * Assert that the specified string has the warning value equal to the provided warning value. + * + * @param s the string representing a full warning header + * @param warningValue the expected warning header + * @return {@code true} if the specified string has the expected warning value + */ + private static boolean assertWarningValue(final String s, final String warningValue) { final Matcher matcher = WARNING_HEADER_PATTERN.matcher(s); final boolean matches = matcher.matches(); assert matches; - return matcher.group(1); + return matcher.group(1).equals(warningValue); } /**