Merge remote-tracking branch 'origin/main' into retention-leases-not-…

…null
arteam · Oct 8, 2024 · 4d62b25 · 4d62b25
2 parents d9941f8 + b80272a
commit 4d62b25
Show file tree

Hide file tree

Showing 36 changed files with 517 additions and 702 deletions.
diff --git a/docs/changelog/114002.yaml b/docs/changelog/114002.yaml
@@ -0,0 +1,5 @@
+pr: 114002
+summary: Add a `mustache.max_output_size_bytes` setting to limit the length of results from mustache scripts
+area: Infra/Scripting
+type: enhancement
+issues: []
diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc
@@ -204,7 +204,7 @@ For general content, you have the following options for adding data to {es} indi
 If you're building a website or app, then you can call Elasticsearch APIs using an https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] in the programming language of your choice. If you use the Python client, then check out the `elasticsearch-labs` repo for various https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search/python-examples[example notebooks]. 
 * {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[File upload]: Use the {kib} file uploader to index single files for one-off testing and exploration. The GUI guides you through setting up your index and field mappings.
 * https://github.com/elastic/crawler[Web crawler]: Extract and index web page content into {es} documents.
-* {enterprise-search-ref}/connectors.html[Connectors]: Sync data from various third-party data sources to create searchable, read-only replicas in {es}.
+* <<es-connectors,Connectors>>: Sync data from various third-party data sources to create searchable, read-only replicas in {es}.
 
 [discrete]
 [[es-ingestion-overview-timestamped]]
@@ -492,4 +492,4 @@ and restrictions. You can review the following guides to learn how to tune your
 * <<use-elasticsearch-for-time-series-data,Tune for time series data>>
 
 Many {es} options come with different performance considerations and trade-offs. The best way to determine the
-optimal configuration for your use case is through https://www.elastic.co/elasticon/conf/2016/sf/quantitative-cluster-sizing[testing with your own data and queries].
+optimal configuration for your use case is through https://www.elastic.co/elasticon/conf/2016/sf/quantitative-cluster-sizing[testing with your own data and queries].
diff --git a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc
@@ -89,6 +89,16 @@ PUT semantic-embeddings
 It will be used to generate the embeddings based on the input text.
 Every time you ingest data into the related `semantic_text` field, this endpoint will be used for creating the vector representation of the text.
 
+[NOTE]
+====
+If you're using web crawlers or connectors to generate indices, you have to
+<<indices-put-mapping,update the index mappings>> for these indices to
+include the `semantic_text` field. Once the mapping is updated, you'll need to run
+a full web crawl or a full connector sync. This ensures that all existing
+documents are reprocessed and updated with the new semantic embeddings,
+enabling semantic search on the updated data.
+====
+
 
 [discrete]
 [[semantic-text-load-data]]
@@ -118,6 +128,13 @@ Create the embeddings from the text by reindexing the data from the `test-data`
 The data in the `content` field will be reindexed into the `content` semantic text field of the destination index.
 The reindexed data will be processed by the {infer} endpoint associated with the `content` semantic text field.
 
+[NOTE]
+====
+This step uses the reindex API to simulate data ingestion. If you are working with data that has already been indexed,
+rather than using the test-data set, reindexing is required to ensure that the data is processed by the {infer} endpoint
+and the necessary embeddings are generated.
+====
+
 [source,console]
 ------------------------------------------------------------
 POST _reindex?wait_for_completion=false

diff --git a/...ash-bridge/src/main/java/org/elasticsearch/logstashbridge/script/ScriptServiceBridge.java b/...ash-bridge/src/main/java/org/elasticsearch/logstashbridge/script/ScriptServiceBridge.java
@@ -53,7 +53,7 @@ private static ScriptService getScriptService(final Settings settings, final Lon
             PainlessScriptEngine.NAME,
             new PainlessScriptEngine(settings, scriptContexts),
             MustacheScriptEngine.NAME,
-            new MustacheScriptEngine()
+            new MustacheScriptEngine(settings)
         );
         return new ScriptService(settings, scriptEngines, ScriptModule.CORE_CONTEXTS, timeProvider);
     }

diff --git a/libs/simdvec/build.gradle b/libs/simdvec/build.gradle
@@ -7,6 +7,7 @@
  * License v3.0 only", or the "Server Side Public License, v 1".
  */
 
+import org.elasticsearch.gradle.internal.info.BuildParams
 import org.elasticsearch.gradle.internal.precommit.CheckForbiddenApisTask
 
 apply plugin: 'elasticsearch.publish'
@@ -32,7 +33,7 @@ tasks.matching { it.name == "compileMain21Java" }.configureEach {
 }
 
 tasks.named('test').configure {
-  if (JavaVersion.current().majorVersion.toInteger() >= 21) {
+  if (BuildParams.getRuntimeJavaVersion().majorVersion.toInteger() >= 21) {
     jvmArgs '--add-modules=jdk.incubator.vector'
   }
 }

diff --git a/modules/lang-mustache/src/main/java/org/elasticsearch/script/mustache/MustachePlugin.java b/modules/lang-mustache/src/main/java/org/elasticsearch/script/mustache/MustachePlugin.java
@@ -44,7 +44,7 @@ public class MustachePlugin extends Plugin implements ScriptPlugin, ActionPlugin
 
     @Override
     public ScriptEngine getScriptEngine(Settings settings, Collection<ScriptContext<?>> contexts) {
-        return new MustacheScriptEngine();
+        return new MustacheScriptEngine(settings);
     }
 
     @Override

diff --git a/...s/lang-mustache/src/main/java/org/elasticsearch/script/mustache/MustacheScriptEngine.java b/...s/lang-mustache/src/main/java/org/elasticsearch/script/mustache/MustacheScriptEngine.java
@@ -14,6 +14,13 @@
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.ExceptionsHelper;
+import org.elasticsearch.common.settings.Setting;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.text.SizeLimitingStringWriter;
+import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.unit.MemorySizeValue;
 import org.elasticsearch.script.GeneralScriptException;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptContext;
@@ -47,6 +54,19 @@ public final class MustacheScriptEngine implements ScriptEngine {
 
     public static final String NAME = "mustache";
 
+    public static final Setting<ByteSizeValue> MUSTACHE_RESULT_SIZE_LIMIT = new Setting<>(
+        "mustache.max_output_size_bytes",
+        s -> "1mb",
+        s -> MemorySizeValue.parseBytesSizeValueOrHeapRatio(s, "mustache.max_output_size_bytes"),
+        Setting.Property.NodeScope
+    );
+
+    private final int sizeLimit;
+
+    public MustacheScriptEngine(Settings settings) {
+        sizeLimit = (int) MUSTACHE_RESULT_SIZE_LIMIT.get(settings).getBytes();
+    }
+
     /**
      * Compile a template string to (in this case) a Mustache object than can
      * later be re-used for execution to fill in missing parameter values.
@@ -118,10 +138,15 @@ private class MustacheExecutableScript extends TemplateScript {
 
         @Override
         public String execute() {
-            final StringWriter writer = new StringWriter();
+            StringWriter writer = new SizeLimitingStringWriter(sizeLimit);
             try {
                 template.execute(writer, params);
             } catch (Exception e) {
+                // size limit exception can appear at several places in the causal list depending on script & context
+                if (ExceptionsHelper.unwrap(e, SizeLimitingStringWriter.SizeLimitExceededException.class) != null) {
+                    // don't log, client problem
+                    throw new ElasticsearchParseException("Mustache script result size limit exceeded", e);
+                }
                 if (shouldLogException(e)) {
                     logger.error(() -> format("Error running %s", template), e);
                 }

diff --git a/...-mustache/src/test/java/org/elasticsearch/script/mustache/CustomMustacheFactoryTests.java b/...-mustache/src/test/java/org/elasticsearch/script/mustache/CustomMustacheFactoryTests.java
@@ -9,6 +9,7 @@
 
 package org.elasticsearch.script.mustache;
 
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptEngine;
 import org.elasticsearch.script.TemplateScript;
@@ -65,7 +66,7 @@ public void testCreateEncoder() {
     }
 
     public void testJsonEscapeEncoder() {
-        final ScriptEngine engine = new MustacheScriptEngine();
+        final ScriptEngine engine = new MustacheScriptEngine(Settings.EMPTY);
         final Map<String, String> params = randomBoolean() ? Map.of(Script.CONTENT_TYPE_OPTION, JSON_MEDIA_TYPE) : Map.of();
 
         TemplateScript.Factory compiled = engine.compile(null, "{\"field\": \"{{value}}\"}", TemplateScript.CONTEXT, params);
@@ -75,7 +76,7 @@ public void testJsonEscapeEncoder() {
     }
 
     public void testDefaultEncoder() {
-        final ScriptEngine engine = new MustacheScriptEngine();
+        final ScriptEngine engine = new MustacheScriptEngine(Settings.EMPTY);
         final Map<String, String> params = Map.of(Script.CONTENT_TYPE_OPTION, PLAIN_TEXT_MEDIA_TYPE);
 
         TemplateScript.Factory compiled = engine.compile(null, "{\"field\": \"{{value}}\"}", TemplateScript.CONTEXT, params);
@@ -85,7 +86,7 @@ public void testDefaultEncoder() {
     }
 
     public void testUrlEncoder() {
-        final ScriptEngine engine = new MustacheScriptEngine();
+        final ScriptEngine engine = new MustacheScriptEngine(Settings.EMPTY);
         final Map<String, String> params = Map.of(Script.CONTENT_TYPE_OPTION, X_WWW_FORM_URLENCODED_MEDIA_TYPE);
 
         TemplateScript.Factory compiled = engine.compile(null, "{\"field\": \"{{value}}\"}", TemplateScript.CONTEXT, params);

diff --git a/...g-mustache/src/test/java/org/elasticsearch/script/mustache/MustacheScriptEngineTests.java b/...g-mustache/src/test/java/org/elasticsearch/script/mustache/MustacheScriptEngineTests.java
@@ -8,8 +8,13 @@
  */
 package org.elasticsearch.script.mustache;
 
+import com.github.mustachejava.MustacheException;
 import com.github.mustachejava.MustacheFactory;
 
+import org.elasticsearch.ElasticsearchParseException;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.text.SizeLimitingStringWriter;
 import org.elasticsearch.script.GeneralScriptException;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.TemplateScript;
@@ -24,6 +29,9 @@
 import java.util.List;
 import java.util.Map;
 
+import static org.elasticsearch.test.LambdaMatchers.transformedMatch;
+import static org.hamcrest.Matchers.allOf;
+import static org.hamcrest.Matchers.endsWith;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.instanceOf;
 import static org.hamcrest.Matchers.startsWith;
@@ -37,7 +45,7 @@ public class MustacheScriptEngineTests extends ESTestCase {
 
     @Before
     public void setup() {
-        qe = new MustacheScriptEngine();
+        qe = new MustacheScriptEngine(Settings.builder().put(MustacheScriptEngine.MUSTACHE_RESULT_SIZE_LIMIT.getKey(), "1kb").build());
         factory = CustomMustacheFactory.builder().build();
     }
 
@@ -402,6 +410,24 @@ public void testEscapeJson() throws IOException {
         }
     }
 
+    public void testResultSizeLimit() throws IOException {
+        String vals = "\"" + "{{val}}".repeat(200) + "\"";
+        String params = "\"val\":\"aaaaaaaaaa\"";
+        XContentParser parser = createParser(JsonXContent.jsonXContent, Strings.format("{\"source\":%s,\"params\":{%s}}", vals, params));
+        Script script = Script.parse(parser);
+        var compiled = qe.compile(null, script.getIdOrCode(), TemplateScript.CONTEXT, Map.of());
+        TemplateScript templateScript = compiled.newInstance(script.getParams());
+        var ex = expectThrows(ElasticsearchParseException.class, templateScript::execute);
+        assertThat(ex.getCause(), instanceOf(MustacheException.class));
+        assertThat(
+            ex.getCause().getCause(),
+            allOf(
+                instanceOf(SizeLimitingStringWriter.SizeLimitExceededException.class),
+                transformedMatch(Throwable::getMessage, endsWith("has exceeded the size limit [1024]"))
+            )
+        );
+    }
+
     private String getChars() {
         String string = randomRealisticUnicodeOfCodepointLengthBetween(0, 10);
         for (int i = 0; i < string.length(); i++) {

diff --git a/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/MustacheTests.java b/modules/lang-mustache/src/test/java/org/elasticsearch/script/mustache/MustacheTests.java
@@ -9,6 +9,7 @@
 package org.elasticsearch.script.mustache;
 
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentHelper;
 import org.elasticsearch.core.Strings;
 import org.elasticsearch.script.ScriptEngine;
@@ -39,7 +40,7 @@
 
 public class MustacheTests extends ESTestCase {
 
-    private ScriptEngine engine = new MustacheScriptEngine();
+    private ScriptEngine engine = new MustacheScriptEngine(Settings.EMPTY);
 
     public void testBasics() {
         String template = """

diff --git a/modules/rest-root/src/main/java/org/elasticsearch/rest/root/TransportMainAction.java b/modules/rest-root/src/main/java/org/elasticsearch/rest/root/TransportMainAction.java
@@ -14,6 +14,7 @@
 import org.elasticsearch.action.support.ActionFilters;
 import org.elasticsearch.action.support.TransportAction;
 import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.metadata.Metadata;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
@@ -48,7 +49,7 @@ protected void doExecute(Task task, MainRequest request, ActionListener<MainResp
                 nodeName,
                 IndexVersion.current().luceneVersion().toString(),
                 clusterState.getClusterName(),
-                clusterState.metadata().clusterUUID(),
+                clusterState.metadata().clusterUUIDCommitted() ? clusterState.metadata().clusterUUID() : Metadata.UNKNOWN_CLUSTER_UUID,
                 Build.current()
             )
         );

diff --git a/modules/rest-root/src/test/java/org/elasticsearch/rest/root/MainActionTests.java b/modules/rest-root/src/test/java/org/elasticsearch/rest/root/MainActionTests.java
@@ -9,13 +9,14 @@
 
 package org.elasticsearch.rest.root;
 
-import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.action.support.ActionFilters;
+import org.elasticsearch.action.support.ActionTestUtils;
 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.block.ClusterBlock;
 import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.block.ClusterBlocks;
+import org.elasticsearch.cluster.metadata.Metadata;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.rest.RestStatus;
@@ -26,7 +27,7 @@
 import org.elasticsearch.transport.TransportService;
 
 import java.util.Collections;
-import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
@@ -39,7 +40,7 @@ public void testMainActionClusterAvailable() {
         final ClusterService clusterService = mock(ClusterService.class);
         final ClusterName clusterName = new ClusterName("elasticsearch");
         final Settings settings = Settings.builder().put("node.name", "my-node").build();
-        ClusterBlocks blocks;
+        final ClusterBlocks blocks;
         if (randomBoolean()) {
             if (randomBoolean()) {
                 blocks = ClusterBlocks.EMPTY_CLUSTER_BLOCK;
@@ -73,7 +74,12 @@ public void testMainActionClusterAvailable() {
                 )
                 .build();
         }
-        ClusterState state = ClusterState.builder(clusterName).blocks(blocks).build();
+        final Metadata.Builder metadata = new Metadata.Builder();
+        if (randomBoolean()) {
+            metadata.clusterUUID(randomUUID());
+            metadata.clusterUUIDCommitted(randomBoolean());
+        }
+        final ClusterState state = ClusterState.builder(clusterName).metadata(metadata).blocks(blocks).build();
         when(clusterService.state()).thenReturn(state);
 
         TransportService transportService = new TransportService(
@@ -85,21 +91,21 @@ public void testMainActionClusterAvailable() {
             null,
             Collections.emptySet()
         );
-        TransportMainAction action = new TransportMainAction(settings, transportService, mock(ActionFilters.class), clusterService);
-        AtomicReference<MainResponse> responseRef = new AtomicReference<>();
-        action.doExecute(mock(Task.class), new MainRequest(), new ActionListener<>() {
-            @Override
-            public void onResponse(MainResponse mainResponse) {
-                responseRef.set(mainResponse);
-            }
-
-            @Override
-            public void onFailure(Exception e) {
-                logger.error("unexpected error", e);
-            }
-        });
+        final AtomicBoolean listenerCalled = new AtomicBoolean();
+        new TransportMainAction(settings, transportService, mock(ActionFilters.class), clusterService).doExecute(
+            mock(Task.class),
+            new MainRequest(),
+            ActionTestUtils.assertNoFailureListener(mainResponse -> {
+                assertNotNull(mainResponse);
+                assertEquals(
+                    state.metadata().clusterUUIDCommitted() ? state.metadata().clusterUUID() : Metadata.UNKNOWN_CLUSTER_UUID,
+                    mainResponse.getClusterUuid()
+                );
+                assertFalse(listenerCalled.getAndSet(true));
+            })
+        );
 
-        assertNotNull(responseRef.get());
+        assertTrue(listenerCalled.get());
         verify(clusterService, times(1)).state();
     }
 }
diff --git a/muted-tests.yml b/muted-tests.yml
@@ -370,6 +370,12 @@ tests:
   issue: https://github.com/elastic/elasticsearch/issues/114188
 - class: org.elasticsearch.ingest.geoip.IpinfoIpDataLookupsTests
   issue: https://github.com/elastic/elasticsearch/issues/114266
+- class: org.elasticsearch.index.SearchSlowLogTests
+  method: testLevelPrecedence
+  issue: https://github.com/elastic/elasticsearch/issues/114300
+- class: org.elasticsearch.index.SearchSlowLogTests
+  method: testTwoLoggersDifferentLevel
+  issue: https://github.com/elastic/elasticsearch/issues/114301
 
 # Examples:
 #

diff --git a/...l-dependencies/src/yamlRestTest/java/org/elasticsearch/ingest/AbstractScriptTestCase.java b/...l-dependencies/src/yamlRestTest/java/org/elasticsearch/ingest/AbstractScriptTestCase.java
@@ -31,7 +31,7 @@ public abstract class AbstractScriptTestCase extends ESTestCase {
 
     @Before
     public void init() throws Exception {
-        MustacheScriptEngine engine = new MustacheScriptEngine();
+        MustacheScriptEngine engine = new MustacheScriptEngine(Settings.EMPTY);
         Map<String, ScriptEngine> engines = Collections.singletonMap(engine.getType(), engine);
         scriptService = new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS, () -> 1L);
     }

diff --git a/renovate.json b/renovate.json
@@ -0,0 +1,21 @@
+{
+  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
+  "extends": [
+    "github>elastic/renovate-config:only-chainguard"
+  ],
+  "customManagers": [
+    {
+      "description": "Extract Wolfi images from elasticsearch DockerBase configuration",
+      "customType": "regex",
+      "fileMatch": [
+        "build\\-tools\\-internal\\/src\\/main\\/java\\/org\\/elasticsearch\\/gradle\\/internal\\/DockerBase\\.java$"
+      ],
+      "matchStrings": [
+        "\\s*\"?(?<depName>[^\\s:@\"]+)(?::(?<currentValue>[-a-zA-Z0-9.]+))?(?:@(?<currentDigest>sha256:[a-zA-Z0-9]+))?\"?"
+      ],
+      "currentValueTemplate": "{{#if currentValue}}{{{currentValue}}}{{else}}latest{{/if}}",
+      "autoReplaceStringTemplate": "\"{{{depName}}}{{#if newValue}}:{{{newValue}}}{{/if}}{{#if newDigest}}@{{{newDigest}}}{{/if}}\"",
+      "datasourceTemplate": "docker"
+    }
+  ]
+}