From df5b51da252ab92275c1b6fdceee53715f2548e0 Mon Sep 17 00:00:00 2001 From: Rohan Date: Wed, 26 Feb 2020 16:41:18 -0800 Subject: [PATCH 1/2] fix: idempotent terminate that can handle hung streams (#4643) Fixes a couple issues with terminate: - don't throw if the query doesn't get into NOT_RUNNING state. This can happen when streams threads are stuck pending shutdown. - make terminate idempotent --- .../src/main/java/io/confluent/ksql/engine/KsqlEngine.java | 6 ++++-- .../ksql/rest/server/computation/StatementExecutor.java | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java b/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java index d88725b6ba3d..b8a04cc9bc02 100644 --- a/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java +++ b/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java @@ -201,8 +201,10 @@ private void unregisterQuery(final ServiceContext serviceContext, final QueryMet final String applicationId = query.getQueryApplicationId(); if (!query.getState().equalsIgnoreCase("NOT_RUNNING")) { - throw new IllegalStateException("query not stopped." - + " id " + applicationId + ", state: " + query.getState()); + log.warn( + "Unregistering query that has not terminated. " + + "This may happen when streams threads are hung. State: " + query.getState() + ); } if (!allLiveQueries.remove(query)) { diff --git a/ksql-rest-app/src/main/java/io/confluent/ksql/rest/server/computation/StatementExecutor.java b/ksql-rest-app/src/main/java/io/confluent/ksql/rest/server/computation/StatementExecutor.java index 5c7e116f8764..b375548b167f 100644 --- a/ksql-rest-app/src/main/java/io/confluent/ksql/rest/server/computation/StatementExecutor.java +++ b/ksql-rest-app/src/main/java/io/confluent/ksql/rest/server/computation/StatementExecutor.java @@ -366,10 +366,8 @@ private KsqlConfig buildMergedConfig(final Command command) { private void terminateQuery(final PreparedStatement terminateQuery) { final QueryId queryId = terminateQuery.getStatement().getQueryId(); - ksqlEngine.getPersistentQuery(queryId) - .orElseThrow(() -> - new KsqlException(String.format("No running query with id %s was found", queryId))) - .close(); + final Optional query = ksqlEngine.getPersistentQuery(queryId); + query.ifPresent(PersistentQueryMetadata::close); } private void maybeTerminateQueryForLegacyDropCommand( From b04906dcb2ffef6edc4847d0960566866c82973f Mon Sep 17 00:00:00 2001 From: Almog Gavra Date: Thu, 27 Feb 2020 12:06:50 -0800 Subject: [PATCH 2/2] fix: don't cleanup topics on engine close (#4658) Co-authored-by: Rohan Co-authored-by: Andy Coates --- .../io/confluent/ksql/engine/KsqlEngine.java | 2 +- .../ksql/util/PersistentQueryMetadata.java | 5 ++ .../io/confluent/ksql/util/QueryMetadata.java | 33 +++++++++-- .../ksql/util/TransientQueryMetadata.java | 9 ++- .../confluent/ksql/engine/KsqlEngineTest.java | 58 +++++++++++++++++++ .../ksql/util/QueryMetadataTest.java | 52 ++++++++++++++++- .../ksql/util/TransientQueryMetadataTest.java | 14 +++++ 7 files changed, 163 insertions(+), 10 deletions(-) diff --git a/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java b/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java index b8a04cc9bc02..8a7ad48ea82d 100644 --- a/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java +++ b/ksql-engine/src/main/java/io/confluent/ksql/engine/KsqlEngine.java @@ -175,7 +175,7 @@ public ExecuteResult execute( @Override public void close() { - allLiveQueries.forEach(QueryMetadata::close); + allLiveQueries.forEach(QueryMetadata::stop); engineMetrics.close(); aggregateMetricsCollector.shutdown(); } diff --git a/ksql-engine/src/main/java/io/confluent/ksql/util/PersistentQueryMetadata.java b/ksql-engine/src/main/java/io/confluent/ksql/util/PersistentQueryMetadata.java index 3947e378bdf6..348fccc49e8e 100644 --- a/ksql-engine/src/main/java/io/confluent/ksql/util/PersistentQueryMetadata.java +++ b/ksql-engine/src/main/java/io/confluent/ksql/util/PersistentQueryMetadata.java @@ -136,4 +136,9 @@ public Optional getMaterialization( ) { return materializationProvider.map(builder -> builder.build(queryId, contextStacker)); } + + @Override + public void stop() { + doClose(false); + } } diff --git a/ksql-engine/src/main/java/io/confluent/ksql/util/QueryMetadata.java b/ksql-engine/src/main/java/io/confluent/ksql/util/QueryMetadata.java index d51706afa860..038d3964559e 100644 --- a/ksql-engine/src/main/java/io/confluent/ksql/util/QueryMetadata.java +++ b/ksql-engine/src/main/java/io/confluent/ksql/util/QueryMetadata.java @@ -30,7 +30,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class QueryMetadata { +public abstract class QueryMetadata { private static final Logger LOG = LoggerFactory.getLogger(QueryMetadata.class); @@ -140,14 +140,39 @@ public boolean hasEverBeenStarted() { return everStarted; } + + /** + * Stops the query without cleaning up the external resources + * so that it can be resumed when we call {@link #start()}. + * + *

NOTE: {@link TransientQueryMetadata} overrides this method + * since any time a transient query is stopped the external resources + * should be cleaned up.

+ * + * @see #close() + */ + public abstract void stop(); + + /** + * Closes the {@code QueryMetadata} and cleans up any of + * the resources associated with it (e.g. internal topics, + * schemas, etc...). + * + * @see QueryMetadata#stop() + */ public void close() { + doClose(true); + closeCallback.accept(this); + } + + protected void doClose(final boolean cleanUp) { kafkaStreams.close(); - kafkaStreams.cleanUp(); + if (cleanUp) { + kafkaStreams.cleanUp(); + } queryStateListener.ifPresent(QueryStateListener::close); - - closeCallback.accept(this); } public void start() { diff --git a/ksql-engine/src/main/java/io/confluent/ksql/util/TransientQueryMetadata.java b/ksql-engine/src/main/java/io/confluent/ksql/util/TransientQueryMetadata.java index 68e9c4ce8bd4..54996df8d076 100644 --- a/ksql-engine/src/main/java/io/confluent/ksql/util/TransientQueryMetadata.java +++ b/ksql-engine/src/main/java/io/confluent/ksql/util/TransientQueryMetadata.java @@ -93,13 +93,18 @@ public void setLimitHandler(final LimitHandler limitHandler) { } @Override - public void close() { + public void stop() { + close(); + } + + @Override + protected void doClose(final boolean cleanUp) { // To avoid deadlock, close the queue first to ensure producer side isn't blocked trying to // write to the blocking queue, otherwise super.close call can deadlock: rowQueue.close(); // Now safe to close: - super.close(); + super.doClose(cleanUp); isRunning.set(false); } } diff --git a/ksql-engine/src/test/java/io/confluent/ksql/engine/KsqlEngineTest.java b/ksql-engine/src/test/java/io/confluent/ksql/engine/KsqlEngineTest.java index ee50a83b303f..4f9b1b125497 100644 --- a/ksql-engine/src/test/java/io/confluent/ksql/engine/KsqlEngineTest.java +++ b/ksql-engine/src/test/java/io/confluent/ksql/engine/KsqlEngineTest.java @@ -33,6 +33,7 @@ import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; @@ -675,6 +676,63 @@ public void shouldCleanUpInternalTopicsOnClose() { verify(topicClient).deleteInternalTopics(query.getQueryApplicationId()); } + @Test + public void shouldCleanUpInternalTopicsOnEngineCloseForTransientQueries() { + // Given: + final List query = KsqlEngineTestUtil.execute( + serviceContext, + ksqlEngine, + "select * from test1 EMIT CHANGES;", + KSQL_CONFIG, Collections.emptyMap() + ); + + query.get(0).start(); + + // When: + ksqlEngine.close(); + + // Then: + verify(topicClient).deleteInternalTopics(query.get(0).getQueryApplicationId()); + } + + @Test + public void shouldNotCleanUpInternalTopicsOnEngineCloseForPersistentQueries() { + // Given: + final List query = KsqlEngineTestUtil.execute( + serviceContext, + ksqlEngine, + "create stream persistent as select * from test1 EMIT CHANGES;", + KSQL_CONFIG, Collections.emptyMap() + ); + + query.get(0).start(); + + // When: + ksqlEngine.close(); + + // Then (there are no transient queries, so no internal topics should be deleted): + verify(topicClient, never()).deleteInternalTopics(any()); + } + + @Test + public void shouldCleanUpInternalTopicsOnQueryCloseForPersistentQueries() { + // Given: + final List query = KsqlEngineTestUtil.execute( + serviceContext, + ksqlEngine, + "create stream persistent as select * from test1 EMIT CHANGES;", + KSQL_CONFIG, Collections.emptyMap() + ); + + query.get(0).start(); + + // When: + query.get(0).close(); + + // Then (there are no transient queries, so no internal topics should be deleted): + verify(topicClient).deleteInternalTopics(query.get(0).getQueryApplicationId()); + } + @Test public void shouldNotCleanUpInternalTopicsOnCloseIfQueryNeverStarted() { // Given: diff --git a/ksql-engine/src/test/java/io/confluent/ksql/util/QueryMetadataTest.java b/ksql-engine/src/test/java/io/confluent/ksql/util/QueryMetadataTest.java index 6d6576838a5a..e850cc781e66 100644 --- a/ksql-engine/src/test/java/io/confluent/ksql/util/QueryMetadataTest.java +++ b/ksql-engine/src/test/java/io/confluent/ksql/util/QueryMetadataTest.java @@ -15,11 +15,12 @@ package io.confluent.ksql.util; -import static io.confluent.ksql.metastore.model.DataSource.DataSourceType; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableSet; @@ -59,9 +60,11 @@ public class QueryMetadataTest { @Mock private Consumer closeCallback; private QueryMetadata query; + private boolean cleanUp; @Before public void setup() { + cleanUp = false; query = new QueryMetadata( "foo", kafkaStreams, @@ -72,8 +75,12 @@ public void setup() { topoplogy, Collections.emptyMap(), Collections.emptyMap(), - closeCallback - ); + closeCallback) { + @Override + public void stop() { + doClose(cleanUp); + } + }; } @Test @@ -135,6 +142,24 @@ public void shouldCloseKStreamsAppOnCloseThenCloseCallback() { inOrder.verify(closeCallback).accept(query); } + @Test + public void shouldNotCallCloseCallbackOnStop() { + // When: + query.stop(); + + // Then: + verifyNoMoreInteractions(closeCallback); + } + + @Test + public void shouldCallKafkaStreamsCloseOnStop() { + // When: + query.stop(); + + // Then: + verify(kafkaStreams).close(); + } + @Test public void shouldCleanUpKStreamsAppAfterCloseOnClose() { // When: @@ -146,6 +171,27 @@ public void shouldCleanUpKStreamsAppAfterCloseOnClose() { inOrder.verify(kafkaStreams).cleanUp(); } + @Test + public void shouldNotCleanUpKStreamsAppOnStop() { + // When: + query.stop(); + + // Then: + verify(kafkaStreams, never()).cleanUp(); + } + + @Test + public void shouldCallCleanupOnStopIfCleanup() { + // Given: + cleanUp = true; + + // When: + query.stop(); + + // Then: + verify(kafkaStreams).cleanUp(); + } + @Test public void shouldReturnSources() { assertThat(query.getSourceNames(), is(SOME_SOURCES)); diff --git a/ksql-engine/src/test/java/io/confluent/ksql/util/TransientQueryMetadataTest.java b/ksql-engine/src/test/java/io/confluent/ksql/util/TransientQueryMetadataTest.java index aab26c6fb146..588ddf6dbd16 100644 --- a/ksql-engine/src/test/java/io/confluent/ksql/util/TransientQueryMetadataTest.java +++ b/ksql-engine/src/test/java/io/confluent/ksql/util/TransientQueryMetadataTest.java @@ -16,6 +16,7 @@ package io.confluent.ksql.util; import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.verify; import io.confluent.ksql.name.SourceName; import io.confluent.ksql.query.BlockingRowQueue; @@ -84,4 +85,17 @@ public void shouldCloseQueueBeforeTopologyToAvoidDeadLock() { inOrder.verify(rowQueue).close(); inOrder.verify(kafkaStreams).close(); } + + @Test + public void shouldCallCloseOnStop() { + // When: + query.stop(); + + // Then: + final InOrder inOrder = inOrder(rowQueue, kafkaStreams, closeCallback); + inOrder.verify(rowQueue).close(); + inOrder.verify(kafkaStreams).close(); + inOrder.verify(kafkaStreams).cleanUp(); + inOrder.verify(closeCallback).accept(query); + } } \ No newline at end of file