Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support pausing/resuming persistent queries #9203

Merged
merged 19 commits into from
Jul 15, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@
import io.confluent.ksql.cli.console.table.builder.FunctionNameListTableBuilder;
import io.confluent.ksql.cli.console.table.builder.KafkaTopicsListTableBuilder;
import io.confluent.ksql.cli.console.table.builder.ListVariablesTableBuilder;
import io.confluent.ksql.cli.console.table.builder.PauseQueryTableBuilder;
import io.confluent.ksql.cli.console.table.builder.PropertiesListTableBuilder;
import io.confluent.ksql.cli.console.table.builder.QueriesTableBuilder;
import io.confluent.ksql.cli.console.table.builder.ResumeQueryTableBuilder;
import io.confluent.ksql.cli.console.table.builder.StreamsListTableBuilder;
import io.confluent.ksql.cli.console.table.builder.TableBuilder;
import io.confluent.ksql.cli.console.table.builder.TablesListTableBuilder;
Expand Down Expand Up @@ -71,6 +73,7 @@
import io.confluent.ksql.rest.entity.KsqlErrorMessage;
import io.confluent.ksql.rest.entity.KsqlStatementErrorMessage;
import io.confluent.ksql.rest.entity.KsqlWarning;
import io.confluent.ksql.rest.entity.PauseQueryEntity;
import io.confluent.ksql.rest.entity.PropertiesList;
import io.confluent.ksql.rest.entity.Queries;
import io.confluent.ksql.rest.entity.QueryDescription;
Expand All @@ -79,6 +82,7 @@
import io.confluent.ksql.rest.entity.QueryHostStat;
import io.confluent.ksql.rest.entity.QueryOffsetSummary;
import io.confluent.ksql.rest.entity.QueryTopicOffsetSummary;
import io.confluent.ksql.rest.entity.ResumeQueryEntity;
import io.confluent.ksql.rest.entity.RunningQuery;
import io.confluent.ksql.rest.entity.SourceDescription;
import io.confluent.ksql.rest.entity.SourceDescriptionEntity;
Expand Down Expand Up @@ -191,6 +195,10 @@ public class Console implements Closeable {
tablePrinter(WarningEntity.class, WarningEntityTableBuilder::new))
.put(VariablesList.class,
tablePrinter(VariablesList.class, ListVariablesTableBuilder::new))
.put(PauseQueryEntity.class,
tablePrinter(PauseQueryEntity.class, PauseQueryTableBuilder::new))
.put(ResumeQueryEntity.class,
tablePrinter(ResumeQueryEntity.class, ResumeQueryTableBuilder::new))
.put(TerminateQueryEntity.class,
tablePrinter(TerminateQueryEntity.class, TerminateQueryTableBuilder::new))
.put(AssertTopicEntity.class, Console::printAssertTopic)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright 2022 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"; you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.cli.console.table.builder;

import io.confluent.ksql.cli.console.table.Table;
import io.confluent.ksql.rest.entity.PauseQueryEntity;

/**
* This builder is only used to pause persistent queries.
*/
public class PauseQueryTableBuilder implements TableBuilder<PauseQueryEntity> {

@Override
public Table buildTable(final PauseQueryEntity entity) {
return new Table.Builder()
.withColumnHeaders("Message")
.withRow("Paused query \"" + entity.getQueryId() + '"')
.build();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright 2022 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"; you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.cli.console.table.builder;

import io.confluent.ksql.cli.console.table.Table;
import io.confluent.ksql.rest.entity.ResumeQueryEntity;

/**
* This builder is only used to resume persistent queries.
*/
public class ResumeQueryTableBuilder implements TableBuilder<ResumeQueryEntity> {

@Override
public Table buildTable(final ResumeQueryEntity entity) {
return new Table.Builder()
.withColumnHeaders("Message")
.withRow("Resumed query \"" + entity.getQueryId() + '"')
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public enum KsqlQueryStatus {
RUNNING,
ERROR,
UNRESPONSIVE,
PAUSED,
}

public static KsqlQueryStatus fromStreamsState(final KafkaStreams.State state) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -183,6 +184,10 @@ public List<PersistentQueryMetadata> getPersistentQueries() {
return ksqlEngine.getPersistentQueries();
}

public Optional<PersistentQueryMetadata> getPersistentQuery(final QueryId queryId) {
return ksqlEngine.getPersistentQuery(queryId);
}

public void close() {
ksqlEngine.close();
serviceContext.close();
Expand All @@ -196,6 +201,16 @@ public void terminateQuery(final QueryId queryId) {
});
}

@VisibleForTesting
public void pauseQuery(final QueryId queryId) {
ksqlEngine.getPersistentQuery(queryId).ifPresent(QueryMetadata::pause);
}

@VisibleForTesting
public void resumeQuery(final QueryId queryId) {
ksqlEngine.getPersistentQuery(queryId).ifPresent(QueryMetadata::resume);
}

private static ExecuteResult execute(
final KsqlExecutionContext executionContext,
final ParsedStatement stmt,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import io.confluent.ksql.schema.ksql.PhysicalSchema;
import io.confluent.ksql.schema.query.QuerySchemas;
import io.confluent.ksql.serde.KeyFormat;
import io.confluent.ksql.util.KsqlConstants.KsqlQueryStatus;
import io.confluent.ksql.util.QueryMetadataImpl.TimeBoundedQueue;
import java.util.Collection;
import java.util.List;
Expand Down Expand Up @@ -86,6 +87,7 @@ public class BinPackedPersistentQueryMetadataImpl implements PersistentQueryMeta
private final Optional<ScalablePushRegistry> scalablePushRegistry;
private final ProcessingLoggerFactory loggerFactory;
public boolean everStarted = false;
private boolean isPaused = false;
private boolean corruptionCommandTopic = false;


Expand Down Expand Up @@ -416,6 +418,27 @@ public void start() {
everStarted = true;
}

@Override
public KsqlQueryStatus getQueryStatus() {
if (isPaused) {
return KsqlQueryStatus.PAUSED;
} else {
return KsqlConstants.fromStreamsState(getState());
}
}

@Override
public void pause() {
sharedKafkaStreamsRuntime.getKafkaStreams().pauseNamedTopology(topology.name());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to understand more about how pausing queries works with named topologies. I saw the note about it in the KIP but am still unclear about the details. I also have some questions about how pausing a KS app affects query status and the metrics emitted from the app. Let me try to catch you offline and then summarize the interesting points here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool. Did our conversations at the KSE on-site answer these questions well enough?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! I meant to post an update here shortly afterwards but now it'll be a good memory exercise 😆 :

  • For named topologies, pause/resume works as usual as long as you pass in the specific topology name. You can pause a topology in this way before the topology has been started, so no concerns there.
  • Query status is complicated/we're not entirely clear. We think queries may continue to transition from REBALANCING to RUNNING (and potentially among other states as well) as normal even after a query is paused. It's best not to assume any guarantees on streams state for paused queries. We'll adjust our monitoring accordingly.
  • Metrics continue to be emitted since that part of the stream thread execution loop is not skipped, but metric values will of course be affected. Again, we'll adjust our monitoring accordingly.

isPaused = true;
}

@Override
public void resume() {
sharedKafkaStreamsRuntime.getKafkaStreams().resumeNamedTopology(topology.name());
isPaused = false;
}

@Override
public void register() {
sharedKafkaStreamsRuntime.register(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import io.confluent.ksql.query.QueryId;
import io.confluent.ksql.rest.entity.StreamsTaskMetadata;
import io.confluent.ksql.schema.ksql.LogicalSchema;
import io.confluent.ksql.util.KsqlConstants.KsqlQueryStatus;
import java.util.Collection;
import java.util.List;
import java.util.Map;
Expand All @@ -43,6 +44,8 @@ public interface QueryMetadata {

KafkaStreams.State getState();

KsqlQueryStatus getQueryStatus();

String getExecutionPlan();

String getQueryApplicationId();
Expand Down Expand Up @@ -73,6 +76,10 @@ public interface QueryMetadata {

KafkaStreams getKafkaStreams();

void pause();

void resume();

void close();

void start();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import io.confluent.ksql.query.QueryId;
import io.confluent.ksql.rest.entity.StreamsTaskMetadata;
import io.confluent.ksql.schema.ksql.LogicalSchema;
import io.confluent.ksql.util.KsqlConstants.KsqlQueryStatus;
import io.confluent.ksql.util.KsqlConstants.KsqlQueryType;
import java.time.Duration;
import java.util.Collection;
Expand All @@ -44,6 +45,7 @@
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KafkaStreams.State;
Expand All @@ -57,7 +59,7 @@

public class QueryMetadataImpl implements QueryMetadata {
private static final Logger LOG = LoggerFactory.getLogger(QueryMetadataImpl.class);

private final AtomicBoolean isPaused = new AtomicBoolean(false);
private final String statementString;
private final String executionPlan;
private final String queryApplicationId;
Expand Down Expand Up @@ -421,6 +423,27 @@ public void start() {
kafkaStreams.start();
}

@Override
public KsqlQueryStatus getQueryStatus() {
if (isPaused.get()) {
return KsqlQueryStatus.PAUSED;
} else {
return KsqlConstants.fromStreamsState(getState());
}
}

@Override
public void pause() {
kafkaStreams.pause();
isPaused.set(true);
}

@Override
public void resume() {
kafkaStreams.resume();
isPaused.set(false);
}

public static class RetryEvent implements QueryMetadata.RetryEvent {
private final Ticker ticker;
private final QueryId queryId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ private SandboxedBinPackedPersistentQueryMetadataImpl(
super(queryMetadata, listener);
}

@Override
public void pause() {
// no-op
}

@Override
public void resume() {
// no-op
}

@Override
public void stop() {
//no-op
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ private SandboxedPersistentQueryMetadataImpl(
super(queryMetadata, listener);
}

@Override
public void pause() {
// no-op
}

@Override
public void resume() {
// no-op
}

@Override
public void close() {
getListener().onClose(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ public void setCompletionHandler(final CompletionHandler completionHandler) {
rowQueue.setCompletionHandler(completionHandler);
}


@Override
public void close() {
// Push queries can be closed by both terminate commands and the client ending the request, so
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2266,6 +2266,21 @@ public void shouldBeAbleToPrepareTerminateAndDrop() {
// Then: did not throw.
}

@Test
public void shouldBeAbleToPreparePauseAndResume() {
// Given:
givenSqlAlreadyExecuted("CREATE STREAM FOO AS SELECT * FROM TEST1;");

final List<ParsedStatement> parsed = ksqlEngine.parse(
"PAUSE CSAS_FOO_0;"
+ "RESUME CSAS_FOO_0;");

// When:
parsed.forEach(ksqlEngine::prepare);

// Then: did not throw.
}

@Test
public void shouldIgnoreLegacyDeleteTopicPartOfDropCommand() {
// Given:
Expand Down
Loading