-
Notifications
You must be signed in to change notification settings - Fork 13.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
KAFKA-13873 Add ability to Pause / Resume KafkaStreams Topologies #12161
Changes from 32 commits
207c4ef
b73708f
070b867
d019d7a
66fc07e
e0a80bb
85e506d
a620621
0a37879
34ec8ac
fb016a8
6d57d04
d007d15
d7be172
b2385ef
759d3a8
cfb18f5
a3bd8ae
cb6d6f1
722132f
c8f8c74
337c995
0cedb80
d30e3b3
148da23
e467852
1bcc5c9
08e332d
c495fb6
771c296
e076a07
f56dd99
6771854
12342d5
61a302b
06c98ea
1cdbc15
ce707de
769a53a
ea77133
f7799a5
e1c698b
c5d7abc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,7 +56,6 @@ | |
import org.apache.kafka.streams.processor.internals.ClientUtils; | ||
import org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier; | ||
import org.apache.kafka.streams.processor.internals.GlobalStreamThread; | ||
import org.apache.kafka.streams.processor.internals.GlobalStreamThread.State; | ||
import org.apache.kafka.streams.processor.internals.StateDirectory; | ||
import org.apache.kafka.streams.processor.internals.StreamThread; | ||
import org.apache.kafka.streams.processor.internals.StreamsMetadataState; | ||
|
@@ -65,6 +64,7 @@ | |
import org.apache.kafka.streams.processor.internals.TopologyMetadata; | ||
import org.apache.kafka.streams.processor.internals.assignment.AssignorError; | ||
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl; | ||
import org.apache.kafka.streams.processor.internals.namedtopology.NamedTopology; | ||
import org.apache.kafka.streams.query.FailureReason; | ||
import org.apache.kafka.streams.query.PositionBound; | ||
import org.apache.kafka.streams.query.QueryConfig; | ||
|
@@ -111,6 +111,7 @@ | |
import static org.apache.kafka.streams.internals.ApiUtils.validateMillisecondDuration; | ||
import static org.apache.kafka.streams.internals.StreamsConfigUtils.getTotalCacheSize; | ||
import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchEndOffsets; | ||
import static org.apache.kafka.streams.processor.internals.TopologyMetadata.UNNAMED_TOPOLOGY; | ||
|
||
/** | ||
* A Kafka client that allows for performing continuous computation on input coming from one or more input topics and | ||
|
@@ -1735,6 +1736,51 @@ public <T> T store(final StoreQueryParameters<T> storeQueryParameters) { | |
return queryableStoreProvider.getStore(storeQueryParameters); | ||
} | ||
|
||
/** | ||
* This method pauses processing for the KafkaStreams instance. | ||
* | ||
* Paused topologies will only skip over a) processing, b) punctuation, and c) standby tasks. | ||
* Notably, paused topologies will still poll Kafka consumers, and commit offsets. | ||
* This method sets transient state that is not maintained or managed among instances. | ||
* Note that pause() can be called before start() in order to start a KafkaStreams instance | ||
* in a manner where the processing is paused as described, but the consumers are started up. | ||
*/ | ||
public void pause() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please add unit tests for the new methods? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! I added some in this commit e0a80bb. |
||
if (topologyMetadata.hasNamedTopologies()) { | ||
for (final NamedTopology namedTopology : topologyMetadata.getAllNamedTopologies()) { | ||
topologyMetadata.pauseTopology(namedTopology.name()); | ||
} | ||
} else { | ||
topologyMetadata.pauseTopology(UNNAMED_TOPOLOGY); | ||
} | ||
} | ||
|
||
/** | ||
* @return true when the KafkaStreams instance has its processing paused. | ||
*/ | ||
public boolean isPaused() { | ||
if (topologyMetadata.hasNamedTopologies()) { | ||
return topologyMetadata.getAllNamedTopologies().stream() | ||
.map(NamedTopology::name) | ||
.allMatch(topologyMetadata::isPaused); | ||
} else { | ||
return topologyMetadata.isPaused(UNNAMED_TOPOLOGY); | ||
} | ||
} | ||
|
||
/** | ||
* This method resumes processing for the KafkaStreams instance. | ||
*/ | ||
public void resume() { | ||
if (topologyMetadata.hasNamedTopologies()) { | ||
for (final NamedTopology namedTopology : topologyMetadata.getAllNamedTopologies()) { | ||
topologyMetadata.resumeTopology(namedTopology.name()); | ||
} | ||
} else { | ||
topologyMetadata.resumeTopology(UNNAMED_TOPOLOGY); | ||
} | ||
} | ||
|
||
/** | ||
* handle each stream thread in a snapshot of threads. | ||
* noted: iteration over SynchronizedList is not thread safe so it must be manually synchronized. However, we may | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -36,6 +36,7 @@ | |||||||||||||||||||||||||||||||||
import org.apache.kafka.streams.processor.StateRestoreListener; | ||||||||||||||||||||||||||||||||||
import org.apache.kafka.streams.processor.TaskId; | ||||||||||||||||||||||||||||||||||
import org.apache.kafka.streams.processor.internals.ProcessorStateManager.StateStoreMetadata; | ||||||||||||||||||||||||||||||||||
import org.apache.kafka.streams.processor.internals.Task.TaskType; | ||||||||||||||||||||||||||||||||||
import org.slf4j.Logger; | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
import java.time.Duration; | ||||||||||||||||||||||||||||||||||
|
@@ -429,6 +430,8 @@ public void restore(final Map<TaskId, Task> tasks) { | |||||||||||||||||||||||||||||||||
final ConsumerRecords<byte[], byte[]> polledRecords; | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
try { | ||||||||||||||||||||||||||||||||||
updateStandbyPartitions(tasks, restoringChangelogs); | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
// for restoring active and updating standby we may prefer different poll time | ||||||||||||||||||||||||||||||||||
// in order to make sure we call the main consumer#poll in time. | ||||||||||||||||||||||||||||||||||
// TODO: once we move ChangelogReader to a separate thread this may no longer be a concern | ||||||||||||||||||||||||||||||||||
|
@@ -463,7 +466,10 @@ public void restore(final Map<TaskId, Task> tasks) { | |||||||||||||||||||||||||||||||||
final TaskId taskId = changelogs.get(partition).stateManager.taskId(); | ||||||||||||||||||||||||||||||||||
try { | ||||||||||||||||||||||||||||||||||
if (restoreChangelog(changelogs.get(partition))) { | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure this method call avoids restoring state stores of paused tasks? Wouldn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll look to add a unit test to verify things. This part may get subtle... I tried to update things and the ITs showing that one can start topologies in paused state broke. |
||||||||||||||||||||||||||||||||||
tasks.get(taskId).clearTaskTimeout(); | ||||||||||||||||||||||||||||||||||
final Task task = tasks.get(taskId); | ||||||||||||||||||||||||||||||||||
if (task != null) { | ||||||||||||||||||||||||||||||||||
jnh5y marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||||||
task.clearTaskTimeout(); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
} catch (final TimeoutException timeoutException) { | ||||||||||||||||||||||||||||||||||
tasks.get(taskId).maybeInitTaskTimeoutOrThrow( | ||||||||||||||||||||||||||||||||||
|
@@ -479,6 +485,47 @@ public void restore(final Map<TaskId, Task> tasks) { | |||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
private void updateStandbyPartitions(final Map<TaskId, Task> tasks, | ||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you forgot to also pause the active tasks in restoration. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if we can make this a more general rule, like:
I know for now it does not matter since we always pause all tasks with the current APIs, but this is extensible for finer-grained controls in the future. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After your changes that include the restoration of active tasks the name of the method does not reflect what the method does anymore. Please rename the method to something more appropriate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not forget to rename this method to something more meaningful. |
||||||||||||||||||||||||||||||||||
final Set<TopicPartition> restoringChangelogs) { | ||||||||||||||||||||||||||||||||||
if (state == ChangelogReaderState.ACTIVE_RESTORING) { | ||||||||||||||||||||||||||||||||||
updatePartitionsByType(tasks, restoringChangelogs, TaskType.ACTIVE); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
if (state == ChangelogReaderState.STANDBY_UPDATING) { | ||||||||||||||||||||||||||||||||||
updatePartitionsByType(tasks, restoringChangelogs, TaskType.STANDBY); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
private void updatePartitionsByType(final Map<TaskId, Task> tasks, | ||||||||||||||||||||||||||||||||||
final Set<TopicPartition> restoringChangelogs, | ||||||||||||||||||||||||||||||||||
final TaskType taskType) { | ||||||||||||||||||||||||||||||||||
final Collection<TopicPartition> toResume = | ||||||||||||||||||||||||||||||||||
restoringChangelogs.stream().filter(t -> shouldResume(tasks, t, taskType)).collect(Collectors.toList()); | ||||||||||||||||||||||||||||||||||
final Collection<TopicPartition> toPause = | ||||||||||||||||||||||||||||||||||
restoringChangelogs.stream().filter(t -> shouldPause(tasks, t, taskType)).collect(Collectors.toList()); | ||||||||||||||||||||||||||||||||||
restoreConsumer.resume(toResume); | ||||||||||||||||||||||||||||||||||
restoreConsumer.pause(toPause); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
private boolean shouldResume(final Map<TaskId, Task> tasks, final TopicPartition partition, final TaskType taskType) { | ||||||||||||||||||||||||||||||||||
final ProcessorStateManager manager = changelogs.get(partition).stateManager; | ||||||||||||||||||||||||||||||||||
final TaskId taskId = manager.taskId(); | ||||||||||||||||||||||||||||||||||
final Task task = tasks.get(taskId); | ||||||||||||||||||||||||||||||||||
if (manager.taskType() == taskType) { | ||||||||||||||||||||||||||||||||||
return task != null; | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
private boolean shouldPause(final Map<TaskId, Task> tasks, final TopicPartition partition, final TaskType taskType) { | ||||||||||||||||||||||||||||||||||
final ProcessorStateManager manager = changelogs.get(partition).stateManager; | ||||||||||||||||||||||||||||||||||
final TaskId taskId = manager.taskId(); | ||||||||||||||||||||||||||||||||||
final Task task = tasks.get(taskId); | ||||||||||||||||||||||||||||||||||
if (manager.taskType() == taskType) { | ||||||||||||||||||||||||||||||||||
return task == null; | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
Comment on lines
+520
to
+527
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not check the task type earlier? If the task type does not match, you do not need to do anything else.
Suggested change
|
||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
private void maybeLogRestorationProgress() { | ||||||||||||||||||||||||||||||||||
if (state == ChangelogReaderState.ACTIVE_RESTORING) { | ||||||||||||||||||||||||||||||||||
if (time.milliseconds() - lastRestoreLogTime > RESTORE_LOG_INTERVAL_MS) { | ||||||||||||||||||||||||||||||||||
|
@@ -633,7 +680,11 @@ private Set<Task> getTasksFromPartitions(final Map<TaskId, Task> tasks, | |||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
private void clearTaskTimeout(final Set<Task> tasks) { | ||||||||||||||||||||||||||||||||||
tasks.forEach(Task::clearTaskTimeout); | ||||||||||||||||||||||||||||||||||
tasks.forEach(t -> { | ||||||||||||||||||||||||||||||||||
if (t != null) { | ||||||||||||||||||||||||||||||||||
t.clearTaskTimeout(); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
}); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
private void maybeInitTaskTimeoutOrThrow(final Set<Task> tasks, | ||||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -897,7 +897,8 @@ private void initializeAndRestorePhase() { | |
} | ||
// we can always let changelog reader try restoring in order to initialize the changelogs; | ||
// if there's no active restoring or standby updating it would not try to fetch any data | ||
changelogReader.restore(taskManager.tasks()); | ||
// After KAFKA-13873, we only restore the not paused tasks. | ||
changelogReader.restore(taskManager.notPausedTasks()); | ||
jnh5y marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should also be verified in a unit test with a mock changelog reader. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I've covered this with a new test in StreamThreadTest. |
||
log.debug("Idempotent restore call done. Thread state has not changed."); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,21 +35,27 @@ public class TaskExecutionMetadata { | |
private static final long CONSTANT_BACKOFF_MS = 5_000L; | ||
|
||
private final boolean hasNamedTopologies; | ||
private final Set<String> pausedTopologies; | ||
// map of topologies experiencing errors/currently under backoff | ||
private final ConcurrentHashMap<String, NamedTopologyMetadata> topologyNameToErrorMetadata = new ConcurrentHashMap<>(); | ||
|
||
public TaskExecutionMetadata(final Set<String> allTopologyNames) { | ||
public TaskExecutionMetadata(final Set<String> allTopologyNames, final Set<String> pausedTopologies) { | ||
this.hasNamedTopologies = !(allTopologyNames.size() == 1 && allTopologyNames.contains(UNNAMED_TOPOLOGY)); | ||
this.pausedTopologies = pausedTopologies; | ||
} | ||
|
||
public boolean canProcessTask(final Task task, final long now) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am aware that there are now unit tests for this class, but there are enough different code paths that would justify to add unit tests for this method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added some unit tests for |
||
final String topologyName = task.id().topologyName(); | ||
if (!hasNamedTopologies) { | ||
// TODO implement error handling/backoff for non-named topologies (needs KIP) | ||
return true; | ||
return !pausedTopologies.contains(UNNAMED_TOPOLOGY); | ||
} else { | ||
final NamedTopologyMetadata metadata = topologyNameToErrorMetadata.get(topologyName); | ||
return metadata == null || (metadata.canProcess() && metadata.canProcessTask(task, now)); | ||
if (pausedTopologies.contains(topologyName)) { | ||
return false; | ||
} else { | ||
final NamedTopologyMetadata metadata = topologyNameToErrorMetadata.get(topologyName); | ||
return metadata == null || (metadata.canProcess() && metadata.canProcessTask(task, now)); | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,31 +16,28 @@ | |
*/ | ||
package org.apache.kafka.streams.processor.internals; | ||
|
||
import org.apache.kafka.clients.consumer.Consumer; | ||
import org.apache.kafka.common.Metric; | ||
import org.apache.kafka.common.MetricName; | ||
import org.apache.kafka.common.TopicPartition; | ||
import org.apache.kafka.common.utils.LogContext; | ||
import org.apache.kafka.streams.processor.TaskId; | ||
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl; | ||
|
||
import java.util.HashSet; | ||
import org.slf4j.Logger; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.TreeMap; | ||
import java.util.stream.Collectors; | ||
import org.apache.kafka.clients.consumer.Consumer; | ||
import org.apache.kafka.common.Metric; | ||
import org.apache.kafka.common.MetricName; | ||
import org.apache.kafka.common.TopicPartition; | ||
import org.apache.kafka.common.utils.LogContext; | ||
import org.apache.kafka.streams.processor.TaskId; | ||
import org.slf4j.Logger; | ||
|
||
class Tasks { | ||
private final Logger log; | ||
private final TopologyMetadata topologyMetadata; | ||
private final StreamsMetricsImpl streamsMetrics; | ||
|
||
private final Map<TaskId, Task> allTasksPerId = new TreeMap<>(); | ||
private final Map<TaskId, Task> allTasksPerId = Collections.synchronizedSortedMap(new TreeMap<>()); | ||
private final Map<TaskId, Task> readOnlyTasksPerId = Collections.unmodifiableMap(allTasksPerId); | ||
private final Collection<Task> readOnlyTasks = Collections.unmodifiableCollection(allTasksPerId.values()); | ||
|
||
|
@@ -68,14 +65,12 @@ class Tasks { | |
|
||
Tasks(final LogContext logContext, | ||
final TopologyMetadata topologyMetadata, | ||
final StreamsMetricsImpl streamsMetrics, | ||
final ActiveTaskCreator activeTaskCreator, | ||
final StandbyTaskCreator standbyTaskCreator) { | ||
|
||
log = logContext.logger(getClass()); | ||
|
||
this.topologyMetadata = topologyMetadata; | ||
this.streamsMetrics = streamsMetrics; | ||
this.activeTaskCreator = activeTaskCreator; | ||
this.standbyTaskCreator = standbyTaskCreator; | ||
} | ||
|
@@ -273,6 +268,20 @@ Collection<Task> allTasks() { | |
return readOnlyTasks; | ||
} | ||
|
||
Collection<Task> notPausedActiveTasks() { | ||
return new ArrayList<>(readOnlyActiveTasks) | ||
.stream() | ||
.filter(t -> !topologyMetadata.isPaused(t.id().topologyName())) | ||
.collect(Collectors.toList()); | ||
} | ||
|
||
Collection<Task> notPausedTasks() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unit tests would be great! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unit tests would still be great! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cadonna Same deal here, I started to try and test I think I'm either missing an easier approach or testing these functions directly may require a decent amount of effort. |
||
return new ArrayList<>(readOnlyTasks) | ||
.stream() | ||
.filter(t -> !topologyMetadata.isPaused(t.id().topologyName())) | ||
.collect(Collectors.toList()); | ||
} | ||
|
||
Set<TaskId> activeTaskIds() { | ||
return readOnlyActiveTaskIds; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In general, I think we should try to avoid adding suppressions. But I also see that
StreamThreadTest
would need quite some love at the moment which is not the intent of this PR.