Skip to content

Commit

Permalink
[fix][broker] Ensure that PulsarService is ready for serving incoming…
Browse files Browse the repository at this point in the history
… requests (apache#22977)

(cherry picked from commit 53df683)
(cherry picked from commit ec51420)
(cherry picked from commit 1a7eb54)
  • Loading branch information
lhotari authored and nikhil-ctds committed Jun 27, 2024
1 parent 36a0967 commit 867d2ff
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ public class PulsarService implements AutoCloseable, ShutdownService {
private TransactionPendingAckStoreProvider transactionPendingAckStoreProvider;
private final ExecutorProvider transactionExecutorProvider;
private String brokerId;
private final CompletableFuture<Void> readyForIncomingRequestsFuture = new CompletableFuture<>();

public enum State {
Init, Started, Closing, Closed
Expand Down Expand Up @@ -901,6 +902,9 @@ public void start() throws PulsarServerException {

this.metricsGenerator = new MetricsGenerator(this);

// the broker is ready to accept incoming requests by Pulsar binary protocol and http/https
readyForIncomingRequestsFuture.complete(null);

// Initialize the message protocol handlers.
// start the protocol handlers only after the broker is ready,
// so that the protocol handlers can access broker service properly.
Expand Down Expand Up @@ -949,12 +953,22 @@ public void start() throws PulsarServerException {
state = State.Started;
} catch (Exception e) {
LOG.error("Failed to start Pulsar service: {}", e.getMessage(), e);
throw new PulsarServerException(e);
PulsarServerException startException = new PulsarServerException(e);
readyForIncomingRequestsFuture.completeExceptionally(startException);
throw startException;
} finally {
mutex.unlock();
}
}

public void runWhenReadyForIncomingRequests(Runnable runnable) {
readyForIncomingRequestsFuture.thenRun(runnable);
}

public void waitUntilReadyForIncomingRequests() throws ExecutionException, InterruptedException {
readyForIncomingRequestsFuture.get();
}

protected BrokerInterceptor newBrokerInterceptor() throws IOException {
return BrokerInterceptors.load(config);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
Expand Down Expand Up @@ -164,10 +163,10 @@ public class ExtensibleLoadManagerImpl implements ExtensibleLoadManager {

private TopBundleLoadDataReporter topBundleLoadDataReporter;

private ScheduledFuture brokerLoadDataReportTask;
private ScheduledFuture topBundlesLoadDataReportTask;
private volatile ScheduledFuture brokerLoadDataReportTask;
private volatile ScheduledFuture topBundlesLoadDataReportTask;

private ScheduledFuture monitorTask;
private volatile ScheduledFuture monitorTask;
private SplitScheduler splitScheduler;

private UnloadManager unloadManager;
Expand All @@ -190,7 +189,7 @@ public class ExtensibleLoadManagerImpl implements ExtensibleLoadManager {

private final ConcurrentHashMap<String, CompletableFuture<Optional<BrokerLookupData>>>
lookupRequests = new ConcurrentHashMap<>();
private final CountDownLatch initWaiter = new CountDownLatch(1);
private final CompletableFuture<Void> initWaiter = new CompletableFuture<>();

/**
* Get all the bundles that are owned by this broker.
Expand Down Expand Up @@ -321,12 +320,14 @@ public void start() throws PulsarServerException {
pulsar.getCoordinationService(), pulsar.getBrokerId(),
pulsar.getSafeWebServiceAddress(), ELECTION_ROOT,
state -> {
pulsar.getLoadManagerExecutor().execute(() -> {
if (state == LeaderElectionState.Leading) {
playLeader();
} else {
playFollower();
}
pulsar.runWhenReadyForIncomingRequests(() -> {
pulsar.getLoadManagerExecutor().execute(() -> {
if (state == LeaderElectionState.Leading) {
playLeader();
} else {
playFollower();
}
});
});
});
this.serviceUnitStateChannel = ServiceUnitStateChannelImpl.newInstance(pulsar);
Expand All @@ -336,7 +337,13 @@ public void start() throws PulsarServerException {
this.serviceUnitStateChannel.listen(unloadManager);
this.serviceUnitStateChannel.listen(splitManager);
this.leaderElectionService.start();
this.serviceUnitStateChannel.start();
pulsar.runWhenReadyForIncomingRequests(() -> {
try {
this.serviceUnitStateChannel.start();
} catch (Exception e) {
failStarting(e);
}
});
this.antiAffinityGroupPolicyHelper =
new AntiAffinityGroupPolicyHelper(pulsar, serviceUnitStateChannel);
antiAffinityGroupPolicyHelper.listenFailureDomainUpdate();
Expand Down Expand Up @@ -368,54 +375,72 @@ public void start() throws PulsarServerException {
new TopBundleLoadDataReporter(pulsar, brokerRegistry.getBrokerId(), topBundlesLoadDataStore);
this.serviceUnitStateChannel.listen(brokerLoadDataReporter);
this.serviceUnitStateChannel.listen(topBundleLoadDataReporter);
var interval = conf.getLoadBalancerReportUpdateMinIntervalMillis();
this.brokerLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
brokerLoadDataReporter.reportAsync(false);
// TODO: update broker load metrics using getLocalData
} catch (Throwable e) {
log.error("Failed to run the broker load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.topBundlesLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
// TODO: consider excluding the bundles that are in the process of split.
topBundleLoadDataReporter.reportAsync(false);
} catch (Throwable e) {
log.error("Failed to run the top bundles load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.monitorTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
monitor();
},
MONITOR_INTERVAL_IN_MILLIS,
MONITOR_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS);

this.unloadScheduler = new UnloadScheduler(
pulsar, pulsar.getLoadManagerExecutor(), unloadManager, context,
serviceUnitStateChannel, unloadCounter, unloadMetrics);
this.splitScheduler = new SplitScheduler(
pulsar, serviceUnitStateChannel, splitManager, splitCounter, splitMetrics, context);
this.splitScheduler.start();
this.initWaiter.countDown();
this.started = true;
log.info("Started load manager.");

pulsar.runWhenReadyForIncomingRequests(() -> {
try {
var interval = conf.getLoadBalancerReportUpdateMinIntervalMillis();

this.brokerLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
brokerLoadDataReporter.reportAsync(false);
// TODO: update broker load metrics using getLocalData
} catch (Throwable e) {
log.error("Failed to run the broker load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.topBundlesLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
// TODO: consider excluding the bundles that are in the process of split.
topBundleLoadDataReporter.reportAsync(false);
} catch (Throwable e) {
log.error("Failed to run the top bundles load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.monitorTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
monitor();
},
MONITOR_INTERVAL_IN_MILLIS,
MONITOR_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS);

this.splitScheduler.start();
this.initWaiter.complete(null);
this.started = true;
log.info("Started load manager.");
} catch (Exception ex) {
failStarting(ex);
}
});
} catch (Exception ex) {
log.error("Failed to start the extensible load balance and close broker registry {}.",
this.brokerRegistry, ex);
if (this.brokerRegistry != null) {
failStarting(ex);
}
}

private void failStarting(Exception ex) {
log.error("Failed to start the extensible load balance and close broker registry {}.",
this.brokerRegistry, ex);
if (this.brokerRegistry != null) {
try {
brokerRegistry.close();
} catch (PulsarServerException e) {
// ignore
}
}
initWaiter.completeExceptionally(ex);
}

@Override
Expand Down Expand Up @@ -772,11 +797,11 @@ synchronized void playLeader() {
boolean becameFollower = false;
while (!Thread.currentThread().isInterrupted()) {
try {
initWaiter.get();
if (!serviceUnitStateChannel.isChannelOwner()) {
becameFollower = true;
break;
}
initWaiter.await();
// Confirm the system topics have been created or create them if they do not exist.
// If the leader has changed, the new leader need to reset
// the local brokerService.topics (by this topic creations).
Expand Down Expand Up @@ -822,11 +847,11 @@ synchronized void playFollower() {
boolean becameLeader = false;
while (!Thread.currentThread().isInterrupted()) {
try {
initWaiter.get();
if (serviceUnitStateChannel.isChannelOwner()) {
becameLeader = true;
break;
}
initWaiter.await();
unloadScheduler.close();
serviceUnitStateChannel.cancelOwnershipMonitor();
brokerLoadDataStore.init();
Expand Down Expand Up @@ -885,7 +910,7 @@ public List<Metrics> getMetrics() {
@VisibleForTesting
protected void monitor() {
try {
initWaiter.await();
initWaiter.get();

// Monitor role
// Periodically check the role in case ZK watcher fails.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1262,7 +1262,9 @@ public void addNamespaceBundleOwnershipListener(NamespaceBundleOwnershipListener
bundleOwnershipListeners.add(listener);
}
}
getOwnedServiceUnits().forEach(bundle -> notifyNamespaceBundleOwnershipListener(bundle, listeners));
pulsar.runWhenReadyForIncomingRequests(() -> {
getOwnedServiceUnits().forEach(bundle -> notifyNamespaceBundleOwnershipListener(bundle, listeners));
});
}

private void notifyNamespaceBundleOwnershipListener(NamespaceBundle bundle,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.pulsar.broker.service;

import com.google.common.annotations.VisibleForTesting;
import io.netty.channel.ChannelHandler;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.socket.SocketChannel;
import io.netty.handler.codec.LengthFieldBasedFrameDecoder;
Expand Down Expand Up @@ -104,6 +105,9 @@ public PulsarChannelInitializer(PulsarService pulsar, PulsarChannelOptions opts)

@Override
protected void initChannel(SocketChannel ch) throws Exception {
// disable auto read explicitly so that requests aren't served until auto read is enabled
// ServerCnx must enable auto read in channelActive after PulsarService is ready to accept incoming requests
ch.config().setAutoRead(false);
ch.pipeline().addLast("consolidation", new FlushConsolidationHandler(1024, true));
if (this.enableTls) {
if (this.tlsEnabledWithKeyStore) {
Expand All @@ -128,7 +132,8 @@ protected void initChannel(SocketChannel ch) throws Exception {
// ServerCnx ends up reading higher number of messages and broker can not throttle the messages by disabling
// auto-read.
ch.pipeline().addLast("flowController", new FlowControlHandler());
ServerCnx cnx = newServerCnx(pulsar, listenerName);
// using "ChannelHandler" type to workaround an IntelliJ bug that shows a false positive error
ChannelHandler cnx = newServerCnx(pulsar, listenerName);
ch.pipeline().addLast("handler", cnx);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,10 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception {
this.commandSender = new PulsarCommandSenderImpl(brokerInterceptor, this);
this.service.getPulsarStats().recordConnectionCreate();
cnxsPerThread.get().add(this);
service.getPulsar().runWhenReadyForIncomingRequests(() -> {
// enable auto read after PulsarService is ready to accept incoming requests
ctx.channel().config().setAutoRead(true);
});
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,21 @@

import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.jetty.JettyStatisticsCollector;
import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import javax.servlet.DispatcherType;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletResponse;
import lombok.Getter;
import org.apache.pulsar.broker.PulsarServerException;
import org.apache.pulsar.broker.PulsarService;
Expand Down Expand Up @@ -228,6 +237,7 @@ private static class FilterInitializer {
private final FilterHolder authenticationFilterHolder;
FilterInitializer(PulsarService pulsarService) {
ServiceConfiguration config = pulsarService.getConfiguration();

if (config.getMaxConcurrentHttpRequests() > 0) {
FilterHolder filterHolder = new FilterHolder(QoSFilter.class);
filterHolder.setInitParameter("maxRequests", String.valueOf(config.getMaxConcurrentHttpRequests()));
Expand All @@ -239,8 +249,11 @@ private static class FilterInitializer {
new RateLimitingFilter(config.getHttpRequestsMaxPerSecond())));
}

boolean brokerInterceptorEnabled =
pulsarService.getBrokerInterceptor() != null && !config.isDisableBrokerInterceptors();
// wait until the PulsarService is ready to serve incoming requests
filterHolders.add(
new FilterHolder(new WaitUntilPulsarServiceIsReadyForIncomingRequestsFilter(pulsarService)));

boolean brokerInterceptorEnabled = pulsarService.getBrokerInterceptor() != null;
if (brokerInterceptorEnabled) {
ExceptionHandler handler = new ExceptionHandler();
// Enable PreInterceptFilter only when interceptors are enabled
Expand Down Expand Up @@ -281,6 +294,42 @@ public void addFilters(ServletContextHandler context, boolean requiresAuthentica
}
}

// Filter that waits until the PulsarService is ready to serve incoming requests
private static class WaitUntilPulsarServiceIsReadyForIncomingRequestsFilter implements Filter {
private final PulsarService pulsarService;

public WaitUntilPulsarServiceIsReadyForIncomingRequestsFilter(PulsarService pulsarService) {
this.pulsarService = pulsarService;
}

@Override
public void init(FilterConfig filterConfig) throws ServletException {

}

@Override
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
throws IOException, ServletException {
try {
// Wait until the PulsarService is ready to serve incoming requests
pulsarService.waitUntilReadyForIncomingRequests();
} catch (ExecutionException e) {
((HttpServletResponse) response).sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,
"PulsarService failed to start.");
return;
} catch (InterruptedException e) {
((HttpServletResponse) response).sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,
"PulsarService is not ready.");
return;
}
chain.doFilter(request, response);
}

@Override
public void destroy() {

}
}
}

public void addServlet(String path, ServletHolder servletHolder, boolean requiresAuthentication,
Expand Down

0 comments on commit 867d2ff

Please sign in to comment.