Skip to content

Commit

Permalink
[fix][broker] Ensure that PulsarService is ready for serving incoming…
Browse files Browse the repository at this point in the history
… requests (#22977)
  • Loading branch information
lhotari authored Jun 26, 2024
1 parent fe726db commit 53df683
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ public class PulsarService implements AutoCloseable, ShutdownService {
private final ExecutorProvider transactionExecutorProvider;
private final DefaultMonotonicSnapshotClock monotonicSnapshotClock;
private String brokerId;
private final CompletableFuture<Void> readyForIncomingRequestsFuture = new CompletableFuture<>();

public enum State {
Init, Started, Closing, Closed
Expand Down Expand Up @@ -999,6 +1000,9 @@ public void start() throws PulsarServerException {

this.metricsGenerator = new MetricsGenerator(this);

// the broker is ready to accept incoming requests by Pulsar binary protocol and http/https
readyForIncomingRequestsFuture.complete(null);

// Initialize the message protocol handlers.
// start the protocol handlers only after the broker is ready,
// so that the protocol handlers can access broker service properly.
Expand Down Expand Up @@ -1047,12 +1051,22 @@ public void start() throws PulsarServerException {
state = State.Started;
} catch (Exception e) {
LOG.error("Failed to start Pulsar service: {}", e.getMessage(), e);
throw new PulsarServerException(e);
PulsarServerException startException = new PulsarServerException(e);
readyForIncomingRequestsFuture.completeExceptionally(startException);
throw startException;
} finally {
mutex.unlock();
}
}

public void runWhenReadyForIncomingRequests(Runnable runnable) {
readyForIncomingRequestsFuture.thenRun(runnable);
}

public void waitUntilReadyForIncomingRequests() throws ExecutionException, InterruptedException {
readyForIncomingRequestsFuture.get();
}

protected BrokerInterceptor newBrokerInterceptor() throws IOException {
return BrokerInterceptors.load(config);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
Expand Down Expand Up @@ -167,10 +166,10 @@ public class ExtensibleLoadManagerImpl implements ExtensibleLoadManager, BrokerS

private TopBundleLoadDataReporter topBundleLoadDataReporter;

private ScheduledFuture brokerLoadDataReportTask;
private ScheduledFuture topBundlesLoadDataReportTask;
private volatile ScheduledFuture brokerLoadDataReportTask;
private volatile ScheduledFuture topBundlesLoadDataReportTask;

private ScheduledFuture monitorTask;
private volatile ScheduledFuture monitorTask;
private SplitScheduler splitScheduler;

private UnloadManager unloadManager;
Expand Down Expand Up @@ -199,7 +198,7 @@ public class ExtensibleLoadManagerImpl implements ExtensibleLoadManager, BrokerS

private final ConcurrentHashMap<String, CompletableFuture<Optional<BrokerLookupData>>>
lookupRequests = new ConcurrentHashMap<>();
private final CountDownLatch initWaiter = new CountDownLatch(1);
private final CompletableFuture<Void> initWaiter = new CompletableFuture<>();

/**
* Get all the bundles that are owned by this broker.
Expand Down Expand Up @@ -376,12 +375,14 @@ public void start() throws PulsarServerException {
pulsar.getCoordinationService(), pulsar.getBrokerId(),
pulsar.getSafeWebServiceAddress(), ELECTION_ROOT,
state -> {
pulsar.getLoadManagerExecutor().execute(() -> {
if (state == LeaderElectionState.Leading) {
playLeader();
} else {
playFollower();
}
pulsar.runWhenReadyForIncomingRequests(() -> {
pulsar.getLoadManagerExecutor().execute(() -> {
if (state == LeaderElectionState.Leading) {
playLeader();
} else {
playFollower();
}
});
});
});
this.serviceUnitStateChannel = new ServiceUnitStateChannelImpl(pulsar);
Expand All @@ -391,7 +392,13 @@ public void start() throws PulsarServerException {
this.serviceUnitStateChannel.listen(unloadManager);
this.serviceUnitStateChannel.listen(splitManager);
this.leaderElectionService.start();
this.serviceUnitStateChannel.start();
pulsar.runWhenReadyForIncomingRequests(() -> {
try {
this.serviceUnitStateChannel.start();
} catch (Exception e) {
failStarting(e);
}
});
this.antiAffinityGroupPolicyHelper =
new AntiAffinityGroupPolicyHelper(pulsar, serviceUnitStateChannel);
antiAffinityGroupPolicyHelper.listenFailureDomainUpdate();
Expand Down Expand Up @@ -423,54 +430,72 @@ public void start() throws PulsarServerException {
new TopBundleLoadDataReporter(pulsar, brokerRegistry.getBrokerId(), topBundlesLoadDataStore);
this.serviceUnitStateChannel.listen(brokerLoadDataReporter);
this.serviceUnitStateChannel.listen(topBundleLoadDataReporter);
var interval = conf.getLoadBalancerReportUpdateMinIntervalMillis();
this.brokerLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
brokerLoadDataReporter.reportAsync(false);
// TODO: update broker load metrics using getLocalData
} catch (Throwable e) {
log.error("Failed to run the broker load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.topBundlesLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
// TODO: consider excluding the bundles that are in the process of split.
topBundleLoadDataReporter.reportAsync(false);
} catch (Throwable e) {
log.error("Failed to run the top bundles load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.monitorTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
monitor();
},
MONITOR_INTERVAL_IN_MILLIS,
MONITOR_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS);

this.unloadScheduler = new UnloadScheduler(
pulsar, pulsar.getLoadManagerExecutor(), unloadManager, context,
serviceUnitStateChannel, unloadCounter, unloadMetrics);
this.splitScheduler = new SplitScheduler(
pulsar, serviceUnitStateChannel, splitManager, splitCounter, splitMetrics, context);
this.splitScheduler.start();
this.initWaiter.countDown();
this.started = true;
log.info("Started load manager.");

pulsar.runWhenReadyForIncomingRequests(() -> {
try {
var interval = conf.getLoadBalancerReportUpdateMinIntervalMillis();

this.brokerLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
brokerLoadDataReporter.reportAsync(false);
// TODO: update broker load metrics using getLocalData
} catch (Throwable e) {
log.error("Failed to run the broker load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.topBundlesLoadDataReportTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
try {
// TODO: consider excluding the bundles that are in the process of split.
topBundleLoadDataReporter.reportAsync(false);
} catch (Throwable e) {
log.error("Failed to run the top bundles load manager executor job.", e);
}
},
interval,
interval, TimeUnit.MILLISECONDS);

this.monitorTask = this.pulsar.getLoadManagerExecutor()
.scheduleAtFixedRate(() -> {
monitor();
},
MONITOR_INTERVAL_IN_MILLIS,
MONITOR_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS);

this.splitScheduler.start();
this.initWaiter.complete(null);
this.started = true;
log.info("Started load manager.");
} catch (Exception ex) {
failStarting(ex);
}
});
} catch (Exception ex) {
log.error("Failed to start the extensible load balance and close broker registry {}.",
this.brokerRegistry, ex);
if (this.brokerRegistry != null) {
failStarting(ex);
}
}

private void failStarting(Exception ex) {
log.error("Failed to start the extensible load balance and close broker registry {}.",
this.brokerRegistry, ex);
if (this.brokerRegistry != null) {
try {
brokerRegistry.close();
} catch (PulsarServerException e) {
// ignore
}
}
initWaiter.completeExceptionally(ex);
}

@Override
Expand Down Expand Up @@ -816,7 +841,7 @@ synchronized void playLeader() {
boolean becameFollower = false;
while (!Thread.currentThread().isInterrupted()) {
try {
initWaiter.await();
initWaiter.get();
if (!serviceUnitStateChannel.isChannelOwner()) {
becameFollower = true;
break;
Expand Down Expand Up @@ -866,7 +891,7 @@ synchronized void playFollower() {
boolean becameLeader = false;
while (!Thread.currentThread().isInterrupted()) {
try {
initWaiter.await();
initWaiter.get();
if (serviceUnitStateChannel.isChannelOwner()) {
becameLeader = true;
break;
Expand Down Expand Up @@ -936,7 +961,7 @@ private List<Metrics> getIgnoredCommandMetrics(String advertisedBrokerAddress) {
@VisibleForTesting
protected void monitor() {
try {
initWaiter.await();
initWaiter.get();

// Monitor role
// Periodically check the role in case ZK watcher fails.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1335,7 +1335,9 @@ public void addNamespaceBundleOwnershipListener(NamespaceBundleOwnershipListener
bundleOwnershipListeners.add(listener);
}
}
getOwnedServiceUnits().forEach(bundle -> notifyNamespaceBundleOwnershipListener(bundle, listeners));
pulsar.runWhenReadyForIncomingRequests(() -> {
getOwnedServiceUnits().forEach(bundle -> notifyNamespaceBundleOwnershipListener(bundle, listeners));
});
}

public void addNamespaceBundleSplitListener(NamespaceBundleSplitListener... listeners) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.pulsar.broker.service;

import com.google.common.annotations.VisibleForTesting;
import io.netty.channel.ChannelHandler;
import io.netty.channel.ChannelInitializer;
import io.netty.channel.socket.SocketChannel;
import io.netty.handler.codec.LengthFieldBasedFrameDecoder;
Expand Down Expand Up @@ -104,6 +105,9 @@ public PulsarChannelInitializer(PulsarService pulsar, PulsarChannelOptions opts)

@Override
protected void initChannel(SocketChannel ch) throws Exception {
// disable auto read explicitly so that requests aren't served until auto read is enabled
// ServerCnx must enable auto read in channelActive after PulsarService is ready to accept incoming requests
ch.config().setAutoRead(false);
ch.pipeline().addLast("consolidation", new FlushConsolidationHandler(1024, true));
if (this.enableTls) {
if (this.tlsEnabledWithKeyStore) {
Expand All @@ -128,7 +132,8 @@ protected void initChannel(SocketChannel ch) throws Exception {
// ServerCnx ends up reading higher number of messages and broker can not throttle the messages by disabling
// auto-read.
ch.pipeline().addLast("flowController", new FlowControlHandler());
ServerCnx cnx = newServerCnx(pulsar, listenerName);
// using "ChannelHandler" type to workaround an IntelliJ bug that shows a false positive error
ChannelHandler cnx = newServerCnx(pulsar, listenerName);
ch.pipeline().addLast("handler", cnx);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,10 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception {
this.commandSender = new PulsarCommandSenderImpl(brokerInterceptor, this);
this.service.getPulsarStats().recordConnectionCreate();
cnxsPerThread.get().add(this);
service.getPulsar().runWhenReadyForIncomingRequests(() -> {
// enable auto read after PulsarService is ready to accept incoming requests
ctx.channel().config().setAutoRead(true);
});
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,21 @@

import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.jetty.JettyStatisticsCollector;
import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ExecutionException;
import javax.servlet.DispatcherType;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletResponse;
import lombok.Getter;
import org.apache.pulsar.broker.PulsarServerException;
import org.apache.pulsar.broker.PulsarService;
Expand Down Expand Up @@ -232,6 +241,7 @@ private static class FilterInitializer {
private final FilterHolder authenticationFilterHolder;
FilterInitializer(PulsarService pulsarService) {
ServiceConfiguration config = pulsarService.getConfiguration();

if (config.getMaxConcurrentHttpRequests() > 0) {
FilterHolder filterHolder = new FilterHolder(QoSFilter.class);
filterHolder.setInitParameter("maxRequests", String.valueOf(config.getMaxConcurrentHttpRequests()));
Expand All @@ -243,6 +253,10 @@ private static class FilterInitializer {
new RateLimitingFilter(config.getHttpRequestsMaxPerSecond())));
}

// wait until the PulsarService is ready to serve incoming requests
filterHolders.add(
new FilterHolder(new WaitUntilPulsarServiceIsReadyForIncomingRequestsFilter(pulsarService)));

boolean brokerInterceptorEnabled = pulsarService.getBrokerInterceptor() != null;
if (brokerInterceptorEnabled) {
ExceptionHandler handler = new ExceptionHandler();
Expand Down Expand Up @@ -284,6 +298,42 @@ public void addFilters(ServletContextHandler context, boolean requiresAuthentica
}
}

// Filter that waits until the PulsarService is ready to serve incoming requests
private static class WaitUntilPulsarServiceIsReadyForIncomingRequestsFilter implements Filter {
private final PulsarService pulsarService;

public WaitUntilPulsarServiceIsReadyForIncomingRequestsFilter(PulsarService pulsarService) {
this.pulsarService = pulsarService;
}

@Override
public void init(FilterConfig filterConfig) throws ServletException {

}

@Override
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
throws IOException, ServletException {
try {
// Wait until the PulsarService is ready to serve incoming requests
pulsarService.waitUntilReadyForIncomingRequests();
} catch (ExecutionException e) {
((HttpServletResponse) response).sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,
"PulsarService failed to start.");
return;
} catch (InterruptedException e) {
((HttpServletResponse) response).sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,
"PulsarService is not ready.");
return;
}
chain.doFilter(request, response);
}

@Override
public void destroy() {

}
}
}

public void addServlet(String path, ServletHolder servletHolder, boolean requiresAuthentication,
Expand Down

0 comments on commit 53df683

Please sign in to comment.