From 064866c3397f8814d6c5f033895f5b84ab238ed6 Mon Sep 17 00:00:00 2001 From: Bryce Anderson Date: Tue, 14 Nov 2023 12:16:09 -0700 Subject: [PATCH 1/7] See if we can consistantly repro the flaky test --- .../servicetalk/http/netty/IoUringTest.java | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java index 1da2af713d..c78ce79aa5 100644 --- a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java +++ b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java @@ -25,6 +25,7 @@ import io.netty.incubator.channel.uring.IOUring; import io.netty.incubator.channel.uring.IOUringEventLoopGroup; +import org.junit.jupiter.api.RepeatedTest; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledOnOs; import org.junit.jupiter.params.ParameterizedTest; @@ -40,6 +41,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -92,9 +94,47 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { } finally { IoUringUtils.tryIoUring(false); if (ioUringExecutor != null) { - ioUringExecutor.closeAsync().toFuture().get(); + ioUringExecutor.closeAsync().toFuture().get(); // the offending line. assertTrue(ioUringExecutor.eventLoopGroup().isShutdown()); } } } + + @RepeatedTest(1000) + @EnabledOnOs(LINUX) + void repro() throws Exception { + ioUringIsAvailableOnLinux(false); + } + + @RepeatedTest(1000) + void maybeGenerallyFlaky() throws Exception { + EventLoopAwareNettyIoExecutor ioExecutor = null; + try { + assumeTrue(IoUringUtils.isAvailable(), "io_uring is unavailable on " + + System.getProperty("os.name") + ' ' + System.getProperty("os.version")); + IOUring.ensureAvailability(); + + ioExecutor = NettyIoExecutors.createIoExecutor(2, "io-uring"); + assertThat(ioExecutor.eventLoopGroup(), is(not(instanceOf(IOUringEventLoopGroup.class)))); + + try (ServerContext serverContext = HttpServers.forAddress(localAddress(0)) + .ioExecutor(ioExecutor) + .executionStrategy(defaultStrategy()) + .listenStreamingAndAwait(new TestServiceStreaming()); + BlockingHttpClient client = HttpClients.forSingleAddress(serverHostAndPort(serverContext)) + .ioExecutor(ioExecutor) + .executionStrategy(defaultStrategy()) + .buildBlocking()) { + HttpRequest request = client.post(SVC_ECHO).payloadBody("bonjour!", textSerializerUtf8()); + HttpResponse response = client.request(request); + assertThat(response.status(), is(OK)); + assertThat(response.payloadBody(textSerializerUtf8()), is("bonjour!")); + } + } finally { + if (ioExecutor != null) { + ioExecutor.closeAsync().toFuture().get(); // the offending line. + assertTrue(ioExecutor.eventLoopGroup().isShutdown()); + } + } + } } From 59eba4760d10ee42b7396244a463367eae6c0fa1 Mon Sep 17 00:00:00 2001 From: Bryce Anderson Date: Tue, 14 Nov 2023 12:37:19 -0700 Subject: [PATCH 2/7] Bump iterations to 5k --- .../src/test/java/io/servicetalk/http/netty/IoUringTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java index c78ce79aa5..26e9e136dc 100644 --- a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java +++ b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java @@ -100,13 +100,13 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { } } - @RepeatedTest(1000) + @RepeatedTest(5000) @EnabledOnOs(LINUX) void repro() throws Exception { ioUringIsAvailableOnLinux(false); } - @RepeatedTest(1000) + @RepeatedTest(5000) void maybeGenerallyFlaky() throws Exception { EventLoopAwareNettyIoExecutor ioExecutor = null; try { From ab46659ef12679156826278d0f44133ca30541ae Mon Sep 17 00:00:00 2001 From: Bryce Anderson Date: Tue, 14 Nov 2023 13:09:58 -0700 Subject: [PATCH 3/7] See if the event loop group thinks its shutdown --- .../servicetalk/http/netty/IoUringTest.java | 59 ++++++++----------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java index 26e9e136dc..a84b327811 100644 --- a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java +++ b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java @@ -15,6 +15,7 @@ */ package io.servicetalk.http.netty; +import io.netty.util.concurrent.EventExecutor; import io.servicetalk.http.api.BlockingHttpClient; import io.servicetalk.http.api.HttpRequest; import io.servicetalk.http.api.HttpResponse; @@ -31,6 +32,11 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + import static io.servicetalk.http.api.HttpExecutionStrategies.defaultStrategy; import static io.servicetalk.http.api.HttpExecutionStrategies.offloadNone; import static io.servicetalk.http.api.HttpResponseStatus.OK; @@ -38,10 +44,10 @@ import static io.servicetalk.http.netty.TestServiceStreaming.SVC_ECHO; import static io.servicetalk.transport.netty.internal.AddressUtils.localAddress; import static io.servicetalk.transport.netty.internal.AddressUtils.serverHostAndPort; +import static java.lang.Thread.getAllStackTraces; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.not; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -94,7 +100,24 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { } finally { IoUringUtils.tryIoUring(false); if (ioUringExecutor != null) { - ioUringExecutor.closeAsync().toFuture().get(); // the offending line. + try { + ioUringExecutor.closeAsync().toFuture().get(5, TimeUnit.SECONDS); // the offending line. + } catch (TimeoutException ex) { + final boolean isShutdown = ioUringExecutor.eventLoopGroup().isShutdown(); + StringBuilder sb = new StringBuilder(); + sb.append("Timed out shutting down executor. isShutdown(): ") + .append(isShutdown) + .append('\n'); + for (Map.Entry entry : Thread.getAllStackTraces().entrySet()) { + sb.append("Thread: ").append(entry.getKey().getName()).append('\n'); + for (StackTraceElement e : entry.getValue()) { + sb.append('\t').append(e).append('\n'); + } + } + Exception next = new TimeoutException(sb.toString()); + next.initCause(ex); + throw next; + } assertTrue(ioUringExecutor.eventLoopGroup().isShutdown()); } } @@ -105,36 +128,4 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { void repro() throws Exception { ioUringIsAvailableOnLinux(false); } - - @RepeatedTest(5000) - void maybeGenerallyFlaky() throws Exception { - EventLoopAwareNettyIoExecutor ioExecutor = null; - try { - assumeTrue(IoUringUtils.isAvailable(), "io_uring is unavailable on " + - System.getProperty("os.name") + ' ' + System.getProperty("os.version")); - IOUring.ensureAvailability(); - - ioExecutor = NettyIoExecutors.createIoExecutor(2, "io-uring"); - assertThat(ioExecutor.eventLoopGroup(), is(not(instanceOf(IOUringEventLoopGroup.class)))); - - try (ServerContext serverContext = HttpServers.forAddress(localAddress(0)) - .ioExecutor(ioExecutor) - .executionStrategy(defaultStrategy()) - .listenStreamingAndAwait(new TestServiceStreaming()); - BlockingHttpClient client = HttpClients.forSingleAddress(serverHostAndPort(serverContext)) - .ioExecutor(ioExecutor) - .executionStrategy(defaultStrategy()) - .buildBlocking()) { - HttpRequest request = client.post(SVC_ECHO).payloadBody("bonjour!", textSerializerUtf8()); - HttpResponse response = client.request(request); - assertThat(response.status(), is(OK)); - assertThat(response.payloadBody(textSerializerUtf8()), is("bonjour!")); - } - } finally { - if (ioExecutor != null) { - ioExecutor.closeAsync().toFuture().get(); // the offending line. - assertTrue(ioExecutor.eventLoopGroup().isShutdown()); - } - } - } } From 16809f7514c0102690c387584b45ec859d440a81 Mon Sep 17 00:00:00 2001 From: Bryce Anderson Date: Tue, 14 Nov 2023 13:48:44 -0700 Subject: [PATCH 4/7] Only emit entries for io-uring threads --- .../test/java/io/servicetalk/http/netty/IoUringTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java index a84b327811..2cacea2828 100644 --- a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java +++ b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java @@ -15,7 +15,6 @@ */ package io.servicetalk.http.netty; -import io.netty.util.concurrent.EventExecutor; import io.servicetalk.http.api.BlockingHttpClient; import io.servicetalk.http.api.HttpRequest; import io.servicetalk.http.api.HttpResponse; @@ -32,7 +31,6 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -import java.util.Iterator; import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -44,7 +42,6 @@ import static io.servicetalk.http.netty.TestServiceStreaming.SVC_ECHO; import static io.servicetalk.transport.netty.internal.AddressUtils.localAddress; import static io.servicetalk.transport.netty.internal.AddressUtils.serverHostAndPort; -import static java.lang.Thread.getAllStackTraces; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -109,6 +106,10 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { .append(isShutdown) .append('\n'); for (Map.Entry entry : Thread.getAllStackTraces().entrySet()) { + if (!entry.getKey().getName().startsWith("io-uring")) { + // Not one of our threads. + continue; + } sb.append("Thread: ").append(entry.getKey().getName()).append('\n'); for (StackTraceElement e : entry.getValue()) { sb.append('\t').append(e).append('\n'); From 78af193edeed6cc344b93c7ece02b2974967782d Mon Sep 17 00:00:00 2001 From: Bryce Anderson Date: Tue, 14 Nov 2023 15:59:35 -0700 Subject: [PATCH 5/7] Does it happen with only a single thread? --- .../src/test/java/io/servicetalk/http/netty/IoUringTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java index 2cacea2828..37324bed4e 100644 --- a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java +++ b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java @@ -78,7 +78,7 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { System.getProperty("os.name") + ' ' + System.getProperty("os.version")); IOUring.ensureAvailability(); - ioUringExecutor = NettyIoExecutors.createIoExecutor(2, "io-uring"); + ioUringExecutor = NettyIoExecutors.createIoExecutor(1, "io-uring"); assertThat(ioUringExecutor.eventLoopGroup(), is(instanceOf(IOUringEventLoopGroup.class))); try (ServerContext serverContext = HttpServers.forAddress(localAddress(0)) From 506554e01bf19158d1887a8ddd05497599045697 Mon Sep 17 00:00:00 2001 From: Bryce Anderson Date: Tue, 14 Nov 2023 16:13:37 -0700 Subject: [PATCH 6/7] Try gracefully and increase timeout --- .../src/test/java/io/servicetalk/http/netty/IoUringTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java index 37324bed4e..e762daee37 100644 --- a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java +++ b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java @@ -98,7 +98,7 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { IoUringUtils.tryIoUring(false); if (ioUringExecutor != null) { try { - ioUringExecutor.closeAsync().toFuture().get(5, TimeUnit.SECONDS); // the offending line. + ioUringExecutor.closeAsyncGracefully().toFuture().get(10, TimeUnit.SECONDS); // the offending line. } catch (TimeoutException ex) { final boolean isShutdown = ioUringExecutor.eventLoopGroup().isShutdown(); StringBuilder sb = new StringBuilder(); From 8f535098578357cd7bb20dc314b721e4210c4f0b Mon Sep 17 00:00:00 2001 From: Bryce Anderson Date: Tue, 14 Nov 2023 17:06:00 -0700 Subject: [PATCH 7/7] Back to closeAsync because closeAsyncGracefully never terminates --- .../src/test/java/io/servicetalk/http/netty/IoUringTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java index e762daee37..e6261a9c16 100644 --- a/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java +++ b/servicetalk-http-netty/src/test/java/io/servicetalk/http/netty/IoUringTest.java @@ -98,7 +98,8 @@ void ioUringIsAvailableOnLinux(boolean noOffloading) throws Exception { IoUringUtils.tryIoUring(false); if (ioUringExecutor != null) { try { - ioUringExecutor.closeAsyncGracefully().toFuture().get(10, TimeUnit.SECONDS); // the offending line. + // It seems that if we attempt to close gracefully we never actually close down. + ioUringExecutor.closeAsync().toFuture().get(10, TimeUnit.SECONDS); // the offending line. } catch (TimeoutException ex) { final boolean isShutdown = ioUringExecutor.eventLoopGroup().isShutdown(); StringBuilder sb = new StringBuilder();