Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Impeller] Encode render passes concurrently on iOS. #42028

Merged
merged 23 commits into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions impeller/entity/contents/content_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -375,11 +375,7 @@ std::shared_ptr<Texture> ContentContext::MakeSubpass(
return nullptr;
}

if (!sub_renderpass->EncodeCommands()) {
return nullptr;
}

if (!sub_command_buffer->SubmitCommands()) {
if (!sub_command_buffer->SubmitCommandsAsync(std::move(sub_renderpass))) {
return nullptr;
}

Expand Down
12 changes: 3 additions & 9 deletions impeller/entity/inline_pass_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,9 @@ bool InlinePassContext::EndPass() {
return true;
}

if (!pass_->EncodeCommands()) {
VALIDATION_LOG
<< "Failed to encode commands while ending the current render pass.";
return false;
}

if (!command_buffer_->SubmitCommands()) {
VALIDATION_LOG
<< "Failed to submit command buffer while ending render pass.";
if (!command_buffer_->SubmitCommandsAsync(std::move(pass_))) {
VALIDATION_LOG << "Failed to encode and submit command buffer while ending "
"render pass.";
return false;
}

Expand Down
4 changes: 4 additions & 0 deletions impeller/playground/backend/metal/playground_impl_mtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

#include <memory>

#include "flutter/fml/concurrent_message_loop.h"
#include "flutter/fml/macros.h"
#include "flutter/fml/synchronization/sync_switch.h"
#include "impeller/playground/playground_impl.h"

namespace impeller {
Expand All @@ -27,6 +29,8 @@ class PlaygroundImplMTL final : public PlaygroundImpl {
// To ensure that ObjC stuff doesn't leak into C++ TUs.
std::unique_ptr<Data> data_;
std::shared_ptr<Context> context_;
std::shared_ptr<fml::ConcurrentMessageLoop> concurrent_loop_;
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch_;

// |PlaygroundImpl|
std::shared_ptr<Context> GetContext() const override;
Expand Down
10 changes: 7 additions & 3 deletions impeller/playground/backend/metal/playground_impl_mtl.mm
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,20 @@
PlaygroundImplMTL::PlaygroundImplMTL(PlaygroundSwitches switches)
: PlaygroundImpl(switches),
handle_(nullptr, &DestroyWindowHandle),
data_(std::make_unique<Data>()) {
data_(std::make_unique<Data>()),
concurrent_loop_(fml::ConcurrentMessageLoop::Create()),
is_gpu_disabled_sync_switch_(new fml::SyncSwitch(false)) {
::glfwDefaultWindowHints();
::glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API);
::glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE);
auto window = ::glfwCreateWindow(1, 1, "Test", nullptr, nullptr);
if (!window) {
return;
}
auto context = ContextMTL::Create(ShaderLibraryMappingsForPlayground(),
"Playground Library");
auto worker_task_runner = concurrent_loop_->GetTaskRunner();
auto context = ContextMTL::Create(
ShaderLibraryMappingsForPlayground(), worker_task_runner,
is_gpu_disabled_sync_switch_, "Playground Library");
if (!context) {
return;
}
Expand Down
3 changes: 3 additions & 0 deletions impeller/renderer/backend/metal/command_buffer_mtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ class CommandBufferMTL final : public CommandBuffer {
// |CommandBuffer|
void OnWaitUntilScheduled() override;

// |CommandBuffer|
bool SubmitCommandsAsync(std::shared_ptr<RenderPass> render_pass) override;

// |CommandBuffer|
std::shared_ptr<RenderPass> OnCreateRenderPass(RenderTarget target) override;

Expand Down
61 changes: 61 additions & 0 deletions impeller/renderer/backend/metal/command_buffer_mtl.mm
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@

#include "impeller/renderer/backend/metal/command_buffer_mtl.h"

#include "flutter/fml/make_copyable.h"
#include "flutter/fml/synchronization/semaphore.h"
#include "flutter/fml/trace_event.h"

#include "impeller/renderer/backend/metal/blit_pass_mtl.h"
#include "impeller/renderer/backend/metal/compute_pass_mtl.h"
#include "impeller/renderer/backend/metal/context_mtl.h"
#include "impeller/renderer/backend/metal/render_pass_mtl.h"

namespace impeller {
Expand Down Expand Up @@ -171,6 +176,62 @@ static bool LogMTLCommandBufferErrorIfPresent(id<MTLCommandBuffer> buffer) {
return true;
}

bool CommandBufferMTL::SubmitCommandsAsync(
std::shared_ptr<RenderPass> render_pass) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently this only works for the render pass but could optionally take the blit and compute passes too. I moved this functionality to the command buffer to make ownership of the render pass data more obvious - the closure keeps the shared_ptr for the render pass alive while the command buffer itself only requires the buffer.

TRACE_EVENT0("impeller", "CommandBufferMTL::SubmitCommandsAsync");
if (!IsValid() || !render_pass->IsValid()) {
return false;
}
auto context = context_.lock();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is more or less what ended up being the fix for problems around ownership in Vulkan backend.

if (!context) {
return false;
}
[buffer_ enqueue];
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enquing on the main therad ensures the order

auto buffer = buffer_;
buffer_ = nil;

auto worker_task_runner = ContextMTL::Cast(*context).GetWorkerTaskRunner();
auto mtl_render_pass = static_cast<RenderPassMTL*>(render_pass.get());

// Render command encoder creation has been observed to exceed the stack size
// limit for worker threads, and therefore is intentionally constructed on the
// raster thread.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not bump the stack size limit for worker threads?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can I try that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's platform specific code, we'd need a pthreads and a win32 implementation at the least.

auto render_command_encoder =
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a frustrating discovery because it prevents us from doing the obvious thing and just calling RenderPass::Encode in the closure.

[buffer renderCommandEncoderWithDescriptor:mtl_render_pass->desc_];
if (!render_command_encoder) {
return false;
}

auto task = fml::MakeCopyable([render_pass, buffer, render_command_encoder,
weak_context = context_]() {
auto context = weak_context.lock();
if (!context) {
return;
}
auto is_gpu_disabled_sync_switch =
ContextMTL::Cast(*context).GetIsGpuDisabledSyncSwitch();
is_gpu_disabled_sync_switch->Execute(fml::SyncSwitch::Handlers().SetIfFalse(
[&render_pass, &render_command_encoder, &buffer, &context] {
auto mtl_render_pass = static_cast<RenderPassMTL*>(render_pass.get());
if (!mtl_render_pass->label_.empty()) {
[render_command_encoder
setLabel:@(mtl_render_pass->label_.c_str())];
}

auto result = mtl_render_pass->EncodeCommands(
context->GetResourceAllocator(), render_command_encoder);
[render_command_encoder endEncoding];
if (result) {
[buffer commit];
} else {
VALIDATION_LOG << "Failed to encode command buffer";
}
}));
});
worker_task_runner->PostTask(task);
return true;
}

void CommandBufferMTL::OnWaitUntilScheduled() {}

std::shared_ptr<RenderPass> CommandBufferMTL::OnCreateRenderPass(
Expand Down
20 changes: 18 additions & 2 deletions impeller/renderer/backend/metal/context_mtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
#include <string>
#include <vector>

#include "flutter/fml/concurrent_message_loop.h"
#include "flutter/fml/macros.h"
#include "flutter/fml/synchronization/sync_switch.h"
#include "impeller/base/backend_cast.h"
#include "impeller/core/sampler.h"
#include "impeller/renderer/backend/metal/allocator_mtl.h"
Expand All @@ -26,10 +28,14 @@ class ContextMTL final : public Context,
public std::enable_shared_from_this<ContextMTL> {
public:
static std::shared_ptr<ContextMTL> Create(
const std::vector<std::string>& shader_library_paths);
const std::vector<std::string>& shader_library_paths,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch);

static std::shared_ptr<ContextMTL> Create(
const std::vector<std::shared_ptr<fml::Mapping>>& shader_libraries_data,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch,
const std::string& label);

// |Context|
Expand Down Expand Up @@ -66,6 +72,10 @@ class ContextMTL final : public Context,

id<MTLCommandBuffer> CreateMTLCommandBuffer() const;

const std::shared_ptr<fml::ConcurrentTaskRunner>& GetWorkerTaskRunner() const;

std::shared_ptr<const fml::SyncSwitch> GetIsGpuDisabledSyncSwitch() const;

private:
id<MTLDevice> device_ = nullptr;
id<MTLCommandQueue> command_queue_ = nullptr;
Expand All @@ -74,9 +84,15 @@ class ContextMTL final : public Context,
std::shared_ptr<SamplerLibrary> sampler_library_;
std::shared_ptr<AllocatorMTL> resource_allocator_;
std::shared_ptr<const Capabilities> device_capabilities_;
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner_;
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch_;
bool is_valid_ = false;

ContextMTL(id<MTLDevice> device, NSArray<id<MTLLibrary>>* shader_libraries);
ContextMTL(
id<MTLDevice> device,
NSArray<id<MTLLibrary>>* shader_libraries,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch);

std::shared_ptr<CommandBuffer> CreateCommandBufferInQueue(
id<MTLCommandQueue> queue) const;
Expand Down
33 changes: 27 additions & 6 deletions impeller/renderer/backend/metal/context_mtl.mm
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,14 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {
.Build();
}

ContextMTL::ContextMTL(id<MTLDevice> device,
NSArray<id<MTLLibrary>>* shader_libraries)
: device_(device) {
ContextMTL::ContextMTL(
id<MTLDevice> device,
NSArray<id<MTLLibrary>>* shader_libraries,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch)
: device_(device),
worker_task_runner_(std::move(worker_task_runner)),
is_gpu_disabled_sync_switch_(std::move(is_gpu_disabled_sync_switch)) {
// Validate device.
if (!device_) {
VALIDATION_LOG << "Could not setup valid Metal device.";
Expand Down Expand Up @@ -200,10 +205,13 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {
}

std::shared_ptr<ContextMTL> ContextMTL::Create(
const std::vector<std::string>& shader_library_paths) {
const std::vector<std::string>& shader_library_paths,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch) {
auto device = CreateMetalDevice();
auto context = std::shared_ptr<ContextMTL>(new ContextMTL(
device, MTLShaderLibraryFromFilePaths(device, shader_library_paths)));
device, MTLShaderLibraryFromFilePaths(device, shader_library_paths),
std::move(worker_task_runner), std::move(is_gpu_disabled_sync_switch)));
if (!context->IsValid()) {
FML_LOG(ERROR) << "Could not create Metal context.";
return nullptr;
Expand All @@ -213,11 +221,14 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {

std::shared_ptr<ContextMTL> ContextMTL::Create(
const std::vector<std::shared_ptr<fml::Mapping>>& shader_libraries_data,
std::shared_ptr<fml::ConcurrentTaskRunner> worker_task_runner,
std::shared_ptr<const fml::SyncSwitch> is_gpu_disabled_sync_switch,
const std::string& label) {
auto device = CreateMetalDevice();
auto context = std::shared_ptr<ContextMTL>(new ContextMTL(
device,
MTLShaderLibraryFromFileData(device, shader_libraries_data, label)));
MTLShaderLibraryFromFileData(device, shader_libraries_data, label),
worker_task_runner, std::move(is_gpu_disabled_sync_switch)));
if (!context->IsValid()) {
FML_LOG(ERROR) << "Could not create Metal context.";
return nullptr;
Expand Down Expand Up @@ -257,6 +268,16 @@ static bool DeviceSupportsComputeSubgroups(id<MTLDevice> device) {
return CreateCommandBufferInQueue(command_queue_);
}

const std::shared_ptr<fml::ConcurrentTaskRunner>&
ContextMTL::GetWorkerTaskRunner() const {
return worker_task_runner_;
}

std::shared_ptr<const fml::SyncSwitch> ContextMTL::GetIsGpuDisabledSyncSwitch()
const {
return is_gpu_disabled_sync_switch_;
}

std::shared_ptr<CommandBuffer> ContextMTL::CreateCommandBufferInQueue(
id<MTLCommandQueue> queue) const {
if (!IsValid()) {
Expand Down
2 changes: 2 additions & 0 deletions impeller/renderer/backend/metal/render_pass_mtl.mm
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

#include "flutter/fml/closure.h"
#include "flutter/fml/logging.h"
#include "flutter/fml/make_copyable.h"
#include "flutter/fml/trace_event.h"
#include "impeller/base/backend_cast.h"
#include "impeller/core/formats.h"
#include "impeller/core/host_buffer.h"
#include "impeller/core/shader_types.h"
#include "impeller/renderer/backend/metal/context_mtl.h"
#include "impeller/renderer/backend/metal/device_buffer_mtl.h"
#include "impeller/renderer/backend/metal/formats_mtl.h"
#include "impeller/renderer/backend/metal/pipeline_mtl.h"
Expand Down
15 changes: 15 additions & 0 deletions impeller/renderer/command_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,21 @@ void CommandBuffer::WaitUntilScheduled() {
return OnWaitUntilScheduled();
}

bool CommandBuffer::SubmitCommandsAsync(
std::shared_ptr<RenderPass>
render_pass // NOLINT(performance-unnecessary-value-param)
) {
TRACE_EVENT0("impeller", "CommandBuffer::SubmitCommandsAsync");
if (!render_pass->IsValid() || !IsValid()) {
return false;
}
if (!render_pass->EncodeCommands()) {
return false;
}

return SubmitCommands(nullptr);
}

std::shared_ptr<RenderPass> CommandBuffer::CreateRenderPass(
const RenderTarget& render_target) {
auto pass = OnCreateRenderPass(render_target);
Expand Down
14 changes: 13 additions & 1 deletion impeller/renderer/command_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ class CommandBuffer {

//----------------------------------------------------------------------------
/// @brief Schedule the command encoded by render passes within this
/// command buffer on the GPU.
/// command buffer on the GPU. The encoding of these commnands is
/// performed immediately on the calling thread.
///
/// A command buffer may only be committed once.
///
Expand All @@ -65,6 +66,17 @@ class CommandBuffer {

[[nodiscard]] bool SubmitCommands();

//----------------------------------------------------------------------------
/// @brief Schedule the command encoded by render passes within this
/// command buffer on the GPU. The enqueing of this buffer is
/// performed immediately but encoding is pushed to a worker
/// thread if possible.
///
/// A command buffer may only be committed once.
///
[[nodiscard]] virtual bool SubmitCommandsAsync(
std::shared_ptr<RenderPass> render_pass);

//----------------------------------------------------------------------------
/// @brief Force execution of pending GPU commands.
///
Expand Down
1 change: 1 addition & 0 deletions impeller/renderer/render_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <string>

#include "impeller/renderer/command.h"
#include "impeller/renderer/command_buffer.h"
#include "impeller/renderer/render_target.h"

namespace impeller {
Expand Down
4 changes: 3 additions & 1 deletion shell/common/animator_unittests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ TEST_F(ShellTest, VSyncTargetTime) {
[vsync_clock, &create_vsync_waiter](Shell& shell) {
return ShellTestPlatformView::Create(
shell, shell.GetTaskRunners(), vsync_clock, create_vsync_waiter,
ShellTestPlatformView::BackendType::kDefaultBackend, nullptr);
ShellTestPlatformView::BackendType::kDefaultBackend, nullptr,
shell.GetConcurrentWorkerTaskRunner(),
shell.GetIsGpuDisabledSyncSwitch());
},
[](Shell& shell) { return std::make_unique<Rasterizer>(shell); });
ASSERT_TRUE(DartVMRef::IsInstanceRunning());
Expand Down
15 changes: 9 additions & 6 deletions shell/common/shell_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -352,12 +352,15 @@ std::unique_ptr<Shell> ShellTest::CreateShell(
shell_test_external_view_embedder, //
rendering_backend //
](Shell& shell) {
return ShellTestPlatformView::Create(shell, //
shell.GetTaskRunners(), //
vsync_clock, //
create_vsync_waiter, //
rendering_backend, //
shell_test_external_view_embedder //
return ShellTestPlatformView::Create(
shell, //
shell.GetTaskRunners(), //
vsync_clock, //
create_vsync_waiter, //
rendering_backend, //
shell_test_external_view_embedder, //
shell.GetConcurrentWorkerTaskRunner(), //
shell.GetIsGpuDisabledSyncSwitch() //
);
};
}
Expand Down
Loading