[Impeller] batch submit vulkan command buffers on newer hardware. (flutter/engine#55956)

Coming back to this from the previous attempt: https://github.com/flutter/engine/pull/50139 

* Even if its slower sometimes, its also way faster on more complex scenes
* The adreno bugs seem limited to older devices.

Conservatively, only enable batching for Mali and newer Adreno

On non vulkan backend we immediately submit.
This commit is contained in:
Jonah Williams
2024-10-19 08:53:38 -07:00
committed by GitHub
parent 15129c0bbc
commit d56c8a7bb0
18 changed files with 285 additions and 129 deletions

View File

@@ -13,6 +13,7 @@
#include "display_list/effects/dl_image_filter.h"
#include "flutter/fml/logging.h"
#include "flutter/fml/trace_event.h"
#include "impeller/base/validation.h"
#include "impeller/display_list/color_filter.h"
#include "impeller/display_list/image_filter.h"
#include "impeller/display_list/skia_conversions.h"
@@ -1672,10 +1673,8 @@ bool Canvas::BlitToOnscreen() {
VALIDATION_LOG << "Failed to encode root pass blit command.";
return false;
}
if (!renderer_.GetContext()
->GetCommandQueue()
->Submit({command_buffer})
.ok()) {
if (!renderer_.GetContext()->EnqueueCommandBuffer(
std::move(command_buffer))) {
return false;
}
} else {
@@ -1703,10 +1702,8 @@ bool Canvas::BlitToOnscreen() {
VALIDATION_LOG << "Failed to encode root pass command buffer.";
return false;
}
if (!renderer_.GetContext()
->GetCommandQueue()
->Submit({command_buffer})
.ok()) {
if (!renderer_.GetContext()->EnqueueCommandBuffer(
std::move(command_buffer))) {
return false;
}
}
@@ -1726,6 +1723,10 @@ void Canvas::EndReplay() {
BlitToOnscreen();
}
if (!renderer_.GetContext()->FlushCommandBuffers()) {
// Not much we can do.
VALIDATION_LOG << "Failed to submit command buffers";
}
render_passes_.clear();
renderer_.GetRenderTargetCache()->End();
clip_geometry_.clear();

View File

@@ -110,10 +110,7 @@ std::optional<Snapshot> Contents::RenderToSnapshot(
if (!render_target.ok()) {
return std::nullopt;
}
if (!renderer.GetContext()
->GetCommandQueue()
->Submit(/*buffers=*/{std::move(command_buffer)})
.ok()) {
if (!renderer.GetContext()->EnqueueCommandBuffer(std::move(command_buffer))) {
return std::nullopt;
}

View File

@@ -243,10 +243,7 @@ static std::optional<Entity> AdvancedBlend(
if (!render_target.ok()) {
return std::nullopt;
}
if (!renderer.GetContext()
->GetCommandQueue()
->Submit(/*buffers=*/{std::move(command_buffer)})
.ok()) {
if (!renderer.GetContext()->EnqueueCommandBuffer(std::move(command_buffer))) {
return std::nullopt;
}
@@ -655,11 +652,7 @@ static std::optional<Entity> PipelineBlend(
if (!render_target.ok()) {
return std::nullopt;
}
if (!renderer.GetContext()
->GetCommandQueue()
->Submit(/*buffers=*/{std::move(command_buffer)})
.ok()) {
if (!renderer.GetContext()->EnqueueCommandBuffer(std::move(command_buffer))) {
return std::nullopt;
}
@@ -895,11 +888,7 @@ std::optional<Entity> BlendFilterContents::CreateFramebufferAdvancedBlend(
if (!render_target.ok()) {
return std::nullopt;
}
if (!renderer.GetContext()
->GetCommandQueue()
->Submit(/*buffers=*/{std::move(cmd_buffer)})
.ok()) {
if (!renderer.GetContext()->EnqueueCommandBuffer(std::move(cmd_buffer))) {
return std::nullopt;
}

View File

@@ -817,11 +817,12 @@ std::optional<Entity> GaussianBlurFilterContents::RenderFilter(
return std::nullopt;
}
if (!renderer.GetContext()
->GetCommandQueue()
->Submit(/*buffers=*/{command_buffer_1, command_buffer_2,
command_buffer_3})
.ok()) {
if (!(renderer.GetContext()->EnqueueCommandBuffer(
std::move(command_buffer_1)) &&
renderer.GetContext()->EnqueueCommandBuffer(
std::move(command_buffer_2)) &&
renderer.GetContext()->EnqueueCommandBuffer(
std::move(command_buffer_3)))) {
return std::nullopt;
}

View File

@@ -144,10 +144,8 @@ std::optional<Entity> DirectionalMorphologyFilterContents::RenderFilter(
if (!render_target.ok()) {
return std::nullopt;
}
if (!renderer.GetContext()
->GetCommandQueue()
->Submit(/*buffers=*/{std::move(command_buffer)})
.ok()) {
if (!renderer.GetContext()->EnqueueCommandBuffer(std::move(command_buffer))) {
return std::nullopt;
}

View File

@@ -61,17 +61,10 @@ bool InlinePassContext::EndPass() {
return false;
}
}
if (!renderer_.GetContext()
->GetCommandQueue()
->Submit({std::move(command_buffer_)})
.ok()) {
return false;
}
pass_ = nullptr;
command_buffer_ = nullptr;
return true;
return renderer_.GetContext()->EnqueueCommandBuffer(
std::move(command_buffer_));
}
EntityPassTarget& InlinePassContext::GetPassTarget() const {

View File

@@ -445,6 +445,7 @@ void ContextVK::Setup(Settings settings) {
device_name_ = std::string(physical_device_properties.deviceName);
command_queue_vk_ = std::make_shared<CommandQueueVK>(weak_from_this());
should_disable_surface_control_ = settings.disable_surface_control;
should_batch_cmd_buffers_ = driver_info_->CanBatchSubmitCommandBuffers();
is_valid_ = true;
// Create the GPU Tracer later because it depends on state from
@@ -590,6 +591,26 @@ std::shared_ptr<CommandQueue> ContextVK::GetCommandQueue() const {
return command_queue_vk_;
}
bool ContextVK::EnqueueCommandBuffer(
std::shared_ptr<CommandBuffer> command_buffer) {
if (should_batch_cmd_buffers_) {
pending_command_buffers_.push_back(std::move(command_buffer));
return true;
} else {
return GetCommandQueue()->Submit({command_buffer}).ok();
}
}
bool ContextVK::FlushCommandBuffers() {
if (should_batch_cmd_buffers_) {
bool result = GetCommandQueue()->Submit(pending_command_buffers_).ok();
pending_command_buffers_.clear();
return result;
} else {
return true;
}
}
// Creating a render pass is observed to take an additional 6ms on a Pixel 7
// device as the driver will lazily bootstrap and compile shaders to do so.
// The render pass does not need to be begun or executed.

View File

@@ -22,6 +22,7 @@
#include "impeller/renderer/backend/vulkan/sampler_library_vk.h"
#include "impeller/renderer/backend/vulkan/shader_library_vk.h"
#include "impeller/renderer/capabilities.h"
#include "impeller/renderer/command_buffer.h"
#include "impeller/renderer/command_queue.h"
#include "impeller/renderer/context.h"
@@ -190,6 +191,13 @@ class ContextVK final : public Context,
/// disabled, even if the device is capable of supporting it.
bool GetShouldDisableSurfaceControlSwapchain() const;
// | Context |
bool EnqueueCommandBuffer(
std::shared_ptr<CommandBuffer> command_buffer) override;
// | Context |
bool FlushCommandBuffers() override;
private:
struct DeviceHolderImpl : public DeviceHolderVK {
// |DeviceHolder|
@@ -223,6 +231,8 @@ class ContextVK final : public Context,
std::shared_ptr<DescriptorPoolRecyclerVK> descriptor_pool_recycler_;
std::shared_ptr<CommandQueue> command_queue_vk_;
bool should_disable_surface_control_ = false;
bool should_batch_cmd_buffers_ = false;
std::vector<std::shared_ptr<CommandBuffer>> pending_command_buffers_;
const uint64_t hash_;

View File

@@ -260,5 +260,56 @@ TEST(ContextVKTest, HasDefaultColorFormat) {
ASSERT_NE(capabilites_vk->GetDefaultColorFormat(), PixelFormat::kUnknown);
}
TEST(ContextVKTest, BatchSubmitCommandBuffersOnArm) {
std::shared_ptr<ContextVK> context =
MockVulkanContextBuilder()
.SetPhysicalPropertiesCallback(
[](VkPhysicalDevice device, VkPhysicalDeviceProperties* prop) {
prop->vendorID = 0x13B5; // ARM
prop->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
})
.Build();
EXPECT_TRUE(context->EnqueueCommandBuffer(context->CreateCommandBuffer()));
EXPECT_TRUE(context->EnqueueCommandBuffer(context->CreateCommandBuffer()));
// If command buffers are batch submitted, we should have created them but not
// created the fence to track them after enqueing.
auto functions = GetMockVulkanFunctions(context->GetDevice());
EXPECT_TRUE(std::find(functions->begin(), functions->end(),
"vkAllocateCommandBuffers") != functions->end());
EXPECT_TRUE(std::find(functions->begin(), functions->end(),
"vkCreateFence") == functions->end());
context->FlushCommandBuffers();
// After flushing, the fence should be created.
functions = GetMockVulkanFunctions(context->GetDevice());
EXPECT_TRUE(std::find(functions->begin(), functions->end(),
"vkCreateFence") != functions->end());
}
TEST(ContextVKTest, BatchSubmitCommandBuffersOnNonArm) {
std::shared_ptr<ContextVK> context =
MockVulkanContextBuilder()
.SetPhysicalPropertiesCallback(
[](VkPhysicalDevice device, VkPhysicalDeviceProperties* prop) {
prop->vendorID = 0x8686; // Made up ID
prop->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
})
.Build();
EXPECT_TRUE(context->EnqueueCommandBuffer(context->CreateCommandBuffer()));
EXPECT_TRUE(context->EnqueueCommandBuffer(context->CreateCommandBuffer()));
// If command buffers are batch not submitted, we should have created them and
// a corresponding fence immediately.
auto functions = GetMockVulkanFunctions(context->GetDevice());
EXPECT_TRUE(std::find(functions->begin(), functions->end(),
"vkAllocateCommandBuffers") != functions->end());
EXPECT_TRUE(std::find(functions->begin(), functions->end(),
"vkCreateFence") != functions->end());
}
} // namespace testing
} // namespace impeller

View File

@@ -317,6 +317,12 @@ void DriverInfoVK::DumpToLog() const {
FML_LOG(IMPORTANT) << stream.str();
}
bool DriverInfoVK::CanBatchSubmitCommandBuffers() const {
return vendor_ == VendorVK::kARM ||
(adreno_gpu_.has_value() &&
adreno_gpu_.value() >= AdrenoGPU::kAdreno702);
}
bool DriverInfoVK::IsEmulator() const {
#if FML_OS_ANDROID
// Google SwiftShader on Android.

View File

@@ -12,100 +12,100 @@ namespace impeller {
// https://en.wikipedia.org/wiki/Adreno
enum class AdrenoGPU {
// Unknown GPU, likely newer model.
kUnknown,
// X
kAdrenoX185,
kAdrenoX145,
// 700s
kAdreno750,
kAdreno740,
kAdreno735,
kAdreno732,
kAdreno730,
kAdreno725,
kAdreno720,
kAdreno710,
kAdreno702,
// I don't think the 400 series will ever run Vulkan, but if some show up we
// can add them here.
// 500s
kAdreno504,
kAdreno505,
kAdreno506,
kAdreno508,
kAdreno509,
kAdreno510,
kAdreno512,
kAdreno530,
kAdreno540,
// 600s
kAdreno695,
kAdreno690,
kAdreno685,
kAdreno680,
kAdreno675,
kAdreno663,
kAdreno660,
kAdreno650,
kAdreno644,
kAdreno643L,
kAdreno642,
kAdreno642L,
kAdreno605,
kAdreno608,
kAdreno610,
kAdreno612,
kAdreno613,
kAdreno615,
kAdreno616,
kAdreno618,
kAdreno619L,
kAdreno619,
kAdreno620,
kAdreno630,
// The 640 is the first GPU inside an Android device with upgradable drivers.
// Anything before this point exhibiting broken behavior is broken forever.
kAdreno640,
kAdreno630,
kAdreno620,
kAdreno619,
kAdreno619L,
kAdreno618,
kAdreno616,
kAdreno615,
kAdreno613,
kAdreno612,
kAdreno610,
kAdreno608,
kAdreno605,
// 500s
kAdreno540,
kAdreno530,
kAdreno512,
kAdreno510,
kAdreno509,
kAdreno508,
kAdreno506,
kAdreno505,
kAdreno504,
// I don't think the 400 series will ever run Vulkan, but if some show up we
// can add them here.
kAdreno642L,
kAdreno642,
kAdreno643L,
kAdreno644,
kAdreno650,
kAdreno660,
kAdreno663,
kAdreno675,
kAdreno680,
kAdreno685,
kAdreno690,
kAdreno695,
// 700s
kAdreno702,
kAdreno710,
kAdreno720,
kAdreno725,
kAdreno730,
kAdreno732,
kAdreno735,
kAdreno740,
kAdreno750,
// X
kAdrenoX145,
kAdrenoX185,
// Unknown GPU, likely newer model.
kUnknown,
};
// https://en.wikipedia.org/wiki/Mali_(processor)
enum class MaliGPU {
kUnknown,
// 5th Gen
kG925,
kG725,
kG625,
kG720,
kG620,
// These might be Vulkan 1.0 Only.
kT760,
kT820,
kT830,
kT860,
kT880,
// Bifrost
kG31,
kG51,
kG71,
kG52,
kG72,
kG76,
// Valhall
// Note: there is an Immortalis-G715 a Mali-G715
kG715,
kG615,
kG710,
kG610,
kG510,
kG310,
kG78,
kG68,
kG77,
kG57,
kG77,
kG68,
kG78,
kG310,
kG510,
kG610,
kG710,
kG615,
kG715,
// Bifrost
kG76,
kG72,
kG52,
kG71,
kG51,
kG31,
// These might be Vulkan 1.0 Only.
kT880,
kT860,
kT830,
kT820,
kT760,
// 5th Gen
kG620,
kG720,
kG625,
kG725,
kG925,
kUnknown,
};
enum class VendorVK {
@@ -234,10 +234,23 @@ class DriverInfoVK {
/// If true, context setup should fail such that the device falls
/// back to OpenGLES.
///
/// @return True if non-functional device, False otherwiise.
/// @return True if non-functional device, False otherwise.
///
bool IsKnownBadDriver() const;
//----------------------------------------------------------------------------
/// @brief Determines if the driver can batch submit command buffers
/// without triggering erronious deadlock errors.
///
/// Early 600 series Adreno drivers would deadlock if a command
/// buffer submission had too much work attached to it, this
/// requires the renderer to split up command buffers that could
/// be logically combined.
///
/// @return True if device can batch submit command buffers.
///
bool CanBatchSubmitCommandBuffers() const;
private:
bool is_valid_ = false;
Version api_version_;

View File

@@ -54,13 +54,39 @@ bool IsBadVersionTest(std::string_view driver_name, bool qc = true) {
return context->GetDriverInfo()->IsKnownBadDriver();
}
bool CanBatchSubmitTest(std::string_view driver_name, bool qc = true) {
auto const context =
MockVulkanContextBuilder()
.SetPhysicalPropertiesCallback(
[&driver_name, qc](VkPhysicalDevice device,
VkPhysicalDeviceProperties* prop) {
if (qc) {
prop->vendorID = 0x168C; // Qualcomm
} else {
prop->vendorID = 0x13B5; // ARM
}
driver_name.copy(prop->deviceName, driver_name.size());
prop->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
})
.Build();
return context->GetDriverInfo()->CanBatchSubmitCommandBuffers();
}
TEST(DriverInfoVKTest, CanBatchSubmitCommandBuffers) {
// Old Adreno no batch submit!
EXPECT_FALSE(CanBatchSubmitTest("Adreno (TM) 540", true));
EXPECT_TRUE(CanBatchSubmitTest("Mali-G51", false));
EXPECT_TRUE(CanBatchSubmitTest("Adreno (TM) 750", true));
}
TEST(DriverInfoVKTest, DriverParsingMali) {
EXPECT_EQ(GetMaliVersion("Mali-G51-MORE STUFF"), MaliGPU::kG51);
EXPECT_EQ(GetMaliVersion("Mali-G51"), MaliGPU::kG51);
EXPECT_EQ(GetMaliVersion("Mali-111111"), MaliGPU::kUnknown);
}
TEST(DriverInfoVKTest, DriverParsingArm) {
TEST(DriverInfoVKTest, DriverParsingAdreno) {
EXPECT_EQ(GetAdrenoVersion("Adreno (TM) 540"), AdrenoGPU::kAdreno540);
EXPECT_EQ(GetAdrenoVersion("Foo Bar"), AdrenoGPU::kUnknown);
}

View File

@@ -111,4 +111,13 @@ const std::shared_ptr<ContextVK>& SurfaceContextVK::GetParent() const {
return parent_;
}
bool SurfaceContextVK::EnqueueCommandBuffer(
std::shared_ptr<CommandBuffer> command_buffer) {
return parent_->EnqueueCommandBuffer(std::move(command_buffer));
}
bool SurfaceContextVK::FlushCommandBuffers() {
return parent_->FlushCommandBuffers();
}
} // namespace impeller

View File

@@ -90,6 +90,11 @@ class SurfaceContextVK : public Context,
const std::shared_ptr<ContextVK>& GetParent() const;
bool EnqueueCommandBuffer(
std::shared_ptr<CommandBuffer> command_buffer) override;
bool FlushCommandBuffers() override;
private:
std::shared_ptr<ContextVK> parent_;
std::shared_ptr<SwapchainVK> swapchain_;

View File

@@ -494,6 +494,7 @@ VkResult vkCreateFence(VkDevice device,
const VkAllocationCallbacks* pAllocator,
VkFence* pFence) {
MockDevice* mock_device = reinterpret_cast<MockDevice*>(device);
mock_device->AddCalledFunction("vkCreateFence");
*pFence = reinterpret_cast<VkFence>(new MockFence());
return VK_SUCCESS;
}

View File

@@ -4,6 +4,8 @@
#include "impeller/renderer/context.h"
#include <utility>
namespace impeller {
Context::~Context() = default;
@@ -14,4 +16,13 @@ bool Context::UpdateOffscreenLayerPixelFormat(PixelFormat format) {
return false;
}
bool Context::EnqueueCommandBuffer(
std::shared_ptr<CommandBuffer> command_buffer) {
return GetCommandQueue()->Submit({std::move(command_buffer)}).ok();
}
bool Context::FlushCommandBuffers() {
return true;
}
} // namespace impeller

View File

@@ -203,6 +203,25 @@ class Context {
/// operation completes in order to clear the cache.
virtual void DisposeThreadLocalCachedResources() {}
/// @brief Enqueue command_buffer for submission by the end of the frame.
///
/// Certain backends may immediately flush the command buffer if batch
/// submission is not supported. This functionality is not thread safe
/// and should only be used via the ContentContext for rendering a
/// 2D workload.
///
/// Returns true if submission has succeeded. If the buffer is enqueued
/// then no error may be returned until FlushCommandBuffers is called.
[[nodiscard]] virtual bool EnqueueCommandBuffer(
std::shared_ptr<CommandBuffer> command_buffer);
/// @brief Flush all pending command buffers.
///
/// Returns whether or not submission was successful. This functionality
/// is not threadsafe and should only be used via the ContentContext for
/// rendering a 2D workload.
[[nodiscard]] virtual bool FlushCommandBuffers();
protected:
Context();

View File

@@ -14,6 +14,7 @@
#include "flutter/fml/trace_event.h"
#include "fml/closure.h"
#include "impeller/base/validation.h"
#include "impeller/core/allocator.h"
#include "impeller/core/buffer_view.h"
#include "impeller/core/formats.h"
@@ -502,7 +503,9 @@ std::shared_ptr<GlyphAtlas> TypographerContextSkia::CreateGlyphAtlas(
fml::ScopedCleanupClosure closure([&]() {
blit_pass->EncodeCommands(context.GetResourceAllocator());
context.GetCommandQueue()->Submit({std::move(cmd_buffer)});
if (!context.EnqueueCommandBuffer(std::move(cmd_buffer))) {
VALIDATION_LOG << "Failed to submit glyph atlas command buffer";
}
});
// ---------------------------------------------------------------------------
@@ -590,7 +593,9 @@ std::shared_ptr<GlyphAtlas> TypographerContextSkia::CreateGlyphAtlas(
fml::ScopedCleanupClosure closure([&]() {
blit_pass->EncodeCommands(context.GetResourceAllocator());
context.GetCommandQueue()->Submit({std::move(cmd_buffer)});
if (!context.EnqueueCommandBuffer(std::move(cmd_buffer))) {
VALIDATION_LOG << "Failed to submit glyph atlas command buffer";
}
});
// Now append all remaining glyphs. This should never have any missing data...