[Impeller] Create reusable prefix sum. (flutter/engine#42167)

Creates a reusable function macro for performing a prefix sum. Eventually, we'll need this for polyline decomposition

![image](https://github.com/flutter/engine/assets/8975114/3d9af2b2-f1ea-413a-ac1f-d4a69211388e)

Reasonably fast with max (1024) input elements.
This commit is contained in:
Jonah Williams
2023-05-22 09:08:06 -07:00
committed by GitHub
parent 80d986be9f
commit 6ee594c86f
6 changed files with 260 additions and 0 deletions

View File

@@ -1091,6 +1091,7 @@ ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl +
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl + ../../../flutter/LICENSE
@@ -1560,6 +1561,7 @@ ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.cc + ../../../flu
ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.h + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.cc + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.h + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/renderer/prefix_sum_test.comp + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/renderer/render_pass.cc + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/renderer/render_pass.h + ../../../flutter/LICENSE
ORIGIN: ../../../flutter/impeller/renderer/render_target.cc + ../../../flutter/LICENSE
@@ -3716,6 +3718,7 @@ FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl
@@ -4185,6 +4188,7 @@ FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.cc
FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.h
FILE: ../../../flutter/impeller/renderer/pipeline_library.cc
FILE: ../../../flutter/impeller/renderer/pipeline_library.h
FILE: ../../../flutter/impeller/renderer/prefix_sum_test.comp
FILE: ../../../flutter/impeller/renderer/render_pass.cc
FILE: ../../../flutter/impeller/renderer/render_pass.h
FILE: ../../../flutter/impeller/renderer/render_target.cc

View File

@@ -0,0 +1,38 @@
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Parallel exclusive prefix sum computes the prefix in place in storage.
// BLOCK_SIZE is the overall storage size while ident must be the global
// x identifier.
#define ExclusivePrefixSum(ident, storage, BLOCK_SIZE) \
do { \
uint offset = 1; \
for (uint n = BLOCK_SIZE / 2; n > 0; n /= 2) { \
if (ident < n) { \
uint ai = offset * (2 * ident + 1) - 1; \
uint bi = offset * (2 * ident + 2) - 1; \
storage[bi] += storage[ai]; \
} \
offset *= 2; \
barrier(); \
} \
\
if (ident == 0) { \
storage[BLOCK_SIZE - 1] = 0; \
} \
barrier(); \
\
for (uint n = 1; n < BLOCK_SIZE; n *= 2) { \
offset /= 2; \
barrier(); \
if (ident < n) { \
uint ai = offset * (2 * ident + 1) - 1; \
uint bi = offset * (2 * ident + 2) - 1; \
uint temp = storage[ai]; \
storage[ai] = storage[bi]; \
storage[bi] += temp; \
} \
} \
barrier(); \
} while (false)

View File

@@ -22,6 +22,7 @@ if (impeller_enable_compute) {
shaders = [
"stroke.comp",
"path_polyline.comp",
"prefix_sum_test.comp",
]
}

View File

@@ -18,6 +18,7 @@
#include "impeller/renderer/compute_command.h"
#include "impeller/renderer/compute_pipeline_builder.h"
#include "impeller/renderer/pipeline_library.h"
#include "impeller/renderer/prefix_sum_test.comp.h"
namespace impeller {
namespace testing {
@@ -103,6 +104,117 @@ TEST_P(ComputeTest, CanCreateComputePass) {
latch.Wait();
}
TEST_P(ComputeTest, CanComputePrefixSum) {
using CS = PrefixSumTestComputeShader;
auto context = GetContext();
ASSERT_TRUE(context);
ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
auto pipeline_desc =
SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
ASSERT_TRUE(pipeline_desc.has_value());
auto compute_pipeline =
context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
ASSERT_TRUE(compute_pipeline);
auto cmd_buffer = context->CreateCommandBuffer();
auto pass = cmd_buffer->CreateComputePass();
ASSERT_TRUE(pass && pass->IsValid());
static constexpr size_t kCount = 5;
pass->SetGridSize(ISize(kCount, 1));
pass->SetThreadGroupSize(ISize(kCount, 1));
ComputeCommand cmd;
cmd.label = "Compute";
cmd.pipeline = compute_pipeline;
CS::InputData<kCount> input_data;
input_data.count = kCount;
for (size_t i = 0; i < kCount; i++) {
input_data.data[i] = 1 + i;
}
auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
context, "Output Buffer");
CS::BindInputData(
cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
CS::BindOutputData(cmd, output_buffer->AsBufferView());
ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
ASSERT_TRUE(pass->EncodeCommands());
fml::AutoResetWaitableEvent latch;
ASSERT_TRUE(cmd_buffer->SubmitCommands(
[&latch, output_buffer](CommandBuffer::Status status) {
EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
auto view = output_buffer->AsBufferView();
EXPECT_EQ(view.range.length, sizeof(CS::OutputData<kCount>));
CS::OutputData<kCount>* output =
reinterpret_cast<CS::OutputData<kCount>*>(view.contents);
EXPECT_TRUE(output);
constexpr uint32_t expected[kCount] = {1, 3, 6, 10, 15};
for (size_t i = 0; i < kCount; i++) {
auto computed_sum = output->data[i];
EXPECT_EQ(computed_sum, expected[i]);
}
latch.Signal();
}));
latch.Wait();
}
TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) {
using CS = PrefixSumTestComputeShader;
auto context = GetContext();
ASSERT_TRUE(context);
ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
auto callback = [&](RenderPass& render_pass) -> bool {
using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
auto pipeline_desc =
SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
auto compute_pipeline =
context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
auto cmd_buffer = context->CreateCommandBuffer();
auto pass = cmd_buffer->CreateComputePass();
static constexpr size_t kCount = 1023;
pass->SetGridSize(ISize(kCount, 1));
ComputeCommand cmd;
cmd.label = "Compute";
cmd.pipeline = compute_pipeline;
CS::InputData<kCount> input_data;
input_data.count = kCount;
for (size_t i = 0; i < kCount; i++) {
input_data.data[i] = 1 + i;
}
auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
context, "Output Buffer");
CS::BindInputData(
cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
CS::BindOutputData(cmd, output_buffer->AsBufferView());
pass->AddCommand(std::move(cmd));
pass->EncodeCommands();
return cmd_buffer->SubmitCommands();
};
ASSERT_TRUE(OpenPlaygroundHere(callback));
}
TEST_P(ComputeTest, MultiStageInputAndOutput) {
using CS1 = Stage1ComputeShader;
using Stage1PipelineBuilder = ComputePipelineBuilder<CS1>;

View File

@@ -0,0 +1,43 @@
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
layout(local_size_x = 512, local_size_y = 1) in;
layout(std430) buffer;
#include <impeller/prefix_sum.glsl>
#define BLOCK_SIZE 1024
layout(binding = 0) readonly buffer InputData {
uint count;
uint data[];
}
input_data;
layout(binding = 1) writeonly buffer OutputData {
uint data[];
}
output_data;
// Needs to be number of threads per threadgroup.
shared uint memory[BLOCK_SIZE];
void main() {
uint ident = gl_GlobalInvocationID.x;
uint value = 0;
if (ident < input_data.count) {
value = input_data.data[ident];
}
memory[ident] = value;
barrier();
ExclusivePrefixSum(ident, memory, BLOCK_SIZE);
if (ident < input_data.count) {
// Convert exclusive to inclusive sum.
output_data.data[ident] = memory[ident] + value;
}
}

View File

@@ -14027,6 +14027,68 @@
}
}
},
"flutter/impeller/renderer/prefix_sum_test.comp.vkspv": {
"Mali-G78": {
"core": "Mali-G78",
"filename": "flutter/impeller/renderer/prefix_sum_test.comp.vkspv",
"has_uniform_computation": true,
"type": "Compute",
"variants": {
"Main": {
"fp16_arithmetic": null,
"has_stack_spilling": false,
"performance": {
"longest_path_bound_pipelines": [
"load_store"
],
"longest_path_cycles": [
2.549999952316284,
0.0,
2.549999952316284,
1.0,
72.0,
0.0
],
"pipelines": [
"arith_total",
"arith_fma",
"arith_cvt",
"arith_sfu",
"load_store",
"texture"
],
"shortest_path_bound_pipelines": [
"load_store"
],
"shortest_path_cycles": [
0.949999988079071,
0.0,
0.949999988079071,
0.0,
1.0,
0.0
],
"total_bound_pipelines": [
"load_store"
],
"total_cycles": [
2.549999952316284,
0.0,
2.549999952316284,
1.0,
72.0,
0.0
]
},
"shared_storage_used": 4096,
"stack_spill_bytes": 0,
"thread_occupancy": 100,
"uniform_registers_used": 8,
"work_registers_used": 21
}
}
}
},
"flutter/impeller/renderer/stroke.comp.vkspv": {
"Mali-G78": {
"core": "Mali-G78",