[Impeller] Create reusable prefix sum. (flutter/engine#42167)
Creates a reusable function macro for performing a prefix sum. Eventually, we'll need this for polyline decomposition  Reasonably fast with max (1024) input elements.
This commit is contained in:
@@ -1091,6 +1091,7 @@ ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl +
|
||||
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl + ../../../flutter/LICENSE
|
||||
@@ -1560,6 +1561,7 @@ ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.cc + ../../../flu
|
||||
ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.h + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.cc + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.h + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/renderer/prefix_sum_test.comp + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/renderer/render_pass.cc + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/renderer/render_pass.h + ../../../flutter/LICENSE
|
||||
ORIGIN: ../../../flutter/impeller/renderer/render_target.cc + ../../../flutter/LICENSE
|
||||
@@ -3716,6 +3718,7 @@ FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl
|
||||
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl
|
||||
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl
|
||||
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl
|
||||
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl
|
||||
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl
|
||||
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl
|
||||
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl
|
||||
@@ -4185,6 +4188,7 @@ FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.cc
|
||||
FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.h
|
||||
FILE: ../../../flutter/impeller/renderer/pipeline_library.cc
|
||||
FILE: ../../../flutter/impeller/renderer/pipeline_library.h
|
||||
FILE: ../../../flutter/impeller/renderer/prefix_sum_test.comp
|
||||
FILE: ../../../flutter/impeller/renderer/render_pass.cc
|
||||
FILE: ../../../flutter/impeller/renderer/render_pass.h
|
||||
FILE: ../../../flutter/impeller/renderer/render_target.cc
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
// Copyright 2013 The Flutter Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Parallel exclusive prefix sum computes the prefix in place in storage.
|
||||
// BLOCK_SIZE is the overall storage size while ident must be the global
|
||||
// x identifier.
|
||||
#define ExclusivePrefixSum(ident, storage, BLOCK_SIZE) \
|
||||
do { \
|
||||
uint offset = 1; \
|
||||
for (uint n = BLOCK_SIZE / 2; n > 0; n /= 2) { \
|
||||
if (ident < n) { \
|
||||
uint ai = offset * (2 * ident + 1) - 1; \
|
||||
uint bi = offset * (2 * ident + 2) - 1; \
|
||||
storage[bi] += storage[ai]; \
|
||||
} \
|
||||
offset *= 2; \
|
||||
barrier(); \
|
||||
} \
|
||||
\
|
||||
if (ident == 0) { \
|
||||
storage[BLOCK_SIZE - 1] = 0; \
|
||||
} \
|
||||
barrier(); \
|
||||
\
|
||||
for (uint n = 1; n < BLOCK_SIZE; n *= 2) { \
|
||||
offset /= 2; \
|
||||
barrier(); \
|
||||
if (ident < n) { \
|
||||
uint ai = offset * (2 * ident + 1) - 1; \
|
||||
uint bi = offset * (2 * ident + 2) - 1; \
|
||||
uint temp = storage[ai]; \
|
||||
storage[ai] = storage[bi]; \
|
||||
storage[bi] += temp; \
|
||||
} \
|
||||
} \
|
||||
barrier(); \
|
||||
} while (false)
|
||||
@@ -22,6 +22,7 @@ if (impeller_enable_compute) {
|
||||
shaders = [
|
||||
"stroke.comp",
|
||||
"path_polyline.comp",
|
||||
"prefix_sum_test.comp",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "impeller/renderer/compute_command.h"
|
||||
#include "impeller/renderer/compute_pipeline_builder.h"
|
||||
#include "impeller/renderer/pipeline_library.h"
|
||||
#include "impeller/renderer/prefix_sum_test.comp.h"
|
||||
|
||||
namespace impeller {
|
||||
namespace testing {
|
||||
@@ -103,6 +104,117 @@ TEST_P(ComputeTest, CanCreateComputePass) {
|
||||
latch.Wait();
|
||||
}
|
||||
|
||||
TEST_P(ComputeTest, CanComputePrefixSum) {
|
||||
using CS = PrefixSumTestComputeShader;
|
||||
auto context = GetContext();
|
||||
ASSERT_TRUE(context);
|
||||
ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
|
||||
|
||||
using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
|
||||
auto pipeline_desc =
|
||||
SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
|
||||
ASSERT_TRUE(pipeline_desc.has_value());
|
||||
auto compute_pipeline =
|
||||
context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
|
||||
ASSERT_TRUE(compute_pipeline);
|
||||
|
||||
auto cmd_buffer = context->CreateCommandBuffer();
|
||||
auto pass = cmd_buffer->CreateComputePass();
|
||||
ASSERT_TRUE(pass && pass->IsValid());
|
||||
|
||||
static constexpr size_t kCount = 5;
|
||||
|
||||
pass->SetGridSize(ISize(kCount, 1));
|
||||
pass->SetThreadGroupSize(ISize(kCount, 1));
|
||||
|
||||
ComputeCommand cmd;
|
||||
cmd.label = "Compute";
|
||||
cmd.pipeline = compute_pipeline;
|
||||
|
||||
CS::InputData<kCount> input_data;
|
||||
input_data.count = kCount;
|
||||
for (size_t i = 0; i < kCount; i++) {
|
||||
input_data.data[i] = 1 + i;
|
||||
}
|
||||
|
||||
auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
|
||||
context, "Output Buffer");
|
||||
|
||||
CS::BindInputData(
|
||||
cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
|
||||
CS::BindOutputData(cmd, output_buffer->AsBufferView());
|
||||
|
||||
ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
|
||||
ASSERT_TRUE(pass->EncodeCommands());
|
||||
|
||||
fml::AutoResetWaitableEvent latch;
|
||||
ASSERT_TRUE(cmd_buffer->SubmitCommands(
|
||||
[&latch, output_buffer](CommandBuffer::Status status) {
|
||||
EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
|
||||
|
||||
auto view = output_buffer->AsBufferView();
|
||||
EXPECT_EQ(view.range.length, sizeof(CS::OutputData<kCount>));
|
||||
|
||||
CS::OutputData<kCount>* output =
|
||||
reinterpret_cast<CS::OutputData<kCount>*>(view.contents);
|
||||
EXPECT_TRUE(output);
|
||||
|
||||
constexpr uint32_t expected[kCount] = {1, 3, 6, 10, 15};
|
||||
for (size_t i = 0; i < kCount; i++) {
|
||||
auto computed_sum = output->data[i];
|
||||
EXPECT_EQ(computed_sum, expected[i]);
|
||||
}
|
||||
latch.Signal();
|
||||
}));
|
||||
|
||||
latch.Wait();
|
||||
}
|
||||
|
||||
TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) {
|
||||
using CS = PrefixSumTestComputeShader;
|
||||
|
||||
auto context = GetContext();
|
||||
ASSERT_TRUE(context);
|
||||
ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
|
||||
|
||||
auto callback = [&](RenderPass& render_pass) -> bool {
|
||||
using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
|
||||
auto pipeline_desc =
|
||||
SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
|
||||
auto compute_pipeline =
|
||||
context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
|
||||
|
||||
auto cmd_buffer = context->CreateCommandBuffer();
|
||||
auto pass = cmd_buffer->CreateComputePass();
|
||||
|
||||
static constexpr size_t kCount = 1023;
|
||||
|
||||
pass->SetGridSize(ISize(kCount, 1));
|
||||
|
||||
ComputeCommand cmd;
|
||||
cmd.label = "Compute";
|
||||
cmd.pipeline = compute_pipeline;
|
||||
|
||||
CS::InputData<kCount> input_data;
|
||||
input_data.count = kCount;
|
||||
for (size_t i = 0; i < kCount; i++) {
|
||||
input_data.data[i] = 1 + i;
|
||||
}
|
||||
|
||||
auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
|
||||
context, "Output Buffer");
|
||||
|
||||
CS::BindInputData(
|
||||
cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
|
||||
CS::BindOutputData(cmd, output_buffer->AsBufferView());
|
||||
|
||||
pass->AddCommand(std::move(cmd));
|
||||
pass->EncodeCommands();
|
||||
return cmd_buffer->SubmitCommands();
|
||||
};
|
||||
ASSERT_TRUE(OpenPlaygroundHere(callback));
|
||||
}
|
||||
|
||||
TEST_P(ComputeTest, MultiStageInputAndOutput) {
|
||||
using CS1 = Stage1ComputeShader;
|
||||
using Stage1PipelineBuilder = ComputePipelineBuilder<CS1>;
|
||||
|
||||
43
engine/src/flutter/impeller/renderer/prefix_sum_test.comp
Normal file
43
engine/src/flutter/impeller/renderer/prefix_sum_test.comp
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright 2013 The Flutter Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
layout(local_size_x = 512, local_size_y = 1) in;
|
||||
layout(std430) buffer;
|
||||
|
||||
#include <impeller/prefix_sum.glsl>
|
||||
|
||||
#define BLOCK_SIZE 1024
|
||||
|
||||
layout(binding = 0) readonly buffer InputData {
|
||||
uint count;
|
||||
uint data[];
|
||||
}
|
||||
input_data;
|
||||
|
||||
layout(binding = 1) writeonly buffer OutputData {
|
||||
uint data[];
|
||||
}
|
||||
output_data;
|
||||
|
||||
// Needs to be number of threads per threadgroup.
|
||||
shared uint memory[BLOCK_SIZE];
|
||||
|
||||
void main() {
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
|
||||
uint value = 0;
|
||||
if (ident < input_data.count) {
|
||||
value = input_data.data[ident];
|
||||
}
|
||||
|
||||
memory[ident] = value;
|
||||
barrier();
|
||||
|
||||
ExclusivePrefixSum(ident, memory, BLOCK_SIZE);
|
||||
|
||||
if (ident < input_data.count) {
|
||||
// Convert exclusive to inclusive sum.
|
||||
output_data.data[ident] = memory[ident] + value;
|
||||
}
|
||||
}
|
||||
@@ -14027,6 +14027,68 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"flutter/impeller/renderer/prefix_sum_test.comp.vkspv": {
|
||||
"Mali-G78": {
|
||||
"core": "Mali-G78",
|
||||
"filename": "flutter/impeller/renderer/prefix_sum_test.comp.vkspv",
|
||||
"has_uniform_computation": true,
|
||||
"type": "Compute",
|
||||
"variants": {
|
||||
"Main": {
|
||||
"fp16_arithmetic": null,
|
||||
"has_stack_spilling": false,
|
||||
"performance": {
|
||||
"longest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"longest_path_cycles": [
|
||||
2.549999952316284,
|
||||
0.0,
|
||||
2.549999952316284,
|
||||
1.0,
|
||||
72.0,
|
||||
0.0
|
||||
],
|
||||
"pipelines": [
|
||||
"arith_total",
|
||||
"arith_fma",
|
||||
"arith_cvt",
|
||||
"arith_sfu",
|
||||
"load_store",
|
||||
"texture"
|
||||
],
|
||||
"shortest_path_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"shortest_path_cycles": [
|
||||
0.949999988079071,
|
||||
0.0,
|
||||
0.949999988079071,
|
||||
0.0,
|
||||
1.0,
|
||||
0.0
|
||||
],
|
||||
"total_bound_pipelines": [
|
||||
"load_store"
|
||||
],
|
||||
"total_cycles": [
|
||||
2.549999952316284,
|
||||
0.0,
|
||||
2.549999952316284,
|
||||
1.0,
|
||||
72.0,
|
||||
0.0
|
||||
]
|
||||
},
|
||||
"shared_storage_used": 4096,
|
||||
"stack_spill_bytes": 0,
|
||||
"thread_occupancy": 100,
|
||||
"uniform_registers_used": 8,
|
||||
"work_registers_used": 21
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"flutter/impeller/renderer/stroke.comp.vkspv": {
|
||||
"Mali-G78": {
|
||||
"core": "Mali-G78",
|
||||
|
||||
Reference in New Issue
Block a user