Refactor Cubic/Quad tests to make sure all threads reach barrier() (flutter/engine#40506)

[Impeller] Refactor Cubic/Quad tests to make sure all threads reach barrier()
This commit is contained in:
Dan Field
2023-03-21 16:52:22 -07:00
committed by GitHub
parent 2602d6dd07
commit 03ee2c89c4
3 changed files with 126 additions and 89 deletions

View File

@@ -28,9 +28,18 @@ struct CubicData {
vec2 p2;
};
struct Position {
uint index;
uint count;
struct QuadDecomposition {
float a0;
float a2;
float u0;
float u_scale;
uint line_count;
float steps;
};
struct PathComponent {
uint index; // Location in buffer
uint count; // Number of points. 4 = cubic, 3 = quad, 2 = line.
};
/// Solve for point on a quadratic Bezier curve defined by starting point `p1`,
@@ -65,4 +74,92 @@ float Cross(vec2 p1, vec2 p2) {
return p1.x * p2.y - p1.y * p2.x;
}
QuadData GenerateQuadraticFromCubic(CubicData cubic,
uint index,
float quad_count) {
float t0 = index / quad_count;
float t1 = (index + 1) / quad_count;
// calculate the subsegment
vec2 sub_p1 = CubicSolve(cubic, t0);
vec2 sub_p2 = CubicSolve(cubic, t1);
QuadData quad = QuadData(3.0 * (cubic.cp1 - cubic.p1), //
3.0 * (cubic.cp2 - cubic.cp1), //
3.0 * (cubic.p2 - cubic.cp2));
float sub_scale = (t1 - t0) * (1.0 / 3.0);
vec2 sub_cp1 = sub_p1 + sub_scale * QuadraticSolve(quad, t0);
vec2 sub_cp2 = sub_p2 - sub_scale * QuadraticSolve(quad, t1);
vec2 quad_p1x2 = 3.0 * sub_cp1 - sub_p1;
vec2 quad_p2x2 = 3.0 * sub_cp2 - sub_p2;
return QuadData(sub_p1, //
((quad_p1x2 + quad_p2x2) / 4.0), //
sub_p2);
}
uint EstimateQuadraticCount(CubicData cubic, float accuracy) {
// The maximum error, as a vector from the cubic to the best approximating
// quadratic, is proportional to the third derivative, which is constant
// across the segment. Thus, the error scales down as the third power of
// the number of subdivisions. Our strategy then is to subdivide `t` evenly.
//
// This is an overestimate of the error because only the component
// perpendicular to the first derivative is important. But the simplicity is
// appealing.
// This magic number is the square of 36 / sqrt(3).
// See: http://caffeineowl.com/graphics/2d/vectorial/cubic2quad01.html
float max_hypot2 = 432.0 * accuracy * accuracy;
vec2 err_v = (3.0 * cubic.cp2 - cubic.p2) - (3.0 * cubic.cp1 - cubic.p1);
float err = dot(err_v, err_v);
return uint(max(1., ceil(pow(err * (1.0 / max_hypot2), 1. / 6.0))));
}
QuadDecomposition DecomposeQuad(QuadData quad, float tolerance) {
float sqrt_tolerance = sqrt(tolerance);
vec2 d01 = quad.cp - quad.p1;
vec2 d12 = quad.p2 - quad.cp;
vec2 dd = d01 - d12;
float c = Cross(quad.p2 - quad.p1, dd);
float x0 = dot(d01, dd) * 1. / c;
float x2 = dot(d12, dd) * 1. / c;
float scale = abs(c / (sqrt(dd.x * dd.x + dd.y * dd.y) * (x2 - x0)));
float a0 = ApproximateParabolaIntegral(x0);
float a2 = ApproximateParabolaIntegral(x2);
float val = 0.f;
if (isfinite(scale)) {
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if ((x0 < 0 && x2 < 0) || (x0 >= 0 && x2 >= 0)) {
val = da * sqrt_scale;
} else {
// cusp case
float xmin = sqrt_tolerance / sqrt_scale;
val = sqrt_tolerance * da / ApproximateParabolaIntegral(xmin);
}
}
float u0 = ApproximateParabolaIntegral(a0);
float u2 = ApproximateParabolaIntegral(a2);
float u_scale = 1. / (u2 - u0);
uint line_count = uint(max(1., ceil(0.5 * val / sqrt_tolerance)) + 1.);
float steps = 1. / line_count;
return QuadDecomposition(a0, a2, u0, u_scale, line_count, steps);
}
vec2 GenerateLineFromQuad(QuadData quad,
uint index,
QuadDecomposition decomposition) {
float u = index * decomposition.steps;
float a = decomposition.a0 + (decomposition.a2 - decomposition.a0) * u;
float t = (ApproximateParabolaIntegral(a) - decomposition.u0) *
decomposition.u_scale;
return QuadraticSolve(quad, t);
}
#endif

View File

@@ -30,54 +30,24 @@ shared uint count_sums[512];
void main() {
uint ident = gl_GlobalInvocationID.x;
if (ident >= cubics.count) {
CubicData cubic;
uint quad_count = 0;
if (ident < cubics.count) {
cubic = cubics.data[ident];
quad_count = EstimateQuadraticCount(cubic, config.accuracy);
quad_counts[ident] = quad_count;
}
barrier();
if (quad_count == 0) {
return;
}
// The maximum error, as a vector from the cubic to the best approximating
// quadratic, is proportional to the third derivative, which is constant
// across the segment. Thus, the error scales down as the third power of
// the number of subdivisions. Our strategy then is to subdivide `t` evenly.
//
// This is an overestimate of the error because only the component
// perpendicular to the first derivative is important. But the simplicity is
// appealing.
// This magic number is the square of 36 / sqrt(3).
// See: http://caffeineowl.com/graphics/2d/vectorial/cubic2quad01.html
float max_hypot2 = 432.0 * config.accuracy * config.accuracy;
CubicData cubic = cubics.data[ident];
vec2 err_v = (3.0 * cubic.cp2 - cubic.p2) - (3.0 * cubic.cp1 - cubic.p1);
float err = dot(err_v, err_v);
float quad_count = max(1., ceil(pow(err * (1.0 / max_hypot2), 1. / 6.0)));
quad_counts[ident] = uint(quad_count);
barrier();
count_sums[ident] = subgroupInclusiveAdd(quad_counts[ident]);
quads.count = count_sums[cubics.count - 1];
for (uint i = 0; i < quad_count; i++) {
float t0 = i / quad_count;
float t1 = (i + 1) / quad_count;
// calculate the subsegment
vec2 sub_p1 = CubicSolve(cubic, t0);
vec2 sub_p2 = CubicSolve(cubic, t1);
QuadData quad = QuadData(3.0 * (cubic.cp1 - cubic.p1), //
3.0 * (cubic.cp2 - cubic.cp1), //
3.0 * (cubic.p2 - cubic.cp2));
float sub_scale = (t1 - t0) * (1.0 / 3.0);
vec2 sub_cp1 = sub_p1 + sub_scale * QuadraticSolve(quad, t0);
vec2 sub_cp2 = sub_p2 - sub_scale * QuadraticSolve(quad, t1);
vec2 quad_p1x2 = 3.0 * sub_cp1 - sub_p1;
vec2 quad_p2x2 = 3.0 * sub_cp2 - sub_p2;
uint offset = count_sums[ident] - uint(quad_count);
quads.data[offset + i] = QuadData(sub_p1, //
((quad_p1x2 + quad_p2x2) / 4.0), //
sub_p2);
uint offset = count_sums[ident] - quad_count;
quads.data[offset + i] = GenerateQuadraticFromCubic(cubic, i, quad_count);
}
}

View File

@@ -30,47 +30,20 @@ shared uint count_sums[512];
void main() {
uint ident = gl_GlobalInvocationID.x;
if (ident >= quads.count) {
QuadData quad;
QuadDecomposition decomposition;
if (ident < quads.count) {
quad = quads.data[ident];
decomposition = DecomposeQuad(quad, config.tolerance);
point_counts[ident] = uint(decomposition.line_count);
}
barrier();
if (decomposition.line_count == 0) {
return;
}
QuadData quad = quads.data[ident];
float sqrt_tolerance = sqrt(config.tolerance);
vec2 d01 = quad.cp - quad.p1;
vec2 d12 = quad.p2 - quad.cp;
vec2 dd = d01 - d12;
float c = Cross(quad.p2 - quad.p1, dd);
float x0 = dot(d01, dd) * 1. / c;
float x2 = dot(d12, dd) * 1. / c;
float scale = abs(c / (sqrt(dd.x * dd.x + dd.y * dd.y) * (x2 - x0)));
float a0 = ApproximateParabolaIntegral(x0);
float a2 = ApproximateParabolaIntegral(x2);
float val = 0.f;
if (isfinite(scale)) {
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if ((x0 < 0 && x2 < 0) || (x0 >= 0 && x2 >= 0)) {
val = da * sqrt_scale;
} else {
// cusp case
float xmin = sqrt_tolerance / sqrt_scale;
val = sqrt_tolerance * da / ApproximateParabolaIntegral(xmin);
}
}
float u0 = ApproximateParabolaIntegral(a0);
float u2 = ApproximateParabolaIntegral(a2);
float u_scale = 1. / (u2 - u0);
float line_count = max(1., ceil(0.5 * val / sqrt_tolerance)) + 1.;
float steps = 1. / line_count;
point_counts[ident] = uint(line_count);
barrier();
count_sums[ident] = subgroupInclusiveAdd(point_counts[ident]);
barrier();
polyline.count = count_sums[quads.count - 1] + 1;
polyline.data[0] = quads.data[0].p1;
@@ -78,12 +51,9 @@ void main() {
// In theory this could be unrolled into a separate shader, but in practice
// line_count usually pretty low and currently lack benchmark data to show
// how much it would even help.
for (uint i = 1; i < line_count; i += 1) {
float u = i * steps;
float a = a0 + (a2 - a0) * u;
float t = (ApproximateParabolaIntegral(a) - u0) * u_scale;
uint offset = count_sums[ident] - uint(line_count);
polyline.data[offset + i] = QuadraticSolve(quad, t);
for (uint i = 1; i < decomposition.line_count; i += 1) {
uint offset = count_sums[ident] - uint(decomposition.line_count);
polyline.data[offset + i] = GenerateLineFromQuad(quad, i, decomposition);
}
polyline.data[count_sums[ident]] = quad.p2;
}