panfrost/midgard: Lower mixed csel (NIR)

Basically, when the conditions of a csel diverge, we scalarize to avoid
going into weird code paths during emit. We could be doing better, but
this case can't occur organically from GLSL as far as I can, though it
does fix lowered atan2.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
This commit is contained in:
Alyssa Rosenzweig 2019-05-03 03:16:14 +00:00
parent 58a1e1f86c
commit cdd9189aad
2 changed files with 83 additions and 12 deletions

View File

@ -1773,18 +1773,6 @@ dEQP-GLES2.functional.shaders.matrix.unary_addition.dynamic_mediump_mat3_fragmen
dEQP-GLES2.functional.shaders.matrix.unary_addition.dynamic_mediump_mat3_vertex
dEQP-GLES2.functional.shaders.matrix.unary_addition.dynamic_mediump_mat4_fragment
dEQP-GLES2.functional.shaders.matrix.unary_addition.dynamic_mediump_mat4_vertex
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec2_fragment
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec2_vertex
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec3_fragment
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec3_vertex
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec4_fragment
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.highp_vec4_vertex
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec2_fragment
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec2_vertex
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec3_fragment
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec3_vertex
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec4_fragment
dEQP-GLES2.functional.shaders.operator.angle_and_trigonometry.atan2.mediump_vec4_vertex
dEQP-GLES2.functional.shaders.operator.binary_operator.div.highp_int_fragment
dEQP-GLES2.functional.shaders.operator.binary_operator.div.highp_int_ivec2_fragment
dEQP-GLES2.functional.shaders.operator.binary_operator.div.highp_int_ivec2_vertex

View File

@ -715,6 +715,58 @@ midgard_nir_lower_fdot2_body(nir_builder *b, nir_alu_instr *alu)
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(sum));
}
/* Lower csel with mixed condition channels to mulitple csel instructions. For
* context, the csel ops on Midgard are vector in *outputs*, but not in
* *conditions*. So, if the condition is e.g. yyyy, a single op can select a
* vec4. But if the condition is e.g. xyzw, four ops are needed as the ISA
* can't cope with the divergent channels.*/
static void
midgard_nir_lower_mixed_csel_body(nir_builder *b, nir_alu_instr *alu)
{
if (alu->op != nir_op_bcsel)
return;
b->cursor = nir_before_instr(&alu->instr);
/* Must be run before registering */
assert(alu->dest.dest.is_ssa);
/* Check for mixed condition */
unsigned comp = alu->src[0].swizzle[0];
unsigned nr_components = alu->dest.dest.ssa.num_components;
bool mixed = false;
for (unsigned c = 1; c < nr_components; ++c)
mixed |= (alu->src[0].swizzle[c] != comp);
if (!mixed)
return;
/* We're mixed, so lower */
assert(nr_components <= 4);
nir_ssa_def *results[4];
nir_ssa_def *cond = nir_ssa_for_alu_src(b, alu, 0);
nir_ssa_def *choice0 = nir_ssa_for_alu_src(b, alu, 1);
nir_ssa_def *choice1 = nir_ssa_for_alu_src(b, alu, 2);
for (unsigned c = 0; c < nr_components; ++c) {
results[c] = nir_bcsel(b,
nir_channel(b, cond, c),
nir_channel(b, choice0, c),
nir_channel(b, choice1, c));
}
/* Replace with our scalarized version */
nir_ssa_def *result = nir_vec(b, results, nr_components);
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(result));
}
static int
midgard_nir_sysval_for_intrinsic(nir_intrinsic_instr *instr)
{
@ -799,6 +851,36 @@ midgard_nir_lower_fdot2(nir_shader *shader)
return progress;
}
static bool
midgard_nir_lower_mixed_csel(nir_shader *shader)
{
bool progress = false;
nir_foreach_function(function, shader) {
if (!function->impl) continue;
nir_builder _b;
nir_builder *b = &_b;
nir_builder_init(b, function->impl);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_alu) continue;
nir_alu_instr *alu = nir_instr_as_alu(instr);
midgard_nir_lower_mixed_csel_body(b, alu);
progress |= true;
}
}
nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
}
return progress;
}
static void
optimise_nir(nir_shader *nir)
{
@ -806,6 +888,7 @@ optimise_nir(nir_shader *nir)
NIR_PASS(progress, nir, nir_lower_regs_to_ssa);
NIR_PASS(progress, nir, midgard_nir_lower_fdot2);
NIR_PASS(progress, nir, midgard_nir_lower_mixed_csel);
nir_lower_tex_options lower_tex_options = {
.lower_rect = true