pan/bi: Lower stores with component != 0

If the shader packs multiple varyings into the same location with
different location_frac, we'll need to lower to a single varying store
that collects all of the channels together. This is not trivial during
code gen, but it is trivial to do in NIR right before codegen by relying
on nir_lower_io_to_temporaries. Since we're guaranteed all varyings will
be written exactly once, in the exit block, we can scan the shader
linearly and collect stores together in a single pass.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11123>
This commit is contained in:
Alyssa Rosenzweig 2021-06-02 10:46:57 -04:00 committed by Marge Bot
parent de42707101
commit 95458c4033
2 changed files with 64 additions and 12 deletions

View File

@ -14,19 +14,7 @@ dEQP-GLES31.functional.draw_indirect.draw_elements_indirect.line_strip.instanced
dEQP-GLES31.functional.draw_indirect.random.31,Fail
dEQP-GLES31.functional.layout_binding.image.image2d.vertex_binding_max_array,Fail
dEQP-GLES31.functional.layout_binding.image.image3d.vertex_binding_max_array,Fail
dEQP-GLES31.functional.separate_shader.random.22,Fail
dEQP-GLES31.functional.separate_shader.random.23,Fail
dEQP-GLES31.functional.separate_shader.random.35,Fail
dEQP-GLES31.functional.separate_shader.random.68,Fail
dEQP-GLES31.functional.separate_shader.random.79,Fail
dEQP-GLES31.functional.separate_shader.random.80,Fail
dEQP-GLES31.functional.separate_shader.random.89,Fail
dEQP-GLES31.functional.draw_base_vertex.draw_elements_base_vertex.builtin_variable.vertex_id,Fail
dEQP-GLES31.functional.draw_base_vertex.draw_elements_instanced_base_vertex.builtin_variable.vertex_id,Fail
dEQP-GLES31.functional.draw_base_vertex.draw_range_elements_base_vertex.builtin_variable.vertex_id,Fail
dEQP-GLES31.functional.separate_shader.interface.same_location_vertex_flat_fragment_flat,Fail
dEQP-GLES31.functional.separate_shader.interface.same_location_vertex_smooth_fragment_centroid,Fail
dEQP-GLES31.functional.separate_shader.interface.same_name_vertex_flat_fragment_flat,Fail
dEQP-GLES31.functional.separate_shader.pipeline.different_constant_separate_programs_add_fragment,Fail
dEQP-GLES31.functional.separate_shader.pipeline.same_constant_separate_programs_add_both,Fail
dEQP-GLES31.functional.separate_shader.program_uniform.separate_programs_add_fragment,Fail

View File

@ -3199,6 +3199,64 @@ bi_opt_post_ra(bi_context *ctx)
}
}
static bool
bifrost_nir_lower_store_component(struct nir_builder *b,
nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_output)
return false;
struct hash_table_u64 *slots = data;
unsigned component = nir_intrinsic_component(intr);
nir_src *slot_src = nir_get_io_offset_src(intr);
uint64_t slot = nir_src_as_uint(*slot_src) + nir_intrinsic_base(intr);
nir_intrinsic_instr *prev = _mesa_hash_table_u64_search(slots, slot);
unsigned mask = (prev ? nir_intrinsic_write_mask(prev) : 0);
nir_ssa_def *value = intr->src[0].ssa;
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size);
nir_ssa_def *channels[4] = { undef, undef, undef, undef };
/* Copy old */
u_foreach_bit(i, mask) {
assert(prev != NULL);
nir_ssa_def *prev_ssa = prev->src[0].ssa;
channels[i] = nir_channel(b, prev_ssa, i);
}
/* Copy new */
unsigned new_mask = nir_intrinsic_write_mask(intr);
mask |= (new_mask << component);
u_foreach_bit(i, new_mask) {
assert(component + i < 4);
channels[component + i] = nir_channel(b, value, i);
}
intr->num_components = util_last_bit(mask);
nir_instr_rewrite_src_ssa(instr, &intr->src[0],
nir_vec(b, channels, intr->num_components));
nir_intrinsic_set_component(intr, 0);
nir_intrinsic_set_write_mask(intr, mask);
if (prev) {
_mesa_hash_table_u64_remove(slots, slot);
nir_instr_remove(&prev->instr);
}
_mesa_hash_table_u64_insert(slots, slot, intr);
return false;
}
/* Dead code elimination for branches at the end of a block - only one branch
* per block is legal semantically, but unreachable jumps can be generated.
* Likewise we can generate jumps to the terminal block which need to be
@ -3273,6 +3331,12 @@ bifrost_compile_shader_nir(nir_shader *nir,
if (ctx->stage == MESA_SHADER_FRAGMENT) {
NIR_PASS_V(nir, nir_lower_mediump_io, nir_var_shader_out,
~0, false);
} else {
struct hash_table_u64 *stores = _mesa_hash_table_u64_create(ctx);
NIR_PASS_V(nir, nir_shader_instructions_pass,
bifrost_nir_lower_store_component,
nir_metadata_block_index |
nir_metadata_dominance, stores);
}
NIR_PASS_V(nir, nir_lower_ssbo);