r600/sfn: Add lowering pass for shared IO

Lower shared load and store to use the r600 specific intrinsics.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5575>
This commit is contained in:
Gert Wollny 2020-05-18 20:36:16 +02:00 committed by Marge Bot
parent 14591a45b7
commit 033968a94e
1 changed files with 90 additions and 0 deletions

View File

@ -512,6 +512,94 @@ r600_get_natural_size_align_bytes(const struct glsl_type *type,
}
}
static bool
r600_lower_shared_io_impl(nir_function *func)
{
nir_builder b;
nir_builder_init(&b, func->impl);
bool progress = false;
nir_foreach_block(block, func->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *op = nir_instr_as_intrinsic(instr);
if (op->intrinsic != nir_intrinsic_load_shared &&
op->intrinsic != nir_intrinsic_store_shared)
continue;
b.cursor = nir_before_instr(instr);
if (op->intrinsic == nir_intrinsic_load_shared) {
nir_ssa_def *addr = op->src[0].ssa;
switch (nir_dest_num_components(op->dest)) {
case 2: {
auto addr2 = nir_iadd_imm(&b, addr, 4);
addr = nir_vec2(&b, addr, addr2);
break;
}
case 3: {
auto addr2 = nir_iadd(&b, addr, nir_imm_ivec2(&b, 4, 8));
addr = nir_vec3(&b, addr,
nir_channel(&b, addr2, 0),
nir_channel(&b, addr2, 1));
break;
}
case 4: {
addr = nir_iadd(&b, addr, nir_imm_ivec4(&b, 0, 4, 8, 12));
break;
}
}
auto load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_local_shared_r600);
load->num_components = nir_dest_num_components(op->dest);
load->src[0] = nir_src_for_ssa(addr);
nir_ssa_dest_init(&load->instr, &load->dest,
load->num_components, 32, NULL);
nir_ssa_def_rewrite_uses(&op->dest.ssa, nir_src_for_ssa(&load->dest.ssa));
nir_builder_instr_insert(&b, &load->instr);
} else {
nir_ssa_def *addr = op->src[1].ssa;
for (int i = 0; i < 2; ++i) {
unsigned test_mask = (0x3 << 2 * i);
if (!(nir_intrinsic_write_mask(op) & test_mask))
continue;
auto store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_local_shared_r600);
unsigned writemask = nir_intrinsic_write_mask(op) & test_mask;
nir_intrinsic_set_write_mask(store, writemask);
store->src[0] = nir_src_for_ssa(op->src[0].ssa);
store->num_components = store->src[0].ssa->num_components;
bool start_even = (writemask & (1u << (2 * i)));
auto addr2 = nir_iadd(&b, addr, nir_imm_int(&b, 8 * i + (start_even ? 0 : 4)));
store->src[1] = nir_src_for_ssa(addr2);
nir_builder_instr_insert(&b, &store->instr);
}
}
nir_instr_remove(instr);
progress = true;
}
}
return progress;
}
static bool
r600_lower_shared_io(nir_shader *nir)
{
bool progress=false;
nir_foreach_function(function, nir) {
if (function->impl &&
r600_lower_shared_io_impl(function))
progress = true;
}
return progress;
}
static bool
optimize_once(nir_shader *shader)
{
@ -575,6 +663,8 @@ int r600_shader_from_nir(struct r600_context *rctx,
NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa);
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
NIR_PASS_V(sel->nir, r600_lower_shared_io);
static const struct nir_lower_tex_options lower_tex_options = {
.lower_txp = ~0u,
};