From 8bd2e138f5db8ab6f82ec7597edb946d84edd3bf Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 9 Apr 2019 17:28:13 +0100 Subject: [PATCH] nir/lower_explicit_io: add nir_var_mem_shared support v2: require nir_address_format_32bit_offset instead v3: don't call nir_intrinsic_set_access() for shared atomics Signed-off-by: Rhys Perry Reviewed-by: Caio Marcelo de Oliveira Filho Reviewed-by: Jason Ekstrand --- src/compiler/nir/nir_lower_io.c | 77 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 36877fc0346..f8bcf4420e0 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -93,6 +93,31 @@ global_atomic_for_deref(nir_intrinsic_op deref_op) } } +static nir_intrinsic_op +shared_atomic_for_deref(nir_intrinsic_op deref_op) +{ + switch (deref_op) { +#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O; + OP(atomic_exchange) + OP(atomic_comp_swap) + OP(atomic_add) + OP(atomic_imin) + OP(atomic_umin) + OP(atomic_imax) + OP(atomic_umax) + OP(atomic_and) + OP(atomic_or) + OP(atomic_xor) + OP(atomic_fadd) + OP(atomic_fmin) + OP(atomic_fmax) + OP(atomic_fcomp_swap) +#undef OP + default: + unreachable("Invalid shared atomic"); + } +} + void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *, bool)) @@ -427,27 +452,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_builder *b = &state->builder; assert(var->data.mode == nir_var_mem_shared); - nir_intrinsic_op op; - switch (intrin->intrinsic) { -#define OP(O) case nir_intrinsic_deref_##O: op = nir_intrinsic_shared_##O; break; - OP(atomic_exchange) - OP(atomic_comp_swap) - OP(atomic_add) - OP(atomic_imin) - OP(atomic_umin) - OP(atomic_imax) - OP(atomic_umax) - OP(atomic_and) - OP(atomic_or) - OP(atomic_xor) - OP(atomic_fadd) - OP(atomic_fmin) - OP(atomic_fmax) - OP(atomic_fcomp_swap) -#undef OP - default: - unreachable("Invalid atomic"); - } + nir_intrinsic_op op = shared_atomic_for_deref(intrin->intrinsic); nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(state->builder.shader, op); @@ -849,6 +854,10 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, assert(addr_format_is_global(addr_format)); op = nir_intrinsic_load_kernel_input; break; + case nir_var_mem_shared: + assert(addr_format == nir_address_format_32bit_offset); + op = nir_intrinsic_load_shared; + break; default: unreachable("Unsupported explicit IO variable mode"); } @@ -857,12 +866,15 @@ build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin, if (addr_format_is_global(addr_format)) { load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); + } else if (addr_format == nir_address_format_32bit_offset) { + assert(addr->num_components == 1); + load->src[0] = nir_src_for_ssa(addr); } else { load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); } - if (mode != nir_var_mem_ubo && mode != nir_var_shader_in) + if (mode != nir_var_mem_ubo && mode != nir_var_shader_in && mode != nir_var_mem_shared) nir_intrinsic_set_access(load, nir_intrinsic_access(intrin)); /* TODO: We should try and provide a better alignment. For OpenCL, we need @@ -919,6 +931,10 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, assert(addr_format_is_global(addr_format)); op = nir_intrinsic_store_global; break; + case nir_var_mem_shared: + assert(addr_format == nir_address_format_32bit_offset); + op = nir_intrinsic_store_shared; + break; default: unreachable("Unsupported explicit IO variable mode"); } @@ -928,6 +944,9 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, store->src[0] = nir_src_for_ssa(value); if (addr_format_is_global(addr_format)) { store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); + } else if (addr_format == nir_address_format_32bit_offset) { + assert(addr->num_components == 1); + store->src[1] = nir_src_for_ssa(addr); } else { store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); @@ -935,7 +954,8 @@ build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin, nir_intrinsic_set_write_mask(store, write_mask); - nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); + if (mode != nir_var_mem_shared) + nir_intrinsic_set_access(store, nir_intrinsic_access(intrin)); /* TODO: We should try and provide a better alignment. For OpenCL, we need * to plumb the alignment through from SPIR-V when we have one. @@ -980,6 +1000,10 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, assert(addr_format_is_global(addr_format)); op = global_atomic_for_deref(intrin->intrinsic); break; + case nir_var_mem_shared: + assert(addr_format == nir_address_format_32bit_offset); + op = shared_atomic_for_deref(intrin->intrinsic); + break; default: unreachable("Unsupported explicit IO variable mode"); } @@ -989,6 +1013,9 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, unsigned src = 0; if (addr_format_is_global(addr_format)) { atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format)); + } else if (addr_format == nir_address_format_32bit_offset) { + assert(addr->num_components == 1); + atomic->src[src++] = nir_src_for_ssa(addr); } else { atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format)); atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format)); @@ -1000,7 +1027,7 @@ build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin, /* Global atomics don't have access flags because they assume that the * address may be non-uniform. */ - if (!addr_format_is_global(addr_format)) + if (!addr_format_is_global(addr_format) && mode != nir_var_mem_shared) nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin)); assert(intrin->dest.ssa.num_components == 1); @@ -1032,7 +1059,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref, assert(deref->dest.is_ssa); switch (deref->deref_type) { case nir_deref_type_var: - assert(deref->mode == nir_var_shader_in); + assert(deref->mode & (nir_var_shader_in | nir_var_mem_shared)); return nir_imm_intN_t(b, deref->var->data.driver_location, deref->dest.ssa.bit_size);