diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index b0cff50eaf2..1dd605010f6 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -2178,6 +2178,7 @@ typedef struct nir_shader_compiler_options { bool lower_helper_invocation; bool lower_cs_local_index_from_id; + bool lower_cs_local_id_from_index; bool lower_device_index_to_zero; diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c index fbc40573579..08a9e8be44a 100644 --- a/src/compiler/nir/nir_lower_system_values.c +++ b/src/compiler/nir/nir_lower_system_values.c @@ -51,6 +51,45 @@ build_local_group_size(nir_builder *b) return local_size; } +static nir_ssa_def * +build_local_invocation_id(nir_builder *b) +{ + if (b->shader->options->lower_cs_local_id_from_index) { + /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based + * on this formula: + * + * gl_LocalInvocationID.x = + * gl_LocalInvocationIndex % gl_WorkGroupSize.x; + * gl_LocalInvocationID.y = + * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) % + * gl_WorkGroupSize.y; + * gl_LocalInvocationID.z = + * (gl_LocalInvocationIndex / + * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % + * gl_WorkGroupSize.z; + * + * However, the final % gl_WorkGroupSize.z does nothing unless we + * accidentally end up with a gl_LocalInvocationIndex that is too + * large so it can safely be omitted. + */ + nir_ssa_def *local_index = nir_load_local_invocation_index(b); + nir_ssa_def *local_size = build_local_group_size(b); + + nir_ssa_def *id_x, *id_y, *id_z; + id_x = nir_umod(b, local_index, + nir_channel(b, local_size, 0)); + id_y = nir_umod(b, nir_udiv(b, local_index, + nir_channel(b, local_size, 0)), + nir_channel(b, local_size, 1)); + id_z = nir_udiv(b, local_index, + nir_imul(b, nir_channel(b, local_size, 0), + nir_channel(b, local_size, 1))); + return nir_vec3(b, id_x, id_y, id_z); + } else { + return nir_load_local_invocation_id(b); + } +} + static bool convert_block(nir_block *block, nir_builder *b) { @@ -91,7 +130,7 @@ convert_block(nir_block *block, nir_builder *b) */ nir_ssa_def *group_size = build_local_group_size(b); nir_ssa_def *group_id = nir_load_work_group_id(b); - nir_ssa_def *local_id = nir_load_local_invocation_id(b); + nir_ssa_def *local_id = build_local_invocation_id(b); sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id); break; @@ -126,6 +165,14 @@ convert_block(nir_block *block, nir_builder *b) break; } + case SYSTEM_VALUE_LOCAL_INVOCATION_ID: + /* If lower_cs_local_id_from_index is true, then we derive the local + * index from the local id. + */ + if (b->shader->options->lower_cs_local_id_from_index) + sysval = build_local_invocation_id(b); + break; + case SYSTEM_VALUE_LOCAL_GROUP_SIZE: { sysval = build_local_group_size(b); break; diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index e863b08b991..fe632c5badc 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -42,6 +42,7 @@ .lower_fdiv = true, \ .lower_flrp64 = true, \ .lower_ldexp = true, \ + .lower_cs_local_id_from_index = true, \ .lower_device_index_to_zero = true, \ .native_integers = true, \ .use_interpolated_input_intrinsics = true, \ diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c index bfbdea0e8fa..fab5edc893f 100644 --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c @@ -70,39 +70,6 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state, break; } - case nir_intrinsic_load_local_invocation_id: { - /* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based - * on this formula: - * - * gl_LocalInvocationID.x = - * gl_LocalInvocationIndex % gl_WorkGroupSize.x; - * gl_LocalInvocationID.y = - * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) % - * gl_WorkGroupSize.y; - * gl_LocalInvocationID.z = - * (gl_LocalInvocationIndex / - * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % - * gl_WorkGroupSize.z; - */ - unsigned *size = nir->info.cs.local_size; - - nir_ssa_def *local_index = nir_load_local_invocation_index(b); - - nir_const_value uvec3; - memset(&uvec3, 0, sizeof(uvec3)); - uvec3.u32[0] = 1; - uvec3.u32[1] = size[0]; - uvec3.u32[2] = size[0] * size[1]; - nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3); - uvec3.u32[0] = size[0]; - uvec3.u32[1] = size[1]; - uvec3.u32[2] = size[2]; - nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3); - - sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val); - break; - } - case nir_intrinsic_load_subgroup_id: if (state->local_workgroup_size > 8) continue;