intel,nir: Move gl_LocalInvocationID lowering to nir_lower_system_values
It's not at all intel-specific; the formula is dictated by OpenGL and Vulkan. The only intel-specific thing is that we need the lowering. As a nice side-effect, the new version is variable-group-size ready. Reviewed-by: Plamena Manolova <plamena.manolova@intel.com>
This commit is contained in:
parent
486091bc00
commit
060817b2fa
|
@ -2178,6 +2178,7 @@ typedef struct nir_shader_compiler_options {
|
||||||
bool lower_helper_invocation;
|
bool lower_helper_invocation;
|
||||||
|
|
||||||
bool lower_cs_local_index_from_id;
|
bool lower_cs_local_index_from_id;
|
||||||
|
bool lower_cs_local_id_from_index;
|
||||||
|
|
||||||
bool lower_device_index_to_zero;
|
bool lower_device_index_to_zero;
|
||||||
|
|
||||||
|
|
|
@ -51,6 +51,45 @@ build_local_group_size(nir_builder *b)
|
||||||
return local_size;
|
return local_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static nir_ssa_def *
|
||||||
|
build_local_invocation_id(nir_builder *b)
|
||||||
|
{
|
||||||
|
if (b->shader->options->lower_cs_local_id_from_index) {
|
||||||
|
/* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
|
||||||
|
* on this formula:
|
||||||
|
*
|
||||||
|
* gl_LocalInvocationID.x =
|
||||||
|
* gl_LocalInvocationIndex % gl_WorkGroupSize.x;
|
||||||
|
* gl_LocalInvocationID.y =
|
||||||
|
* (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
|
||||||
|
* gl_WorkGroupSize.y;
|
||||||
|
* gl_LocalInvocationID.z =
|
||||||
|
* (gl_LocalInvocationIndex /
|
||||||
|
* (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
|
||||||
|
* gl_WorkGroupSize.z;
|
||||||
|
*
|
||||||
|
* However, the final % gl_WorkGroupSize.z does nothing unless we
|
||||||
|
* accidentally end up with a gl_LocalInvocationIndex that is too
|
||||||
|
* large so it can safely be omitted.
|
||||||
|
*/
|
||||||
|
nir_ssa_def *local_index = nir_load_local_invocation_index(b);
|
||||||
|
nir_ssa_def *local_size = build_local_group_size(b);
|
||||||
|
|
||||||
|
nir_ssa_def *id_x, *id_y, *id_z;
|
||||||
|
id_x = nir_umod(b, local_index,
|
||||||
|
nir_channel(b, local_size, 0));
|
||||||
|
id_y = nir_umod(b, nir_udiv(b, local_index,
|
||||||
|
nir_channel(b, local_size, 0)),
|
||||||
|
nir_channel(b, local_size, 1));
|
||||||
|
id_z = nir_udiv(b, local_index,
|
||||||
|
nir_imul(b, nir_channel(b, local_size, 0),
|
||||||
|
nir_channel(b, local_size, 1)));
|
||||||
|
return nir_vec3(b, id_x, id_y, id_z);
|
||||||
|
} else {
|
||||||
|
return nir_load_local_invocation_id(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
convert_block(nir_block *block, nir_builder *b)
|
convert_block(nir_block *block, nir_builder *b)
|
||||||
{
|
{
|
||||||
|
@ -91,7 +130,7 @@ convert_block(nir_block *block, nir_builder *b)
|
||||||
*/
|
*/
|
||||||
nir_ssa_def *group_size = build_local_group_size(b);
|
nir_ssa_def *group_size = build_local_group_size(b);
|
||||||
nir_ssa_def *group_id = nir_load_work_group_id(b);
|
nir_ssa_def *group_id = nir_load_work_group_id(b);
|
||||||
nir_ssa_def *local_id = nir_load_local_invocation_id(b);
|
nir_ssa_def *local_id = build_local_invocation_id(b);
|
||||||
|
|
||||||
sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
|
sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
|
||||||
break;
|
break;
|
||||||
|
@ -126,6 +165,14 @@ convert_block(nir_block *block, nir_builder *b)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
|
||||||
|
/* If lower_cs_local_id_from_index is true, then we derive the local
|
||||||
|
* index from the local id.
|
||||||
|
*/
|
||||||
|
if (b->shader->options->lower_cs_local_id_from_index)
|
||||||
|
sysval = build_local_invocation_id(b);
|
||||||
|
break;
|
||||||
|
|
||||||
case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
|
case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
|
||||||
sysval = build_local_group_size(b);
|
sysval = build_local_group_size(b);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
.lower_fdiv = true, \
|
.lower_fdiv = true, \
|
||||||
.lower_flrp64 = true, \
|
.lower_flrp64 = true, \
|
||||||
.lower_ldexp = true, \
|
.lower_ldexp = true, \
|
||||||
|
.lower_cs_local_id_from_index = true, \
|
||||||
.lower_device_index_to_zero = true, \
|
.lower_device_index_to_zero = true, \
|
||||||
.native_integers = true, \
|
.native_integers = true, \
|
||||||
.use_interpolated_input_intrinsics = true, \
|
.use_interpolated_input_intrinsics = true, \
|
||||||
|
|
|
@ -70,39 +70,6 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_load_local_invocation_id: {
|
|
||||||
/* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
|
|
||||||
* on this formula:
|
|
||||||
*
|
|
||||||
* gl_LocalInvocationID.x =
|
|
||||||
* gl_LocalInvocationIndex % gl_WorkGroupSize.x;
|
|
||||||
* gl_LocalInvocationID.y =
|
|
||||||
* (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
|
|
||||||
* gl_WorkGroupSize.y;
|
|
||||||
* gl_LocalInvocationID.z =
|
|
||||||
* (gl_LocalInvocationIndex /
|
|
||||||
* (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
|
|
||||||
* gl_WorkGroupSize.z;
|
|
||||||
*/
|
|
||||||
unsigned *size = nir->info.cs.local_size;
|
|
||||||
|
|
||||||
nir_ssa_def *local_index = nir_load_local_invocation_index(b);
|
|
||||||
|
|
||||||
nir_const_value uvec3;
|
|
||||||
memset(&uvec3, 0, sizeof(uvec3));
|
|
||||||
uvec3.u32[0] = 1;
|
|
||||||
uvec3.u32[1] = size[0];
|
|
||||||
uvec3.u32[2] = size[0] * size[1];
|
|
||||||
nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
|
|
||||||
uvec3.u32[0] = size[0];
|
|
||||||
uvec3.u32[1] = size[1];
|
|
||||||
uvec3.u32[2] = size[2];
|
|
||||||
nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
|
|
||||||
|
|
||||||
sysval = nir_umod(b, nir_udiv(b, local_index, div_val), mod_val);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case nir_intrinsic_load_subgroup_id:
|
case nir_intrinsic_load_subgroup_id:
|
||||||
if (state->local_workgroup_size > 8)
|
if (state->local_workgroup_size > 8)
|
||||||
continue;
|
continue;
|
||||||
|
|
Loading…
Reference in New Issue