nir: Introduce workgroup_index and ability to lower workgroup_id to it.
The workgroup_index is intended for situations when a 3 dimensional workgroup_id is not available on the HW, but a 1 dimensional index is. In this case, we can use lower the 3D ID to use this. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15103>
This commit is contained in:
parent
6a4c01f3ef
commit
4b99b528f5
|
@ -2419,6 +2419,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
|
|||
return nir_intrinsic_load_local_invocation_index;
|
||||
case SYSTEM_VALUE_WORKGROUP_ID:
|
||||
return nir_intrinsic_load_workgroup_id;
|
||||
case SYSTEM_VALUE_WORKGROUP_INDEX:
|
||||
return nir_intrinsic_load_workgroup_index;
|
||||
case SYSTEM_VALUE_NUM_WORKGROUPS:
|
||||
return nir_intrinsic_load_num_workgroups;
|
||||
case SYSTEM_VALUE_PRIMITIVE_ID:
|
||||
|
@ -2556,6 +2558,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
|
|||
return SYSTEM_VALUE_NUM_WORKGROUPS;
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
return SYSTEM_VALUE_WORKGROUP_ID;
|
||||
case nir_intrinsic_load_workgroup_index:
|
||||
return SYSTEM_VALUE_WORKGROUP_INDEX;
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
return SYSTEM_VALUE_PRIMITIVE_ID;
|
||||
case nir_intrinsic_load_tess_coord:
|
||||
|
|
|
@ -4764,6 +4764,7 @@ typedef struct nir_lower_compute_system_values_options {
|
|||
bool shuffle_local_ids_for_quad_derivatives:1;
|
||||
bool lower_local_invocation_index:1;
|
||||
bool lower_cs_local_id_to_index:1;
|
||||
bool lower_workgroup_id_to_index:1;
|
||||
} nir_lower_compute_system_values_options;
|
||||
|
||||
bool nir_lower_compute_system_values(nir_shader *shader,
|
||||
|
|
|
@ -264,6 +264,7 @@ visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
|
|||
assert(stage == MESA_SHADER_TESS_CTRL);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_workgroup_index:
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
assert(gl_shader_stage_uses_workgroup(stage));
|
||||
if (stage == MESA_SHADER_COMPUTE)
|
||||
|
|
|
@ -640,6 +640,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
|
|||
case nir_intrinsic_load_base_global_invocation_id:
|
||||
case nir_intrinsic_load_global_invocation_index:
|
||||
case nir_intrinsic_load_workgroup_id:
|
||||
case nir_intrinsic_load_workgroup_index:
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
case nir_intrinsic_load_workgroup_size:
|
||||
case nir_intrinsic_load_work_dim:
|
||||
|
|
|
@ -772,6 +772,9 @@ system_value("local_invocation_index", 1)
|
|||
# non-zero_base indicates the base is included
|
||||
system_value("workgroup_id", 3, bit_sizes=[32, 64])
|
||||
system_value("workgroup_id_zero_base", 3)
|
||||
# The workgroup_index is intended for situations when a 3 dimensional
|
||||
# workgroup_id is not available on the HW, but a 1 dimensional index is.
|
||||
system_value("workgroup_index", 1)
|
||||
system_value("base_workgroup_id", 3, bit_sizes=[32, 64])
|
||||
system_value("user_clip_plane", 4, indices=[UCP_ID])
|
||||
system_value("num_workgroups", 3, bit_sizes=[32, 64])
|
||||
|
|
|
@ -265,6 +265,35 @@ nir_lower_system_values(nir_shader *shader)
|
|||
return progress;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_id_to_index_no_umod(nir_builder *b, nir_ssa_def *index,
|
||||
nir_ssa_def *size, unsigned bit_size)
|
||||
{
|
||||
/* We lower ID to Index with the following formula:
|
||||
*
|
||||
* id.z = index / (size.x * size.y)
|
||||
* id.y = (index - (id.z * (size.x * size.y))) / size.x
|
||||
* id.x = index - ((id.z * (size.x * size.y)) + (id.y * size.x))
|
||||
*
|
||||
* This is more efficient on HW that doesn't have a
|
||||
* modulo division instruction and when the size is either
|
||||
* not compile time known or not a power of two.
|
||||
*/
|
||||
|
||||
nir_ssa_def *size_x = nir_channel(b, size, 0);
|
||||
nir_ssa_def *size_y = nir_channel(b, size, 1);
|
||||
nir_ssa_def *size_x_y = nir_imul(b, size_x, size_y);
|
||||
|
||||
nir_ssa_def *id_z = nir_udiv(b, index, size_x_y);
|
||||
nir_ssa_def *z_portion = nir_imul(b, id_z, size_x_y);
|
||||
nir_ssa_def *id_y = nir_udiv(b, nir_isub(b, index, z_portion), size_x);
|
||||
nir_ssa_def *y_portion = nir_imul(b, id_y, size_x);
|
||||
nir_ssa_def *id_x = nir_isub(b, index, nir_iadd(b, z_portion, y_portion));
|
||||
|
||||
return nir_u2u(b, nir_vec3(b, id_x, id_y, id_z), bit_size);
|
||||
}
|
||||
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_id_to_index(nir_builder *b, nir_ssa_def *index, nir_ssa_def *size,
|
||||
unsigned bit_size)
|
||||
|
@ -520,8 +549,13 @@ lower_compute_system_value_instr(nir_builder *b,
|
|||
if (options && options->has_base_workgroup_id)
|
||||
return nir_iadd(b, nir_u2u(b, nir_load_workgroup_id_zero_base(b), bit_size),
|
||||
nir_load_base_workgroup_id(b, bit_size));
|
||||
else
|
||||
return NULL;
|
||||
else if (options && options->lower_workgroup_id_to_index)
|
||||
return lower_id_to_index_no_umod(b, nir_load_workgroup_index(b),
|
||||
nir_load_num_workgroups(b, bit_size),
|
||||
bit_size);
|
||||
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
default:
|
||||
|
|
|
@ -766,6 +766,7 @@ typedef enum
|
|||
SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID,
|
||||
SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX,
|
||||
SYSTEM_VALUE_WORKGROUP_ID,
|
||||
SYSTEM_VALUE_WORKGROUP_INDEX,
|
||||
SYSTEM_VALUE_NUM_WORKGROUPS,
|
||||
SYSTEM_VALUE_WORKGROUP_SIZE,
|
||||
SYSTEM_VALUE_GLOBAL_GROUP_SIZE,
|
||||
|
|
Loading…
Reference in New Issue