nir: Add new system values and intrinsics for dealing with CL work offsets
New intrinsics are added for global invocation IDs and work group IDs to deal with offsets in both. The only one of these that needs a system value is global invocation offset, for CL's get_global_offset(). Note that CL requires very large work group sizes, so these intrinsics are modified to be able to use 64bit values, for 64bit SPIR-V. Reviewed-by: Karol Herbst <kherbst@redhat.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5891>
This commit is contained in:
parent
6b1515cb84
commit
41e4eb9948
|
@ -16,7 +16,7 @@ build_buffer_fill_shader(struct radv_device *dev)
|
|||
b.shader->info.cs.local_size[2] = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -71,7 +71,7 @@ build_buffer_copy_shader(struct radv_device *dev)
|
|||
b.shader->info.cs.local_size[2] = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
|
|
@ -60,7 +60,7 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
|
|||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -289,7 +289,7 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
|
|||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -511,7 +511,7 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
|
|||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -719,7 +719,7 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
|
|||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -936,7 +936,7 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
|
|||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -1143,7 +1143,7 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
|
|||
output_img->data.binding = 0;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -1335,7 +1335,7 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
|
|||
output_img->data.binding = 0;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
|
|
@ -1175,7 +1175,7 @@ build_clear_htile_mask_shader()
|
|||
b.shader->info.cs.local_size[2] = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
|
|
@ -58,7 +58,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
|
|||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
|
|
@ -58,7 +58,7 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
|
|||
output_img->data.access = ACCESS_NON_READABLE;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
|
|
@ -93,7 +93,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -195,7 +195,7 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
|
|
@ -203,7 +203,7 @@ build_occlusion_query_shader(struct radv_device *device) {
|
|||
nir_builder_instr_insert(&b, &src_buf->instr);
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -395,7 +395,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
|
|||
nir_builder_instr_insert(&b, &src_buf->instr);
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -635,7 +635,7 @@ build_tfb_query_shader(struct radv_device *device)
|
|||
|
||||
/* Compute global ID. */
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
@ -837,7 +837,7 @@ build_timestamp_query_shader(struct radv_device *device)
|
|||
|
||||
/* Compute global ID. */
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b,
|
||||
b.shader->info.cs.local_size[0],
|
||||
b.shader->info.cs.local_size[1],
|
||||
|
|
|
@ -2121,6 +2121,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
|
|||
return nir_intrinsic_load_local_group_size;
|
||||
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
|
||||
return nir_intrinsic_load_global_invocation_id;
|
||||
case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
|
||||
return nir_intrinsic_load_base_global_invocation_id;
|
||||
case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
|
||||
return nir_intrinsic_load_global_invocation_index;
|
||||
case SYSTEM_VALUE_WORK_DIM:
|
||||
|
@ -2220,6 +2222,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
|
|||
return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
|
||||
case nir_intrinsic_load_global_invocation_id:
|
||||
return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
|
||||
case nir_intrinsic_load_base_global_invocation_id:
|
||||
return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
|
||||
case nir_intrinsic_load_global_invocation_index:
|
||||
return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
|
||||
case nir_intrinsic_load_work_dim:
|
||||
|
|
|
@ -586,9 +586,13 @@ system_value("tess_level_inner_default", 2)
|
|||
system_value("patch_vertices_in", 1)
|
||||
system_value("local_invocation_id", 3)
|
||||
system_value("local_invocation_index", 1)
|
||||
system_value("work_group_id", 3)
|
||||
# zero_base indicates it starts from 0 for the current dispatch
|
||||
# non-zero_base indicates the base is included
|
||||
system_value("work_group_id", 3, bit_sizes=[32, 64])
|
||||
system_value("work_group_id_zero_base", 3)
|
||||
system_value("base_work_group_id", 3, bit_sizes=[32, 64])
|
||||
system_value("user_clip_plane", 4, indices=[UCP_ID])
|
||||
system_value("num_work_groups", 3)
|
||||
system_value("num_work_groups", 3, bit_sizes=[32, 64])
|
||||
system_value("helper_invocation", 1, bit_sizes=[1, 32])
|
||||
system_value("alpha_ref_float", 1)
|
||||
system_value("layer_id", 1)
|
||||
|
@ -603,7 +607,13 @@ system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])
|
|||
system_value("num_subgroups", 1)
|
||||
system_value("subgroup_id", 1)
|
||||
system_value("local_group_size", 3)
|
||||
# note: the definition of global_invocation_id_zero_base is based on
|
||||
# (work_group_id * local_group_size) + local_invocation_id.
|
||||
# it is *not* based on work_group_id_zero_base, meaning the work group
|
||||
# base is already accounted for, and the global base is additive on top of that
|
||||
system_value("global_invocation_id", 3, bit_sizes=[32, 64])
|
||||
system_value("global_invocation_id_zero_base", 3, bit_sizes=[32, 64])
|
||||
system_value("base_global_invocation_id", 3, bit_sizes=[32, 64])
|
||||
system_value("global_invocation_index", 1, bit_sizes=[32, 64])
|
||||
system_value("work_dim", 1)
|
||||
system_value("line_width", 1)
|
||||
|
|
|
@ -44,9 +44,9 @@ static nir_ssa_def*
|
|||
build_global_group_size(nir_builder *b, unsigned bit_size)
|
||||
{
|
||||
nir_ssa_def *group_size = nir_load_local_group_size(b);
|
||||
nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);
|
||||
nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size);
|
||||
return nir_imul(b, nir_u2u(b, group_size, bit_size),
|
||||
nir_u2u(b, num_work_groups, bit_size));
|
||||
num_work_groups);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -189,10 +189,10 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
|
|||
|
||||
case nir_intrinsic_load_global_invocation_id: {
|
||||
nir_ssa_def *group_size = nir_load_local_group_size(b);
|
||||
nir_ssa_def *group_id = nir_load_work_group_id(b);
|
||||
nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
|
||||
nir_ssa_def *local_id = nir_load_local_invocation_id(b);
|
||||
|
||||
return nir_iadd(b, nir_imul(b, nir_u2u(b, group_id, bit_size),
|
||||
return nir_iadd(b, nir_imul(b, group_id,
|
||||
nir_u2u(b, group_size, bit_size)),
|
||||
nir_u2u(b, local_id, bit_size));
|
||||
}
|
||||
|
@ -222,10 +222,6 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
return sanitize_32bit_sysval(b, intrin);
|
||||
|
||||
case nir_intrinsic_load_deref: {
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
if (deref->mode != nir_var_system_value)
|
||||
|
|
|
@ -246,6 +246,7 @@ gl_system_value_name(gl_system_value sysval)
|
|||
ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
|
||||
ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX),
|
||||
ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID),
|
||||
ENUM(SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID),
|
||||
ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX),
|
||||
ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
|
||||
ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS),
|
||||
|
|
|
@ -615,6 +615,7 @@ typedef enum
|
|||
SYSTEM_VALUE_LOCAL_INVOCATION_ID,
|
||||
SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
|
||||
SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
|
||||
SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID,
|
||||
SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX,
|
||||
SYSTEM_VALUE_WORK_GROUP_ID,
|
||||
SYSTEM_VALUE_NUM_WORK_GROUPS,
|
||||
|
|
|
@ -644,7 +644,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
|
|||
break;
|
||||
case TGSI_SEMANTIC_BLOCK_ID:
|
||||
op = nir_intrinsic_load_work_group_id;
|
||||
load = nir_load_work_group_id(b);
|
||||
load = nir_load_work_group_id(b, 32);
|
||||
break;
|
||||
case TGSI_SEMANTIC_BLOCK_SIZE:
|
||||
op = nir_intrinsic_load_local_group_size;
|
||||
|
|
Loading…
Reference in New Issue