nir: Add new system values and intrinsics for dealing with CL work offsets

New intrinsics are added for global invocation IDs and work group IDs to
deal with offsets in both. The only one of these that needs a system value
is global invocation offset, for CL's get_global_offset().

Note that CL requires very large work group sizes, so these intrinsics
are modified to be able to use 64bit values, for 64bit SPIR-V.

Reviewed-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5891>
This commit is contained in:
Jesse Natalie 2020-07-27 16:56:21 -07:00 committed by Marge Bot
parent 6b1515cb84
commit 41e4eb9948
13 changed files with 41 additions and 29 deletions

View File

@ -16,7 +16,7 @@ build_buffer_fill_shader(struct radv_device *dev)
b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -71,7 +71,7 @@ build_buffer_copy_shader(struct radv_device *dev)
b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],

View File

@ -60,7 +60,7 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -289,7 +289,7 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -511,7 +511,7 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -719,7 +719,7 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -936,7 +936,7 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -1143,7 +1143,7 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.binding = 0;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -1335,7 +1335,7 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
output_img->data.binding = 0;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],

View File

@ -1175,7 +1175,7 @@ build_clear_htile_mask_shader()
b.shader->info.cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],

View File

@ -58,7 +58,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],

View File

@ -58,7 +58,7 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
output_img->data.access = ACCESS_NON_READABLE;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],

View File

@ -93,7 +93,7 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -195,7 +195,7 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],

View File

@ -203,7 +203,7 @@ build_occlusion_query_shader(struct radv_device *device) {
nir_builder_instr_insert(&b, &src_buf->instr);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -395,7 +395,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
nir_builder_instr_insert(&b, &src_buf->instr);
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -635,7 +635,7 @@ build_tfb_query_shader(struct radv_device *device)
/* Compute global ID. */
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
@ -837,7 +837,7 @@ build_timestamp_query_shader(struct radv_device *device)
/* Compute global ID. */
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b);
nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],

View File

@ -2121,6 +2121,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_local_group_size;
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
return nir_intrinsic_load_global_invocation_id;
case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID:
return nir_intrinsic_load_base_global_invocation_id;
case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
return nir_intrinsic_load_global_invocation_index;
case SYSTEM_VALUE_WORK_DIM:
@ -2220,6 +2222,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
case nir_intrinsic_load_global_invocation_id:
return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
case nir_intrinsic_load_base_global_invocation_id:
return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID;
case nir_intrinsic_load_global_invocation_index:
return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX;
case nir_intrinsic_load_work_dim:

View File

@ -586,9 +586,13 @@ system_value("tess_level_inner_default", 2)
system_value("patch_vertices_in", 1)
system_value("local_invocation_id", 3)
system_value("local_invocation_index", 1)
system_value("work_group_id", 3)
# zero_base indicates it starts from 0 for the current dispatch
# non-zero_base indicates the base is included
system_value("work_group_id", 3, bit_sizes=[32, 64])
system_value("work_group_id_zero_base", 3)
system_value("base_work_group_id", 3, bit_sizes=[32, 64])
system_value("user_clip_plane", 4, indices=[UCP_ID])
system_value("num_work_groups", 3)
system_value("num_work_groups", 3, bit_sizes=[32, 64])
system_value("helper_invocation", 1, bit_sizes=[1, 32])
system_value("alpha_ref_float", 1)
system_value("layer_id", 1)
@ -603,7 +607,13 @@ system_value("subgroup_lt_mask", 0, bit_sizes=[32, 64])
system_value("num_subgroups", 1)
system_value("subgroup_id", 1)
system_value("local_group_size", 3)
# note: the definition of global_invocation_id_zero_base is based on
# (work_group_id * local_group_size) + local_invocation_id.
# it is *not* based on work_group_id_zero_base, meaning the work group
# base is already accounted for, and the global base is additive on top of that
system_value("global_invocation_id", 3, bit_sizes=[32, 64])
system_value("global_invocation_id_zero_base", 3, bit_sizes=[32, 64])
system_value("base_global_invocation_id", 3, bit_sizes=[32, 64])
system_value("global_invocation_index", 1, bit_sizes=[32, 64])
system_value("work_dim", 1)
system_value("line_width", 1)

View File

@ -44,9 +44,9 @@ static nir_ssa_def*
build_global_group_size(nir_builder *b, unsigned bit_size)
{
nir_ssa_def *group_size = nir_load_local_group_size(b);
nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);
nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size);
return nir_imul(b, nir_u2u(b, group_size, bit_size),
nir_u2u(b, num_work_groups, bit_size));
num_work_groups);
}
static bool
@ -189,10 +189,10 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
case nir_intrinsic_load_global_invocation_id: {
nir_ssa_def *group_size = nir_load_local_group_size(b);
nir_ssa_def *group_id = nir_load_work_group_id(b);
nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
nir_ssa_def *local_id = nir_load_local_invocation_id(b);
return nir_iadd(b, nir_imul(b, nir_u2u(b, group_id, bit_size),
return nir_iadd(b, nir_imul(b, group_id,
nir_u2u(b, group_size, bit_size)),
nir_u2u(b, local_id, bit_size));
}
@ -222,10 +222,6 @@ lower_system_value_instr(nir_builder *b, nir_instr *instr, void *_state)
return NULL;
}
case nir_intrinsic_load_num_work_groups:
case nir_intrinsic_load_work_group_id:
return sanitize_32bit_sysval(b, intrin);
case nir_intrinsic_load_deref: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
if (deref->mode != nir_var_system_value)

View File

@ -246,6 +246,7 @@ gl_system_value_name(gl_system_value sysval)
ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID),
ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX),
ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_ID),
ENUM(SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID),
ENUM(SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX),
ENUM(SYSTEM_VALUE_WORK_GROUP_ID),
ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS),

View File

@ -615,6 +615,7 @@ typedef enum
SYSTEM_VALUE_LOCAL_INVOCATION_ID,
SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID,
SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX,
SYSTEM_VALUE_WORK_GROUP_ID,
SYSTEM_VALUE_NUM_WORK_GROUPS,

View File

@ -644,7 +644,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
break;
case TGSI_SEMANTIC_BLOCK_ID:
op = nir_intrinsic_load_work_group_id;
load = nir_load_work_group_id(b);
load = nir_load_work_group_id(b, 32);
break;
case TGSI_SEMANTIC_BLOCK_SIZE:
op = nir_intrinsic_load_local_group_size;