anv: Implement VK_KHR_shader_atomic_int64
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
parent
79fb0d27f3
commit
bd56ce8ce5
|
@ -423,6 +423,7 @@ enum opcode {
|
||||||
SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
|
SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
|
||||||
SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
|
SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
|
||||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
|
||||||
SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
|
||||||
|
|
||||||
SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
|
SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
|
||||||
|
|
|
@ -855,6 +855,7 @@ fs_inst::components_read(unsigned i) const
|
||||||
return i == 1 ? src[2].ud : 1;
|
return i == 1 ? src[2].ud : 1;
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
assert(src[2].file == IMM);
|
assert(src[2].file == IMM);
|
||||||
if (i == 1) {
|
if (i == 1) {
|
||||||
/* Data source */
|
/* Data source */
|
||||||
|
@ -5298,7 +5299,7 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
if (devinfo->gen >= 9) {
|
if (devinfo->gen >= 9) {
|
||||||
/* On Skylake and above, we have SENDS */
|
/* On Skylake and above, we have SENDS */
|
||||||
mlen = 2 * (inst->exec_size / 8);
|
mlen = 2 * (inst->exec_size / 8);
|
||||||
ex_mlen = src_comps * (inst->exec_size / 8);
|
ex_mlen = src_comps * type_sz(src.type) * inst->exec_size / REG_SIZE;
|
||||||
payload = retype(bld.move_to_vgrf(addr, 1), BRW_REGISTER_TYPE_UD);
|
payload = retype(bld.move_to_vgrf(addr, 1), BRW_REGISTER_TYPE_UD);
|
||||||
payload2 = retype(bld.move_to_vgrf(src, src_comps),
|
payload2 = retype(bld.move_to_vgrf(src, src_comps),
|
||||||
BRW_REGISTER_TYPE_UD);
|
BRW_REGISTER_TYPE_UD);
|
||||||
|
@ -5350,6 +5351,13 @@ lower_a64_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
!inst->dst.is_null());
|
!inst->dst.is_null());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
|
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 64,
|
||||||
|
arg, /* atomic_op */
|
||||||
|
!inst->dst.is_null());
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
|
||||||
arg, /* atomic_op */
|
arg, /* atomic_op */
|
||||||
|
@ -5558,6 +5566,7 @@ fs_visitor::lower_logical_sends()
|
||||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
lower_a64_logical_send(ibld, inst);
|
lower_a64_logical_send(ibld, inst);
|
||||||
break;
|
break;
|
||||||
|
@ -6147,6 +6156,7 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
||||||
return devinfo->gen <= 8 ? 8 : MIN2(16, inst->exec_size);
|
return devinfo->gen <= 8 ? 8 : MIN2(16, inst->exec_size);
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
return 8;
|
return 8;
|
||||||
|
|
||||||
|
|
|
@ -4928,6 +4928,13 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
|
||||||
if (stage == MESA_SHADER_FRAGMENT)
|
if (stage == MESA_SHADER_FRAGMENT)
|
||||||
brw_wm_prog_data(prog_data)->has_side_effects = true;
|
brw_wm_prog_data(prog_data)->has_side_effects = true;
|
||||||
|
|
||||||
|
/* The BTI untyped atomic messages only support 32-bit atomics. If you
|
||||||
|
* just look at the big table of messages in the Vol 7 of the SKL PRM, they
|
||||||
|
* appear to exist. However, if you look at Vol 2a, there are no message
|
||||||
|
* descriptors provided for Qword atomic ops except for A64 messages.
|
||||||
|
*/
|
||||||
|
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||||
|
|
||||||
fs_reg dest;
|
fs_reg dest;
|
||||||
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
|
||||||
dest = get_nir_dest(instr->dest);
|
dest = get_nir_dest(instr->dest);
|
||||||
|
@ -5092,8 +5099,14 @@ fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
|
||||||
data = tmp;
|
data = tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
if (nir_dest_bit_size(instr->dest) == 64) {
|
||||||
dest, addr, data, brw_imm_ud(op));
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
|
||||||
|
dest, addr, data, brw_imm_ud(op));
|
||||||
|
} else {
|
||||||
|
assert(nir_dest_bit_size(instr->dest) == 32);
|
||||||
|
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
|
||||||
|
dest, addr, data, brw_imm_ud(op));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -308,6 +308,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
||||||
return "a64_byte_scattered_write_logical";
|
return "a64_byte_scattered_write_logical";
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
return "a64_untyped_atomic_logical";
|
return "a64_untyped_atomic_logical";
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
|
return "a64_untyped_atomic_int64_logical";
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
return "a64_untyped_atomic_float_logical";
|
return "a64_untyped_atomic_float_logical";
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||||
|
@ -1044,6 +1046,7 @@ backend_instruction::has_side_effects() const
|
||||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
|
||||||
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
|
||||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
|
||||||
|
|
|
@ -1051,6 +1051,14 @@ void anv_GetPhysicalDeviceFeatures2(
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
|
||||||
|
VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = (void *)ext;
|
||||||
|
features->shaderBufferInt64Atomics =
|
||||||
|
pdevice->info.gen >= 9 && pdevice->use_softpin;
|
||||||
|
features->shaderSharedInt64Atomics = VK_FALSE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
|
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
|
||||||
VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *)ext;
|
VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *)ext;
|
||||||
features->shaderDrawParameters = true;
|
features->shaderDrawParameters = true;
|
||||||
|
|
|
@ -104,6 +104,8 @@ EXTENSIONS = [
|
||||||
Extension('VK_KHR_relaxed_block_layout', 1, True),
|
Extension('VK_KHR_relaxed_block_layout', 1, True),
|
||||||
Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
|
Extension('VK_KHR_sampler_mirror_clamp_to_edge', 1, True),
|
||||||
Extension('VK_KHR_sampler_ycbcr_conversion', 1, True),
|
Extension('VK_KHR_sampler_ycbcr_conversion', 1, True),
|
||||||
|
Extension('VK_KHR_shader_atomic_int64', 1,
|
||||||
|
'device->info.gen >= 9 && device->use_softpin'),
|
||||||
Extension('VK_KHR_shader_draw_parameters', 1, True),
|
Extension('VK_KHR_shader_draw_parameters', 1, True),
|
||||||
Extension('VK_KHR_shader_float16_int8', 1, 'device->info.gen >= 8'),
|
Extension('VK_KHR_shader_float16_int8', 1, 'device->info.gen >= 8'),
|
||||||
Extension('VK_KHR_storage_buffer_storage_class', 1, True),
|
Extension('VK_KHR_storage_buffer_storage_class', 1, True),
|
||||||
|
|
|
@ -253,7 +253,7 @@ build_index_offset_for_deref(nir_deref_instr *deref,
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin,
|
try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin, bool is_atomic,
|
||||||
struct apply_pipeline_layout_state *state)
|
struct apply_pipeline_layout_state *state)
|
||||||
{
|
{
|
||||||
nir_builder *b = &state->builder;
|
nir_builder *b = &state->builder;
|
||||||
|
@ -262,6 +262,12 @@ try_lower_direct_buffer_intrinsic(nir_intrinsic_instr *intrin,
|
||||||
if (deref->mode != nir_var_mem_ssbo)
|
if (deref->mode != nir_var_mem_ssbo)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/* 64-bit atomics only support A64 messages so we can't lower them to the
|
||||||
|
* index+offset model.
|
||||||
|
*/
|
||||||
|
if (is_atomic && nir_dest_bit_size(intrin->dest) == 64)
|
||||||
|
return false;
|
||||||
|
|
||||||
if (!nir_deref_find_descriptor(deref, state))
|
if (!nir_deref_find_descriptor(deref, state))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -286,6 +292,8 @@ lower_direct_buffer_access(nir_function_impl *impl,
|
||||||
switch (intrin->intrinsic) {
|
switch (intrin->intrinsic) {
|
||||||
case nir_intrinsic_load_deref:
|
case nir_intrinsic_load_deref:
|
||||||
case nir_intrinsic_store_deref:
|
case nir_intrinsic_store_deref:
|
||||||
|
try_lower_direct_buffer_intrinsic(intrin, false, state);
|
||||||
|
break;
|
||||||
case nir_intrinsic_deref_atomic_add:
|
case nir_intrinsic_deref_atomic_add:
|
||||||
case nir_intrinsic_deref_atomic_imin:
|
case nir_intrinsic_deref_atomic_imin:
|
||||||
case nir_intrinsic_deref_atomic_umin:
|
case nir_intrinsic_deref_atomic_umin:
|
||||||
|
@ -299,7 +307,7 @@ lower_direct_buffer_access(nir_function_impl *impl,
|
||||||
case nir_intrinsic_deref_atomic_fmin:
|
case nir_intrinsic_deref_atomic_fmin:
|
||||||
case nir_intrinsic_deref_atomic_fmax:
|
case nir_intrinsic_deref_atomic_fmax:
|
||||||
case nir_intrinsic_deref_atomic_fcomp_swap:
|
case nir_intrinsic_deref_atomic_fcomp_swap:
|
||||||
try_lower_direct_buffer_intrinsic(intrin, state);
|
try_lower_direct_buffer_intrinsic(intrin, true, state);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case nir_intrinsic_get_buffer_size: {
|
case nir_intrinsic_get_buffer_size: {
|
||||||
|
|
|
@ -147,6 +147,7 @@ anv_shader_compile_to_nir(struct anv_device *device,
|
||||||
.int8 = pdevice->info.gen >= 8,
|
.int8 = pdevice->info.gen >= 8,
|
||||||
.int16 = pdevice->info.gen >= 8,
|
.int16 = pdevice->info.gen >= 8,
|
||||||
.int64 = pdevice->info.gen >= 8,
|
.int64 = pdevice->info.gen >= 8,
|
||||||
|
.int64_atomics = pdevice->info.gen >= 9 && pdevice->use_softpin,
|
||||||
.min_lod = true,
|
.min_lod = true,
|
||||||
.multiview = true,
|
.multiview = true,
|
||||||
.physical_storage_buffer_address = pdevice->has_a64_buffer_access,
|
.physical_storage_buffer_address = pdevice->has_a64_buffer_access,
|
||||||
|
|
Loading…
Reference in New Issue