radv: Modify load_sbt_amd intrinsic to get the descriptor.

That way we can get the address to the entry, which is needed for
some nir builtins because extra data in the entry can be used as
shader input.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12592>
This commit is contained in:
Bas Nieuwenhuizen 2021-07-20 01:21:11 +02:00 committed by Marge Bot
parent 2abf44cf18
commit b6be96a2bd
4 changed files with 8 additions and 25 deletions

View File

@ -5475,23 +5475,12 @@ void
visit_load_sbt_amd(isel_context* ctx, nir_intrinsic_instr* instr)
{
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
Temp index = get_ssa_temp(ctx, instr->src[0].ssa);
unsigned binding = nir_intrinsic_binding(instr);
unsigned base = nir_intrinsic_base(instr);
index = as_vgpr(ctx, index);
Builder bld(ctx->program, ctx->block);
Temp desc_base = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.sbt_descriptors));
Operand desc_off = bld.copy(bld.def(s1), Operand::c32(binding * 16u));
Temp rsrc = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), desc_base, desc_off);
/* If we want more we need to implement */
assert(instr->dest.ssa.bit_size == 32);
assert(instr->num_components == 1);
bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst), rsrc, index, Operand::zero(), base,
false, false, true);
bld.smem(aco_opcode::s_load_dwordx4, Definition(dst), desc_base, desc_off);
}
void

View File

@ -778,7 +778,6 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_intrinsic_load_initial_edgeflags_amd:
case nir_intrinsic_load_packed_passthrough_primitive_amd:
case nir_intrinsic_gds_atomic_add_amd:
case nir_intrinsic_load_sbt_amd:
case nir_intrinsic_bvh64_intersect_ray_amd:
case nir_intrinsic_load_cull_small_prim_precision_amd: type = RegType::vgpr; break;
case nir_intrinsic_load_shared:
@ -801,6 +800,7 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_intrinsic_inclusive_scan:
case nir_intrinsic_exclusive_scan:
case nir_intrinsic_reduce:
case nir_intrinsic_load_sbt_amd:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_global:

View File

@ -6528,17 +6528,10 @@ radv_rt_bind_tables(struct radv_cmd_buffer *cmd_buffer,
desc_ptr = ptr;
for (unsigned i = 0; i < 4; ++i, desc_ptr += 4) {
uint32_t rsrc_word3 =
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED) | S_008F0C_RESOURCE_LEVEL(1);
desc_ptr[0] = tables[i].deviceAddress;
desc_ptr[1] = S_008F04_BASE_ADDRESS_HI(tables[i].deviceAddress >> 32) |
S_008F04_STRIDE(tables[i].stride);
desc_ptr[2] = 0xffffffffu;
desc_ptr[3] = rsrc_word3;
desc_ptr[1] = tables[i].deviceAddress >> 32;
desc_ptr[2] = tables[i].stride;
desc_ptr[3] = 0;
}
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;

View File

@ -1214,8 +1214,9 @@ intrinsic("overwrite_vs_arguments_amd", src_comp=[1, 1], indices=[])
# Overwrites TES input registers, for use with vertex compaction after culling. src = {tes_u, tes_v, rel_patch_id, patch_id}.
intrinsic("overwrite_tes_arguments_amd", src_comp=[1, 1, 1, 1], indices=[])
# src = [index] BINDING = which table BASE = offset within handle
intrinsic("load_sbt_amd", src_comp=[-1], dest_comp=0, indices=[BINDING, BASE],
# loads a descriptor for an sbt.
# src = [index] BINDING = which table
intrinsic("load_sbt_amd", dest_comp=4, bit_sizes=[32], indices=[BINDING],
flags=[CAN_ELIMINATE, CAN_REORDER])
# 1. HW descriptor