radv: Modify load_sbt_amd intrinsic to get the descriptor.
That way we can get the address to the entry, which is needed for some nir builtins because extra data in the entry can be used as shader input. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12592>
This commit is contained in:
parent
2abf44cf18
commit
b6be96a2bd
|
@ -5475,23 +5475,12 @@ void
|
|||
visit_load_sbt_amd(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
Temp index = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
unsigned binding = nir_intrinsic_binding(instr);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
|
||||
index = as_vgpr(ctx, index);
|
||||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp desc_base = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.sbt_descriptors));
|
||||
Operand desc_off = bld.copy(bld.def(s1), Operand::c32(binding * 16u));
|
||||
Temp rsrc = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), desc_base, desc_off);
|
||||
|
||||
/* If we want more we need to implement */
|
||||
assert(instr->dest.ssa.bit_size == 32);
|
||||
assert(instr->num_components == 1);
|
||||
|
||||
bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst), rsrc, index, Operand::zero(), base,
|
||||
false, false, true);
|
||||
bld.smem(aco_opcode::s_load_dwordx4, Definition(dst), desc_base, desc_off);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -778,7 +778,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_intrinsic_load_initial_edgeflags_amd:
|
||||
case nir_intrinsic_load_packed_passthrough_primitive_amd:
|
||||
case nir_intrinsic_gds_atomic_add_amd:
|
||||
case nir_intrinsic_load_sbt_amd:
|
||||
case nir_intrinsic_bvh64_intersect_ray_amd:
|
||||
case nir_intrinsic_load_cull_small_prim_precision_amd: type = RegType::vgpr; break;
|
||||
case nir_intrinsic_load_shared:
|
||||
|
@ -801,6 +800,7 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_intrinsic_inclusive_scan:
|
||||
case nir_intrinsic_exclusive_scan:
|
||||
case nir_intrinsic_reduce:
|
||||
case nir_intrinsic_load_sbt_amd:
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_global:
|
||||
|
|
|
@ -6528,17 +6528,10 @@ radv_rt_bind_tables(struct radv_cmd_buffer *cmd_buffer,
|
|||
|
||||
desc_ptr = ptr;
|
||||
for (unsigned i = 0; i < 4; ++i, desc_ptr += 4) {
|
||||
uint32_t rsrc_word3 =
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
|
||||
desc_ptr[0] = tables[i].deviceAddress;
|
||||
desc_ptr[1] = S_008F04_BASE_ADDRESS_HI(tables[i].deviceAddress >> 32) |
|
||||
S_008F04_STRIDE(tables[i].stride);
|
||||
desc_ptr[2] = 0xffffffffu;
|
||||
desc_ptr[3] = rsrc_word3;
|
||||
desc_ptr[1] = tables[i].deviceAddress >> 32;
|
||||
desc_ptr[2] = tables[i].stride;
|
||||
desc_ptr[3] = 0;
|
||||
}
|
||||
|
||||
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
|
||||
|
|
|
@ -1214,8 +1214,9 @@ intrinsic("overwrite_vs_arguments_amd", src_comp=[1, 1], indices=[])
|
|||
# Overwrites TES input registers, for use with vertex compaction after culling. src = {tes_u, tes_v, rel_patch_id, patch_id}.
|
||||
intrinsic("overwrite_tes_arguments_amd", src_comp=[1, 1, 1, 1], indices=[])
|
||||
|
||||
# src = [index] BINDING = which table BASE = offset within handle
|
||||
intrinsic("load_sbt_amd", src_comp=[-1], dest_comp=0, indices=[BINDING, BASE],
|
||||
# loads a descriptor for an sbt.
|
||||
# src = [index] BINDING = which table
|
||||
intrinsic("load_sbt_amd", dest_comp=4, bit_sizes=[32], indices=[BINDING],
|
||||
flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# 1. HW descriptor
|
||||
|
|
Loading…
Reference in New Issue