intel/fs: ray query fix for global address

With stages dispatching with a mask, we can run into situations where
we don't have the global address in all lanes. The existing code
always assumed we had the addres in at least lane0.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: bb40e999d1 ("intel/nir: use a single intel intrinsic to deal with ray traversal")
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17330>
This commit is contained in:
Lionel Landwerlin 2022-06-23 14:15:51 +03:00 committed by Marge Bot
parent 9bd7570e96
commit 9680e0e4a2
2 changed files with 12 additions and 3 deletions

View File

@ -6859,7 +6859,14 @@ static void
lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst)
{
const intel_device_info *devinfo = bld.shader->devinfo;
const fs_reg &globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS];
/* The emit_uniformize() in brw_fs_nir.cpp will generate an horizontal
* stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q
* types in on Gfx12.5, we need to tweak the stride with a value of 1 dword
* so that the MOV operates on 2 components rather than twice the same
* component.
*/
fs_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_REGISTER_TYPE_UD);
globals_addr.stride = 1;
const fs_reg &bvh_level =
inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ?
inst->src[RT_LOGICAL_SRC_BVH_LEVEL] :
@ -6878,7 +6885,7 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst)
const fs_builder ubld = bld.exec_all().group(8, 0);
fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD);
ubld.MOV(header, brw_imm_ud(0));
ubld.group(2, 0).MOV(header, retype(globals_addr, BRW_REGISTER_TYPE_UD));
ubld.group(2, 0).MOV(header, globals_addr);
if (synchronous)
ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous));

View File

@ -5948,7 +5948,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
emit_rt_lsc_fence(bld, LSC_FENCE_LOCAL, LSC_FLUSH_TYPE_NONE);
fs_reg srcs[RT_LOGICAL_NUM_SRCS];
srcs[RT_LOGICAL_SRC_GLOBALS] = get_nir_src(instr->src[0]);
fs_reg globals = get_nir_src(instr->src[0]);
srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals);
srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(instr->src[1]);
srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(instr->src[2]);
srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous);