intel/fs: ray query fix for global address
With stages dispatching with a mask, we can run into situations where
we don't have the global address in all lanes. The existing code
always assumed we had the addres in at least lane0.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: bb40e999d1
("intel/nir: use a single intel intrinsic to deal with ray traversal")
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17330>
This commit is contained in:
parent
9bd7570e96
commit
9680e0e4a2
|
@ -6859,7 +6859,14 @@ static void
|
|||
lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
const intel_device_info *devinfo = bld.shader->devinfo;
|
||||
const fs_reg &globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS];
|
||||
/* The emit_uniformize() in brw_fs_nir.cpp will generate an horizontal
|
||||
* stride of 0. Below we're doing a MOV() in SIMD2. Since we can't use UQ/Q
|
||||
* types in on Gfx12.5, we need to tweak the stride with a value of 1 dword
|
||||
* so that the MOV operates on 2 components rather than twice the same
|
||||
* component.
|
||||
*/
|
||||
fs_reg globals_addr = retype(inst->src[RT_LOGICAL_SRC_GLOBALS], BRW_REGISTER_TYPE_UD);
|
||||
globals_addr.stride = 1;
|
||||
const fs_reg &bvh_level =
|
||||
inst->src[RT_LOGICAL_SRC_BVH_LEVEL].file == BRW_IMMEDIATE_VALUE ?
|
||||
inst->src[RT_LOGICAL_SRC_BVH_LEVEL] :
|
||||
|
@ -6878,7 +6885,7 @@ lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst)
|
|||
const fs_builder ubld = bld.exec_all().group(8, 0);
|
||||
fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld.MOV(header, brw_imm_ud(0));
|
||||
ubld.group(2, 0).MOV(header, retype(globals_addr, BRW_REGISTER_TYPE_UD));
|
||||
ubld.group(2, 0).MOV(header, globals_addr);
|
||||
if (synchronous)
|
||||
ubld.group(1, 0).MOV(byte_offset(header, 16), brw_imm_ud(synchronous));
|
||||
|
||||
|
|
|
@ -5948,7 +5948,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
emit_rt_lsc_fence(bld, LSC_FENCE_LOCAL, LSC_FLUSH_TYPE_NONE);
|
||||
|
||||
fs_reg srcs[RT_LOGICAL_NUM_SRCS];
|
||||
srcs[RT_LOGICAL_SRC_GLOBALS] = get_nir_src(instr->src[0]);
|
||||
|
||||
fs_reg globals = get_nir_src(instr->src[0]);
|
||||
srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals);
|
||||
srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(instr->src[1]);
|
||||
srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(instr->src[2]);
|
||||
srcs[RT_LOGICAL_SRC_SYNCHRONOUS] = brw_imm_ud(synchronous);
|
||||
|
|
Loading…
Reference in New Issue