352 lines
13 KiB
C
352 lines
13 KiB
C
/*
|
|
* Copyright © 2020 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "brw_nir_rt.h"
|
|
#include "brw_nir_rt_builder.h"
|
|
#include "nir_phi_builder.h"
|
|
|
|
/** Insert the appropriate return instruction at the end of the shader */
|
|
void
|
|
brw_nir_lower_shader_returns(nir_shader *shader)
|
|
{
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
|
|
/* Reserve scratch space at the start of the shader's per-thread scratch
|
|
* space for the return BINDLESS_SHADER_RECORD address and data payload.
|
|
* When a shader is called, the calling shader will write the return BSR
|
|
* address in this region of the callee's scratch space.
|
|
*
|
|
* We could also put it at the end of the caller's scratch space. However,
|
|
* doing this way means that a shader never accesses its caller's scratch
|
|
* space unless given an explicit pointer (such as for ray payloads). It
|
|
* also makes computing the address easier given that we want to apply an
|
|
* alignment to the scratch offset to ensure we can make alignment
|
|
* assumptions in the called shader.
|
|
*
|
|
* This isn't needed for ray-gen shaders because they end the thread and
|
|
* never return to the calling trampoline shader.
|
|
*/
|
|
assert(shader->scratch_size == 0);
|
|
if (shader->info.stage != MESA_SHADER_RAYGEN)
|
|
shader->scratch_size = BRW_BTD_STACK_CALLEE_DATA_SIZE;
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
set_foreach(impl->end_block->predecessors, block_entry) {
|
|
struct nir_block *block = (void *)block_entry->key;
|
|
b.cursor = nir_after_block_before_jump(block);
|
|
|
|
switch (shader->info.stage) {
|
|
case MESA_SHADER_RAYGEN:
|
|
/* A raygen shader is always the root of the shader call tree. When
|
|
* it ends, we retire the bindless stack ID and no further shaders
|
|
* will be executed.
|
|
*/
|
|
brw_nir_btd_retire(&b);
|
|
break;
|
|
|
|
case MESA_SHADER_ANY_HIT:
|
|
/* The default action of an any-hit shader is to accept the ray
|
|
* intersection.
|
|
*/
|
|
nir_accept_ray_intersection(&b);
|
|
break;
|
|
|
|
case MESA_SHADER_CALLABLE:
|
|
case MESA_SHADER_MISS:
|
|
case MESA_SHADER_CLOSEST_HIT:
|
|
/* Callable, miss, and closest-hit shaders don't take any special
|
|
* action at the end. They simply return back to the previous shader
|
|
* in the call stack.
|
|
*/
|
|
brw_nir_btd_return(&b);
|
|
break;
|
|
|
|
case MESA_SHADER_INTERSECTION:
|
|
/* This will be handled by brw_nir_lower_intersection_shader */
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid callable shader stage");
|
|
}
|
|
|
|
assert(impl->end_block->predecessors->entries == 1);
|
|
break;
|
|
}
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_block_index |
|
|
nir_metadata_dominance);
|
|
}
|
|
|
|
static void
|
|
store_resume_addr(nir_builder *b, nir_intrinsic_instr *call)
|
|
{
|
|
uint32_t call_idx = nir_intrinsic_call_idx(call);
|
|
uint32_t offset = nir_intrinsic_stack_size(call);
|
|
|
|
/* First thing on the called shader's stack is the resume address
|
|
* followed by a pointer to the payload.
|
|
*/
|
|
nir_ssa_def *resume_record_addr =
|
|
nir_iadd_imm(b, nir_load_btd_resume_sbt_addr_intel(b),
|
|
call_idx * BRW_BTD_RESUME_SBT_STRIDE);
|
|
/* By the time we get here, any remaining shader/function memory
|
|
* pointers have been lowered to SSA values.
|
|
*/
|
|
assert(nir_get_shader_call_payload_src(call)->is_ssa);
|
|
nir_ssa_def *payload_addr =
|
|
nir_get_shader_call_payload_src(call)->ssa;
|
|
brw_nir_rt_store_scratch(b, offset, BRW_BTD_STACK_ALIGN,
|
|
nir_vec2(b, resume_record_addr, payload_addr),
|
|
0xf /* write_mask */);
|
|
|
|
nir_btd_stack_push_intel(b, offset);
|
|
}
|
|
|
|
static bool
|
|
lower_shader_trace_ray_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
|
|
/* Leave nir_intrinsic_rt_resume to be lowered by
|
|
* brw_nir_lower_rt_intrinsics()
|
|
*/
|
|
nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
|
|
if (call->intrinsic != nir_intrinsic_rt_trace_ray)
|
|
return false;
|
|
|
|
b->cursor = nir_instr_remove(instr);
|
|
|
|
store_resume_addr(b, call);
|
|
|
|
nir_ssa_def *as_addr = call->src[0].ssa;
|
|
nir_ssa_def *ray_flags = call->src[1].ssa;
|
|
/* From the SPIR-V spec:
|
|
*
|
|
* "Only the 8 least-significant bits of Cull Mask are used by this
|
|
* instruction - other bits are ignored.
|
|
*
|
|
* Only the 4 least-significant bits of SBT Offset and SBT Stride are
|
|
* used by this instruction - other bits are ignored.
|
|
*
|
|
* Only the 16 least-significant bits of Miss Index are used by this
|
|
* instruction - other bits are ignored."
|
|
*/
|
|
nir_ssa_def *cull_mask = nir_iand_imm(b, call->src[2].ssa, 0xff);
|
|
nir_ssa_def *sbt_offset = nir_iand_imm(b, call->src[3].ssa, 0xf);
|
|
nir_ssa_def *sbt_stride = nir_iand_imm(b, call->src[4].ssa, 0xf);
|
|
nir_ssa_def *miss_index = nir_iand_imm(b, call->src[5].ssa, 0xffff);
|
|
nir_ssa_def *ray_orig = call->src[6].ssa;
|
|
nir_ssa_def *ray_t_min = call->src[7].ssa;
|
|
nir_ssa_def *ray_dir = call->src[8].ssa;
|
|
nir_ssa_def *ray_t_max = call->src[9].ssa;
|
|
|
|
nir_ssa_def *root_node_ptr =
|
|
brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
|
|
|
|
/* The hardware packet requires an address to the first element of the
|
|
* hit SBT.
|
|
*
|
|
* In order to calculate this, we must multiply the "SBT Offset"
|
|
* provided to OpTraceRay by the SBT stride provided for the hit SBT in
|
|
* the call to vkCmdTraceRay() and add that to the base address of the
|
|
* hit SBT. This stride is not to be confused with the "SBT Stride"
|
|
* provided to OpTraceRay which is in units of this stride. It's a
|
|
* rather terrible overload of the word "stride". The hardware docs
|
|
* calls the SPIR-V stride value the "shader index multiplier" which is
|
|
* a much more sane name.
|
|
*/
|
|
nir_ssa_def *hit_sbt_stride_B =
|
|
nir_load_ray_hit_sbt_stride_intel(b);
|
|
nir_ssa_def *hit_sbt_offset_B =
|
|
nir_umul_32x16(b, sbt_offset, nir_u2u32(b, hit_sbt_stride_B));
|
|
nir_ssa_def *hit_sbt_addr =
|
|
nir_iadd(b, nir_load_ray_hit_sbt_addr_intel(b),
|
|
nir_u2u64(b, hit_sbt_offset_B));
|
|
|
|
/* The hardware packet takes an address to the miss BSR. */
|
|
nir_ssa_def *miss_sbt_stride_B =
|
|
nir_load_ray_miss_sbt_stride_intel(b);
|
|
nir_ssa_def *miss_sbt_offset_B =
|
|
nir_umul_32x16(b, miss_index, nir_u2u32(b, miss_sbt_stride_B));
|
|
nir_ssa_def *miss_sbt_addr =
|
|
nir_iadd(b, nir_load_ray_miss_sbt_addr_intel(b),
|
|
nir_u2u64(b, miss_sbt_offset_B));
|
|
|
|
struct brw_nir_rt_mem_ray_defs ray_defs = {
|
|
.root_node_ptr = root_node_ptr,
|
|
.ray_flags = nir_u2u16(b, ray_flags),
|
|
.ray_mask = cull_mask,
|
|
.hit_group_sr_base_ptr = hit_sbt_addr,
|
|
.hit_group_sr_stride = nir_u2u16(b, hit_sbt_stride_B),
|
|
.miss_sr_ptr = miss_sbt_addr,
|
|
.orig = ray_orig,
|
|
.t_near = ray_t_min,
|
|
.dir = ray_dir,
|
|
.t_far = ray_t_max,
|
|
.shader_index_multiplier = sbt_stride,
|
|
};
|
|
brw_nir_rt_store_mem_ray(b, &ray_defs, BRW_RT_BVH_LEVEL_WORLD);
|
|
|
|
nir_trace_ray_intel(b,
|
|
nir_load_btd_global_arg_addr_intel(b),
|
|
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD),
|
|
nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
|
|
.synchronous = false);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_shader_call_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
|
|
/* Leave nir_intrinsic_rt_resume to be lowered by
|
|
* brw_nir_lower_rt_intrinsics()
|
|
*/
|
|
nir_intrinsic_instr *call = nir_instr_as_intrinsic(instr);
|
|
if (call->intrinsic != nir_intrinsic_rt_execute_callable)
|
|
return false;
|
|
|
|
b->cursor = nir_instr_remove(instr);
|
|
|
|
store_resume_addr(b, call);
|
|
|
|
nir_ssa_def *sbt_offset32 =
|
|
nir_imul(b, call->src[0].ssa,
|
|
nir_u2u32(b, nir_load_callable_sbt_stride_intel(b)));
|
|
nir_ssa_def *sbt_addr =
|
|
nir_iadd(b, nir_load_callable_sbt_addr_intel(b),
|
|
nir_u2u64(b, sbt_offset32));
|
|
brw_nir_btd_spawn(b, sbt_addr);
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
brw_nir_lower_shader_calls(nir_shader *shader)
|
|
{
|
|
return
|
|
nir_shader_instructions_pass(shader,
|
|
lower_shader_trace_ray_instr,
|
|
nir_metadata_none,
|
|
NULL) |
|
|
nir_shader_instructions_pass(shader,
|
|
lower_shader_call_instr,
|
|
nir_metadata_block_index |
|
|
nir_metadata_dominance,
|
|
NULL);
|
|
}
|
|
|
|
/** Creates a trivial return shader
|
|
*
|
|
* In most cases this shader doesn't actually do anything. It just needs to
|
|
* return to the caller.
|
|
*
|
|
* By default, our HW has the ability to handle the fact that a shader is not
|
|
* available and will execute the next following shader in the tracing call.
|
|
* For instance, a RAYGEN shader traces a ray, the tracing generates a hit,
|
|
* but there is no ANYHIT shader available. The HW should follow up by
|
|
* execution the CLOSESTHIT shader.
|
|
*
|
|
* This default behavior can be changed through the RT_CTRL register
|
|
* (privileged access) and when NULL shader checks are disabled, the HW will
|
|
* instead call the call stack handler (this shader). This is what i915 is
|
|
* doing as part of Wa_14013202645.
|
|
*
|
|
* In order to ensure the call to the CLOSESTHIT shader, this shader needs to
|
|
* commit the ray and will not proceed with the BTD return. Similarly when the
|
|
* same thing happen with the INTERSECTION shader, we should just carry on the
|
|
* ray traversal with the continue operation.
|
|
*
|
|
*/
|
|
nir_shader *
|
|
brw_nir_create_trivial_return_shader(const struct brw_compiler *compiler,
|
|
void *mem_ctx)
|
|
{
|
|
const nir_shader_compiler_options *nir_options =
|
|
compiler->nir_options[MESA_SHADER_CALLABLE];
|
|
|
|
nir_builder _b = nir_builder_init_simple_shader(MESA_SHADER_CALLABLE,
|
|
nir_options,
|
|
"RT Trivial Return");
|
|
nir_builder *b = &_b;
|
|
|
|
ralloc_steal(mem_ctx, b->shader);
|
|
nir_shader *nir = b->shader;
|
|
|
|
/* Workaround not needed on DG2-G10-C0+ & DG2-G11-B0+ */
|
|
if ((compiler->devinfo->platform == INTEL_PLATFORM_DG2_G10 &&
|
|
compiler->devinfo->revision < 8) ||
|
|
(compiler->devinfo->platform == INTEL_PLATFORM_DG2_G11 &&
|
|
compiler->devinfo->revision < 4)) {
|
|
/* Reserve scratch space at the start of the shader's per-thread scratch
|
|
* space for the return BINDLESS_SHADER_RECORD address and data payload.
|
|
* When a shader is called, the calling shader will write the return BSR
|
|
* address in this region of the callee's scratch space.
|
|
*/
|
|
nir->scratch_size = BRW_BTD_STACK_CALLEE_DATA_SIZE;
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
|
|
b->cursor = nir_before_block(nir_start_block(impl));
|
|
|
|
nir_ssa_def *shader_type = nir_load_btd_shader_type_intel(b);
|
|
|
|
nir_ssa_def *is_intersection_shader =
|
|
nir_ieq_imm(b, shader_type, GEN_RT_BTD_SHADER_TYPE_INTERSECTION);
|
|
nir_ssa_def *is_anyhit_shader =
|
|
nir_ieq_imm(b, shader_type, GEN_RT_BTD_SHADER_TYPE_ANY_HIT);
|
|
|
|
nir_ssa_def *needs_commit_or_continue =
|
|
nir_ior(b, is_intersection_shader, is_anyhit_shader);
|
|
|
|
nir_push_if(b, needs_commit_or_continue);
|
|
{
|
|
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
|
brw_nir_rt_load_mem_hit(b, &hit_in, false /* committed */);
|
|
|
|
nir_ssa_def *ray_op =
|
|
nir_bcsel(b, is_intersection_shader,
|
|
nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
|
|
nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT));
|
|
nir_ssa_def *ray_level = hit_in.bvh_level;
|
|
|
|
nir_trace_ray_intel(b,
|
|
nir_load_btd_global_arg_addr_intel(b),
|
|
ray_level, ray_op);
|
|
}
|
|
nir_push_else(b, NULL);
|
|
{
|
|
brw_nir_btd_return(b);
|
|
}
|
|
nir_pop_if(b, NULL);
|
|
} else {
|
|
NIR_PASS_V(nir, brw_nir_lower_shader_returns);
|
|
}
|
|
|
|
return nir;
|
|
}
|