intel/fs: lower ray query intrinsics
v2: Add helper for acceleration->root_node computation (Caio) v3: Update comment on "done" bit (Caio) Remove progress bool value for impl function (Caio) Don't use nir_shader_instructions_pass to search the shader (Caio) v4: Rename variable for if/else block (Caio) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13719>
This commit is contained in:
parent
712d8fb043
commit
c78be5da30
|
@ -1392,3 +1392,4 @@ system_value("leaf_procedural_intel", 1, bit_sizes=[1])
|
|||
# 2: Miss
|
||||
# 3: Intersection
|
||||
system_value("btd_shader_type_intel", 1)
|
||||
system_value("ray_query_global_intel", 1, bit_sizes=[64])
|
||||
|
|
|
@ -177,6 +177,7 @@ can_remat_instr(nir_instr *instr, struct brw_bitset *remat)
|
|||
case nir_intrinsic_load_callable_sbt_addr_intel:
|
||||
case nir_intrinsic_load_callable_sbt_stride_intel:
|
||||
case nir_intrinsic_load_reloc_const_intel:
|
||||
case nir_intrinsic_load_ray_query_global_intel:
|
||||
/* Notably missing from the above list is btd_local_arg_addr_intel.
|
||||
* This is because the resume shader will have a different local
|
||||
* argument pointer because it has a different BSR. Any access of
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
*/
|
||||
|
||||
#include "brw_nir.h"
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_shader.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
|
@ -547,6 +548,8 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
OPT(nir_opt_dead_write_vars);
|
||||
OPT(nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
OPT(nir_opt_ray_queries);
|
||||
|
||||
if (is_scalar) {
|
||||
OPT(nir_lower_alu_to_scalar, NULL, NULL);
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,590 @@
|
|||
/*
|
||||
* Copyright (c) 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
|
||||
#include "nir_deref.h"
|
||||
|
||||
#include "util/macros.h"
|
||||
|
||||
struct lowering_state {
|
||||
const struct intel_device_info *devinfo;
|
||||
|
||||
struct hash_table *queries;
|
||||
uint32_t n_queries;
|
||||
|
||||
struct brw_nir_rt_globals_defs globals;
|
||||
nir_ssa_def *rq_globals;
|
||||
};
|
||||
|
||||
struct brw_ray_query {
|
||||
nir_variable *opaque_var;
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
static bool
|
||||
need_spill_fill(struct lowering_state *state)
|
||||
{
|
||||
return state->n_queries > 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* This pass converts opaque RayQuery structures from SPIRV into a vec3 where
|
||||
* the first 2 elements store a global address for the query and the third
|
||||
* element is an incremented counter on the number of executed
|
||||
* nir_intrinsic_rq_proceed.
|
||||
*/
|
||||
|
||||
static bool
|
||||
maybe_create_brw_var(nir_instr *instr, struct lowering_state *state)
|
||||
{
|
||||
if (instr->type != nir_instr_type_deref)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
||||
if (deref->deref_type != nir_deref_type_var &&
|
||||
deref->deref_type != nir_deref_type_array)
|
||||
return false;
|
||||
|
||||
nir_variable *opaque_var = nir_deref_instr_get_variable(deref);
|
||||
if (!opaque_var || !opaque_var->data.ray_query)
|
||||
return false;
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
|
||||
if (entry)
|
||||
return false;
|
||||
|
||||
struct brw_ray_query *rq = rzalloc(state->queries, struct brw_ray_query);
|
||||
rq->opaque_var = opaque_var;
|
||||
rq->id = state->n_queries;
|
||||
|
||||
_mesa_hash_table_insert(state->queries, opaque_var, rq);
|
||||
|
||||
unsigned aoa_size = glsl_get_aoa_size(opaque_var->type);
|
||||
state->n_queries += MAX2(1, aoa_size);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_ray_query_shadow_addr(nir_builder *b,
|
||||
nir_deref_instr *deref,
|
||||
struct lowering_state *state,
|
||||
nir_ssa_def **out_state_addr)
|
||||
{
|
||||
nir_deref_path path;
|
||||
nir_deref_path_init(&path, deref, NULL);
|
||||
assert(path.path[0]->deref_type == nir_deref_type_var);
|
||||
|
||||
nir_variable *opaque_var = nir_deref_instr_get_variable(path.path[0]);
|
||||
struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
|
||||
assert(entry);
|
||||
|
||||
struct brw_ray_query *rq = entry->data;
|
||||
|
||||
/* Base address in the shadow memory of the variable associated with this
|
||||
* ray query variable.
|
||||
*/
|
||||
nir_ssa_def *base_addr =
|
||||
nir_iadd_imm(b, state->globals.resume_sbt_addr,
|
||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id);
|
||||
|
||||
bool spill_fill = need_spill_fill(state);
|
||||
*out_state_addr =
|
||||
spill_fill ?
|
||||
nir_iadd_imm(b,
|
||||
state->globals.resume_sbt_addr,
|
||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo) *
|
||||
b->shader->info.ray_queries +
|
||||
4 * rq->id) :
|
||||
state->globals.resume_sbt_addr;
|
||||
|
||||
if (!spill_fill)
|
||||
return NULL;
|
||||
|
||||
/* Just emit code and let constant-folding go to town */
|
||||
nir_deref_instr **p = &path.path[1];
|
||||
for (; *p; p++) {
|
||||
if ((*p)->deref_type == nir_deref_type_array) {
|
||||
nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
|
||||
|
||||
/**/
|
||||
uint32_t local_state_offset = 4 * MAX2(1, glsl_get_aoa_size((*p)->type));
|
||||
*out_state_addr =
|
||||
nir_iadd(b, *out_state_addr,
|
||||
nir_i2i64(b,
|
||||
nir_imul_imm(b, index, local_state_offset)));
|
||||
|
||||
/**/
|
||||
uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) *
|
||||
brw_rt_ray_queries_shadow_stack_size(state->devinfo);
|
||||
|
||||
nir_ssa_def *mul = nir_amul_imm(b, nir_i2i64(b, index), size);
|
||||
|
||||
base_addr = nir_iadd(b, base_addr, mul);
|
||||
} else {
|
||||
unreachable("Unsupported deref type");
|
||||
}
|
||||
}
|
||||
|
||||
nir_deref_path_finish(&path);
|
||||
|
||||
/* Add the lane offset to the shadow memory address */
|
||||
nir_ssa_def *lane_offset =
|
||||
nir_imul_imm(
|
||||
b,
|
||||
nir_iadd(
|
||||
b,
|
||||
nir_imul(
|
||||
b,
|
||||
brw_load_btd_dss_id(b),
|
||||
brw_nir_rt_load_num_simd_lanes_per_dss(b, state->devinfo)),
|
||||
brw_nir_rt_sync_stack_id(b)),
|
||||
BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
|
||||
|
||||
return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
|
||||
}
|
||||
|
||||
static void
|
||||
update_trace_ctrl_level(nir_builder *b,
|
||||
nir_ssa_def *state_addr,
|
||||
nir_ssa_def **out_old_ctrl,
|
||||
nir_ssa_def **out_old_level,
|
||||
nir_ssa_def *new_ctrl,
|
||||
nir_ssa_def *new_level)
|
||||
{
|
||||
nir_ssa_def *old_value = brw_nir_rt_load(b, state_addr, 4, 1, 32);
|
||||
nir_ssa_def *old_ctrl = nir_ishr_imm(b, old_value, 2);
|
||||
nir_ssa_def *old_level = nir_iand_imm(b, old_value, 0x3);
|
||||
|
||||
if (out_old_ctrl)
|
||||
*out_old_ctrl = old_ctrl;
|
||||
if (out_old_level)
|
||||
*out_old_level = old_level;
|
||||
|
||||
if (new_ctrl || new_level) {
|
||||
if (!new_ctrl)
|
||||
new_ctrl = old_ctrl;
|
||||
if (!new_level)
|
||||
new_level = old_level;
|
||||
|
||||
nir_ssa_def *new_value = nir_ior(b, nir_ishl_imm(b, new_ctrl, 2), new_level);
|
||||
brw_nir_rt_store(b, state_addr, 4, new_value, 0x1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fill_query(nir_builder *b,
|
||||
nir_ssa_def *hw_stack_addr,
|
||||
nir_ssa_def *shadow_stack_addr,
|
||||
nir_ssa_def *ctrl)
|
||||
{
|
||||
brw_nir_memcpy_global(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, hw_stack_addr, false), 16,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, shadow_stack_addr, false), 16,
|
||||
BRW_RT_SIZEOF_HIT_INFO);
|
||||
brw_nir_memcpy_global(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, hw_stack_addr, true), 16,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, shadow_stack_addr, true), 16,
|
||||
BRW_RT_SIZEOF_HIT_INFO);
|
||||
brw_nir_memcpy_global(b,
|
||||
brw_nir_rt_mem_ray_addr(b, hw_stack_addr,
|
||||
BRW_RT_BVH_LEVEL_WORLD), 16,
|
||||
brw_nir_rt_mem_ray_addr(b, shadow_stack_addr,
|
||||
BRW_RT_BVH_LEVEL_WORLD), 16,
|
||||
BRW_RT_SIZEOF_RAY);
|
||||
}
|
||||
|
||||
static void
|
||||
spill_query(nir_builder *b,
|
||||
nir_ssa_def *hw_stack_addr,
|
||||
nir_ssa_def *shadow_stack_addr)
|
||||
{
|
||||
struct brw_nir_rt_mem_hit_defs committed_hit = {};
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, &committed_hit, hw_stack_addr, true);
|
||||
|
||||
/* Always copy the potential hit back */
|
||||
brw_nir_memcpy_global(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, shadow_stack_addr, false), 16,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, hw_stack_addr, false), 16,
|
||||
BRW_RT_SIZEOF_HIT_INFO);
|
||||
|
||||
/* Also copy the committed hit back if it is valid */
|
||||
nir_push_if(b, committed_hit.valid);
|
||||
{
|
||||
brw_nir_memcpy_global(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, shadow_stack_addr, true), 16,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, hw_stack_addr, true), 16,
|
||||
BRW_RT_SIZEOF_HIT_INFO);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lower_ray_query_intrinsic(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
struct lowering_state *state)
|
||||
{
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_ssa_def *ctrl_level_addr;
|
||||
nir_ssa_def *shadow_stack_addr =
|
||||
get_ray_query_shadow_addr(b, deref, state, &ctrl_level_addr);
|
||||
nir_ssa_def *hw_stack_addr =
|
||||
brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, state->devinfo);
|
||||
nir_ssa_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_rq_initialize: {
|
||||
nir_ssa_def *as_addr = intrin->src[1].ssa;
|
||||
nir_ssa_def *ray_flags = intrin->src[2].ssa;
|
||||
/* From the SPIR-V spec:
|
||||
*
|
||||
* "Only the 8 least-significant bits of Cull Mask are used by
|
||||
* this instruction - other bits are ignored.
|
||||
*
|
||||
* Only the 16 least-significant bits of Miss Index are used by
|
||||
* this instruction - other bits are ignored."
|
||||
*/
|
||||
nir_ssa_def *cull_mask = nir_iand_imm(b, intrin->src[3].ssa, 0xff);
|
||||
nir_ssa_def *ray_orig = intrin->src[4].ssa;
|
||||
nir_ssa_def *ray_t_min = intrin->src[5].ssa;
|
||||
nir_ssa_def *ray_dir = intrin->src[6].ssa;
|
||||
nir_ssa_def *ray_t_max = intrin->src[7].ssa;
|
||||
|
||||
nir_ssa_def *root_node_ptr =
|
||||
brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs ray_defs = {
|
||||
.root_node_ptr = root_node_ptr,
|
||||
.ray_flags = nir_u2u16(b, ray_flags),
|
||||
.ray_mask = cull_mask,
|
||||
.orig = ray_orig,
|
||||
.t_near = ray_t_min,
|
||||
.dir = ray_dir,
|
||||
.t_far = ray_t_max,
|
||||
};
|
||||
|
||||
nir_ssa_def *ray_addr =
|
||||
brw_nir_rt_mem_ray_addr(b, stack_addr, BRW_RT_BVH_LEVEL_WORLD);
|
||||
|
||||
brw_nir_rt_query_mark_init(b, stack_addr);
|
||||
brw_nir_rt_init_mem_hit_at_addr(b, stack_addr, false, ray_t_max);
|
||||
brw_nir_rt_init_mem_hit_at_addr(b, stack_addr, true, ray_t_max);
|
||||
brw_nir_rt_store_mem_ray_query_at_addr(b, ray_addr, &ray_defs);
|
||||
|
||||
update_trace_ctrl_level(b, ctrl_level_addr,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_proceed: {
|
||||
nir_ssa_def *not_done =
|
||||
nir_inot(b, brw_nir_rt_query_done(b, stack_addr));
|
||||
nir_ssa_def *not_done_then, *not_done_else;
|
||||
|
||||
nir_push_if(b, not_done);
|
||||
{
|
||||
nir_ssa_def *ctrl, *level;
|
||||
update_trace_ctrl_level(b, ctrl_level_addr,
|
||||
&ctrl, &level,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
/* Mark the query as done because handing it over to the HW for
|
||||
* processing. If the HW make any progress, it will write back some
|
||||
* data and as a side effect, clear the "done" bit. If no progress is
|
||||
* made, HW does not write anything back and we can use this bit to
|
||||
* detect that.
|
||||
*/
|
||||
brw_nir_rt_query_mark_done(b, stack_addr);
|
||||
|
||||
if (shadow_stack_addr)
|
||||
fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
|
||||
|
||||
nir_trace_ray_intel(b, state->rq_globals, level, ctrl, .synchronous = true);
|
||||
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false);
|
||||
|
||||
if (shadow_stack_addr)
|
||||
spill_query(b, hw_stack_addr, shadow_stack_addr);
|
||||
|
||||
update_trace_ctrl_level(b, ctrl_level_addr,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
|
||||
|
||||
not_done_then = nir_inot(b, hit_in.done);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
not_done_else = nir_imm_false(b);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
not_done = nir_if_phi(b, not_done_then, not_done_else);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, not_done);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_confirm_intersection: {
|
||||
brw_nir_memcpy_global(b,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true), 16,
|
||||
brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false), 16,
|
||||
BRW_RT_SIZEOF_HIT_INFO);
|
||||
update_trace_ctrl_level(b, ctrl_level_addr,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_generate_intersection: {
|
||||
brw_nir_rt_generate_hit_addr(b, stack_addr, intrin->src[1].ssa);
|
||||
update_trace_ctrl_level(b, ctrl_level_addr,
|
||||
NULL, NULL,
|
||||
nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
|
||||
nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_terminate: {
|
||||
brw_nir_rt_query_mark_done(b, stack_addr);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_rq_load: {
|
||||
const bool committed = nir_src_as_bool(intrin->src[1]);
|
||||
|
||||
struct brw_nir_rt_mem_ray_defs world_ray_in = {};
|
||||
struct brw_nir_rt_mem_ray_defs object_ray_in = {};
|
||||
struct brw_nir_rt_mem_hit_defs hit_in = {};
|
||||
brw_nir_rt_load_mem_ray_from_addr(b, &world_ray_in, stack_addr,
|
||||
BRW_RT_BVH_LEVEL_WORLD);
|
||||
brw_nir_rt_load_mem_ray_from_addr(b, &object_ray_in, stack_addr,
|
||||
BRW_RT_BVH_LEVEL_OBJECT);
|
||||
brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr, committed);
|
||||
|
||||
nir_ssa_def *sysval = NULL;
|
||||
switch (nir_intrinsic_base(intrin)) {
|
||||
case nir_ray_query_value_intersection_type:
|
||||
if (committed) {
|
||||
/* Values we want to generate :
|
||||
*
|
||||
* RayQueryCommittedIntersectionNoneEXT = 0U <= hit_in.valid == false
|
||||
* RayQueryCommittedIntersectionTriangleEXT = 1U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_QUAD (4)
|
||||
* RayQueryCommittedIntersectionGeneratedEXT = 2U <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_PROCEDURAL (3)
|
||||
*/
|
||||
sysval =
|
||||
nir_bcsel(b, nir_ieq(b, hit_in.leaf_type, nir_imm_int(b, 4)),
|
||||
nir_imm_int(b, 1), nir_imm_int(b, 2));
|
||||
sysval =
|
||||
nir_bcsel(b, hit_in.valid,
|
||||
sysval, nir_imm_int(b, 0));
|
||||
} else {
|
||||
/* 0 -> triangle, 1 -> AABB */
|
||||
sysval =
|
||||
nir_b2i32(b,
|
||||
nir_ieq(b, hit_in.leaf_type,
|
||||
nir_imm_int(b, BRW_RT_BVH_NODE_TYPE_PROCEDURAL)));
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_t:
|
||||
sysval = hit_in.t;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_instance_custom_index: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.instance_id;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_instance_id: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.instance_index;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_instance_sbt_index: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.contribution_to_hit_group_index;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_geometry_index: {
|
||||
nir_ssa_def *geometry_index_dw =
|
||||
nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
|
||||
1, 32);
|
||||
sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_primitive_index:
|
||||
sysval = brw_nir_rt_load_primitive_id_from_hit(b, NULL /* is_procedural */, &hit_in);
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_barycentrics:
|
||||
sysval = hit_in.tri_bary;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_front_face:
|
||||
sysval = hit_in.front_face;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_ray_direction:
|
||||
sysval = world_ray_in.dir;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_ray_origin:
|
||||
sysval = world_ray_in.orig;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_to_world: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_world_to_object: {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_ray_query_value_intersection_candidate_aabb_opaque:
|
||||
sysval = hit_in.front_face;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_tmin:
|
||||
sysval = world_ray_in.t_near;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_flags:
|
||||
sysval = nir_u2u32(b, world_ray_in.ray_flags);
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_world_ray_direction:
|
||||
sysval = world_ray_in.dir;
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_world_ray_origin:
|
||||
sysval = world_ray_in.orig;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid ray query");
|
||||
}
|
||||
|
||||
assert(sysval);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, sysval);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid intrinsic");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
|
||||
{
|
||||
nir_builder _b, *b = &_b;
|
||||
nir_builder_init(&_b, impl);
|
||||
|
||||
b->cursor = nir_before_block(nir_start_block(b->impl));
|
||||
|
||||
state->rq_globals = nir_load_ray_query_global_intel(b);
|
||||
|
||||
brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals);
|
||||
|
||||
nir_foreach_block_safe(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_rq_initialize &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_terminate &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_proceed &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_generate_intersection &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_confirm_intersection &&
|
||||
intrin->intrinsic != nir_intrinsic_rq_load)
|
||||
continue;
|
||||
|
||||
lower_ray_query_intrinsic(b, intrin, state);
|
||||
}
|
||||
}
|
||||
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_ray_queries(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
struct lowering_state state = {
|
||||
.devinfo = devinfo,
|
||||
.queries = _mesa_pointer_hash_table_create(NULL),
|
||||
};
|
||||
|
||||
assert(exec_list_length(&shader->functions) == 1);
|
||||
|
||||
/* Find query variables */
|
||||
nir_foreach_function(function, shader) {
|
||||
if (!function->impl)
|
||||
continue;
|
||||
|
||||
nir_foreach_block_safe(block, function->impl) {
|
||||
nir_foreach_instr(instr, block)
|
||||
maybe_create_brw_var(instr, &state);
|
||||
}
|
||||
}
|
||||
|
||||
if (_mesa_hash_table_num_entries(state.queries) > 0) {
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl)
|
||||
lower_ray_query_impl(function->impl, &state);
|
||||
}
|
||||
|
||||
nir_remove_dead_derefs(shader);
|
||||
nir_remove_dead_variables(shader,
|
||||
nir_var_shader_temp | nir_var_function_temp,
|
||||
NULL);
|
||||
}
|
||||
|
||||
ralloc_free(state.queries);
|
||||
|
||||
return true;
|
||||
}
|
|
@ -163,13 +163,8 @@ lower_shader_calls_instr(struct nir_builder *b, nir_instr *instr, void *data)
|
|||
nir_ssa_def *ray_dir = call->src[8].ssa;
|
||||
nir_ssa_def *ray_t_max = call->src[9].ssa;
|
||||
|
||||
/* The hardware packet takes the address to the root node in the
|
||||
* acceleration structure, not the acceleration structure itself. To
|
||||
* find that, we have to read the root node offset from the acceleration
|
||||
* structure which is the first QWord.
|
||||
*/
|
||||
nir_ssa_def *root_node_ptr =
|
||||
nir_iadd(b, as_addr, nir_load_global(b, as_addr, 256, 1, 64));
|
||||
brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
|
||||
|
||||
/* The hardware packet requires an address to the first element of the
|
||||
* hit SBT.
|
||||
|
|
|
@ -49,6 +49,9 @@ void brw_nir_lower_combined_intersection_any_hit(nir_shader *intersection,
|
|||
/* We require the stack to be 8B aligned at the start of a shader */
|
||||
#define BRW_BTD_STACK_ALIGN 8
|
||||
|
||||
bool brw_nir_lower_ray_queries(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo);
|
||||
|
||||
void brw_nir_lower_shader_returns(nir_shader *shader);
|
||||
|
||||
bool brw_nir_lower_shader_calls(nir_shader *shader);
|
||||
|
|
|
@ -907,4 +907,32 @@ brw_nir_rt_load_primitive_id_from_hit(nir_builder *b,
|
|||
4, /* align */ 1, 32);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
brw_nir_rt_acceleration_structure_to_root_node(nir_builder *b,
|
||||
nir_ssa_def *as_addr)
|
||||
{
|
||||
/* The HW memory structure in which we specify what acceleration structure
|
||||
* to traverse, takes the address to the root node in the acceleration
|
||||
* structure, not the acceleration structure itself. To find that, we have
|
||||
* to read the root node offset from the acceleration structure which is
|
||||
* the first QWord.
|
||||
*
|
||||
* But if the acceleration structure pointer is NULL, then we should return
|
||||
* NULL as root node pointer.
|
||||
*/
|
||||
nir_ssa_def *root_node_ptr, *null_node_ptr;
|
||||
nir_push_if(b, nir_ieq(b, as_addr, nir_imm_int64(b, 0)));
|
||||
{
|
||||
null_node_ptr = nir_imm_int64(b, 0);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
root_node_ptr =
|
||||
nir_iadd(b, as_addr, brw_nir_rt_load(b, as_addr, 256, 1, 64));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
return nir_if_phi(b, null_node_ptr, root_node_ptr);
|
||||
}
|
||||
|
||||
#endif /* BRW_NIR_RT_BUILDER_H */
|
||||
|
|
|
@ -31,6 +31,9 @@ extern "C" {
|
|||
/** Vulkan defines shaderGroupHandleSize = 32 */
|
||||
#define BRW_RT_SBT_HANDLE_SIZE 32
|
||||
|
||||
/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
|
||||
#define BRW_RT_DISPATCH_GLOBALS_SIZE 80
|
||||
|
||||
/** Offset after the RT dispatch globals at which "push" constants live */
|
||||
#define BRW_RT_PUSH_CONST_OFFSET 128
|
||||
|
||||
|
@ -177,6 +180,10 @@ struct brw_rt_raygen_trampoline_params {
|
|||
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS + \
|
||||
(BRW_RT_MAX_BVH_LEVELS % 2 ? 32 : 0))
|
||||
|
||||
#define BRW_RT_SIZEOF_SHADOW_RAY_QUERY \
|
||||
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
||||
(BRW_RT_SIZEOF_RAY + BRW_RT_SIZEOF_TRAV_STACK) * BRW_RT_MAX_BVH_LEVELS)
|
||||
|
||||
#define BRW_RT_SIZEOF_HW_STACK \
|
||||
(BRW_RT_SIZEOF_HIT_INFO * 2 + \
|
||||
BRW_RT_SIZEOF_RAY * BRW_RT_MAX_BVH_LEVELS + \
|
||||
|
@ -228,6 +235,39 @@ brw_rt_compute_scratch_layout(struct brw_rt_scratch_layout *layout,
|
|||
layout->total_size = size;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_rt_ray_queries_hw_stacks_size(const struct intel_device_info *devinfo)
|
||||
{
|
||||
/* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
|
||||
* which includes all the threads.
|
||||
*/
|
||||
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
|
||||
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
|
||||
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_RAY_QUERY;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_rt_ray_queries_shadow_stack_size(const struct intel_device_info *devinfo)
|
||||
{
|
||||
/* Maximum slice/subslice/EU ID can be computed from the max_scratch_ids
|
||||
* which includes all the threads.
|
||||
*/
|
||||
uint32_t max_eu_id = devinfo->max_scratch_ids[MESA_SHADER_COMPUTE];
|
||||
uint32_t max_simd_size = 16; /* Cannot run in SIMD32 with ray queries */
|
||||
return max_eu_id * max_simd_size * BRW_RT_SIZEOF_SHADOW_RAY_QUERY;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
brw_rt_ray_queries_shadow_stacks_size(const struct intel_device_info *devinfo,
|
||||
uint32_t ray_queries)
|
||||
{
|
||||
/* Don't bother a shadow stack if we only have a single query. We can
|
||||
* directly write in the HW buffer.
|
||||
*/
|
||||
return (ray_queries > 1 ? ray_queries : 0) * brw_rt_ray_queries_shadow_stack_size(devinfo) +
|
||||
ray_queries * 4; /* Ctrl + Level data */
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -87,6 +87,7 @@ libintel_compiler_files = files(
|
|||
'brw_nir_lower_alpha_to_coverage.c',
|
||||
'brw_nir_lower_intersection_shader.c',
|
||||
'brw_nir_lower_mem_access_bit_sizes.c',
|
||||
'brw_nir_lower_ray_queries.c',
|
||||
'brw_nir_lower_rt_intrinsics.c',
|
||||
'brw_nir_lower_scoped_barriers.c',
|
||||
'brw_nir_lower_shader_calls.c',
|
||||
|
|
Loading…
Reference in New Issue