radv: Implement device-side BVH building.
Same naive algorithm as the host build. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11078>
This commit is contained in:
parent
d51a4b4c4b
commit
0dad88b469
|
@ -23,6 +23,9 @@
|
|||
#include "radv_private.h"
|
||||
|
||||
#include "util/half_float.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_cs.h"
|
||||
#include "radv_meta.h"
|
||||
|
||||
struct radv_accel_struct_header {
|
||||
uint32_t root_node_offset;
|
||||
|
@ -589,3 +592,830 @@ radv_BuildAccelerationStructuresKHR(
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_indices(nir_builder *b, nir_ssa_def *addr, nir_ssa_def *type, nir_ssa_def *id)
|
||||
{
|
||||
const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3);
|
||||
nir_variable *result =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, uvec3_type, "indices");
|
||||
|
||||
nir_push_if(b, nir_ult(b, type, nir_imm_int(b, 2)));
|
||||
nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_UINT16)));
|
||||
{
|
||||
nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 6));
|
||||
nir_ssa_def *indices[3];
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
indices[i] = nir_build_load_global(
|
||||
b, 1, 16, nir_iadd(b, addr, nir_u2u64(b, nir_iadd(b, index_id, nir_imm_int(b, 2 * i)))),
|
||||
.align_mul = 2, .align_offset = 0);
|
||||
}
|
||||
nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 12));
|
||||
nir_ssa_def *indices = nir_build_load_global(
|
||||
b, 3, 32, nir_iadd(b, addr, nir_u2u64(b, index_id)), .align_mul = 4, .align_offset = 0);
|
||||
nir_store_var(b, result, indices, 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *index_id = nir_umul24(b, id, nir_imm_int(b, 3));
|
||||
nir_ssa_def *indices[] = {
|
||||
index_id,
|
||||
nir_iadd(b, index_id, nir_imm_int(b, 1)),
|
||||
nir_iadd(b, index_id, nir_imm_int(b, 2)),
|
||||
};
|
||||
|
||||
nir_push_if(b, nir_ieq(b, type, nir_imm_int(b, VK_INDEX_TYPE_NONE_KHR)));
|
||||
{
|
||||
nir_store_var(b, result, nir_vec(b, indices, 3), 7);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
indices[i] = nir_build_load_global(b, 1, 8, nir_iadd(b, addr, nir_u2u64(b, indices[i])),
|
||||
.align_mul = 1, .align_offset = 0);
|
||||
}
|
||||
nir_store_var(b, result, nir_u2u32(b, nir_vec(b, indices, 3)), 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
return nir_load_var(b, result);
|
||||
}
|
||||
|
||||
static void
|
||||
get_vertices(nir_builder *b, nir_ssa_def *addresses, nir_ssa_def *format, nir_ssa_def *positions[3])
|
||||
{
|
||||
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
|
||||
nir_variable *results[3] = {
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex0"),
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex1"),
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, vec3_type, "vertex2")};
|
||||
|
||||
VkFormat formats[] = {
|
||||
VK_FORMAT_R32G32B32_SFLOAT,
|
||||
VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||
VK_FORMAT_R16G16B16_SFLOAT,
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT,
|
||||
};
|
||||
|
||||
for (unsigned f = 0; f < ARRAY_SIZE(formats); ++f) {
|
||||
if (f + 1 < ARRAY_SIZE(formats))
|
||||
nir_push_if(b, nir_ieq(b, format, nir_imm_int(b, formats[f])));
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
switch (formats[f]) {
|
||||
case VK_FORMAT_R32G32B32_SFLOAT:
|
||||
case VK_FORMAT_R32G32B32A32_SFLOAT:
|
||||
nir_store_var(b, results[i],
|
||||
nir_build_load_global(b, 3, 32, nir_channel(b, addresses, i),
|
||||
.align_mul = 4, .align_offset = 0),
|
||||
7);
|
||||
break;
|
||||
case VK_FORMAT_R16G16B16_SFLOAT:
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT: {
|
||||
nir_ssa_def *values[3];
|
||||
nir_ssa_def *addr = nir_channel(b, addresses, i);
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
values[j] =
|
||||
nir_build_load_global(b, 1, 16, nir_iadd(b, addr, nir_imm_int64(b, j * 2)),
|
||||
.align_mul = 2, .align_offset = 0);
|
||||
nir_store_var(b, results[i], nir_f2f32(b, nir_vec(b, values, 3)), 7);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Unhandled format");
|
||||
}
|
||||
}
|
||||
if (f + 1 < ARRAY_SIZE(formats))
|
||||
nir_push_else(b, NULL);
|
||||
}
|
||||
for (unsigned f = 1; f < ARRAY_SIZE(formats); ++f) {
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
positions[i] = nir_load_var(b, results[i]);
|
||||
}
|
||||
|
||||
struct build_primitive_constants {
|
||||
uint64_t node_dst_addr;
|
||||
uint64_t scratch_addr;
|
||||
uint32_t dst_offset;
|
||||
uint32_t dst_scratch_offset;
|
||||
uint32_t geometry_type;
|
||||
uint32_t geometry_id;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint64_t vertex_addr;
|
||||
uint64_t index_addr;
|
||||
uint64_t transform_addr;
|
||||
uint32_t vertex_stride;
|
||||
uint32_t vertex_format;
|
||||
uint32_t index_format;
|
||||
};
|
||||
struct {
|
||||
uint64_t instance_data;
|
||||
};
|
||||
struct {
|
||||
uint64_t aabb_addr;
|
||||
uint32_t aabb_stride;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
struct build_internal_constants {
|
||||
uint64_t node_dst_addr;
|
||||
uint64_t scratch_addr;
|
||||
uint32_t dst_offset;
|
||||
uint32_t dst_scratch_offset;
|
||||
uint32_t src_scratch_offset;
|
||||
uint32_t fill_header;
|
||||
};
|
||||
|
||||
/* This inverts a 3x3 matrix using cofactors, as in e.g.
|
||||
* https://www.mathsisfun.com/algebra/matrix-inverse-minors-cofactors-adjugate.html */
|
||||
static void
|
||||
nir_invert_3x3(nir_builder *b, nir_ssa_def *in[3][3], nir_ssa_def *out[3][3])
|
||||
{
|
||||
nir_ssa_def *cofactors[3][3];
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
cofactors[i][j] =
|
||||
nir_fsub(b, nir_fmul(b, in[(i + 1) % 3][(j + 1) % 3], in[(i + 2) % 3][(j + 2) % 3]),
|
||||
nir_fmul(b, in[(i + 1) % 3][(j + 2) % 3], in[(i + 2) % 3][(j + 1) % 3]));
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *det = NULL;
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
nir_ssa_def *det_part = nir_fmul(b, in[0][i], cofactors[0][i]);
|
||||
det = det ? nir_fadd(b, det, det_part) : det_part;
|
||||
}
|
||||
|
||||
nir_ssa_def *det_inv = nir_frcp(b, det);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
out[i][j] = nir_fmul(b, cofactors[j][i], det_inv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_leaf_shader(struct radv_device *dev)
|
||||
{
|
||||
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
|
||||
nir_builder b =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "accel_build_leaf_shader");
|
||||
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
nir_ssa_def *pconst0 =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
|
||||
nir_ssa_def *pconst1 =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range = 16);
|
||||
nir_ssa_def *pconst2 =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 32, .range = 16);
|
||||
nir_ssa_def *pconst3 =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 48, .range = 16);
|
||||
nir_ssa_def *pconst4 =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 64, .range = 4);
|
||||
|
||||
nir_ssa_def *geom_type = nir_channel(&b, pconst1, 2);
|
||||
nir_ssa_def *node_dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
|
||||
nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 12));
|
||||
nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
|
||||
nir_ssa_def *scratch_offset = nir_channel(&b, pconst1, 1);
|
||||
nir_ssa_def *geometry_id = nir_channel(&b, pconst1, 3);
|
||||
|
||||
nir_ssa_def *global_id =
|
||||
nir_iadd(&b,
|
||||
nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 1),
|
||||
nir_imm_int(&b, b.shader->info.workgroup_size[0])),
|
||||
nir_channels(&b, nir_load_local_invocation_id(&b), 1));
|
||||
scratch_addr = nir_iadd(
|
||||
&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, scratch_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 4)))));
|
||||
|
||||
nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, VK_GEOMETRY_TYPE_TRIANGLES_KHR)));
|
||||
{ /* Triangles */
|
||||
nir_ssa_def *vertex_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3));
|
||||
nir_ssa_def *index_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 12));
|
||||
nir_ssa_def *transform_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst3, 3));
|
||||
nir_ssa_def *vertex_stride = nir_channel(&b, pconst3, 2);
|
||||
nir_ssa_def *vertex_format = nir_channel(&b, pconst3, 3);
|
||||
nir_ssa_def *index_format = nir_channel(&b, pconst4, 0);
|
||||
unsigned repl_swizzle[4] = {0, 0, 0, 0};
|
||||
|
||||
nir_ssa_def *node_offset =
|
||||
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 64)));
|
||||
nir_ssa_def *triangle_node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
|
||||
|
||||
nir_ssa_def *indices = get_indices(&b, index_addr, index_format, global_id);
|
||||
nir_ssa_def *vertex_addresses = nir_iadd(
|
||||
&b, nir_u2u64(&b, nir_imul(&b, indices, nir_swizzle(&b, vertex_stride, repl_swizzle, 3))),
|
||||
nir_swizzle(&b, vertex_addr, repl_swizzle, 3));
|
||||
nir_ssa_def *positions[3];
|
||||
get_vertices(&b, vertex_addresses, vertex_format, positions);
|
||||
|
||||
nir_ssa_def *node_data[16];
|
||||
memset(node_data, 0, sizeof(node_data));
|
||||
|
||||
nir_variable *transform[] = {
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform0"),
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform1"),
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "transform2"),
|
||||
};
|
||||
nir_store_var(&b, transform[0], nir_imm_vec4(&b, 1.0, 0.0, 0.0, 0.0), 0xf);
|
||||
nir_store_var(&b, transform[1], nir_imm_vec4(&b, 0.0, 1.0, 0.0, 0.0), 0xf);
|
||||
nir_store_var(&b, transform[2], nir_imm_vec4(&b, 0.0, 0.0, 1.0, 0.0), 0xf);
|
||||
|
||||
nir_push_if(&b, nir_ine(&b, transform_addr, nir_imm_int64(&b, 0)));
|
||||
nir_store_var(
|
||||
&b, transform[0],
|
||||
nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 0)),
|
||||
.align_mul = 4, .align_offset = 0),
|
||||
0xf);
|
||||
nir_store_var(
|
||||
&b, transform[1],
|
||||
nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 16)),
|
||||
.align_mul = 4, .align_offset = 0),
|
||||
0xf);
|
||||
nir_store_var(
|
||||
&b, transform[2],
|
||||
nir_build_load_global(&b, 4, 32, nir_iadd(&b, transform_addr, nir_imm_int64(&b, 32)),
|
||||
.align_mul = 4, .align_offset = 0),
|
||||
0xf);
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
node_data[i * 3 + j] = nir_fdph(&b, positions[i], nir_load_var(&b, transform[j]));
|
||||
|
||||
node_data[12] = global_id;
|
||||
node_data[13] = geometry_id;
|
||||
node_data[15] = nir_imm_int(&b, 9);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(node_data); ++i)
|
||||
if (!node_data[i])
|
||||
node_data[i] = nir_imm_int(&b, 0);
|
||||
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
nir_build_store_global(&b, nir_vec(&b, node_data + i * 4, 4),
|
||||
nir_iadd(&b, triangle_node_dst_addr, nir_imm_int64(&b, i * 16)),
|
||||
.write_mask = 15, .align_mul = 16, .align_offset = 0);
|
||||
}
|
||||
|
||||
nir_ssa_def *node_id = nir_ushr(&b, node_offset, nir_imm_int(&b, 3));
|
||||
nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, .align_mul = 4,
|
||||
.align_offset = 0);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
nir_push_if(&b, nir_ieq(&b, geom_type, nir_imm_int(&b, VK_GEOMETRY_TYPE_AABBS_KHR)));
|
||||
{ /* AABBs */
|
||||
nir_ssa_def *aabb_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3));
|
||||
nir_ssa_def *aabb_stride = nir_channel(&b, pconst2, 2);
|
||||
|
||||
nir_ssa_def *node_offset =
|
||||
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 64)));
|
||||
nir_ssa_def *aabb_node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
|
||||
nir_ssa_def *node_id =
|
||||
nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 7));
|
||||
nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, .align_mul = 4,
|
||||
.align_offset = 0);
|
||||
|
||||
aabb_addr = nir_iadd(&b, aabb_addr, nir_u2u64(&b, nir_imul(&b, aabb_stride, global_id)));
|
||||
|
||||
nir_ssa_def *min_bound =
|
||||
nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, nir_imm_int64(&b, 0)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_ssa_def *max_bound =
|
||||
nir_build_load_global(&b, 3, 32, nir_iadd(&b, aabb_addr, nir_imm_int64(&b, 12)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
|
||||
nir_ssa_def *values[] = {nir_channel(&b, min_bound, 0),
|
||||
nir_channel(&b, min_bound, 1),
|
||||
nir_channel(&b, min_bound, 2),
|
||||
nir_channel(&b, max_bound, 0),
|
||||
nir_channel(&b, max_bound, 1),
|
||||
nir_channel(&b, max_bound, 2),
|
||||
global_id,
|
||||
geometry_id};
|
||||
|
||||
nir_build_store_global(&b, nir_vec(&b, values + 0, 4),
|
||||
nir_iadd(&b, aabb_node_dst_addr, nir_imm_int64(&b, 0)),
|
||||
.write_mask = 15, .align_mul = 16, .align_offset = 0);
|
||||
nir_build_store_global(&b, nir_vec(&b, values + 4, 4),
|
||||
nir_iadd(&b, aabb_node_dst_addr, nir_imm_int64(&b, 16)),
|
||||
.write_mask = 15, .align_mul = 16, .align_offset = 0);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{ /* Instances */
|
||||
|
||||
nir_ssa_def *instance_addr =
|
||||
nir_iadd(&b, nir_pack_64_2x32(&b, nir_channels(&b, pconst2, 3)),
|
||||
nir_u2u64(&b, nir_imul(&b, global_id, nir_imm_int(&b, 64))));
|
||||
nir_ssa_def *inst_transform[] = {
|
||||
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 0)),
|
||||
.align_mul = 4, .align_offset = 0),
|
||||
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 16)),
|
||||
.align_mul = 4, .align_offset = 0),
|
||||
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 32)),
|
||||
.align_mul = 4, .align_offset = 0)};
|
||||
nir_ssa_def *inst3 =
|
||||
nir_build_load_global(&b, 4, 32, nir_iadd(&b, instance_addr, nir_imm_int64(&b, 48)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
|
||||
nir_ssa_def *node_offset =
|
||||
nir_iadd(&b, node_dst_offset, nir_umul24(&b, global_id, nir_imm_int(&b, 128)));
|
||||
node_dst_addr = nir_iadd(&b, node_dst_addr, nir_u2u64(&b, node_offset));
|
||||
nir_ssa_def *node_id =
|
||||
nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 6));
|
||||
nir_build_store_global(&b, node_id, scratch_addr, .write_mask = 1, .align_mul = 4,
|
||||
.align_offset = 0);
|
||||
|
||||
nir_variable *bounds[2] = {
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
|
||||
};
|
||||
|
||||
nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
|
||||
nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
|
||||
|
||||
nir_ssa_def *header_addr = nir_pack_64_2x32(&b, nir_channels(&b, inst3, 12));
|
||||
nir_push_if(&b, nir_ine(&b, header_addr, nir_imm_int64(&b, 0)));
|
||||
nir_ssa_def *header_root_offset =
|
||||
nir_build_load_global(&b, 1, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 0)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_ssa_def *header_min =
|
||||
nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 8)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_ssa_def *header_max =
|
||||
nir_build_load_global(&b, 3, 32, nir_iadd(&b, header_addr, nir_imm_int64(&b, 20)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
|
||||
nir_ssa_def *bound_defs[2][3];
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
bound_defs[0][i] = bound_defs[1][i] = nir_channel(&b, inst_transform[i], 3);
|
||||
|
||||
nir_ssa_def *mul_a = nir_fmul(&b, nir_channels(&b, inst_transform[i], 7), header_min);
|
||||
nir_ssa_def *mul_b = nir_fmul(&b, nir_channels(&b, inst_transform[i], 7), header_max);
|
||||
nir_ssa_def *mi = nir_fmin(&b, mul_a, mul_b);
|
||||
nir_ssa_def *ma = nir_fmax(&b, mul_a, mul_b);
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
bound_defs[0][i] = nir_fadd(&b, bound_defs[0][i], nir_channel(&b, mi, j));
|
||||
bound_defs[1][i] = nir_fadd(&b, bound_defs[1][i], nir_channel(&b, ma, j));
|
||||
}
|
||||
}
|
||||
|
||||
nir_store_var(&b, bounds[0], nir_vec(&b, bound_defs[0], 3), 7);
|
||||
nir_store_var(&b, bounds[1], nir_vec(&b, bound_defs[1], 3), 7);
|
||||
|
||||
nir_ssa_def *m_in[3][3], *m_out[3][3], *m_vec[3][4];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
m_in[i][j] = nir_channel(&b, inst_transform[i], j);
|
||||
nir_invert_3x3(&b, m_in, m_out);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
m_vec[i][j] = m_out[i][j];
|
||||
m_vec[i][3] = nir_channel(&b, inst_transform[i], 3);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
nir_build_store_global(&b, nir_vec(&b, m_vec[i], 4),
|
||||
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 16 + 16 * i)),
|
||||
.write_mask = 0xf, .align_mul = 4, .align_offset = 0);
|
||||
}
|
||||
|
||||
nir_ssa_def *out0[4] = {
|
||||
nir_ior(&b, nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 0), header_root_offset),
|
||||
nir_channel(&b, nir_unpack_64_2x32(&b, header_addr), 1), nir_channel(&b, inst3, 0),
|
||||
nir_channel(&b, inst3, 1)};
|
||||
nir_build_store_global(&b, nir_vec(&b, out0, 4),
|
||||
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 0)), .write_mask = 0xf,
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_build_store_global(&b, global_id, nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 88)),
|
||||
.write_mask = 0x1, .align_mul = 4, .align_offset = 0);
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
|
||||
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 64)), .write_mask = 0x7,
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
|
||||
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 76)), .write_mask = 0x7,
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static void
|
||||
determine_bounds(nir_builder *b, nir_ssa_def *node_addr, nir_ssa_def *node_id,
|
||||
nir_variable *bounds_vars[2])
|
||||
{
|
||||
nir_ssa_def *node_type = nir_iand(b, node_id, nir_imm_int(b, 7));
|
||||
node_addr = nir_iadd(
|
||||
b, node_addr,
|
||||
nir_u2u64(b, nir_ishl(b, nir_iand(b, node_id, nir_imm_int(b, ~7u)), nir_imm_int(b, 3))));
|
||||
|
||||
nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 0)));
|
||||
{
|
||||
nir_ssa_def *positions[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
positions[i] =
|
||||
nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, i * 12)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_ssa_def *bounds[] = {positions[0], positions[0]};
|
||||
for (unsigned i = 1; i < 3; ++i) {
|
||||
bounds[0] = nir_fmin(b, bounds[0], positions[i]);
|
||||
bounds[1] = nir_fmax(b, bounds[1], positions[i]);
|
||||
}
|
||||
nir_store_var(b, bounds_vars[0], bounds[0], 7);
|
||||
nir_store_var(b, bounds_vars[1], bounds[1], 7);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 5)));
|
||||
{
|
||||
nir_ssa_def *input_bounds[4][2];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
for (unsigned j = 0; j < 2; ++j)
|
||||
input_bounds[i][j] = nir_build_load_global(
|
||||
b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 16 + i * 24 + j * 12)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_ssa_def *bounds[] = {input_bounds[0][0], input_bounds[0][1]};
|
||||
for (unsigned i = 1; i < 4; ++i) {
|
||||
bounds[0] = nir_fmin(b, bounds[0], input_bounds[i][0]);
|
||||
bounds[1] = nir_fmax(b, bounds[1], input_bounds[i][1]);
|
||||
}
|
||||
|
||||
nir_store_var(b, bounds_vars[0], bounds[0], 7);
|
||||
nir_store_var(b, bounds_vars[1], bounds[1], 7);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
nir_push_if(b, nir_ieq(b, node_type, nir_imm_int(b, 6)));
|
||||
{ /* Instances */
|
||||
nir_ssa_def *bounds[2];
|
||||
for (unsigned i = 0; i < 2; ++i)
|
||||
bounds[i] =
|
||||
nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, 64 + i * 12)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_store_var(b, bounds_vars[0], bounds[0], 7);
|
||||
nir_store_var(b, bounds_vars[1], bounds[1], 7);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{ /* AABBs */
|
||||
nir_ssa_def *bounds[2];
|
||||
for (unsigned i = 0; i < 2; ++i)
|
||||
bounds[i] =
|
||||
nir_build_load_global(b, 3, 32, nir_iadd(b, node_addr, nir_imm_int64(b, i * 12)),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
nir_store_var(b, bounds_vars[0], bounds[0], 7);
|
||||
nir_store_var(b, bounds_vars[1], bounds[1], 7);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_pop_if(b, NULL);
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_internal_shader(struct radv_device *dev)
|
||||
{
|
||||
const struct glsl_type *vec3_type = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
|
||||
nir_builder b =
|
||||
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "accel_build_internal_shader");
|
||||
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
|
||||
/*
|
||||
* push constants:
|
||||
* i32 x 2: node dst address
|
||||
* i32 x 2: scratch address
|
||||
* i32: dst offset
|
||||
* i32: dst scratch offset
|
||||
* i32: src scratch offset
|
||||
* i32: src_node_count | (fill_header << 31)
|
||||
*/
|
||||
nir_ssa_def *pconst0 =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
|
||||
nir_ssa_def *pconst1 =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 16, .range = 16);
|
||||
|
||||
nir_ssa_def *node_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 3));
|
||||
nir_ssa_def *scratch_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst0, 12));
|
||||
nir_ssa_def *node_dst_offset = nir_channel(&b, pconst1, 0);
|
||||
nir_ssa_def *dst_scratch_offset = nir_channel(&b, pconst1, 1);
|
||||
nir_ssa_def *src_scratch_offset = nir_channel(&b, pconst1, 2);
|
||||
nir_ssa_def *src_node_count =
|
||||
nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 0x7FFFFFFFU));
|
||||
nir_ssa_def *fill_header =
|
||||
nir_ine(&b, nir_iand(&b, nir_channel(&b, pconst1, 3), nir_imm_int(&b, 0x80000000U)),
|
||||
nir_imm_int(&b, 0));
|
||||
|
||||
nir_ssa_def *global_id =
|
||||
nir_iadd(&b,
|
||||
nir_umul24(&b, nir_channels(&b, nir_load_workgroup_id(&b, 32), 1),
|
||||
nir_imm_int(&b, b.shader->info.workgroup_size[0])),
|
||||
nir_channels(&b, nir_load_local_invocation_id(&b), 1));
|
||||
nir_ssa_def *src_idx = nir_imul(&b, global_id, nir_imm_int(&b, 4));
|
||||
nir_ssa_def *src_count = nir_umin(&b, nir_imm_int(&b, 4), nir_isub(&b, src_node_count, src_idx));
|
||||
|
||||
nir_ssa_def *node_offset =
|
||||
nir_iadd(&b, node_dst_offset, nir_ishl(&b, global_id, nir_imm_int(&b, 7)));
|
||||
nir_ssa_def *node_dst_addr = nir_iadd(&b, node_addr, nir_u2u64(&b, node_offset));
|
||||
nir_ssa_def *src_nodes = nir_build_load_global(
|
||||
&b, 4, 32,
|
||||
nir_iadd(&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, src_scratch_offset,
|
||||
nir_ishl(&b, global_id, nir_imm_int(&b, 4))))),
|
||||
.align_mul = 4, .align_offset = 0);
|
||||
|
||||
nir_build_store_global(&b, src_nodes, nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 0)),
|
||||
.write_mask = 0xf, .align_mul = 4, .align_offset = 0);
|
||||
|
||||
nir_ssa_def *total_bounds[2] = {
|
||||
nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
|
||||
nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7),
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
nir_variable *bounds[2] = {
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "min_bound"),
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, vec3_type, "max_bound"),
|
||||
};
|
||||
nir_store_var(&b, bounds[0], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
|
||||
nir_store_var(&b, bounds[1], nir_channels(&b, nir_imm_vec4(&b, NAN, NAN, NAN, NAN), 7), 7);
|
||||
|
||||
nir_push_if(&b, nir_ilt(&b, nir_imm_int(&b, i), src_count));
|
||||
determine_bounds(&b, node_addr, nir_channel(&b, src_nodes, i), bounds);
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_build_store_global(&b, nir_load_var(&b, bounds[0]),
|
||||
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 16 + 24 * i)),
|
||||
.write_mask = 0x7, .align_mul = 4, .align_offset = 0);
|
||||
nir_build_store_global(&b, nir_load_var(&b, bounds[1]),
|
||||
nir_iadd(&b, node_dst_addr, nir_imm_int64(&b, 28 + 24 * i)),
|
||||
.write_mask = 0x7, .align_mul = 4, .align_offset = 0);
|
||||
total_bounds[0] = nir_fmin(&b, total_bounds[0], nir_load_var(&b, bounds[0]));
|
||||
total_bounds[1] = nir_fmax(&b, total_bounds[1], nir_load_var(&b, bounds[1]));
|
||||
}
|
||||
|
||||
nir_ssa_def *node_id =
|
||||
nir_iadd(&b, nir_ushr(&b, node_offset, nir_imm_int(&b, 3)), nir_imm_int(&b, 5));
|
||||
nir_ssa_def *dst_scratch_addr = nir_iadd(
|
||||
&b, scratch_addr,
|
||||
nir_u2u64(&b, nir_iadd(&b, dst_scratch_offset, nir_ishl(&b, global_id, nir_imm_int(&b, 2)))));
|
||||
nir_build_store_global(&b, node_id, dst_scratch_addr, .write_mask = 1, .align_mul = 4,
|
||||
.align_offset = 0);
|
||||
|
||||
nir_push_if(&b, fill_header);
|
||||
nir_build_store_global(&b, node_id, node_addr, .write_mask = 1, .align_mul = 4,
|
||||
.align_offset = 0);
|
||||
nir_build_store_global(&b, total_bounds[0], nir_iadd(&b, node_addr, nir_imm_int64(&b, 8)),
|
||||
.write_mask = 7, .align_mul = 4, .align_offset = 0);
|
||||
nir_build_store_global(&b, total_bounds[1], nir_iadd(&b, node_addr, nir_imm_int64(&b, 20)),
|
||||
.write_mask = 7, .align_mul = 4, .align_offset = 0);
|
||||
nir_pop_if(&b, NULL);
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
void
|
||||
radv_device_finish_accel_struct_build_state(struct radv_device *device)
|
||||
{
|
||||
struct radv_meta_state *state = &device->meta_state;
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.internal_pipeline,
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device), state->accel_struct_build.leaf_pipeline,
|
||||
&state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.internal_p_layout, &state->alloc);
|
||||
radv_DestroyPipelineLayout(radv_device_to_handle(device),
|
||||
state->accel_struct_build.leaf_p_layout, &state->alloc);
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_device_init_accel_struct_build_state(struct radv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
nir_shader *leaf_cs = build_leaf_shader(device);
|
||||
nir_shader *internal_cs = build_internal_shader(device);
|
||||
|
||||
const VkPipelineLayoutCreateInfo leaf_pl_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 0,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
||||
sizeof(struct build_primitive_constants)},
|
||||
};
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &leaf_pl_create_info,
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.accel_struct_build.leaf_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
VkPipelineShaderStageCreateInfo leaf_shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = vk_shader_module_handle_from_nir(leaf_cs),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo leaf_pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = leaf_shader_stage,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.accel_struct_build.leaf_p_layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(
|
||||
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
|
||||
&leaf_pipeline_info, NULL, &device->meta_state.accel_struct_build.leaf_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
const VkPipelineLayoutCreateInfo internal_pl_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 0,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
||||
sizeof(struct build_internal_constants)},
|
||||
};
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &internal_pl_create_info,
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.accel_struct_build.internal_p_layout);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
VkPipelineShaderStageCreateInfo internal_shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = vk_shader_module_handle_from_nir(internal_cs),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo internal_pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = internal_shader_stage,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.accel_struct_build.internal_p_layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(
|
||||
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
|
||||
&internal_pipeline_info, NULL, &device->meta_state.accel_struct_build.internal_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
radv_device_finish_accel_struct_build_state(device);
|
||||
ralloc_free(internal_cs);
|
||||
ralloc_free(leaf_cs);
|
||||
return result;
|
||||
}
|
||||
|
||||
struct bvh_state {
|
||||
uint32_t node_offset;
|
||||
uint32_t node_count;
|
||||
uint32_t scratch_offset;
|
||||
};
|
||||
|
||||
void
|
||||
radv_CmdBuildAccelerationStructuresKHR(
|
||||
VkCommandBuffer commandBuffer, uint32_t infoCount,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct radv_meta_saved_state saved_state;
|
||||
|
||||
radv_meta_save(
|
||||
&saved_state, cmd_buffer,
|
||||
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
|
||||
struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
|
||||
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.leaf_pipeline);
|
||||
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
|
||||
pInfos[i].dstAccelerationStructure);
|
||||
|
||||
struct build_primitive_constants prim_consts = {
|
||||
.node_dst_addr = radv_accel_struct_get_va(accel_struct),
|
||||
.scratch_addr = pInfos[i].scratchData.deviceAddress,
|
||||
.dst_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64) + 128,
|
||||
.dst_scratch_offset = 0,
|
||||
};
|
||||
|
||||
for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
|
||||
const VkAccelerationStructureGeometryKHR *geom =
|
||||
pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
|
||||
|
||||
prim_consts.geometry_type = geom->geometryType;
|
||||
prim_consts.geometry_id = j | (geom->flags << 28);
|
||||
unsigned prim_size;
|
||||
switch (geom->geometryType) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
prim_consts.vertex_addr =
|
||||
geom->geometry.triangles.vertexData.deviceAddress +
|
||||
ppBuildRangeInfos[i][j].firstVertex * geom->geometry.triangles.vertexStride +
|
||||
(geom->geometry.triangles.indexType != VK_INDEX_TYPE_NONE_KHR
|
||||
? ppBuildRangeInfos[i][j].primitiveOffset
|
||||
: 0);
|
||||
prim_consts.index_addr = geom->geometry.triangles.indexData.deviceAddress +
|
||||
ppBuildRangeInfos[i][j].primitiveOffset;
|
||||
prim_consts.transform_addr = geom->geometry.triangles.transformData.deviceAddress +
|
||||
ppBuildRangeInfos[i][j].transformOffset;
|
||||
prim_consts.vertex_stride = geom->geometry.triangles.vertexStride;
|
||||
prim_consts.vertex_format = geom->geometry.triangles.vertexFormat;
|
||||
prim_consts.index_format = geom->geometry.triangles.indexType;
|
||||
prim_size = 64;
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
prim_consts.aabb_addr =
|
||||
geom->geometry.aabbs.data.deviceAddress + ppBuildRangeInfos[i][j].primitiveOffset;
|
||||
prim_consts.aabb_stride = geom->geometry.aabbs.stride;
|
||||
prim_size = 64;
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
||||
prim_consts.instance_data = geom->geometry.instances.data.deviceAddress;
|
||||
prim_size = 128;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown geometryType");
|
||||
}
|
||||
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
cmd_buffer->device->meta_state.accel_struct_build.leaf_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(prim_consts), &prim_consts);
|
||||
radv_unaligned_dispatch(cmd_buffer, ppBuildRangeInfos[i][j].primitiveCount, 1, 1);
|
||||
prim_consts.dst_offset += prim_size * ppBuildRangeInfos[i][j].primitiveCount;
|
||||
prim_consts.dst_scratch_offset += 4 * ppBuildRangeInfos[i][j].primitiveCount;
|
||||
}
|
||||
bvh_states[i].node_offset = prim_consts.dst_offset;
|
||||
bvh_states[i].node_count = prim_consts.dst_scratch_offset / 4;
|
||||
}
|
||||
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
cmd_buffer->device->meta_state.accel_struct_build.internal_pipeline);
|
||||
bool progress = true;
|
||||
for (unsigned iter = 0; progress; ++iter) {
|
||||
progress = false;
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct,
|
||||
pInfos[i].dstAccelerationStructure);
|
||||
|
||||
if (iter && bvh_states[i].node_count == 1)
|
||||
continue;
|
||||
|
||||
if (!progress) {
|
||||
cmd_buffer->state.flush_bits |=
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL) |
|
||||
radv_dst_access_flush(cmd_buffer,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, NULL);
|
||||
}
|
||||
progress = true;
|
||||
uint32_t dst_node_count = MAX2(1, DIV_ROUND_UP(bvh_states[i].node_count, 4));
|
||||
bool final_iter = dst_node_count == 1;
|
||||
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
|
||||
uint32_t dst_scratch_offset = src_scratch_offset ? 0 : bvh_states[i].node_count * 4;
|
||||
uint32_t dst_node_offset = bvh_states[i].node_offset;
|
||||
if (final_iter)
|
||||
dst_node_offset = ALIGN(sizeof(struct radv_accel_struct_header), 64);
|
||||
|
||||
const struct build_internal_constants consts = {
|
||||
.node_dst_addr = radv_accel_struct_get_va(accel_struct),
|
||||
.scratch_addr = pInfos[i].scratchData.deviceAddress,
|
||||
.dst_offset = dst_node_offset,
|
||||
.dst_scratch_offset = dst_scratch_offset,
|
||||
.src_scratch_offset = src_scratch_offset,
|
||||
.fill_header = bvh_states[i].node_count | (final_iter ? 0x80000000U : 0),
|
||||
};
|
||||
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
cmd_buffer->device->meta_state.accel_struct_build.internal_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
||||
radv_unaligned_dispatch(cmd_buffer, dst_node_count, 1, 1);
|
||||
bvh_states[i].node_offset += dst_node_count * 128;
|
||||
bvh_states[i].node_count = dst_node_count;
|
||||
bvh_states[i].scratch_offset = dst_scratch_offset;
|
||||
}
|
||||
}
|
||||
free(bvh_states);
|
||||
radv_meta_restore(&saved_state, cmd_buffer);
|
||||
}
|
|
@ -474,8 +474,14 @@ radv_device_init_meta(struct radv_device *device)
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail_fmask_expand;
|
||||
|
||||
result = radv_device_init_accel_struct_build_state(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_accel_struct_build;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_accel_struct_build:
|
||||
radv_device_finish_meta_fmask_expand_state(device);
|
||||
fail_fmask_expand:
|
||||
radv_device_finish_meta_resolve_fragment_state(device);
|
||||
fail_resolve_fragment:
|
||||
|
@ -507,6 +513,7 @@ fail_clear:
|
|||
void
|
||||
radv_device_finish_meta(struct radv_device *device)
|
||||
{
|
||||
radv_device_finish_accel_struct_build_state(device);
|
||||
radv_device_finish_meta_clear_state(device);
|
||||
radv_device_finish_meta_resolve_state(device);
|
||||
radv_device_finish_meta_blit_state(device);
|
||||
|
|
|
@ -133,6 +133,9 @@ void radv_device_finish_meta_dcc_retile_state(struct radv_device *device);
|
|||
|
||||
void radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device);
|
||||
|
||||
VkResult radv_device_init_accel_struct_build_state(struct radv_device *device);
|
||||
void radv_device_finish_accel_struct_build_state(struct radv_device *device);
|
||||
|
||||
void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,
|
||||
uint32_t flags);
|
||||
|
||||
|
|
|
@ -661,6 +661,13 @@ struct radv_meta_state {
|
|||
VkPipelineLayout p_layout;
|
||||
VkPipeline pipeline;
|
||||
} dcc_retile;
|
||||
|
||||
struct {
|
||||
VkPipelineLayout leaf_p_layout;
|
||||
VkPipeline leaf_pipeline;
|
||||
VkPipelineLayout internal_p_layout;
|
||||
VkPipeline internal_pipeline;
|
||||
} accel_struct_build;
|
||||
};
|
||||
|
||||
/* queue types */
|
||||
|
|
Loading…
Reference in New Issue