pvr: Add static consts support in descriptor pds program.
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Reviewed-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17681>
This commit is contained in:
parent
a4830eec53
commit
a278f7d21e
|
@ -60,6 +60,22 @@ struct rogue_ubo_data {
|
|||
size_t size[ROGUE_MAX_BUFFERS];
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Compile time constants that need uploading.
|
||||
*/
|
||||
struct rogue_compile_time_consts_data {
|
||||
/* TODO: Output these from the compiler. */
|
||||
/* TODO: Add the other types. */
|
||||
struct {
|
||||
size_t num;
|
||||
size_t dest;
|
||||
/* TODO: This should probably be bigger. Big enough to account for all
|
||||
* available writable special constant regs.
|
||||
*/
|
||||
uint32_t value[ROGUE_MAX_BUFFERS];
|
||||
} static_consts;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Per-stage common build data.
|
||||
*/
|
||||
|
@ -70,6 +86,7 @@ struct rogue_common_build_data {
|
|||
size_t shareds;
|
||||
|
||||
struct rogue_ubo_data ubo_data;
|
||||
struct rogue_compile_time_consts_data compile_time_consts_data;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -2826,6 +2826,29 @@ static VkResult pvr_setup_descriptor_mappings(
|
|||
break;
|
||||
}
|
||||
|
||||
case PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER: {
|
||||
const struct pvr_const_map_entry_special_buffer *special_buff_entry =
|
||||
(struct pvr_const_map_entry_special_buffer *)entries;
|
||||
|
||||
switch (special_buff_entry->buffer_type) {
|
||||
case PVR_BUFFER_TYPES_COMPILE_TIME: {
|
||||
uint64_t addr = descriptor_state->static_consts->vma->dev_addr.addr;
|
||||
|
||||
PVR_WRITE(qword_buffer,
|
||||
addr,
|
||||
special_buff_entry->const_offset,
|
||||
pds_info->data_size_in_dwords);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unsupported special buffer type.");
|
||||
}
|
||||
|
||||
entries += sizeof(*special_buff_entry);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unsupported map entry type.");
|
||||
}
|
||||
|
|
|
@ -561,11 +561,22 @@ static size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes()
|
|||
typedef struct pvr_pds_buffer (
|
||||
*const pvr_pds_descriptor_program_buffer_array_ptr)[PVR_PDS_MAX_BUFFERS];
|
||||
|
||||
static void pvr_pds_descriptor_program_setup_buffers(
|
||||
/**
|
||||
* \brief Setup buffers for the PDS descriptor program.
|
||||
*
|
||||
* Sets up buffers required by the PDS gen api based on compiler info.
|
||||
*
|
||||
* For compile time static constants that need DMAing it uploads them and
|
||||
* returns the upload in \r static_consts_pvr_bo_out .
|
||||
*/
|
||||
static VkResult pvr_pds_descriptor_program_setup_buffers(
|
||||
struct pvr_device *device,
|
||||
bool robust_buffer_access,
|
||||
const struct rogue_compile_time_consts_data *compile_time_consts_data,
|
||||
const struct rogue_ubo_data *ubo_data,
|
||||
pvr_pds_descriptor_program_buffer_array_ptr buffers_out_ptr,
|
||||
uint32_t *const buffer_count_out)
|
||||
uint32_t *const buffer_count_out,
|
||||
struct pvr_bo **const static_consts_pvr_bo_out)
|
||||
{
|
||||
struct pvr_pds_buffer *const buffers = *buffers_out_ptr;
|
||||
uint32_t buffer_count = 0;
|
||||
|
@ -591,21 +602,56 @@ static void pvr_pds_descriptor_program_setup_buffers(
|
|||
buffer_count++;
|
||||
}
|
||||
|
||||
if (compile_time_consts_data->static_consts.num > 0) {
|
||||
VkResult result;
|
||||
|
||||
assert(compile_time_consts_data->static_consts.num <=
|
||||
ARRAY_SIZE(compile_time_consts_data->static_consts.value));
|
||||
|
||||
/* This is fine since buffers_out_ptr is a pointer to an array. */
|
||||
assert(buffer_count < ARRAY_SIZE(*buffers_out_ptr));
|
||||
|
||||
/* TODO: Is it possible to have multiple static consts buffer where the
|
||||
* destination is not adjoining? If so we need to handle that.
|
||||
* Currently we're only setting up a single buffer.
|
||||
*/
|
||||
buffers[buffer_count++] = (struct pvr_pds_buffer){
|
||||
.type = PVR_BUFFER_TYPES_COMPILE_TIME,
|
||||
.size_in_dwords = compile_time_consts_data->static_consts.num,
|
||||
.destination = compile_time_consts_data->static_consts.dest,
|
||||
};
|
||||
|
||||
result = pvr_gpu_upload(device,
|
||||
device->heaps.general_heap,
|
||||
compile_time_consts_data->static_consts.value,
|
||||
compile_time_consts_data->static_consts.num *
|
||||
ROGUE_REG_SIZE_BYTES,
|
||||
ROGUE_REG_SIZE_BYTES,
|
||||
static_consts_pvr_bo_out);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
} else {
|
||||
*static_consts_pvr_bo_out = NULL;
|
||||
}
|
||||
|
||||
*buffer_count_out = buffer_count;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult pvr_pds_descriptor_program_create_and_upload(
|
||||
struct pvr_device *const device,
|
||||
const VkAllocationCallbacks *const allocator,
|
||||
const struct rogue_compile_time_consts_data *const compile_time_consts_data,
|
||||
const struct rogue_ubo_data *const ubo_data,
|
||||
const struct pvr_explicit_constant_usage *const explicit_const_usage,
|
||||
const struct pvr_pipeline_layout *const layout,
|
||||
enum pvr_stage_allocation stage,
|
||||
struct pvr_pds_upload *const pds_code_upload_out,
|
||||
struct pvr_pds_info *const pds_info_out)
|
||||
struct pvr_stage_allocation_descriptor_state *const descriptor_state)
|
||||
{
|
||||
const size_t const_entries_size_in_bytes =
|
||||
pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes();
|
||||
struct pvr_pds_info *const pds_info = &descriptor_state->pds_info;
|
||||
struct pvr_descriptor_program_input program = { 0 };
|
||||
struct pvr_const_map_entry *entries_buffer;
|
||||
ASSERTED uint32_t code_size_in_dwords;
|
||||
|
@ -615,19 +661,18 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
|
|||
|
||||
assert(stage != PVR_STAGE_ALLOCATION_COUNT);
|
||||
|
||||
memset(pds_info_out, 0, sizeof(*pds_info_out));
|
||||
*pds_info = (struct pvr_pds_info){ 0 };
|
||||
|
||||
pvr_pds_descriptor_program_setup_buffers(device->features.robustBufferAccess,
|
||||
ubo_data,
|
||||
&program.buffers,
|
||||
&program.buffer_count);
|
||||
|
||||
for (uint32_t dma = 0; dma < program.buffer_count; dma++) {
|
||||
if (program.buffers[dma].type != PVR_BUFFER_TYPES_COMPILE_TIME)
|
||||
continue;
|
||||
|
||||
assert(!"Unimplemented");
|
||||
}
|
||||
result = pvr_pds_descriptor_program_setup_buffers(
|
||||
device,
|
||||
device->features.robustBufferAccess,
|
||||
compile_time_consts_data,
|
||||
ubo_data,
|
||||
&program.buffers,
|
||||
&program.buffer_count,
|
||||
&descriptor_state->static_consts);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (layout->per_stage_reg_info[stage].primary_dynamic_size_in_dwords)
|
||||
assert(!"Unimplemented");
|
||||
|
@ -667,23 +712,26 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
|
|||
const_entries_size_in_bytes,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!entries_buffer)
|
||||
if (!entries_buffer) {
|
||||
pvr_bo_free(device, descriptor_state->static_consts);
|
||||
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
pds_info_out->entries = entries_buffer;
|
||||
pds_info_out->entries_size_in_bytes = const_entries_size_in_bytes;
|
||||
pds_info->entries = entries_buffer;
|
||||
pds_info->entries_size_in_bytes = const_entries_size_in_bytes;
|
||||
|
||||
pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info_out);
|
||||
pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info);
|
||||
|
||||
code_size_in_dwords = pds_info_out->code_size_in_dwords;
|
||||
code_size_in_dwords = pds_info->code_size_in_dwords;
|
||||
staging_buffer_size =
|
||||
pds_info_out->code_size_in_dwords * sizeof(*staging_buffer);
|
||||
pds_info->code_size_in_dwords * sizeof(*staging_buffer);
|
||||
|
||||
if (!staging_buffer_size) {
|
||||
vk_free2(&device->vk.alloc, allocator, entries_buffer);
|
||||
|
||||
memset(pds_info_out, 0, sizeof(*pds_info_out));
|
||||
memset(pds_code_upload_out, 0, sizeof(*pds_code_upload_out));
|
||||
*descriptor_state = (struct pvr_stage_allocation_descriptor_state){ 0 };
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -693,30 +741,33 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
|
|||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!staging_buffer) {
|
||||
pvr_bo_free(device, descriptor_state->static_consts);
|
||||
vk_free2(&device->vk.alloc, allocator, entries_buffer);
|
||||
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
pvr_pds_generate_descriptor_upload_program(&program,
|
||||
staging_buffer,
|
||||
pds_info_out);
|
||||
pds_info);
|
||||
|
||||
assert(pds_info_out->code_size_in_dwords <= code_size_in_dwords);
|
||||
assert(pds_info->code_size_in_dwords <= code_size_in_dwords);
|
||||
|
||||
/* FIXME: use vk_realloc2() ? */
|
||||
entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
|
||||
entries_buffer,
|
||||
pds_info_out->entries_written_size_in_bytes,
|
||||
pds_info->entries_written_size_in_bytes,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!entries_buffer) {
|
||||
pvr_bo_free(device, descriptor_state->static_consts);
|
||||
vk_free2(&device->vk.alloc, allocator, staging_buffer);
|
||||
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
pds_info_out->entries = entries_buffer;
|
||||
pds_info_out->entries_size_in_bytes =
|
||||
pds_info_out->entries_written_size_in_bytes;
|
||||
pds_info->entries = entries_buffer;
|
||||
pds_info->entries_size_in_bytes = pds_info->entries_written_size_in_bytes;
|
||||
|
||||
/* FIXME: Figure out the define for alignment of 16. */
|
||||
result = pvr_gpu_upload_pds(device,
|
||||
|
@ -724,11 +775,12 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
|
|||
0,
|
||||
0,
|
||||
staging_buffer,
|
||||
pds_info_out->code_size_in_dwords,
|
||||
pds_info->code_size_in_dwords,
|
||||
16,
|
||||
16,
|
||||
pds_code_upload_out);
|
||||
&descriptor_state->pds_code);
|
||||
if (result != VK_SUCCESS) {
|
||||
pvr_bo_free(device, descriptor_state->static_consts);
|
||||
vk_free2(&device->vk.alloc, allocator, entries_buffer);
|
||||
vk_free2(&device->vk.alloc, allocator, staging_buffer);
|
||||
|
||||
|
@ -743,11 +795,11 @@ static VkResult pvr_pds_descriptor_program_create_and_upload(
|
|||
static void pvr_pds_descriptor_program_destroy(
|
||||
struct pvr_device *const device,
|
||||
const struct VkAllocationCallbacks *const allocator,
|
||||
struct pvr_pds_upload *const pds_code,
|
||||
struct pvr_pds_info *const pds_info)
|
||||
struct pvr_stage_allocation_descriptor_state *const descriptor_state)
|
||||
{
|
||||
pvr_bo_free(device, pds_code->pvr_bo);
|
||||
vk_free2(&device->vk.alloc, allocator, pds_info->entries);
|
||||
pvr_bo_free(device, descriptor_state->pds_code.pvr_bo);
|
||||
vk_free2(&device->vk.alloc, allocator, descriptor_state->pds_info.entries);
|
||||
pvr_bo_free(device, descriptor_state->static_consts);
|
||||
}
|
||||
|
||||
static void pvr_pds_compute_program_setup(
|
||||
|
@ -1014,6 +1066,7 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
const VkAllocationCallbacks *const allocator,
|
||||
struct pvr_compute_pipeline *const compute_pipeline)
|
||||
{
|
||||
struct rogue_compile_time_consts_data compile_time_consts_data;
|
||||
uint32_t work_group_input_regs[PVR_WORKGROUP_DIMENSIONS];
|
||||
struct pvr_explicit_constant_usage explicit_const_usage;
|
||||
uint32_t local_input_regs[PVR_WORKGROUP_DIMENSIONS];
|
||||
|
@ -1065,12 +1118,12 @@ static VkResult pvr_compute_pipeline_compile(
|
|||
result = pvr_pds_descriptor_program_create_and_upload(
|
||||
device,
|
||||
allocator,
|
||||
&compile_time_consts_data,
|
||||
&ubo_data,
|
||||
&explicit_const_usage,
|
||||
compute_pipeline->base.layout,
|
||||
PVR_STAGE_ALLOCATION_COMPUTE,
|
||||
&compute_pipeline->state.descriptor.pds_code,
|
||||
&compute_pipeline->state.descriptor.pds_info);
|
||||
&compute_pipeline->state.descriptor);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_shader;
|
||||
|
||||
|
@ -1207,11 +1260,9 @@ static void pvr_compute_pipeline_destroy(
|
|||
allocator,
|
||||
&compute_pipeline->state.primary_program,
|
||||
&compute_pipeline->state.primary_program_info);
|
||||
pvr_pds_descriptor_program_destroy(
|
||||
device,
|
||||
allocator,
|
||||
&compute_pipeline->state.descriptor.pds_code,
|
||||
&compute_pipeline->state.descriptor.pds_info);
|
||||
pvr_pds_descriptor_program_destroy(device,
|
||||
allocator,
|
||||
&compute_pipeline->state.descriptor);
|
||||
pvr_bo_free(device, compute_pipeline->state.shader.bo);
|
||||
|
||||
pvr_pipeline_finish(&compute_pipeline->base);
|
||||
|
@ -1286,14 +1337,12 @@ pvr_graphics_pipeline_destroy(struct pvr_device *const device,
|
|||
pvr_pds_descriptor_program_destroy(
|
||||
device,
|
||||
allocator,
|
||||
&gfx_pipeline->fragment_shader_state.descriptor_state.pds_code,
|
||||
&gfx_pipeline->fragment_shader_state.descriptor_state.pds_info);
|
||||
&gfx_pipeline->fragment_shader_state.descriptor_state);
|
||||
|
||||
pvr_pds_descriptor_program_destroy(
|
||||
device,
|
||||
allocator,
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state.pds_code,
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state.pds_info);
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state);
|
||||
|
||||
for (uint32_t i = 0; i < num_vertex_attrib_programs; i++) {
|
||||
struct pvr_pds_attrib_program *const attrib_program =
|
||||
|
@ -1587,12 +1636,12 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
|
|||
result = pvr_pds_descriptor_program_create_and_upload(
|
||||
device,
|
||||
allocator,
|
||||
&ctx->common_data[MESA_SHADER_VERTEX].compile_time_consts_data,
|
||||
&ctx->common_data[MESA_SHADER_VERTEX].ubo_data,
|
||||
&vert_explicit_const_usage,
|
||||
gfx_pipeline->base.layout,
|
||||
PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state.pds_code,
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state.pds_info);
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_vertex_attrib_program;
|
||||
|
||||
|
@ -1610,12 +1659,12 @@ pvr_graphics_pipeline_compile(struct pvr_device *const device,
|
|||
result = pvr_pds_descriptor_program_create_and_upload(
|
||||
device,
|
||||
allocator,
|
||||
&ctx->common_data[MESA_SHADER_FRAGMENT].compile_time_consts_data,
|
||||
&ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data,
|
||||
&frag_explicit_const_usage,
|
||||
gfx_pipeline->base.layout,
|
||||
PVR_STAGE_ALLOCATION_FRAGMENT,
|
||||
&gfx_pipeline->fragment_shader_state.descriptor_state.pds_code,
|
||||
&gfx_pipeline->fragment_shader_state.descriptor_state.pds_info);
|
||||
&gfx_pipeline->fragment_shader_state.descriptor_state);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_vertex_descriptor_program;
|
||||
|
||||
|
@ -1629,8 +1678,7 @@ err_free_vertex_descriptor_program:
|
|||
pvr_pds_descriptor_program_destroy(
|
||||
device,
|
||||
allocator,
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state.pds_code,
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state.pds_info);
|
||||
&gfx_pipeline->vertex_shader_state.descriptor_state);
|
||||
err_free_vertex_attrib_program:
|
||||
for (uint32_t i = 0;
|
||||
i < ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs);
|
||||
|
|
|
@ -970,6 +970,9 @@ struct pvr_stage_allocation_descriptor_state {
|
|||
* referring to the code upload.
|
||||
*/
|
||||
struct pvr_pds_info pds_info;
|
||||
|
||||
/* Already setup compile time static consts. */
|
||||
struct pvr_bo *static_consts;
|
||||
};
|
||||
|
||||
struct pvr_pds_attrib_program {
|
||||
|
|
Loading…
Reference in New Issue