pvr: Add compute update kernel in vkCmdDispatch().
We upload a new data section whenever a patched variant is needed. They will be freed at command buffer destruction since the uploads are linked. Co-authored-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com> Reviewed-by: Frank Binns <frank.binns@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16040>
This commit is contained in:
parent
5e9e3fe7f7
commit
58d9afb80b
|
@ -2880,6 +2880,111 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer)
|
||||||
pvr_compute_generate_control_stream(csb, &info);
|
pvr_compute_generate_control_stream(csb, &info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
pvr_compute_flat_pad_workgroup_size(const struct pvr_device_info *dev_info,
|
||||||
|
uint32_t workgroup_size,
|
||||||
|
uint32_t coeff_regs_count)
|
||||||
|
{
|
||||||
|
uint32_t max_avail_coeff_regs =
|
||||||
|
rogue_get_cdm_max_local_mem_size_regs(dev_info);
|
||||||
|
uint32_t coeff_regs_count_aligned =
|
||||||
|
ALIGN_POT(coeff_regs_count,
|
||||||
|
PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE) >> 2U);
|
||||||
|
|
||||||
|
/* If the work group size is > ROGUE_MAX_INSTANCES_PER_TASK. We now *always*
|
||||||
|
* pad the work group size to the next multiple of
|
||||||
|
* ROGUE_MAX_INSTANCES_PER_TASK.
|
||||||
|
*
|
||||||
|
* If we use more than 1/8th of the max coefficient registers then we round
|
||||||
|
* work group size up to the next multiple of ROGUE_MAX_INSTANCES_PER_TASK
|
||||||
|
*/
|
||||||
|
/* TODO: See if this can be optimized. */
|
||||||
|
if (workgroup_size > ROGUE_MAX_INSTANCES_PER_TASK ||
|
||||||
|
coeff_regs_count_aligned > (max_avail_coeff_regs / 8)) {
|
||||||
|
assert(workgroup_size < rogue_get_compute_max_work_group_size(dev_info));
|
||||||
|
|
||||||
|
return ALIGN_POT(workgroup_size, ROGUE_MAX_INSTANCES_PER_TASK);
|
||||||
|
}
|
||||||
|
|
||||||
|
return workgroup_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: Wire up the base_workgroup variant program when implementing
|
||||||
|
* VK_KHR_device_group. The values will also need patching into the program.
|
||||||
|
*/
|
||||||
|
static void pvr_compute_update_kernel(
|
||||||
|
struct pvr_cmd_buffer *cmd_buffer,
|
||||||
|
const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS])
|
||||||
|
{
|
||||||
|
const struct pvr_device_info *dev_info =
|
||||||
|
&cmd_buffer->device->pdevice->dev_info;
|
||||||
|
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
|
||||||
|
struct pvr_csb *csb = &state->current_sub_cmd->compute.control_stream;
|
||||||
|
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
|
||||||
|
const struct pvr_pds_info *program_info =
|
||||||
|
&pipeline->state.primary_program_info;
|
||||||
|
|
||||||
|
struct pvr_compute_kernel_info info = {
|
||||||
|
.indirect_buffer_addr.addr = 0ULL,
|
||||||
|
.usc_target = PVRX(CDMCTRL_USC_TARGET_ANY),
|
||||||
|
.pds_temp_size =
|
||||||
|
DIV_ROUND_UP(program_info->temps_required << 2U,
|
||||||
|
PVRX(CDMCTRL_KERNEL0_PDS_TEMP_SIZE_UNIT_SIZE)),
|
||||||
|
|
||||||
|
.pds_data_size =
|
||||||
|
DIV_ROUND_UP(program_info->data_size_in_dwords << 2U,
|
||||||
|
PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)),
|
||||||
|
.pds_data_offset = pipeline->state.primary_program.data_offset,
|
||||||
|
.pds_code_offset = pipeline->state.primary_program.code_offset,
|
||||||
|
|
||||||
|
.sd_type = PVRX(CDMCTRL_SD_TYPE_USC),
|
||||||
|
|
||||||
|
.usc_unified_size =
|
||||||
|
DIV_ROUND_UP(pipeline->state.shader.input_register_count << 2U,
|
||||||
|
PVRX(CDMCTRL_KERNEL0_USC_UNIFIED_SIZE_UNIT_SIZE)),
|
||||||
|
|
||||||
|
/* clang-format off */
|
||||||
|
.global_size = {
|
||||||
|
global_workgroup_size[0],
|
||||||
|
global_workgroup_size[1],
|
||||||
|
global_workgroup_size[2]
|
||||||
|
},
|
||||||
|
/* clang-format on */
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t work_size = pipeline->state.shader.work_size;
|
||||||
|
uint32_t coeff_regs;
|
||||||
|
|
||||||
|
if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) {
|
||||||
|
/* Enforce a single workgroup per cluster through allocation starvation.
|
||||||
|
*/
|
||||||
|
coeff_regs = rogue_get_cdm_max_local_mem_size_regs(dev_info);
|
||||||
|
} else {
|
||||||
|
coeff_regs = pipeline->state.shader.coefficient_register_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
info.usc_common_size =
|
||||||
|
DIV_ROUND_UP(coeff_regs << 2U,
|
||||||
|
PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
|
||||||
|
|
||||||
|
/* Use a whole slot per workgroup. */
|
||||||
|
work_size = MAX2(work_size, ROGUE_MAX_INSTANCES_PER_TASK);
|
||||||
|
|
||||||
|
coeff_regs += pipeline->state.shader.const_shared_reg_count;
|
||||||
|
|
||||||
|
work_size =
|
||||||
|
pvr_compute_flat_pad_workgroup_size(dev_info, work_size, coeff_regs);
|
||||||
|
|
||||||
|
info.local_size[0] = work_size;
|
||||||
|
info.local_size[1] = 1U;
|
||||||
|
info.local_size[2] = 1U;
|
||||||
|
|
||||||
|
info.max_instances =
|
||||||
|
pvr_compute_flat_slot_size(dev_info, coeff_regs, false, work_size);
|
||||||
|
|
||||||
|
pvr_compute_generate_control_stream(csb, &info);
|
||||||
|
}
|
||||||
|
|
||||||
void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
||||||
uint32_t groupCountX,
|
uint32_t groupCountX,
|
||||||
uint32_t groupCountY,
|
uint32_t groupCountY,
|
||||||
|
@ -2953,7 +3058,7 @@ void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
|
||||||
|
|
||||||
pvr_compute_update_shared(cmd_buffer);
|
pvr_compute_update_shared(cmd_buffer);
|
||||||
|
|
||||||
/* FIXME: Create update kernel end emit control stream. */
|
pvr_compute_update_kernel(cmd_buffer, workgroup_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
||||||
|
|
|
@ -45,6 +45,7 @@
|
||||||
#include "util/log.h"
|
#include "util/log.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
|
#include "util/u_math.h"
|
||||||
#include "vk_alloc.h"
|
#include "vk_alloc.h"
|
||||||
#include "vk_log.h"
|
#include "vk_log.h"
|
||||||
#include "vk_object.h"
|
#include "vk_object.h"
|
||||||
|
@ -777,22 +778,17 @@ static void pvr_pds_uniform_program_destroy(
|
||||||
vk_free2(&device->vk.alloc, allocator, pds_info->entries);
|
vk_free2(&device->vk.alloc, allocator, pds_info->entries);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIXME: See if pvr_device_init_compute_pds_program() and this could be merged.
|
static void pvr_pds_compute_program_setup(
|
||||||
*/
|
const struct pvr_device_info *dev_info,
|
||||||
static VkResult pvr_pds_compute_program_create_and_upload(
|
|
||||||
struct pvr_device *const device,
|
|
||||||
const VkAllocationCallbacks *const allocator,
|
|
||||||
const uint32_t local_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
const uint32_t local_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||||
const uint32_t work_group_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
const uint32_t work_group_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||||
uint32_t barrier_coefficient,
|
uint32_t barrier_coefficient,
|
||||||
bool add_base_workgroup,
|
bool add_base_workgroup,
|
||||||
uint32_t usc_temps,
|
uint32_t usc_temps,
|
||||||
pvr_dev_addr_t usc_shader_dev_addr,
|
pvr_dev_addr_t usc_shader_dev_addr,
|
||||||
struct pvr_pds_upload *const pds_upload_out,
|
struct pvr_pds_compute_shader_program *const program)
|
||||||
struct pvr_pds_info *const pds_info_out,
|
|
||||||
uint32_t *const base_workgroup_data_patching_offset_out)
|
|
||||||
{
|
{
|
||||||
struct pvr_pds_compute_shader_program program = {
|
*program = (struct pvr_pds_compute_shader_program){
|
||||||
/* clang-format off */
|
/* clang-format off */
|
||||||
.local_input_regs = {
|
.local_input_regs = {
|
||||||
local_input_regs[0],
|
local_input_regs[0],
|
||||||
|
@ -815,27 +811,50 @@ static VkResult pvr_pds_compute_program_create_and_upload(
|
||||||
.add_base_workgroup = add_base_workgroup,
|
.add_base_workgroup = add_base_workgroup,
|
||||||
.kick_usc = true,
|
.kick_usc = true,
|
||||||
};
|
};
|
||||||
struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
|
||||||
uint32_t staging_buffer_size;
|
|
||||||
uint32_t *staging_buffer;
|
|
||||||
VkResult result;
|
|
||||||
|
|
||||||
STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
|
STATIC_ASSERT(ARRAY_SIZE(program->local_input_regs) ==
|
||||||
PVR_WORKGROUP_DIMENSIONS);
|
PVR_WORKGROUP_DIMENSIONS);
|
||||||
STATIC_ASSERT(ARRAY_SIZE(program.work_group_input_regs) ==
|
STATIC_ASSERT(ARRAY_SIZE(program->work_group_input_regs) ==
|
||||||
PVR_WORKGROUP_DIMENSIONS);
|
PVR_WORKGROUP_DIMENSIONS);
|
||||||
STATIC_ASSERT(ARRAY_SIZE(program.global_input_regs) ==
|
STATIC_ASSERT(ARRAY_SIZE(program->global_input_regs) ==
|
||||||
PVR_WORKGROUP_DIMENSIONS);
|
PVR_WORKGROUP_DIMENSIONS);
|
||||||
|
|
||||||
assert(!add_base_workgroup || base_workgroup_data_patching_offset_out);
|
pvr_pds_setup_doutu(&program->usc_task_control,
|
||||||
|
|
||||||
pvr_pds_setup_doutu(&program.usc_task_control,
|
|
||||||
usc_shader_dev_addr.addr,
|
usc_shader_dev_addr.addr,
|
||||||
usc_temps,
|
usc_temps,
|
||||||
PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
|
PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
|
||||||
false);
|
false);
|
||||||
|
|
||||||
pvr_pds_compute_shader(&program, NULL, PDS_GENERATE_SIZES, dev_info);
|
pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FIXME: See if pvr_device_init_compute_pds_program() and this could be merged.
|
||||||
|
*/
|
||||||
|
static VkResult pvr_pds_compute_program_create_and_upload(
|
||||||
|
struct pvr_device *const device,
|
||||||
|
const VkAllocationCallbacks *const allocator,
|
||||||
|
const uint32_t local_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||||
|
const uint32_t work_group_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||||
|
uint32_t barrier_coefficient,
|
||||||
|
uint32_t usc_temps,
|
||||||
|
pvr_dev_addr_t usc_shader_dev_addr,
|
||||||
|
struct pvr_pds_upload *const pds_upload_out,
|
||||||
|
struct pvr_pds_info *const pds_info_out)
|
||||||
|
{
|
||||||
|
struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||||
|
struct pvr_pds_compute_shader_program program;
|
||||||
|
uint32_t staging_buffer_size;
|
||||||
|
uint32_t *staging_buffer;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
pvr_pds_compute_program_setup(dev_info,
|
||||||
|
local_input_regs,
|
||||||
|
work_group_input_regs,
|
||||||
|
barrier_coefficient,
|
||||||
|
false,
|
||||||
|
usc_temps,
|
||||||
|
usc_shader_dev_addr,
|
||||||
|
&program);
|
||||||
|
|
||||||
/* FIXME: According to pvr_device_init_compute_pds_program() the code size
|
/* FIXME: According to pvr_device_init_compute_pds_program() the code size
|
||||||
* is in bytes. Investigate this.
|
* is in bytes. Investigate this.
|
||||||
|
@ -864,16 +883,6 @@ static VkResult pvr_pds_compute_program_create_and_upload(
|
||||||
PDS_GENERATE_DATA_SEGMENT,
|
PDS_GENERATE_DATA_SEGMENT,
|
||||||
dev_info);
|
dev_info);
|
||||||
|
|
||||||
/* We'll need to patch the base workgroup in the PDS data section before
|
|
||||||
* dispatch so we give back the offsets at which to patch. We only need to
|
|
||||||
* save the offset for the first workgroup id since the workgroup ids are
|
|
||||||
* stored contiguously in the data segment.
|
|
||||||
*/
|
|
||||||
if (add_base_workgroup) {
|
|
||||||
*base_workgroup_data_patching_offset_out =
|
|
||||||
program.base_workgroup_constant_offset_in_dwords[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* FIXME: Figure out the define for alignment of 16. */
|
/* FIXME: Figure out the define for alignment of 16. */
|
||||||
result = pvr_gpu_upload_pds(device,
|
result = pvr_gpu_upload_pds(device,
|
||||||
&staging_buffer[program.code_size],
|
&staging_buffer[program.code_size],
|
||||||
|
@ -910,6 +919,97 @@ static void pvr_pds_compute_program_destroy(
|
||||||
pvr_bo_free(device, pds_program->pvr_bo);
|
pvr_bo_free(device, pds_program->pvr_bo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* This only uploads the code segment. The data segment will need to be patched
|
||||||
|
* with the base workgroup before uploading.
|
||||||
|
*/
|
||||||
|
static VkResult pvr_pds_compute_base_workgroup_variant_program_init(
|
||||||
|
struct pvr_device *const device,
|
||||||
|
const VkAllocationCallbacks *const allocator,
|
||||||
|
const uint32_t local_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||||
|
const uint32_t work_group_input_regs[static const PVR_WORKGROUP_DIMENSIONS],
|
||||||
|
uint32_t barrier_coefficient,
|
||||||
|
uint32_t usc_temps,
|
||||||
|
pvr_dev_addr_t usc_shader_dev_addr,
|
||||||
|
struct pvr_pds_base_workgroup_program *program_out)
|
||||||
|
{
|
||||||
|
struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||||
|
struct pvr_pds_compute_shader_program program;
|
||||||
|
uint32_t buffer_size;
|
||||||
|
uint32_t *buffer;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
|
pvr_pds_compute_program_setup(dev_info,
|
||||||
|
local_input_regs,
|
||||||
|
work_group_input_regs,
|
||||||
|
barrier_coefficient,
|
||||||
|
true,
|
||||||
|
usc_temps,
|
||||||
|
usc_shader_dev_addr,
|
||||||
|
&program);
|
||||||
|
|
||||||
|
/* FIXME: According to pvr_device_init_compute_pds_program() the code size
|
||||||
|
* is in bytes. Investigate this.
|
||||||
|
*/
|
||||||
|
buffer_size = MAX2(program.code_size, program.data_size) * sizeof(*buffer);
|
||||||
|
|
||||||
|
buffer = vk_alloc2(&device->vk.alloc,
|
||||||
|
allocator,
|
||||||
|
buffer_size,
|
||||||
|
8,
|
||||||
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||||
|
if (!buffer)
|
||||||
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
|
|
||||||
|
pvr_pds_compute_shader(&program,
|
||||||
|
&buffer[0],
|
||||||
|
PDS_GENERATE_CODE_SEGMENT,
|
||||||
|
dev_info);
|
||||||
|
|
||||||
|
/* FIXME: Figure out the define for alignment of 16. */
|
||||||
|
result = pvr_gpu_upload_pds(device,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
buffer,
|
||||||
|
program.code_size,
|
||||||
|
16,
|
||||||
|
16,
|
||||||
|
&program_out->code_upload);
|
||||||
|
if (result != VK_SUCCESS) {
|
||||||
|
vk_free2(&device->vk.alloc, allocator, buffer);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
pvr_pds_compute_shader(&program, buffer, PDS_GENERATE_DATA_SEGMENT, dev_info);
|
||||||
|
|
||||||
|
program_out->data_section = buffer;
|
||||||
|
|
||||||
|
/* We'll need to patch the base workgroup in the PDS data section before
|
||||||
|
* dispatch so we save the offsets at which to patch. We only need to save
|
||||||
|
* the offset for the first workgroup id since the workgroup ids are stored
|
||||||
|
* contiguously in the data segment.
|
||||||
|
*/
|
||||||
|
program_out->base_workgroup_data_patching_offset =
|
||||||
|
program.base_workgroup_constant_offset_in_dwords[0];
|
||||||
|
|
||||||
|
program_out->info = (struct pvr_pds_info){
|
||||||
|
.temps_required = program.highest_temp,
|
||||||
|
.code_size_in_dwords = program.code_size,
|
||||||
|
.data_size_in_dwords = program.data_size,
|
||||||
|
};
|
||||||
|
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void pvr_pds_compute_base_workgroup_variant_program_finish(
|
||||||
|
struct pvr_device *device,
|
||||||
|
const VkAllocationCallbacks *const allocator,
|
||||||
|
struct pvr_pds_base_workgroup_program *const state)
|
||||||
|
{
|
||||||
|
pvr_bo_free(device, state->code_upload.pvr_bo);
|
||||||
|
vk_free2(&device->vk.alloc, allocator, state->data_section);
|
||||||
|
}
|
||||||
|
|
||||||
/******************************************************************************
|
/******************************************************************************
|
||||||
Generic pipeline functions
|
Generic pipeline functions
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
@ -962,6 +1062,9 @@ static VkResult pvr_compute_pipeline_compile(
|
||||||
compute_pipeline->state.shader.uses_barrier = false;
|
compute_pipeline->state.shader.uses_barrier = false;
|
||||||
compute_pipeline->state.shader.uses_num_workgroups = false;
|
compute_pipeline->state.shader.uses_num_workgroups = false;
|
||||||
compute_pipeline->state.shader.const_shared_reg_count = 4;
|
compute_pipeline->state.shader.const_shared_reg_count = 4;
|
||||||
|
compute_pipeline->state.shader.input_register_count = 8;
|
||||||
|
compute_pipeline->state.shader.work_size = 1 * 1 * 1;
|
||||||
|
compute_pipeline->state.shader.coefficient_register_count = 4;
|
||||||
|
|
||||||
result = pvr_gpu_upload_usc(device,
|
result = pvr_gpu_upload_usc(device,
|
||||||
pvr_usc_compute_shader,
|
pvr_usc_compute_shader,
|
||||||
|
@ -1011,12 +1114,10 @@ static VkResult pvr_compute_pipeline_compile(
|
||||||
local_input_regs,
|
local_input_regs,
|
||||||
work_group_input_regs,
|
work_group_input_regs,
|
||||||
barrier_coefficient,
|
barrier_coefficient,
|
||||||
false,
|
|
||||||
pvr_pds_compute_program_params.usc_temps,
|
pvr_pds_compute_program_params.usc_temps,
|
||||||
compute_pipeline->state.shader.bo->vma->dev_addr,
|
compute_pipeline->state.shader.bo->vma->dev_addr,
|
||||||
&compute_pipeline->state.primary_program,
|
&compute_pipeline->state.primary_program,
|
||||||
&compute_pipeline->state.primary_program_info,
|
&compute_pipeline->state.primary_program_info);
|
||||||
NULL);
|
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto err_free_uniform_program;
|
goto err_free_uniform_program;
|
||||||
|
|
||||||
|
@ -1029,27 +1130,27 @@ static VkResult pvr_compute_pipeline_compile(
|
||||||
work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
|
work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
|
||||||
|
|
||||||
if (compute_pipeline->state.flags.base_workgroup) {
|
if (compute_pipeline->state.flags.base_workgroup) {
|
||||||
result = pvr_pds_compute_program_create_and_upload(
|
result = pvr_pds_compute_base_workgroup_variant_program_init(
|
||||||
device,
|
device,
|
||||||
allocator,
|
allocator,
|
||||||
local_input_regs,
|
local_input_regs,
|
||||||
work_group_input_regs,
|
work_group_input_regs,
|
||||||
barrier_coefficient,
|
barrier_coefficient,
|
||||||
true,
|
|
||||||
pvr_pds_compute_program_params.usc_temps,
|
pvr_pds_compute_program_params.usc_temps,
|
||||||
compute_pipeline->state.shader.bo->vma->dev_addr,
|
compute_pipeline->state.shader.bo->vma->dev_addr,
|
||||||
&compute_pipeline->state.primary_program_base_workgroup_variant,
|
&compute_pipeline->state.primary_base_workgroup_variant_program);
|
||||||
&compute_pipeline->state.primary_program_base_workgroup_variant_info,
|
|
||||||
&compute_pipeline->state.base_workgroup_ids_dword_offset);
|
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
goto err_free_compute_program;
|
goto err_destroy_compute_program;
|
||||||
}
|
}
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
|
|
||||||
err_free_compute_program:
|
err_destroy_compute_program:
|
||||||
if (compute_pipeline->state.flags.base_workgroup)
|
pvr_pds_compute_program_destroy(
|
||||||
pvr_bo_free(device, compute_pipeline->state.primary_program.pvr_bo);
|
device,
|
||||||
|
allocator,
|
||||||
|
&compute_pipeline->state.primary_program,
|
||||||
|
&compute_pipeline->state.primary_program_info);
|
||||||
|
|
||||||
err_free_uniform_program:
|
err_free_uniform_program:
|
||||||
pvr_bo_free(device, compute_pipeline->state.uniform.pds_code.pvr_bo);
|
pvr_bo_free(device, compute_pipeline->state.uniform.pds_code.pvr_bo);
|
||||||
|
@ -1129,11 +1230,10 @@ static void pvr_compute_pipeline_destroy(
|
||||||
struct pvr_compute_pipeline *const compute_pipeline)
|
struct pvr_compute_pipeline *const compute_pipeline)
|
||||||
{
|
{
|
||||||
if (compute_pipeline->state.flags.base_workgroup) {
|
if (compute_pipeline->state.flags.base_workgroup) {
|
||||||
pvr_pds_compute_program_destroy(
|
pvr_pds_compute_base_workgroup_variant_program_finish(
|
||||||
device,
|
device,
|
||||||
allocator,
|
allocator,
|
||||||
&compute_pipeline->state.primary_program_base_workgroup_variant,
|
&compute_pipeline->state.primary_base_workgroup_variant_program);
|
||||||
&compute_pipeline->state.primary_program_base_workgroup_variant_info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pvr_pds_compute_program_destroy(
|
pvr_pds_compute_program_destroy(
|
||||||
|
|
|
@ -1036,6 +1036,9 @@ struct pvr_compute_pipeline {
|
||||||
bool uses_num_workgroups;
|
bool uses_num_workgroups;
|
||||||
|
|
||||||
uint32_t const_shared_reg_count;
|
uint32_t const_shared_reg_count;
|
||||||
|
uint32_t input_register_count;
|
||||||
|
uint32_t work_size;
|
||||||
|
uint32_t coefficient_register_count;
|
||||||
} shader;
|
} shader;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
|
@ -1047,12 +1050,17 @@ struct pvr_compute_pipeline {
|
||||||
struct pvr_pds_upload primary_program;
|
struct pvr_pds_upload primary_program;
|
||||||
struct pvr_pds_info primary_program_info;
|
struct pvr_pds_info primary_program_info;
|
||||||
|
|
||||||
struct pvr_pds_upload primary_program_base_workgroup_variant;
|
struct pvr_pds_base_workgroup_program {
|
||||||
struct pvr_pds_info primary_program_base_workgroup_variant_info;
|
struct pvr_pds_upload code_upload;
|
||||||
|
|
||||||
|
uint32_t *data_section;
|
||||||
/* Offset within the PDS data section at which the base workgroup id
|
/* Offset within the PDS data section at which the base workgroup id
|
||||||
* resides.
|
* resides.
|
||||||
*/
|
*/
|
||||||
uint32_t base_workgroup_ids_dword_offset;
|
uint32_t base_workgroup_data_patching_offset;
|
||||||
|
|
||||||
|
struct pvr_pds_info info;
|
||||||
|
} primary_base_workgroup_variant_program;
|
||||||
} state;
|
} state;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue