diff --git a/src/imagination/common/pvr_device_info.h b/src/imagination/common/pvr_device_info.h index a9098cb1621..339bb1fb365 100644 --- a/src/imagination/common/pvr_device_info.h +++ b/src/imagination/common/pvr_device_info.h @@ -356,6 +356,16 @@ struct pvr_device_info { struct pvr_device_quirks quirks; }; +struct pvr_device_runtime_info { + uint64_t min_free_list_size; + uint64_t reserved_shared_size; + uint64_t total_reserved_partition_size; + uint64_t num_phantoms; + uint64_t max_coeffs; + uint64_t cdm_max_local_mem_size_regs; + uint32_t core_count; +}; + /** * Packs B, V, N and C values into a 64-bit unsigned integer. * diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h index 56ea843b85f..0679fe4f7b3 100644 --- a/src/imagination/include/hwdef/rogue_hw_utils.h +++ b/src/imagination/include/hwdef/rogue_hw_utils.h @@ -172,15 +172,6 @@ rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info) return num_macrotiles_x * num_macrotiles_y * 8U; } -/* To get the number of required Bernado/Phantom(s), divide the number of - * clusters by 4 and round up. - */ -static inline uint32_t -rogue_get_num_phantoms(const struct pvr_device_info *dev_info) -{ - return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U); -} - /* Region header size in bytes. */ static inline uint32_t rogue_get_region_header_size(const struct pvr_device_info *dev_info) @@ -198,24 +189,6 @@ rogue_get_region_header_size(const struct pvr_device_info *dev_info) return 5; } -/* Return the total reserved size of partition in dwords. */ -static inline uint32_t -rogue_get_total_reserved_partition_size(const struct pvr_device_info *dev_info) -{ - uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0); - uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0); - uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0); - - if (tile_size_x == 16 && tile_size_y == 16) { - return tile_size_x * tile_size_y * max_partitions * - PVR_GET_FEATURE_VALUE(dev_info, - usc_min_output_registers_per_pix, - 0); - } - - return max_partitions * 1024U; -} - static inline uint32_t rogue_get_render_size_max(const struct pvr_device_info *dev_info) { @@ -251,26 +224,6 @@ static inline uint32_t pvr_get_max_user_vertex_output_components( return 128U; } -static inline uint32_t -rogue_get_reserved_shared_size(const struct pvr_device_info *dev_info) -{ - uint32_t common_store_size_in_dwords = - PVR_GET_FEATURE_VALUE(dev_info, - common_store_size_in_dwords, - 512U * 4U * 4U); - uint32_t reserved_shared_size = - common_store_size_in_dwords - (256U * 4U) - - rogue_get_total_reserved_partition_size(dev_info); - - if (PVR_HAS_QUIRK(dev_info, 44079)) { - uint32_t common_store_split_point = (768U * 4U * 4U); - - return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size); - } - - return reserved_shared_size; -} - static inline uint32_t rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info) { @@ -280,42 +233,6 @@ rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info) return 0U; } -static inline uint32_t -rogue_get_max_coeffs(const struct pvr_device_info *dev_info) -{ - uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS; - uint32_t pending_allocation_shared_regs = 2U * 1024U; - uint32_t pending_allocation_coeff_regs = 0U; - uint32_t num_phantoms = rogue_get_num_phantoms(dev_info); - uint32_t tiles_in_flight = - PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0); - uint32_t max_coeff_pixel_portion = - DIV_ROUND_UP(tiles_in_flight, num_phantoms); - - max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS; - - /* Compute tasks on cores with BRN48492 and without compute overlap may lock - * up without two additional lines of coeffs. - */ - if (PVR_HAS_QUIRK(dev_info, 48492) && - !PVR_HAS_FEATURE(dev_info, compute_overlap)) { - pending_allocation_coeff_regs = 2U * 1024U; - } - - if (PVR_HAS_ERN(dev_info, 38748)) - pending_allocation_shared_regs = 0U; - - if (PVR_HAS_ERN(dev_info, 38020)) { - max_coeff_additional_portion += - rogue_max_compute_shared_registers(dev_info); - } - - return rogue_get_reserved_shared_size(dev_info) + - pending_allocation_coeff_regs - - (max_coeff_pixel_portion + max_coeff_additional_portion + - pending_allocation_shared_regs); -} - static inline uint32_t rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info) { @@ -341,28 +258,6 @@ static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment( return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT; } -static inline uint32_t -rogue_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info) -{ - uint32_t available_coeffs_in_dwords = rogue_get_max_coeffs(dev_info); - - if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) && - !PVR_HAS_FEATURE(dev_info, compute_overlap)) { - /* Driver must not use the 2 reserved lines. */ - available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2; - } - - /* The maximum amount of local memory available to a kernel is the minimum - * of the total number of coefficient registers available and the max common - * store allocation size which can be made by the CDM. - * - * If any coeff lines are reserved for tessellation or pixel then we need to - * subtract those too. - */ - return MIN2(available_coeffs_in_dwords, - ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS); -} - static inline uint32_t rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info) { diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index a6edd6c76fc..9f75c98c7b4 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1066,10 +1066,15 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, */ #define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U -static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info, - struct pvr_cmd_buffer *cmd_buffer, - struct pvr_sub_cmd_compute *sub_cmd) +static void +pvr_sub_cmd_compute_job_init(const struct pvr_physical_device *pdevice, + struct pvr_cmd_buffer *cmd_buffer, + struct pvr_sub_cmd_compute *sub_cmd) { + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; + const struct pvr_device_info *dev_info = &pdevice->dev_info; + if (sub_cmd->uses_barrier) sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; @@ -1102,7 +1107,7 @@ static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info, if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && - rogue_get_num_phantoms(dev_info) > 1 && sub_cmd->uses_atomic_ops) { + dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) { /* Each phantom has its own MCU, so atomicity can only be guaranteed * when all work items are processed on the same phantom. This means we * need to disable all USCs other than those of the first phantom, which @@ -1131,14 +1136,17 @@ static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info, (1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)) static uint32_t -pvr_compute_flat_slot_size(const struct pvr_device_info *dev_info, +pvr_compute_flat_slot_size(const struct pvr_physical_device *pdevice, uint32_t coeff_regs_count, bool use_barrier, uint32_t total_workitems) { + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; + const struct pvr_device_info *dev_info = &pdevice->dev_info; uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK; uint32_t max_avail_coeff_regs = - rogue_get_cdm_max_local_mem_size_regs(dev_info); + dev_runtime_info->cdm_max_local_mem_size_regs; uint32_t localstore_chunks_count = DIV_ROUND_UP(coeff_regs_count << 2, PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE)); @@ -1309,8 +1317,7 @@ pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, { const struct pvr_pds_upload *program = &cmd_buffer->device->pds_compute_fence_program; - const struct pvr_device_info *dev_info = - &cmd_buffer->device->pdevice->dev_info; + const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; struct pvr_csb *csb = &sub_cmd->control_stream; struct pvr_compute_kernel_info info = { @@ -1336,7 +1343,7 @@ pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, /* Here we calculate the slot size. This can depend on the use of barriers, * local memory, BRN's or other factors. */ - info.max_instances = pvr_compute_flat_slot_size(dev_info, 0U, false, 1U); + info.max_instances = pvr_compute_flat_slot_size(pdevice, 0U, false, 1U); pvr_compute_generate_control_stream(csb, &info); } @@ -1413,7 +1420,7 @@ static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) return result; } - pvr_sub_cmd_compute_job_init(&device->pdevice->dev_info, + pvr_sub_cmd_compute_job_init(device->pdevice, cmd_buffer, compute_sub_cmd); break; @@ -2838,8 +2845,7 @@ static VkResult pvr_setup_descriptor_mappings( static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *const sub_cmd) { - const struct pvr_device_info *dev_info = - &cmd_buffer->device->pdevice->dev_info; + const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; struct pvr_cmd_buffer_state *state = &cmd_buffer->state; struct pvr_csb *csb = &sub_cmd->control_stream; const struct pvr_compute_pipeline *pipeline = state->compute_pipeline; @@ -2892,18 +2898,21 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer, /* We don't need to pad the workgroup size. */ info.max_instances = - pvr_compute_flat_slot_size(dev_info, const_shared_reg_count, false, 1U); + pvr_compute_flat_slot_size(pdevice, const_shared_reg_count, false, 1U); pvr_compute_generate_control_stream(csb, &info); } static uint32_t -pvr_compute_flat_pad_workgroup_size(const struct pvr_device_info *dev_info, +pvr_compute_flat_pad_workgroup_size(const struct pvr_physical_device *pdevice, uint32_t workgroup_size, uint32_t coeff_regs_count) { + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; + const struct pvr_device_info *dev_info = &pdevice->dev_info; uint32_t max_avail_coeff_regs = - rogue_get_cdm_max_local_mem_size_regs(dev_info); + dev_runtime_info->cdm_max_local_mem_size_regs; uint32_t coeff_regs_count_aligned = ALIGN_POT(coeff_regs_count, PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE) >> 2U); @@ -2934,8 +2943,9 @@ static void pvr_compute_update_kernel( struct pvr_sub_cmd_compute *const sub_cmd, const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS]) { - const struct pvr_device_info *dev_info = - &cmd_buffer->device->pdevice->dev_info; + const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; struct pvr_cmd_buffer_state *state = &cmd_buffer->state; struct pvr_csb *csb = &sub_cmd->control_stream; const struct pvr_compute_pipeline *pipeline = state->compute_pipeline; @@ -2976,7 +2986,7 @@ static void pvr_compute_update_kernel( if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) { /* Enforce a single workgroup per cluster through allocation starvation. */ - coeff_regs = rogue_get_cdm_max_local_mem_size_regs(dev_info); + coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs; } else { coeff_regs = pipeline->state.shader.coefficient_register_count; } @@ -2991,14 +3001,14 @@ static void pvr_compute_update_kernel( coeff_regs += pipeline->state.shader.const_shared_reg_count; work_size = - pvr_compute_flat_pad_workgroup_size(dev_info, work_size, coeff_regs); + pvr_compute_flat_pad_workgroup_size(pdevice, work_size, coeff_regs); info.local_size[0] = work_size; info.local_size[1] = 1U; info.local_size[2] = 1U; info.max_instances = - pvr_compute_flat_slot_size(dev_info, coeff_regs, false, work_size); + pvr_compute_flat_slot_size(pdevice, coeff_regs, false, work_size); pvr_compute_generate_control_stream(csb, &info); } @@ -3632,8 +3642,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, &state->gfx_pipeline->fragment_shader_state.pds_coeff_program; const struct pvr_pipeline_stage_state *fragment_state = &state->gfx_pipeline->fragment_shader_state.stage_state; - struct pvr_device_info *const dev_info = - &cmd_buffer->device->pdevice->dev_info; + const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice; struct pvr_emit_state *const emit_state = &state->emit_state; struct pvr_ppp_state *const ppp_state = &state->ppp_state; @@ -3659,7 +3668,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer, const uint32_t max_tiles_in_flight = pvr_calc_fscommon_size_and_tiles_in_flight( - dev_info, + pdevice, usc_shared_size * PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE), 1); diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c index d03373a40e9..16407fff347 100644 --- a/src/imagination/vulkan/pvr_device.c +++ b/src/imagination/vulkan/pvr_device.c @@ -661,33 +661,35 @@ void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, /* TODO: See if this function can be improved once fully implemented. */ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( - const struct pvr_device_info *dev_info, + const struct pvr_physical_device *pdevice, uint32_t fs_common_size, uint32_t min_tiles_in_flight) { + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; + const struct pvr_device_info *dev_info = &pdevice->dev_info; uint32_t max_tiles_in_flight; uint32_t num_allocs; if (PVR_HAS_FEATURE(dev_info, s8xe)) { num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U); } else { - uint32_t num_phantoms = rogue_get_num_phantoms(dev_info); uint32_t min_cluster_per_phantom = 0; - if (num_phantoms > 1) { + if (dev_runtime_info->num_phantoms > 1) { pvr_finishme("Unimplemented path!!"); } else { min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U); } - if (num_phantoms > 1) + if (dev_runtime_info->num_phantoms > 1) pvr_finishme("Unimplemented path!!"); - if (num_phantoms > 2) + if (dev_runtime_info->num_phantoms > 2) pvr_finishme("Unimplemented path!!"); - if (num_phantoms > 3) + if (dev_runtime_info->num_phantoms > 3) pvr_finishme("Unimplemented path!!"); if (min_cluster_per_phantom >= 4) @@ -702,6 +704,8 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U); if (fs_common_size == UINT_MAX) { + const struct pvr_device_runtime_info *dev_runtime_info = + &pdevice->dev_runtime_info; uint32_t max_common_size; num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight); @@ -711,8 +715,8 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( num_allocs += 1; } - max_common_size = rogue_get_reserved_shared_size(dev_info) - - rogue_get_max_coeffs(dev_info); + max_common_size = + dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs; /* Double resource requirements to deal with fragmentation. */ max_common_size /= num_allocs * 2; @@ -764,7 +768,7 @@ pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice) }; const uint32_t common_size = - pvr_calc_fscommon_size_and_tiles_in_flight(&pdevice->dev_info, -1, 1); + pvr_calc_fscommon_size_and_tiles_in_flight(pdevice, -1, 1); enum pvr_descriptor_cs_level cs_level; if (common_size >= 2048) { diff --git a/src/imagination/vulkan/pvr_job_common.c b/src/imagination/vulkan/pvr_job_common.c index 24f18df6042..fb8f31b98d7 100644 --- a/src/imagination/vulkan/pvr_job_common.c +++ b/src/imagination/vulkan/pvr_job_common.c @@ -286,13 +286,15 @@ void pvr_pbe_pack_state( * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in * pvr_render_job_ws_fragment_state_init(). */ -void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info, - uint32_t msaa_mode, - uint32_t pixel_width, - bool paired_tiles, - uint32_t max_tiles_in_flight, - uint32_t *const isp_ctl_out, - uint32_t *const pixel_ctl_out) +void pvr_setup_tiles_in_flight( + const struct pvr_device_info *dev_info, + const struct pvr_device_runtime_info *dev_runtime_info, + uint32_t msaa_mode, + uint32_t pixel_width, + bool paired_tiles, + uint32_t max_tiles_in_flight, + uint32_t *const isp_ctl_out, + uint32_t *const pixel_ctl_out) { uint32_t total_tiles_in_flight = 0; uint32_t usable_partition_size; @@ -347,9 +349,8 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info, /* Maximum available partition space for partitions of this size. */ max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0); - usable_partition_size = - MIN2(rogue_get_total_reserved_partition_size(dev_info), - partition_size * max_partitions); + usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size, + partition_size * max_partitions); if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) < (1024 * 4 * 4)) { @@ -371,7 +372,7 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info, MIN2(max_partitions, usable_partition_size / partition_size); if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure)) - max_phantoms = rogue_get_num_phantoms(dev_info); + max_phantoms = dev_runtime_info->num_phantoms; else if (PVR_HAS_FEATURE(dev_info, roguexe)) max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0); else @@ -399,7 +400,7 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info, if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) || PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) != 2) { - isp_tiles_in_flight /= rogue_get_num_phantoms(dev_info); + isp_tiles_in_flight /= dev_runtime_info->num_phantoms; } isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight); diff --git a/src/imagination/vulkan/pvr_job_common.h b/src/imagination/vulkan/pvr_job_common.h index 56bf81831a9..88a39503e07 100644 --- a/src/imagination/vulkan/pvr_job_common.h +++ b/src/imagination/vulkan/pvr_job_common.h @@ -140,12 +140,14 @@ void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format, uint32_t *const src_format_out, enum pvr_pbe_gamma *const gamma_out); -void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info, - uint32_t msaa_mode, - uint32_t pixel_width, - bool paired_tiles, - uint32_t max_tiles_in_flight, - uint32_t *const isp_ctl_out, - uint32_t *const pixel_ctl_out); +void pvr_setup_tiles_in_flight( + const struct pvr_device_info *dev_info, + const struct pvr_device_runtime_info *dev_runtime_info, + uint32_t msaa_mode, + uint32_t pixel_width, + bool paired_tiles, + uint32_t max_tiles_in_flight, + uint32_t *const isp_ctl_out, + uint32_t *const pixel_ctl_out); #endif /* PVR_JOB_COMMON_H */ diff --git a/src/imagination/vulkan/pvr_job_context.c b/src/imagination/vulkan/pvr_job_context.c index ecc5e7ea03b..bc84ea0a761 100644 --- a/src/imagination/vulkan/pvr_job_context.c +++ b/src/imagination/vulkan/pvr_job_context.c @@ -925,6 +925,8 @@ static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload( { const uint32_t pds_data_alignment = PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U; + const struct pvr_device_runtime_info *dev_runtime_info = + &device->pdevice->dev_runtime_info; ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info; uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U]; struct pvr_pds_fence_program program = { 0 }; @@ -934,7 +936,7 @@ static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload( /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */ assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) && - rogue_get_num_phantoms(dev_info) >= 2)); + dev_runtime_info->num_phantoms >= 2)); pvr_pds_generate_fence_terminate_program(&program, staging_buffer, diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c index f60b39e5e4f..80949db8e1f 100644 --- a/src/imagination/vulkan/pvr_job_render.c +++ b/src/imagination/vulkan/pvr_job_render.c @@ -1298,6 +1298,8 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, { const enum PVRX(CR_ISP_AA_MODE_TYPE) isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples); + const struct pvr_device_runtime_info *dev_runtime_info = + &ctx->device->pdevice->dev_runtime_info; const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info; uint32_t isp_ctl; @@ -1305,6 +1307,7 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, /* FIXME: pass in the number of samples rather than isp_aa_mode? */ pvr_setup_tiles_in_flight(dev_info, + dev_runtime_info, isp_aa_mode, job->pixel_output_width, false, @@ -1340,7 +1343,7 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx, if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && - rogue_get_num_phantoms(dev_info) > 1 && job->frag_uses_atomic_ops) { + dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) { /* Each phantom has its own MCU, so atomicity can only be guaranteed * when all work items are processed on the same phantom. This means we * need to disable all USCs other than those of the first phantom, which diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 3b62f9d1e9a..c33e40908b4 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -199,9 +199,7 @@ struct pvr_physical_device { struct pvr_winsys *ws; struct pvr_device_info dev_info; - struct pvr_device_runtime_info { - uint32_t core_count; - } dev_runtime_info; + struct pvr_device_runtime_info dev_runtime_info; VkPhysicalDeviceMemoryProperties memory; @@ -1272,7 +1270,7 @@ struct pvr_load_op { }; uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( - const struct pvr_device_info *dev_info, + const struct pvr_physical_device *pdevice, uint32_t fs_common_size, uint32_t min_tiles_in_flight); diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c index 6c24ec604ad..5da06d51969 100644 --- a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c +++ b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c @@ -26,6 +26,7 @@ #include #include +#include "hwdef/rogue_hw_utils.h" #include "pvr_csb.h" #include "pvr_device_info.h" #include "pvr_private.h" @@ -42,6 +43,7 @@ #include "pvr_winsys.h" #include "pvr_winsys_helper.h" #include "util/log.h" +#include "util/macros.h" #include "util/os_misc.h" #include "vk_log.h" @@ -377,6 +379,125 @@ static void pvr_srv_winsys_destroy(struct pvr_winsys *ws) pvr_srv_connection_destroy(fd); } +static uint64_t +pvr_srv_get_min_free_list_size(const struct pvr_device_info *dev_info) +{ + uint64_t min_num_pages; + + if (PVR_HAS_FEATURE(dev_info, roguexe)) { + if (PVR_HAS_QUIRK(dev_info, 66011)) + min_num_pages = 40U; + else + min_num_pages = 25U; + } else { + min_num_pages = 50U; + } + + return min_num_pages << ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT; +} + +static inline uint64_t +pvr_srv_get_num_phantoms(const struct pvr_device_info *dev_info) +{ + return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U); +} + +/* Return the total reserved size of partition in dwords. */ +static inline uint64_t pvr_srv_get_total_reserved_partition_size( + const struct pvr_device_info *dev_info) +{ + uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0); + uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0); + uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0); + + if (tile_size_x == 16 && tile_size_y == 16) { + return tile_size_x * tile_size_y * max_partitions * + PVR_GET_FEATURE_VALUE(dev_info, + usc_min_output_registers_per_pix, + 0); + } + + return max_partitions * 1024U; +} + +static inline uint64_t +pvr_srv_get_reserved_shared_size(const struct pvr_device_info *dev_info) +{ + uint32_t common_store_size_in_dwords = + PVR_GET_FEATURE_VALUE(dev_info, + common_store_size_in_dwords, + 512U * 4U * 4U); + uint32_t reserved_shared_size = + common_store_size_in_dwords - (256U * 4U) - + pvr_srv_get_total_reserved_partition_size(dev_info); + + if (PVR_HAS_QUIRK(dev_info, 44079)) { + uint32_t common_store_split_point = (768U * 4U * 4U); + + return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size); + } + + return reserved_shared_size; +} + +static inline uint64_t +pvr_srv_get_max_coeffs(const struct pvr_device_info *dev_info) +{ + uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS; + uint32_t pending_allocation_shared_regs = 2U * 1024U; + uint32_t pending_allocation_coeff_regs = 0U; + uint32_t num_phantoms = pvr_srv_get_num_phantoms(dev_info); + uint32_t tiles_in_flight = + PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0); + uint32_t max_coeff_pixel_portion = + DIV_ROUND_UP(tiles_in_flight, num_phantoms); + + max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS; + + /* Compute tasks on cores with BRN48492 and without compute overlap may lock + * up without two additional lines of coeffs. + */ + if (PVR_HAS_QUIRK(dev_info, 48492) && + !PVR_HAS_FEATURE(dev_info, compute_overlap)) { + pending_allocation_coeff_regs = 2U * 1024U; + } + + if (PVR_HAS_ERN(dev_info, 38748)) + pending_allocation_shared_regs = 0U; + + if (PVR_HAS_ERN(dev_info, 38020)) { + max_coeff_additional_portion += + rogue_max_compute_shared_registers(dev_info); + } + + return pvr_srv_get_reserved_shared_size(dev_info) + + pending_allocation_coeff_regs - + (max_coeff_pixel_portion + max_coeff_additional_portion + + pending_allocation_shared_regs); +} + +static inline uint64_t +pvr_srv_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info) +{ + uint32_t available_coeffs_in_dwords = pvr_srv_get_max_coeffs(dev_info); + + if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) && + !PVR_HAS_FEATURE(dev_info, compute_overlap)) { + /* Driver must not use the 2 reserved lines. */ + available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2; + } + + /* The maximum amount of local memory available to a kernel is the minimum + * of the total number of coefficient registers available and the max common + * store allocation size which can be made by the CDM. + * + * If any coeff lines are reserved for tessellation or pixel then we need to + * subtract those too. + */ + return MIN2(available_coeffs_in_dwords, + ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS); +} + static int pvr_srv_winsys_device_info_init(struct pvr_winsys *ws, struct pvr_device_info *dev_info, @@ -396,6 +517,16 @@ pvr_srv_winsys_device_info_init(struct pvr_winsys *ws, return ret; } + runtime_info->min_free_list_size = pvr_srv_get_min_free_list_size(dev_info); + runtime_info->reserved_shared_size = + pvr_srv_get_reserved_shared_size(dev_info); + runtime_info->total_reserved_partition_size = + pvr_srv_get_total_reserved_partition_size(dev_info); + runtime_info->num_phantoms = pvr_srv_get_num_phantoms(dev_info); + runtime_info->max_coeffs = pvr_srv_get_max_coeffs(dev_info); + runtime_info->cdm_max_local_mem_size_regs = + pvr_srv_get_cdm_max_local_mem_size_regs(dev_info); + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { result = pvr_srv_get_multicore_info(srv_ws->render_fd, 0,