pvr: Move BRN 44079, 48492 and 66011 code into pvrsrvkm specific directory

The new kernel mode driver will provide the relevant information directly to
userspace, so this code is only required for pvrsrvkm.

Signed-off-by: Sarah Walker <sarah.walker@imgtec.com>
Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17595>
This commit is contained in:
Sarah Walker 2022-07-04 10:05:30 +01:00 committed by Marge Bot
parent fa79020ba9
commit f286cab27b
10 changed files with 217 additions and 162 deletions

View File

@ -356,6 +356,16 @@ struct pvr_device_info {
struct pvr_device_quirks quirks;
};
struct pvr_device_runtime_info {
uint64_t min_free_list_size;
uint64_t reserved_shared_size;
uint64_t total_reserved_partition_size;
uint64_t num_phantoms;
uint64_t max_coeffs;
uint64_t cdm_max_local_mem_size_regs;
uint32_t core_count;
};
/**
* Packs B, V, N and C values into a 64-bit unsigned integer.
*

View File

@ -172,15 +172,6 @@ rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info)
return num_macrotiles_x * num_macrotiles_y * 8U;
}
/* To get the number of required Bernado/Phantom(s), divide the number of
* clusters by 4 and round up.
*/
static inline uint32_t
rogue_get_num_phantoms(const struct pvr_device_info *dev_info)
{
return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U);
}
/* Region header size in bytes. */
static inline uint32_t
rogue_get_region_header_size(const struct pvr_device_info *dev_info)
@ -198,24 +189,6 @@ rogue_get_region_header_size(const struct pvr_device_info *dev_info)
return 5;
}
/* Return the total reserved size of partition in dwords. */
static inline uint32_t
rogue_get_total_reserved_partition_size(const struct pvr_device_info *dev_info)
{
uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
if (tile_size_x == 16 && tile_size_y == 16) {
return tile_size_x * tile_size_y * max_partitions *
PVR_GET_FEATURE_VALUE(dev_info,
usc_min_output_registers_per_pix,
0);
}
return max_partitions * 1024U;
}
static inline uint32_t
rogue_get_render_size_max(const struct pvr_device_info *dev_info)
{
@ -251,26 +224,6 @@ static inline uint32_t pvr_get_max_user_vertex_output_components(
return 128U;
}
static inline uint32_t
rogue_get_reserved_shared_size(const struct pvr_device_info *dev_info)
{
uint32_t common_store_size_in_dwords =
PVR_GET_FEATURE_VALUE(dev_info,
common_store_size_in_dwords,
512U * 4U * 4U);
uint32_t reserved_shared_size =
common_store_size_in_dwords - (256U * 4U) -
rogue_get_total_reserved_partition_size(dev_info);
if (PVR_HAS_QUIRK(dev_info, 44079)) {
uint32_t common_store_split_point = (768U * 4U * 4U);
return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size);
}
return reserved_shared_size;
}
static inline uint32_t
rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
{
@ -280,42 +233,6 @@ rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
return 0U;
}
static inline uint32_t
rogue_get_max_coeffs(const struct pvr_device_info *dev_info)
{
uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS;
uint32_t pending_allocation_shared_regs = 2U * 1024U;
uint32_t pending_allocation_coeff_regs = 0U;
uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
uint32_t tiles_in_flight =
PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
uint32_t max_coeff_pixel_portion =
DIV_ROUND_UP(tiles_in_flight, num_phantoms);
max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS;
/* Compute tasks on cores with BRN48492 and without compute overlap may lock
* up without two additional lines of coeffs.
*/
if (PVR_HAS_QUIRK(dev_info, 48492) &&
!PVR_HAS_FEATURE(dev_info, compute_overlap)) {
pending_allocation_coeff_regs = 2U * 1024U;
}
if (PVR_HAS_ERN(dev_info, 38748))
pending_allocation_shared_regs = 0U;
if (PVR_HAS_ERN(dev_info, 38020)) {
max_coeff_additional_portion +=
rogue_max_compute_shared_registers(dev_info);
}
return rogue_get_reserved_shared_size(dev_info) +
pending_allocation_coeff_regs -
(max_coeff_pixel_portion + max_coeff_additional_portion +
pending_allocation_shared_regs);
}
static inline uint32_t
rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
{
@ -341,28 +258,6 @@ static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment(
return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT;
}
static inline uint32_t
rogue_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info)
{
uint32_t available_coeffs_in_dwords = rogue_get_max_coeffs(dev_info);
if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) &&
!PVR_HAS_FEATURE(dev_info, compute_overlap)) {
/* Driver must not use the 2 reserved lines. */
available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2;
}
/* The maximum amount of local memory available to a kernel is the minimum
* of the total number of coefficient registers available and the max common
* store allocation size which can be made by the CDM.
*
* If any coeff lines are reserved for tessellation or pixel then we need to
* subtract those too.
*/
return MIN2(available_coeffs_in_dwords,
ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS);
}
static inline uint32_t
rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info)
{

View File

@ -1066,10 +1066,15 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
*/
#define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U
static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info,
struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_compute *sub_cmd)
static void
pvr_sub_cmd_compute_job_init(const struct pvr_physical_device *pdevice,
struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_compute *sub_cmd)
{
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &pdevice->dev_info;
if (sub_cmd->uses_barrier)
sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
@ -1102,7 +1107,7 @@ static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info,
if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
rogue_get_num_phantoms(dev_info) > 1 && sub_cmd->uses_atomic_ops) {
dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
/* Each phantom has its own MCU, so atomicity can only be guaranteed
* when all work items are processed on the same phantom. This means we
* need to disable all USCs other than those of the first phantom, which
@ -1131,14 +1136,17 @@ static void pvr_sub_cmd_compute_job_init(const struct pvr_device_info *dev_info,
(1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE))
static uint32_t
pvr_compute_flat_slot_size(const struct pvr_device_info *dev_info,
pvr_compute_flat_slot_size(const struct pvr_physical_device *pdevice,
uint32_t coeff_regs_count,
bool use_barrier,
uint32_t total_workitems)
{
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &pdevice->dev_info;
uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK;
uint32_t max_avail_coeff_regs =
rogue_get_cdm_max_local_mem_size_regs(dev_info);
dev_runtime_info->cdm_max_local_mem_size_regs;
uint32_t localstore_chunks_count =
DIV_ROUND_UP(coeff_regs_count << 2,
PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
@ -1309,8 +1317,7 @@ pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
{
const struct pvr_pds_upload *program =
&cmd_buffer->device->pds_compute_fence_program;
const struct pvr_device_info *dev_info =
&cmd_buffer->device->pdevice->dev_info;
const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
struct pvr_csb *csb = &sub_cmd->control_stream;
struct pvr_compute_kernel_info info = {
@ -1336,7 +1343,7 @@ pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
/* Here we calculate the slot size. This can depend on the use of barriers,
* local memory, BRN's or other factors.
*/
info.max_instances = pvr_compute_flat_slot_size(dev_info, 0U, false, 1U);
info.max_instances = pvr_compute_flat_slot_size(pdevice, 0U, false, 1U);
pvr_compute_generate_control_stream(csb, &info);
}
@ -1413,7 +1420,7 @@ static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
return result;
}
pvr_sub_cmd_compute_job_init(&device->pdevice->dev_info,
pvr_sub_cmd_compute_job_init(device->pdevice,
cmd_buffer,
compute_sub_cmd);
break;
@ -2838,8 +2845,7 @@ static VkResult pvr_setup_descriptor_mappings(
static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
struct pvr_sub_cmd_compute *const sub_cmd)
{
const struct pvr_device_info *dev_info =
&cmd_buffer->device->pdevice->dev_info;
const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
struct pvr_csb *csb = &sub_cmd->control_stream;
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
@ -2892,18 +2898,21 @@ static void pvr_compute_update_shared(struct pvr_cmd_buffer *cmd_buffer,
/* We don't need to pad the workgroup size. */
info.max_instances =
pvr_compute_flat_slot_size(dev_info, const_shared_reg_count, false, 1U);
pvr_compute_flat_slot_size(pdevice, const_shared_reg_count, false, 1U);
pvr_compute_generate_control_stream(csb, &info);
}
static uint32_t
pvr_compute_flat_pad_workgroup_size(const struct pvr_device_info *dev_info,
pvr_compute_flat_pad_workgroup_size(const struct pvr_physical_device *pdevice,
uint32_t workgroup_size,
uint32_t coeff_regs_count)
{
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &pdevice->dev_info;
uint32_t max_avail_coeff_regs =
rogue_get_cdm_max_local_mem_size_regs(dev_info);
dev_runtime_info->cdm_max_local_mem_size_regs;
uint32_t coeff_regs_count_aligned =
ALIGN_POT(coeff_regs_count,
PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE) >> 2U);
@ -2934,8 +2943,9 @@ static void pvr_compute_update_kernel(
struct pvr_sub_cmd_compute *const sub_cmd,
const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS])
{
const struct pvr_device_info *dev_info =
&cmd_buffer->device->pdevice->dev_info;
const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
struct pvr_csb *csb = &sub_cmd->control_stream;
const struct pvr_compute_pipeline *pipeline = state->compute_pipeline;
@ -2976,7 +2986,7 @@ static void pvr_compute_update_kernel(
if (work_size > ROGUE_MAX_INSTANCES_PER_TASK) {
/* Enforce a single workgroup per cluster through allocation starvation.
*/
coeff_regs = rogue_get_cdm_max_local_mem_size_regs(dev_info);
coeff_regs = dev_runtime_info->cdm_max_local_mem_size_regs;
} else {
coeff_regs = pipeline->state.shader.coefficient_register_count;
}
@ -2991,14 +3001,14 @@ static void pvr_compute_update_kernel(
coeff_regs += pipeline->state.shader.const_shared_reg_count;
work_size =
pvr_compute_flat_pad_workgroup_size(dev_info, work_size, coeff_regs);
pvr_compute_flat_pad_workgroup_size(pdevice, work_size, coeff_regs);
info.local_size[0] = work_size;
info.local_size[1] = 1U;
info.local_size[2] = 1U;
info.max_instances =
pvr_compute_flat_slot_size(dev_info, coeff_regs, false, work_size);
pvr_compute_flat_slot_size(pdevice, coeff_regs, false, work_size);
pvr_compute_generate_control_stream(csb, &info);
}
@ -3632,8 +3642,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
&state->gfx_pipeline->fragment_shader_state.pds_coeff_program;
const struct pvr_pipeline_stage_state *fragment_state =
&state->gfx_pipeline->fragment_shader_state.stage_state;
struct pvr_device_info *const dev_info =
&cmd_buffer->device->pdevice->dev_info;
const struct pvr_physical_device *pdevice = cmd_buffer->device->pdevice;
struct pvr_emit_state *const emit_state = &state->emit_state;
struct pvr_ppp_state *const ppp_state = &state->ppp_state;
@ -3659,7 +3668,7 @@ pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer,
const uint32_t max_tiles_in_flight =
pvr_calc_fscommon_size_and_tiles_in_flight(
dev_info,
pdevice,
usc_shared_size *
PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE),
1);

View File

@ -661,33 +661,35 @@ void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
/* TODO: See if this function can be improved once fully implemented. */
uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
const struct pvr_device_info *dev_info,
const struct pvr_physical_device *pdevice,
uint32_t fs_common_size,
uint32_t min_tiles_in_flight)
{
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &pdevice->dev_info;
uint32_t max_tiles_in_flight;
uint32_t num_allocs;
if (PVR_HAS_FEATURE(dev_info, s8xe)) {
num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
} else {
uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
uint32_t min_cluster_per_phantom = 0;
if (num_phantoms > 1) {
if (dev_runtime_info->num_phantoms > 1) {
pvr_finishme("Unimplemented path!!");
} else {
min_cluster_per_phantom =
PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
}
if (num_phantoms > 1)
if (dev_runtime_info->num_phantoms > 1)
pvr_finishme("Unimplemented path!!");
if (num_phantoms > 2)
if (dev_runtime_info->num_phantoms > 2)
pvr_finishme("Unimplemented path!!");
if (num_phantoms > 3)
if (dev_runtime_info->num_phantoms > 3)
pvr_finishme("Unimplemented path!!");
if (min_cluster_per_phantom >= 4)
@ -702,6 +704,8 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
if (fs_common_size == UINT_MAX) {
const struct pvr_device_runtime_info *dev_runtime_info =
&pdevice->dev_runtime_info;
uint32_t max_common_size;
num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
@ -711,8 +715,8 @@ uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
num_allocs += 1;
}
max_common_size = rogue_get_reserved_shared_size(dev_info) -
rogue_get_max_coeffs(dev_info);
max_common_size =
dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
/* Double resource requirements to deal with fragmentation. */
max_common_size /= num_allocs * 2;
@ -764,7 +768,7 @@ pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice)
};
const uint32_t common_size =
pvr_calc_fscommon_size_and_tiles_in_flight(&pdevice->dev_info, -1, 1);
pvr_calc_fscommon_size_and_tiles_in_flight(pdevice, -1, 1);
enum pvr_descriptor_cs_level cs_level;
if (common_size >= 2048) {

View File

@ -286,13 +286,15 @@ void pvr_pbe_pack_state(
* total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
* pvr_render_job_ws_fragment_state_init().
*/
void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
uint32_t msaa_mode,
uint32_t pixel_width,
bool paired_tiles,
uint32_t max_tiles_in_flight,
uint32_t *const isp_ctl_out,
uint32_t *const pixel_ctl_out)
void pvr_setup_tiles_in_flight(
const struct pvr_device_info *dev_info,
const struct pvr_device_runtime_info *dev_runtime_info,
uint32_t msaa_mode,
uint32_t pixel_width,
bool paired_tiles,
uint32_t max_tiles_in_flight,
uint32_t *const isp_ctl_out,
uint32_t *const pixel_ctl_out)
{
uint32_t total_tiles_in_flight = 0;
uint32_t usable_partition_size;
@ -347,9 +349,8 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
/* Maximum available partition space for partitions of this size. */
max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
usable_partition_size =
MIN2(rogue_get_total_reserved_partition_size(dev_info),
partition_size * max_partitions);
usable_partition_size = MIN2(dev_runtime_info->total_reserved_partition_size,
partition_size * max_partitions);
if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
(1024 * 4 * 4)) {
@ -371,7 +372,7 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
MIN2(max_partitions, usable_partition_size / partition_size);
if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
max_phantoms = rogue_get_num_phantoms(dev_info);
max_phantoms = dev_runtime_info->num_phantoms;
else if (PVR_HAS_FEATURE(dev_info, roguexe))
max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
else
@ -399,7 +400,7 @@ void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
2) {
isp_tiles_in_flight /= rogue_get_num_phantoms(dev_info);
isp_tiles_in_flight /= dev_runtime_info->num_phantoms;
}
isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);

View File

@ -140,12 +140,14 @@ void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
uint32_t *const src_format_out,
enum pvr_pbe_gamma *const gamma_out);
void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
uint32_t msaa_mode,
uint32_t pixel_width,
bool paired_tiles,
uint32_t max_tiles_in_flight,
uint32_t *const isp_ctl_out,
uint32_t *const pixel_ctl_out);
void pvr_setup_tiles_in_flight(
const struct pvr_device_info *dev_info,
const struct pvr_device_runtime_info *dev_runtime_info,
uint32_t msaa_mode,
uint32_t pixel_width,
bool paired_tiles,
uint32_t max_tiles_in_flight,
uint32_t *const isp_ctl_out,
uint32_t *const pixel_ctl_out);
#endif /* PVR_JOB_COMMON_H */

View File

@ -925,6 +925,8 @@ static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
{
const uint32_t pds_data_alignment =
PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
const struct pvr_device_runtime_info *dev_runtime_info =
&device->pdevice->dev_runtime_info;
ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
struct pvr_pds_fence_program program = { 0 };
@ -934,7 +936,7 @@ static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
/* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
rogue_get_num_phantoms(dev_info) >= 2));
dev_runtime_info->num_phantoms >= 2));
pvr_pds_generate_fence_terminate_program(&program,
staging_buffer,

View File

@ -1298,6 +1298,8 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
{
const enum PVRX(CR_ISP_AA_MODE_TYPE)
isp_aa_mode = pvr_cr_isp_aa_mode_type(job->samples);
const struct pvr_device_runtime_info *dev_runtime_info =
&ctx->device->pdevice->dev_runtime_info;
const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
uint32_t isp_ctl;
@ -1305,6 +1307,7 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
/* FIXME: pass in the number of samples rather than isp_aa_mode? */
pvr_setup_tiles_in_flight(dev_info,
dev_runtime_info,
isp_aa_mode,
job->pixel_output_width,
false,
@ -1340,7 +1343,7 @@ pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
rogue_get_num_phantoms(dev_info) > 1 && job->frag_uses_atomic_ops) {
dev_runtime_info->num_phantoms > 1 && job->frag_uses_atomic_ops) {
/* Each phantom has its own MCU, so atomicity can only be guaranteed
* when all work items are processed on the same phantom. This means we
* need to disable all USCs other than those of the first phantom, which

View File

@ -199,9 +199,7 @@ struct pvr_physical_device {
struct pvr_winsys *ws;
struct pvr_device_info dev_info;
struct pvr_device_runtime_info {
uint32_t core_count;
} dev_runtime_info;
struct pvr_device_runtime_info dev_runtime_info;
VkPhysicalDeviceMemoryProperties memory;
@ -1272,7 +1270,7 @@ struct pvr_load_op {
};
uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
const struct pvr_device_info *dev_info,
const struct pvr_physical_device *pdevice,
uint32_t fs_common_size,
uint32_t min_tiles_in_flight);

View File

@ -26,6 +26,7 @@
#include <stdint.h>
#include <xf86drm.h>
#include "hwdef/rogue_hw_utils.h"
#include "pvr_csb.h"
#include "pvr_device_info.h"
#include "pvr_private.h"
@ -42,6 +43,7 @@
#include "pvr_winsys.h"
#include "pvr_winsys_helper.h"
#include "util/log.h"
#include "util/macros.h"
#include "util/os_misc.h"
#include "vk_log.h"
@ -377,6 +379,125 @@ static void pvr_srv_winsys_destroy(struct pvr_winsys *ws)
pvr_srv_connection_destroy(fd);
}
static uint64_t
pvr_srv_get_min_free_list_size(const struct pvr_device_info *dev_info)
{
uint64_t min_num_pages;
if (PVR_HAS_FEATURE(dev_info, roguexe)) {
if (PVR_HAS_QUIRK(dev_info, 66011))
min_num_pages = 40U;
else
min_num_pages = 25U;
} else {
min_num_pages = 50U;
}
return min_num_pages << ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
}
static inline uint64_t
pvr_srv_get_num_phantoms(const struct pvr_device_info *dev_info)
{
return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U);
}
/* Return the total reserved size of partition in dwords. */
static inline uint64_t pvr_srv_get_total_reserved_partition_size(
const struct pvr_device_info *dev_info)
{
uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
if (tile_size_x == 16 && tile_size_y == 16) {
return tile_size_x * tile_size_y * max_partitions *
PVR_GET_FEATURE_VALUE(dev_info,
usc_min_output_registers_per_pix,
0);
}
return max_partitions * 1024U;
}
static inline uint64_t
pvr_srv_get_reserved_shared_size(const struct pvr_device_info *dev_info)
{
uint32_t common_store_size_in_dwords =
PVR_GET_FEATURE_VALUE(dev_info,
common_store_size_in_dwords,
512U * 4U * 4U);
uint32_t reserved_shared_size =
common_store_size_in_dwords - (256U * 4U) -
pvr_srv_get_total_reserved_partition_size(dev_info);
if (PVR_HAS_QUIRK(dev_info, 44079)) {
uint32_t common_store_split_point = (768U * 4U * 4U);
return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size);
}
return reserved_shared_size;
}
static inline uint64_t
pvr_srv_get_max_coeffs(const struct pvr_device_info *dev_info)
{
uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS;
uint32_t pending_allocation_shared_regs = 2U * 1024U;
uint32_t pending_allocation_coeff_regs = 0U;
uint32_t num_phantoms = pvr_srv_get_num_phantoms(dev_info);
uint32_t tiles_in_flight =
PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
uint32_t max_coeff_pixel_portion =
DIV_ROUND_UP(tiles_in_flight, num_phantoms);
max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS;
/* Compute tasks on cores with BRN48492 and without compute overlap may lock
* up without two additional lines of coeffs.
*/
if (PVR_HAS_QUIRK(dev_info, 48492) &&
!PVR_HAS_FEATURE(dev_info, compute_overlap)) {
pending_allocation_coeff_regs = 2U * 1024U;
}
if (PVR_HAS_ERN(dev_info, 38748))
pending_allocation_shared_regs = 0U;
if (PVR_HAS_ERN(dev_info, 38020)) {
max_coeff_additional_portion +=
rogue_max_compute_shared_registers(dev_info);
}
return pvr_srv_get_reserved_shared_size(dev_info) +
pending_allocation_coeff_regs -
(max_coeff_pixel_portion + max_coeff_additional_portion +
pending_allocation_shared_regs);
}
static inline uint64_t
pvr_srv_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info)
{
uint32_t available_coeffs_in_dwords = pvr_srv_get_max_coeffs(dev_info);
if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) &&
!PVR_HAS_FEATURE(dev_info, compute_overlap)) {
/* Driver must not use the 2 reserved lines. */
available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2;
}
/* The maximum amount of local memory available to a kernel is the minimum
* of the total number of coefficient registers available and the max common
* store allocation size which can be made by the CDM.
*
* If any coeff lines are reserved for tessellation or pixel then we need to
* subtract those too.
*/
return MIN2(available_coeffs_in_dwords,
ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS);
}
static int
pvr_srv_winsys_device_info_init(struct pvr_winsys *ws,
struct pvr_device_info *dev_info,
@ -396,6 +517,16 @@ pvr_srv_winsys_device_info_init(struct pvr_winsys *ws,
return ret;
}
runtime_info->min_free_list_size = pvr_srv_get_min_free_list_size(dev_info);
runtime_info->reserved_shared_size =
pvr_srv_get_reserved_shared_size(dev_info);
runtime_info->total_reserved_partition_size =
pvr_srv_get_total_reserved_partition_size(dev_info);
runtime_info->num_phantoms = pvr_srv_get_num_phantoms(dev_info);
runtime_info->max_coeffs = pvr_srv_get_max_coeffs(dev_info);
runtime_info->cdm_max_local_mem_size_regs =
pvr_srv_get_cdm_max_local_mem_size_regs(dev_info);
if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
result = pvr_srv_get_multicore_info(srv_ws->render_fd,
0,