pvr: Fix physical device limits.

This commit changes to the physical device limits which were
missed during the 1.17 transition.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Reviewed-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17206>
This commit is contained in:
Karmjit Mahil 2022-06-09 13:03:30 +01:00 committed by Marge Bot
parent eac5a2fdfa
commit 7858c32550
4 changed files with 61 additions and 13 deletions

View File

@ -46,6 +46,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.has_gs_rta_support = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
.has_max_instances_per_pds_task = true,
.has_max_multisample = true,
.has_max_partitions = true,
.has_max_usc_tasks = true,
@ -63,6 +64,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.has_tpu_image_state_v2 = true,
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_slots = true,
.has_uvs_banks = true,
.has_uvs_pba_entries = true,
.has_uvs_vtx_entries = true,
@ -73,6 +75,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.common_store_size_in_dwords = 1280U * 4U * 4U,
.isp_max_tiles_in_flight = 4U,
.isp_samples_per_pixel = 2U,
.max_instances_per_pds_task = 32U,
.max_multisample = 8U,
.max_partitions = 8U,
.max_usc_tasks = 56U,
@ -83,6 +86,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.tile_size_x = 32U,
.tile_size_y = 32U,
.usc_min_output_registers_per_pix = 2U,
.usc_slots = 32U,
.uvs_banks = 8U,
.uvs_pba_entries = 320U,
.uvs_vtx_entries = 288U,
@ -126,6 +130,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_compute = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
.has_max_instances_per_pds_task = true,
.has_max_multisample = true,
.has_max_partitions = true,
.has_max_usc_tasks = true,
@ -146,6 +151,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_pixel_partition_mask = true,
.has_usc_slots = true,
.has_uvs_banks = true,
.has_uvs_pba_entries = true,
.has_uvs_vtx_entries = true,
@ -154,6 +160,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.common_store_size_in_dwords = 512U * 4U * 4U,
.isp_max_tiles_in_flight = 1U,
.isp_samples_per_pixel = 1U,
.max_instances_per_pds_task = 32U,
.max_multisample = 4U,
.max_partitions = 4U,
.max_usc_tasks = 24U,
@ -165,6 +172,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.tile_size_x = 16U,
.tile_size_y = 16U,
.usc_min_output_registers_per_pix = 1U,
.usc_slots = 14U,
.uvs_banks = 2U,
.uvs_pba_entries = 320U,
.uvs_vtx_entries = 288U,
@ -199,6 +207,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_gs_rta_support = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
.has_max_instances_per_pds_task = true,
.has_max_multisample = true,
.has_max_partitions = true,
.has_max_usc_tasks = true,
@ -221,6 +230,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_pixel_partition_mask = true,
.has_usc_slots = true,
.has_uvs_banks = true,
.has_uvs_pba_entries = true,
.has_uvs_vtx_entries = true,
@ -230,6 +240,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.common_store_size_in_dwords = 1344U * 4U * 4U,
.isp_max_tiles_in_flight = 6U,
.isp_samples_per_pixel = 4U,
.max_instances_per_pds_task = 32U,
.max_multisample = 4U,
.max_partitions = 16U,
.max_usc_tasks = 156U,
@ -241,6 +252,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.tile_size_x = 16U,
.tile_size_y = 16U,
.usc_min_output_registers_per_pix = 2U,
.usc_slots = 64U,
.uvs_banks = 8U,
.uvs_pba_entries = 160U,
.uvs_vtx_entries = 144U,

View File

@ -259,6 +259,7 @@ struct pvr_device_features {
bool has_gs_rta_support : 1;
bool has_isp_max_tiles_in_flight : 1;
bool has_isp_samples_per_pixel : 1;
bool has_max_instances_per_pds_task : 1;
bool has_max_multisample : 1;
bool has_max_partitions : 1;
bool has_max_usc_tasks : 1;
@ -285,6 +286,7 @@ struct pvr_device_features {
bool has_usc_f16sop_u8 : 1;
bool has_usc_min_output_registers_per_pix : 1;
bool has_usc_pixel_partition_mask : 1;
bool has_usc_slots : 1;
bool has_uvs_banks : 1;
bool has_uvs_pba_entries : 1;
bool has_uvs_vtx_entries : 1;
@ -296,6 +298,7 @@ struct pvr_device_features {
uint32_t common_store_size_in_dwords;
uint32_t isp_max_tiles_in_flight;
uint32_t isp_samples_per_pixel;
uint32_t max_instances_per_pds_task;
uint32_t max_multisample;
uint32_t max_partitions;
uint32_t max_usc_tasks;
@ -307,6 +310,7 @@ struct pvr_device_features {
uint32_t tile_size_x;
uint32_t tile_size_y;
uint32_t usc_min_output_registers_per_pix;
uint32_t usc_slots;
uint32_t uvs_banks;
uint32_t uvs_pba_entries;
uint32_t uvs_vtx_entries;

View File

@ -819,6 +819,33 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
const uint32_t max_user_vertex_components =
((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
/* The workgroup invocations are limited by the case where we have a compute
* barrier - each slot has a fixed number of invocations, the whole workgroup
* may need to span multiple slots. As each slot will WAIT at the barrier
* until the last invocation completes, all have to be schedulable at the
* same time.
*
* Typically all Rogue cores have 16 slots. Some of the smallest cores are
* reduced to 14.
*
* The compute barrier slot exhaustion scenario can be tested with:
* dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
* .atomicwrite*guard*comp
*/
/* Default value based on the minimum value found in all existing cores. */
const uint32_t usc_slots =
PVR_GET_FEATURE_VALUE(&pdevice->dev_info, usc_slots, 14);
/* Default value based on the minimum value found in all existing cores. */
const uint32_t max_instances_per_pds_task =
PVR_GET_FEATURE_VALUE(&pdevice->dev_info,
max_instances_per_pds_task,
32U);
const uint32_t max_compute_work_group_invocations =
(usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = max_render_size,
.maxImageDimension2D = max_render_size,
@ -879,28 +906,33 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
.maxTessellationEvaluationOutputComponents = 0,
/* Geometry Shader Limits */
.maxGeometryShaderInvocations = 32U,
.maxGeometryInputComponents = max_user_vertex_components,
.maxGeometryOutputComponents = max_user_vertex_components,
.maxGeometryOutputVertices = 256U,
.maxGeometryTotalOutputComponents = 1024U,
.maxGeometryShaderInvocations = 0,
.maxGeometryInputComponents = 0,
.maxGeometryOutputComponents = 0,
.maxGeometryOutputVertices = 0,
.maxGeometryTotalOutputComponents = 0,
/* Fragment Shader Limits */
.maxFragmentInputComponents = max_user_vertex_components,
.maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
.maxFragmentDualSrcAttachments = 0,
.maxFragmentCombinedOutputResources = 8U,
.maxFragmentCombinedOutputResources =
descriptor_limits->max_per_stage_storage_buffers +
descriptor_limits->max_per_stage_storage_images +
PVR_MAX_COLOR_ATTACHMENTS,
/* Compute Shader Limits */
.maxComputeSharedMemorySize = 16U * 1024U,
.maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
.maxComputeWorkGroupInvocations = 512U,
.maxComputeWorkGroupSize = { 512U, 512U, 64U },
.maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
.maxComputeWorkGroupSize = { max_compute_work_group_invocations,
max_compute_work_group_invocations,
64U },
/* Rasterization Limits */
.subPixelPrecisionBits = sub_pixel_precision,
.subTexelPrecisionBits = 8U,
.mipmapPrecisionBits = 4U,
.mipmapPrecisionBits = 8U,
.maxDrawIndexedIndexValue = UINT32_MAX,
.maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
@ -921,8 +953,8 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
.minTexelOffset = -8,
.maxTexelOffset = 7U,
.minTexelGatherOffset = 0,
.maxTexelGatherOffset = 0,
.minTexelGatherOffset = -8,
.maxTexelGatherOffset = 7,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.5,
.subPixelInterpolationOffsetBits = 4U,

View File

@ -32,12 +32,12 @@
#include "pvr_device_info.h"
#include "util/u_math.h"
#define PVR_MAX_COLOR_ATTACHMENTS 8U
#define PVR_MAX_COLOR_ATTACHMENTS 8U /* Number of PBE emit registers. */
#define PVR_MAX_QUEUES 2U
#define PVR_MAX_VIEWPORTS 1U
#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U
#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U
#define PVR_MAX_PUSH_CONSTANTS_SIZE 256U
#define PVR_MAX_DESCRIPTOR_SETS 4U
#define PVR_MAX_FRAMEBUFFER_LAYERS ROGUE_MAX_RENDER_TARGETS