radv: Use local buffers for the global bo list.

Even if we don't use local buffers in general. Turns out that even
though the performance is not the best the kernel still does it
better than our own list.

We still have to keep the radv bo list for buffers that are shared
externally.

This improves Talos on lowest quality setting (so as CPU bound as
possible) by ~10% if the global bo list is enabled.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Bas Nieuwenhuizen 2019-04-10 01:42:31 +02:00
parent af9534b9f3
commit f6fdd39eab
3 changed files with 8 additions and 2 deletions

View File

@ -3189,8 +3189,12 @@ static VkResult radv_alloc_memory(struct radv_device *device,
if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
flags |= RADEON_FLAG_GTT_WC;
if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes))
if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
if (device->use_global_bo_list) {
flags |= RADEON_FLAG_PREFER_LOCAL_BO;
}
}
mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
domain, flags, priority);

View File

@ -58,6 +58,7 @@ enum radeon_bo_flag { /* bitfield */
RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
RADEON_FLAG_READ_ONLY = (1 << 7),
RADEON_FLAG_32BIT = (1 << 8),
RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
};
enum radeon_bo_usage { /* bitfield */

View File

@ -368,7 +368,8 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22)
request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
ws->info.has_local_buffers && ws->use_local_bos) {
ws->info.has_local_buffers &&
(ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
bo->base.is_local = true;
request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
}