radv: set cb base tile swizzles for MRT speedups (v4)

This patch uses addrlib to workout the tile swizzles according
to the surface index. It seems to produce the same values as
amdgpu-pro for the deferred test.

v2: don't apply swizzle to CMASK. the eg docs don't mention
it, and we clearly don't align cmask for that.
v3: disable surf index for dedicated images, as these will
most likely be shared, and I don't think the metadata has
space for this info in it yet.
v4: update for shareable images, rename combined_swizzle
to tile_swizzle

This gets the deferred demo from 730->950fps on my rx480.
(dcc cmask elim predication patches get it further)

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie 2017-07-07 06:56:57 +01:00
parent b86f86f55c
commit f8d5b377c8
5 changed files with 34 additions and 2 deletions

View File

@ -692,6 +692,20 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
surf->htile_size *= 2;
surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
/* workout base swizzle */
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
AddrBaseSwizzleIn.surfIndex = config->info.surf_index;
AddrBaseSwizzleIn.tileIndex = AddrSurfInfoIn.tileIndex;
AddrBaseSwizzleIn.macroModeIndex = AddrSurfInfoOut.macroModeIndex;
AddrBaseSwizzleIn.pTileInfo = AddrSurfInfoOut.pTileInfo;
AddrBaseSwizzleIn.tileMode = AddrSurfInfoOut.tileMode;
AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
surf->u.legacy.tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
}
return 0;
}

View File

@ -97,6 +97,7 @@ struct legacy_surf_layout {
unsigned depth_adjusted:1;
unsigned stencil_adjusted:1;
uint8_t tile_swizzle;
struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
@ -194,6 +195,7 @@ struct ac_surf_info {
uint32_t width;
uint32_t height;
uint32_t depth;
uint32_t surf_index;
uint8_t samples;
uint8_t levels;
uint16_t array_size;

View File

@ -2814,7 +2814,8 @@ radv_initialise_color_surface(struct radv_device *device,
}
cb->cb_color_base = va >> 8;
if (device->physical_device->rad_info.chip_class < GFX9)
cb->cb_color_base |= iview->image->surface.u.legacy.tile_swizzle;
/* CMASK variables */
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->cmask.offset;
@ -2823,6 +2824,8 @@ radv_initialise_color_surface(struct radv_device *device,
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->dcc_offset;
cb->cb_dcc_base = va >> 8;
if (device->physical_device->rad_info.chip_class < GFX9)
cb->cb_dcc_base |= iview->image->surface.u.legacy.tile_swizzle;
uint32_t max_slice = radv_surface_layer_count(iview);
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
@ -2838,6 +2841,8 @@ radv_initialise_color_surface(struct radv_device *device,
if (iview->image->fmask.size) {
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
cb->cb_color_fmask = va >> 8;
if (device->physical_device->rad_info.chip_class < GFX9)
cb->cb_color_fmask |= iview->image->surface.u.legacy.tile_swizzle;
} else {
cb->cb_color_fmask = cb->cb_color_base;
}

View File

@ -32,6 +32,7 @@
#include "sid.h"
#include "gfx9d.h"
#include "util/debug.h"
#include "util/u_atomic.h"
static unsigned
radv_choose_tiling(struct radv_device *Device,
const struct radv_image_create_info *create_info)
@ -210,6 +211,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
va += base_level_info->offset;
state[0] = va >> 8;
if (chip_class < GFX9)
state[0] |= image->surface.u.legacy.tile_swizzle;
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
@ -225,7 +228,8 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
meta_va += base_level_info->dcc_offset;
state[6] |= S_008F28_COMPRESSION_EN(1);
state[7] = meta_va >> 8;
if (chip_class < GFX9)
state[7] |= image->surface.u.legacy.tile_swizzle;
}
}
@ -473,6 +477,8 @@ si_make_texture_descriptor(struct radv_device *device,
}
fmask_state[0] = va >> 8;
if (device->physical_device->rad_info.chip_class < GFX9)
fmask_state[0] |= image->surface.u.legacy.tile_swizzle;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
S_008F14_DATA_FORMAT_GFX6(fmask_format) |
S_008F14_NUM_FORMAT_GFX6(num_format);
@ -792,6 +798,9 @@ radv_image_create(VkDevice _device,
image->shareable = vk_find_struct_const(pCreateInfo->pNext,
EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
image->info.surf_index = p_atomic_inc_return(&device->image_mrt_offset_counter) - 1;
}
radv_init_surface(device, &image->surface, create_info);

View File

@ -547,6 +547,8 @@ struct radv_device {
/* Backup in-memory cache to be used if the app doesn't provide one */
struct radv_pipeline_cache * mem_cache;
uint32_t image_mrt_offset_counter;
};
struct radv_device_memory {