i965: Always use Y-tiled buffers on SKL+

Starting with Skylake, the display engine is capable of scanning out from
Y-tiled buffers. As such, we can and should use Y-tiling for better efficiency.
This also has the added benefit of being able to fast clear the winsys buffer.

Note that the buffer allocation done for mipmaps will already never allocate an
X-tiled buffer for GEN9.

This has an almost universal positive impact on benchmarks, some improving by as
much as 20%.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Ben Widawsky 2016-04-21 20:14:58 -07:00
parent c3b88cc2c1
commit 6a0d036483
4 changed files with 30 additions and 8 deletions

View File

@ -244,7 +244,7 @@ brw_get_fast_clear_rect(const struct brw_context *brw,
* alignment size returned by intel_get_non_msrt_mcs_alignment(), but * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
* with X alignment multiplied by 16 and Y alignment multiplied by 32. * with X alignment multiplied by 16 and Y alignment multiplied by 32.
*/ */
intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align); intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
x_align *= 16; x_align *= 16;
/* SKL+ line alignment requirement for Y-tiled are half those of the prior /* SKL+ line alignment requirement for Y-tiled are half those of the prior
@ -838,7 +838,7 @@ brw_get_resolve_rect(const struct brw_context *brw,
* by a factor of 2. * by a factor of 2.
*/ */
intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align); intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
if (brw->gen >= 9) { if (brw->gen >= 9) {
x_scaledown = x_align * 8; x_scaledown = x_align * 8;
y_scaledown = y_align * 8; y_scaledown = y_align * 8;

View File

@ -144,7 +144,8 @@ compute_msaa_layout(struct brw_context *brw, mesa_format format,
* by half the block width, and Y coordinates by half the block height. * by half the block width, and Y coordinates by half the block height.
*/ */
void void
intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt, intel_get_non_msrt_mcs_alignment(const struct brw_context *brw,
const struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height) unsigned *width_px, unsigned *height)
{ {
switch (mt->tiling) { switch (mt->tiling) {
@ -156,6 +157,11 @@ intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt,
*height = 4; *height = 4;
break; break;
case I915_TILING_X: case I915_TILING_X:
/* The docs are somewhat confusing with the way the tables are displayed.
* However, it does clearly state: "MCS and Lossless compression is
* supported for TiledY/TileYs/TileYf non-MSRTs only."
*/
assert(brw->gen < 9);
*width_px = 64 / mt->cpp; *width_px = 64 / mt->cpp;
*height = 2; *height = 2;
} }
@ -1552,7 +1558,7 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
const mesa_format format = MESA_FORMAT_R_UINT32; const mesa_format format = MESA_FORMAT_R_UINT32;
unsigned block_width_px; unsigned block_width_px;
unsigned block_height; unsigned block_height;
intel_get_non_msrt_mcs_alignment(mt, &block_width_px, &block_height); intel_get_non_msrt_mcs_alignment(brw, mt, &block_width_px, &block_height);
unsigned width_divisor = block_width_px * 4; unsigned width_divisor = block_width_px * 4;
unsigned height_divisor = block_height * 8; unsigned height_divisor = block_height * 8;

View File

@ -663,7 +663,8 @@ struct intel_mipmap_tree
}; };
void void
intel_get_non_msrt_mcs_alignment(const struct intel_mipmap_tree *mt, intel_get_non_msrt_mcs_alignment(const struct brw_context *brw,
const struct intel_mipmap_tree *mt,
unsigned *width_px, unsigned *height); unsigned *width_px, unsigned *height);
bool bool

View File

@ -516,7 +516,11 @@ intel_create_image(__DRIscreen *screen,
int cpp; int cpp;
unsigned long pitch; unsigned long pitch;
tiling = I915_TILING_X; if (intelScreen->devinfo->gen >= 9) {
tiling = I915_TILING_Y;
} else {
tiling = I915_TILING_X;
}
if (use & __DRI_IMAGE_USE_CURSOR) { if (use & __DRI_IMAGE_USE_CURSOR) {
if (width != 64 || height != 64) if (width != 64 || height != 64)
return NULL; return NULL;
@ -1144,8 +1148,14 @@ intel_detect_swizzling(struct intel_screen *screen)
drm_intel_bo *buffer; drm_intel_bo *buffer;
unsigned long flags = 0; unsigned long flags = 0;
unsigned long aligned_pitch; unsigned long aligned_pitch;
uint32_t tiling = I915_TILING_X;
uint32_t swizzle_mode = 0; uint32_t swizzle_mode = 0;
uint32_t tiling;
if (screen->devinfo->gen >= 9) {
tiling = I915_TILING_Y;
} else {
tiling = I915_TILING_X;
}
buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test", buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test",
64, 64, 4, 64, 64, 4,
@ -1571,7 +1581,12 @@ intelAllocateBuffer(__DRIscreen *screen,
return NULL; return NULL;
/* The front and back buffers are color buffers, which are X tiled. */ /* The front and back buffers are color buffers, which are X tiled. */
uint32_t tiling = I915_TILING_X; uint32_t tiling;
if (intelScreen->devinfo->gen >= 9) {
tiling = I915_TILING_Y;
} else {
tiling = I915_TILING_X;
}
unsigned long pitch; unsigned long pitch;
int cpp = format / 8; int cpp = format / 8;
intelBuffer->bo = drm_intel_bo_alloc_tiled(intelScreen->bufmgr, intelBuffer->bo = drm_intel_bo_alloc_tiled(intelScreen->bufmgr,