blorp: Add support for blorp_copy via XY_BLOCK_COPY_BLT

This introduces a new blorp_copy() path using the new XY_BLOCK_COPY_BLT
blitter command introduced on Tigerlake.  Unlike the blitter commands of
old, this one is actually fast and worth using.  Although it doesn't use
shaders like the rest of BLORP, we still can use some surface-munging
code from there, and BLORP also provides a nice place to put this which
is shared among the drivers.

To use the new path, set BLORP_BATCH_USE_BLITTER (much like Jordan's
recent BLORP_BATCH_USE_COMPUTE bit) and target the batch at the copy
engine.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14687>
This commit is contained in:
Kenneth Graunke 2021-10-25 19:02:42 -07:00 committed by Marge Bot
parent 4d4f57b15c
commit 31eeb72e45
3 changed files with 320 additions and 2 deletions

View File

@ -89,6 +89,9 @@ enum blorp_batch_flags {
* operation.
*/
BLORP_BATCH_USE_COMPUTE = (1 << 3),
/** Use the hardware blitter to perform any operations in this batch */
BLORP_BATCH_USE_BLITTER = (1 << 4),
};
struct blorp_batch {
@ -203,6 +206,13 @@ blorp_blit_supports_compute(struct blorp_context *blorp,
const struct isl_surf *dst_surf,
enum isl_aux_usage dst_aux_usage);
bool
blorp_copy_supports_blitter(struct blorp_context *blorp,
const struct isl_surf *src_surf,
const struct isl_surf *dst_surf,
enum isl_aux_usage src_aux_usage,
enum isl_aux_usage dst_aux_usage);
void
blorp_clear(struct blorp_batch *batch,
const struct blorp_surf *surf,

View File

@ -2455,6 +2455,61 @@ blorp_blit_supports_compute(struct blorp_context *blorp,
}
}
static bool
blitter_supports_aux(const struct intel_device_info *devinfo,
enum isl_aux_usage aux_usage)
{
switch (aux_usage) {
case ISL_AUX_USAGE_NONE:
return true;
case ISL_AUX_USAGE_CCS_E:
case ISL_AUX_USAGE_GFX12_CCS_E:
return devinfo->verx10 >= 125;
default:
return false;
}
}
bool
blorp_copy_supports_blitter(struct blorp_context *blorp,
const struct isl_surf *src_surf,
const struct isl_surf *dst_surf,
enum isl_aux_usage src_aux_usage,
enum isl_aux_usage dst_aux_usage)
{
const struct intel_device_info *devinfo = blorp->isl_dev->info;
if (devinfo->ver < 12)
return false;
if (dst_surf->samples > 1 || src_surf->samples > 1)
return false;
if (!blitter_supports_aux(devinfo, dst_aux_usage))
return false;
if (!blitter_supports_aux(devinfo, src_aux_usage))
return false;
const struct isl_format_layout *fmtl =
isl_format_get_layout(dst_surf->format);
if (fmtl->bpb == 96) {
/* XY_BLOCK_COPY_BLT mentions it doesn't support clear colors for 96bpp
* formats, but none of them support CCS anyway, so it's a moot point.
*/
assert(src_aux_usage == ISL_AUX_USAGE_NONE);
assert(dst_aux_usage == ISL_AUX_USAGE_NONE);
/* We can only support linear mode for 96bpp. */
if (src_surf->tiling != ISL_TILING_LINEAR ||
dst_surf->tiling != ISL_TILING_LINEAR)
return false;
}
return true;
}
void
blorp_blit(struct blorp_batch *batch,
const struct blorp_surf *src_surf,
@ -2806,6 +2861,7 @@ blorp_copy(struct blorp_batch *batch,
uint32_t src_width, uint32_t src_height)
{
const struct isl_device *isl_dev = batch->blorp->isl_dev;
const struct intel_device_info *devinfo = isl_dev->info;
struct blorp_params params;
if (src_width == 0 || src_height == 0)
@ -2931,6 +2987,25 @@ blorp_copy(struct blorp_batch *batch,
uint32_t dst_width = src_width;
uint32_t dst_height = src_height;
if (batch->flags & BLORP_BATCH_USE_BLITTER) {
if (devinfo->verx10 < 125) {
blorp_surf_convert_to_single_slice(isl_dev, &params.dst);
blorp_surf_convert_to_single_slice(isl_dev, &params.src);
}
params.x0 = dst_x;
params.x1 = dst_x + dst_width;
params.y0 = dst_y;
params.y1 = dst_y + dst_height;
params.wm_inputs.coord_transform[0].offset = dst_x - (float)src_x;
params.wm_inputs.coord_transform[1].offset = dst_y - (float)src_y;
params.wm_inputs.coord_transform[0].multiplier = 1.0f;
params.wm_inputs.coord_transform[1].multiplier = 1.0f;
batch->blorp->exec(batch, &params);
return;
}
struct blt_coords coords = {
.x = {
.src0 = src_x,

View File

@ -2295,6 +2295,232 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
blorp_measure_end(batch, params);
}
/* -----------------------------------------------------------------------
* -- BLORP on blitter
* -----------------------------------------------------------------------
*/
#include "isl/isl_genX_helpers.h"
#if GFX_VER >= 12
static uint32_t
xy_bcb_tiling(const struct isl_surf *surf)
{
switch (surf->tiling) {
case ISL_TILING_LINEAR:
return XY_TILE_LINEAR;
#if GFX_VERx10 >= 125
case ISL_TILING_X:
return XY_TILE_X;
case ISL_TILING_4:
return XY_TILE_4;
case ISL_TILING_64:
return XY_TILE_64;
#else
case ISL_TILING_Y0:
return XY_TILE_Y;
#endif
default:
unreachable("Invalid tiling for XY_BLOCK_COPY_BLT");
}
}
static uint32_t
xy_color_depth(const struct isl_format_layout *fmtl)
{
switch (fmtl->bpb) {
case 128: return XY_BPP_128_BIT;
case 96: return XY_BPP_96_BIT;
case 64: return XY_BPP_64_BIT;
case 32: return XY_BPP_32_BIT;
case 16: return XY_BPP_16_BIT;
case 8: return XY_BPP_8_BIT;
default:
unreachable("Invalid bpp");
}
}
#endif
#if GFX_VERx10 >= 125
static uint32_t
xy_bcb_surf_dim(const struct isl_surf *surf)
{
switch (surf->dim) {
case ISL_SURF_DIM_1D:
return XY_SURFTYPE_1D;
case ISL_SURF_DIM_2D:
return XY_SURFTYPE_2D;
case ISL_SURF_DIM_3D:
return XY_SURFTYPE_3D;
default:
unreachable("Invalid dimensionality for XY_BLOCK_COPY_BLT");
}
}
static uint32_t
xy_bcb_surf_depth(const struct isl_surf *surf)
{
return surf->dim == ISL_SURF_DIM_3D ? surf->logical_level0_px.depth
: surf->logical_level0_px.array_len;
}
static uint32_t
xy_aux_mode(const struct brw_blorp_surface_info *info)
{
switch (info->aux_usage) {
case ISL_AUX_USAGE_CCS_E:
case ISL_AUX_USAGE_GFX12_CCS_E:
return XY_CCS_E;
case ISL_AUX_USAGE_NONE:
return XY_NONE;
default:
unreachable("Unsupported aux mode");
}
}
#endif
UNUSED static void
blorp_xy_block_copy_blt(struct blorp_batch *batch,
const struct blorp_params *params)
{
#if GFX_VER < 12
unreachable("Blitter is only suppotred on Gfx12+");
#else
UNUSED const struct isl_device *isl_dev = batch->blorp->isl_dev;
assert(batch->flags & BLORP_BATCH_USE_BLITTER);
assert(!(batch->flags & BLORP_BATCH_NO_UPDATE_CLEAR_COLOR));
assert(!(batch->flags & BLORP_BATCH_PREDICATE_ENABLE));
assert(params->hiz_op == ISL_AUX_OP_NONE);
assert(params->num_layers == 1);
assert(params->dst.view.levels == 1);
assert(params->src.view.levels == 1);
#if GFX_VERx10 < 125
assert(params->dst.view.base_array_layer == 0);
assert(params->dst.z_offset == 0);
#endif
unsigned dst_x0 = params->x0;
unsigned dst_x1 = params->x1;
unsigned src_x0 =
dst_x0 - params->wm_inputs.coord_transform[0].offset;
ASSERTED unsigned src_x1 =
dst_x1 - params->wm_inputs.coord_transform[0].offset;
unsigned dst_y0 = params->y0;
unsigned dst_y1 = params->y1;
unsigned src_y0 =
dst_y0 - params->wm_inputs.coord_transform[1].offset;
ASSERTED unsigned src_y1 =
dst_y1 - params->wm_inputs.coord_transform[1].offset;
assert(src_x1 - src_x0 == dst_x1 - dst_x0);
assert(src_y1 - src_y0 == dst_y1 - dst_y0);
const struct isl_surf *src_surf = &params->src.surf;
const struct isl_surf *dst_surf = &params->dst.surf;
const struct isl_format_layout *fmtl =
isl_format_get_layout(params->dst.view.format);
if (fmtl->bpb == 96) {
assert(src_surf->tiling == ISL_TILING_LINEAR &&
dst_surf->tiling == ISL_TILING_LINEAR);
}
assert(src_surf->samples == 1);
assert(dst_surf->samples == 1);
unsigned dst_pitch_unit = dst_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
unsigned src_pitch_unit = src_surf->tiling == ISL_TILING_LINEAR ? 1 : 4;
#if GFX_VERx10 >= 125
struct isl_extent3d src_align = isl_get_image_alignment(src_surf);
struct isl_extent3d dst_align = isl_get_image_alignment(dst_surf);
#endif
blorp_emit(batch, GENX(XY_BLOCK_COPY_BLT), blt) {
blt.ColorDepth = xy_color_depth(fmtl);
blt.DestinationPitch = (dst_surf->row_pitch_B / dst_pitch_unit) - 1;
blt.DestinationMOCS = params->dst.addr.mocs;
blt.DestinationTiling = xy_bcb_tiling(dst_surf);
blt.DestinationX1 = dst_x0;
blt.DestinationY1 = dst_y0;
blt.DestinationX2 = dst_x1;
blt.DestinationY2 = dst_y1;
blt.DestinationBaseAddress = params->dst.addr;
blt.DestinationXOffset = params->dst.tile_x_sa;
blt.DestinationYOffset = params->dst.tile_y_sa;
#if GFX_VERx10 >= 125
blt.DestinationSurfaceType = xy_bcb_surf_dim(dst_surf);
blt.DestinationSurfaceWidth = dst_surf->logical_level0_px.w - 1;
blt.DestinationSurfaceHeight = dst_surf->logical_level0_px.h - 1;
blt.DestinationSurfaceDepth = xy_bcb_surf_depth(dst_surf) - 1;
blt.DestinationArrayIndex =
params->dst.view.base_array_layer + params->dst.z_offset;
blt.DestinationSurfaceQPitch = isl_get_qpitch(dst_surf) >> 2;
blt.DestinationLOD = params->dst.view.base_level;
blt.DestinationMipTailStartLOD = 15;
blt.DestinationHorizontalAlign = isl_encode_halign(dst_align.width);
blt.DestinationVerticalAlign = isl_encode_valign(dst_align.height);
blt.DestinationDepthStencilResource = false;
blt.DestinationTargetMemory =
params->dst.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
if (params->dst.aux_usage != ISL_AUX_USAGE_NONE) {
blt.DestinationAuxiliarySurfaceMode = xy_aux_mode(&params->dst);
blt.DestinationCompressionEnable = true;
blt.DestinationCompressionFormat =
isl_get_render_compression_format(dst_surf->format);
blt.DestinationClearValueEnable = !!params->dst.clear_color_addr.buffer;
blt.DestinationClearAddress = params->dst.clear_color_addr;
}
#endif
blt.SourceX1 = src_x0;
blt.SourceY1 = src_y0;
blt.SourcePitch = (src_surf->row_pitch_B / src_pitch_unit) - 1;
blt.SourceMOCS = params->src.addr.mocs;
blt.SourceTiling = xy_bcb_tiling(src_surf);
blt.SourceBaseAddress = params->src.addr;
blt.SourceXOffset = params->src.tile_x_sa;
blt.SourceYOffset = params->src.tile_y_sa;
#if GFX_VERx10 >= 125
blt.SourceSurfaceType = xy_bcb_surf_dim(src_surf);
blt.SourceSurfaceWidth = src_surf->logical_level0_px.w - 1;
blt.SourceSurfaceHeight = src_surf->logical_level0_px.h - 1;
blt.SourceSurfaceDepth = xy_bcb_surf_depth(src_surf) - 1;
blt.SourceArrayIndex =
params->src.view.base_array_layer + params->src.z_offset;
blt.SourceSurfaceQPitch = isl_get_qpitch(src_surf) >> 2;
blt.SourceLOD = params->src.view.base_level;
blt.SourceMipTailStartLOD = 15;
blt.SourceHorizontalAlign = isl_encode_halign(src_align.width);
blt.SourceVerticalAlign = isl_encode_valign(src_align.height);
blt.SourceDepthStencilResource = false;
blt.SourceTargetMemory =
params->src.addr.local_hint ? XY_MEM_LOCAL : XY_MEM_SYSTEM;
if (params->src.aux_usage != ISL_AUX_USAGE_NONE) {
blt.SourceAuxiliarySurfaceMode = xy_aux_mode(&params->src);
blt.SourceCompressionEnable = true;
blt.SourceCompressionFormat =
isl_get_render_compression_format(src_surf->format);
blt.SourceClearValueEnable = !!params->src.clear_color_addr.buffer;
blt.SourceClearAddress = params->src.clear_color_addr;
}
/* XeHP needs special MOCS values for the blitter */
blt.DestinationMOCS = isl_dev->mocs.blitter_dst;
blt.SourceMOCS = isl_dev->mocs.blitter_src;
#endif
}
#endif
}
/**
* \brief Execute a blit or render pass operation.
@ -2308,10 +2534,17 @@ blorp_exec_compute(struct blorp_batch *batch, const struct blorp_params *params)
static void
blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
{
if (batch->flags & BLORP_BATCH_USE_COMPUTE)
if (batch->flags & BLORP_BATCH_USE_BLITTER) {
/* Someday, if we implement clears on the blit enginer, we can
* use params->src.enabled to determine which case we're in.
*/
assert(params->src.enabled);
blorp_xy_block_copy_blt(batch, params);
} else if (batch->flags & BLORP_BATCH_USE_COMPUTE) {
blorp_exec_compute(batch, params);
else
} else {
blorp_exec_3d(batch, params);
}
}
#endif /* BLORP_GENX_EXEC_H */