338 lines
12 KiB
C
338 lines
12 KiB
C
/*
|
||
* Copyright © 2018 Intel Corporation
|
||
*
|
||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
* copy of this software and associated documentation files (the "Software"),
|
||
* to deal in the Software without restriction, including without limitation
|
||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
* and/or sell copies of the Software, and to permit persons to whom the
|
||
* Software is furnished to do so, subject to the following conditions:
|
||
*
|
||
* The above copyright notice and this permission notice shall be included
|
||
* in all copies or substantial portions of the Software.
|
||
*
|
||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
* DEALINGS IN THE SOFTWARE.
|
||
*/
|
||
|
||
/* blt command encoding for gen4/5 */
|
||
#include "crocus_context.h"
|
||
|
||
#include "crocus_genx_macros.h"
|
||
#include "crocus_genx_protos.h"
|
||
#include "crocus_resource.h"
|
||
|
||
#define FILE_DEBUG_FLAG DEBUG_BLIT
|
||
|
||
#if GFX_VER <= 5
|
||
|
||
static bool validate_blit_for_blt(struct crocus_batch *batch,
|
||
const struct pipe_blit_info *info)
|
||
{
|
||
/* If the source and destination are the same size with no mirroring,
|
||
* the rectangles are within the size of the texture and there is no
|
||
* scissor, then we can probably use the blit engine.
|
||
*/
|
||
if (info->dst.box.width != info->src.box.width ||
|
||
info->dst.box.height != info->src.box.height)
|
||
return false;
|
||
|
||
if (info->scissor_enable)
|
||
return false;
|
||
|
||
if (info->dst.box.height < 0 || info->src.box.height < 0)
|
||
return false;
|
||
|
||
if (info->dst.box.depth > 1 || info->src.box.depth > 1)
|
||
return false;
|
||
|
||
return true;
|
||
}
|
||
|
||
static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
|
||
{
|
||
int pitch = res->surf.row_pitch_B;
|
||
if (res->surf.tiling != ISL_TILING_LINEAR)
|
||
pitch /= 4;
|
||
return pitch;
|
||
}
|
||
|
||
static uint32_t
|
||
color_depth_for_cpp(int cpp)
|
||
{
|
||
switch (cpp) {
|
||
case 4: return COLOR_DEPTH__32bit;
|
||
case 2: return COLOR_DEPTH__565;
|
||
case 1: return COLOR_DEPTH__8bit;
|
||
default:
|
||
unreachable("not reached");
|
||
}
|
||
}
|
||
|
||
static bool emit_copy_blt(struct crocus_batch *batch,
|
||
struct crocus_resource *src,
|
||
struct crocus_resource *dst,
|
||
unsigned cpp,
|
||
int32_t src_pitch,
|
||
unsigned src_offset,
|
||
int32_t dst_pitch,
|
||
unsigned dst_offset,
|
||
uint16_t src_x, uint16_t src_y,
|
||
uint16_t dst_x, uint16_t dst_y,
|
||
uint16_t w, uint16_t h)
|
||
|
||
{
|
||
uint32_t src_tile_w, src_tile_h;
|
||
uint32_t dst_tile_w, dst_tile_h;
|
||
int dst_y2 = dst_y + h;
|
||
int dst_x2 = dst_x + w;
|
||
|
||
DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
|
||
__func__,
|
||
src, src_pitch, src_offset, src_x, src_y,
|
||
dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
|
||
|
||
isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
|
||
isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
|
||
|
||
/* For Tiled surfaces, the pitch has to be a multiple of the Tile width
|
||
* (X direction width of the Tile). This is ensured while allocating the
|
||
* buffer object.
|
||
*/
|
||
assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
|
||
assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
|
||
|
||
/* For big formats (such as floating point), do the copy using 16 or
|
||
* 32bpp and multiply the coordinates.
|
||
*/
|
||
if (cpp > 4) {
|
||
if (cpp % 4 == 2) {
|
||
dst_x *= cpp / 2;
|
||
dst_x2 *= cpp / 2;
|
||
src_x *= cpp / 2;
|
||
cpp = 2;
|
||
} else {
|
||
assert(cpp % 4 == 0);
|
||
dst_x *= cpp / 4;
|
||
dst_x2 *= cpp / 4;
|
||
src_x *= cpp / 4;
|
||
cpp = 4;
|
||
}
|
||
}
|
||
|
||
/* For tiled source and destination, pitch value should be specified
|
||
* as a number of Dwords.
|
||
*/
|
||
if (dst->surf.tiling != ISL_TILING_LINEAR)
|
||
dst_pitch /= 4;
|
||
|
||
if (src->surf.tiling != ISL_TILING_LINEAR)
|
||
src_pitch /= 4;
|
||
|
||
assert(cpp <= 4);
|
||
crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
|
||
xyblt.RasterOperation = 0xCC;
|
||
xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
|
||
xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
|
||
xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
|
||
xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
|
||
xyblt.ColorDepth = color_depth_for_cpp(cpp);
|
||
xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
|
||
xyblt.DestinationX1Coordinate = dst_x;
|
||
xyblt.DestinationY1Coordinate = dst_y;
|
||
xyblt.DestinationX2Coordinate = dst_x2;
|
||
xyblt.DestinationY2Coordinate = dst_y2;
|
||
xyblt.DestinationPitch = dst_pitch;
|
||
xyblt.SourceX1Coordinate = src_x;
|
||
xyblt.SourceY1Coordinate = src_y;
|
||
xyblt.SourcePitch = src_pitch;
|
||
};
|
||
|
||
crocus_emit_mi_flush(batch);
|
||
return true;
|
||
}
|
||
|
||
static bool crocus_emit_blt(struct crocus_batch *batch,
|
||
struct crocus_resource *src,
|
||
struct crocus_resource *dst,
|
||
unsigned dst_level,
|
||
unsigned dst_x, unsigned dst_y,
|
||
unsigned dst_z,
|
||
unsigned src_level,
|
||
const struct pipe_box *src_box)
|
||
{
|
||
const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
|
||
unsigned src_cpp = src_fmtl->bpb / 8;
|
||
const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
|
||
const unsigned dst_cpp = dst_fmtl->bpb / 8;
|
||
uint16_t src_x, src_y;
|
||
uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
|
||
uint32_t src_width = src_box->width, src_height = src_box->height;
|
||
|
||
/* gen4/5 can't handle Y tiled blits. */
|
||
if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
|
||
return false;
|
||
|
||
if (src->surf.format != dst->surf.format)
|
||
return false;
|
||
|
||
if (src_cpp != dst_cpp)
|
||
return false;
|
||
|
||
src_x = src_box->x;
|
||
src_y = src_box->y;
|
||
|
||
assert(src_cpp == dst_cpp);
|
||
|
||
crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
|
||
&src_image_y);
|
||
if (util_format_is_compressed(src->base.format)) {
|
||
int bw = util_format_get_blockwidth(src->base.format);
|
||
int bh = util_format_get_blockheight(src->base.format);
|
||
assert(src_x % bw == 0);
|
||
assert(src_y % bh == 0);
|
||
src_x /= (int)bw;
|
||
src_y /= (int)bh;
|
||
src_width = DIV_ROUND_UP(src_width, (int)bw);
|
||
src_height = DIV_ROUND_UP(src_height, (int)bh);
|
||
}
|
||
|
||
crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
|
||
&dst_image_y);
|
||
if (util_format_is_compressed(dst->base.format)) {
|
||
int bw = util_format_get_blockwidth(dst->base.format);
|
||
int bh = util_format_get_blockheight(dst->base.format);
|
||
assert(dst_x % bw == 0);
|
||
assert(dst_y % bh == 0);
|
||
dst_x /= (int)bw;
|
||
dst_y /= (int)bh;
|
||
}
|
||
src_x += src_image_x;
|
||
src_y += src_image_y;
|
||
dst_x += dst_image_x;
|
||
dst_y += dst_image_y;
|
||
|
||
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
|
||
* Data Size Limitations):
|
||
*
|
||
* The BLT engine is capable of transferring very large quantities of
|
||
* graphics data. Any graphics data read from and written to the
|
||
* destination is permitted to represent a number of pixels that
|
||
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
|
||
* at the destination. The maximum number of pixels that may be
|
||
* represented per scan line’s worth of graphics data depends on the
|
||
* color depth.
|
||
*
|
||
* The blitter's pitch is a signed 16-bit integer, but measured in bytes
|
||
* for linear surfaces and DWords for tiled surfaces. So the maximum
|
||
* pitch is 32k linear and 128k tiled.
|
||
*/
|
||
if (crocus_resource_blt_pitch(src) >= 32768 ||
|
||
crocus_resource_blt_pitch(dst) >= 32768) {
|
||
return false;
|
||
}
|
||
|
||
/* We need to split the blit into chunks that each fit within the blitter's
|
||
* restrictions. We can't use a chunk size of 32768 because we need to
|
||
* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
|
||
* a nice round power of two, big enough that performance won't suffer, and
|
||
* small enough to guarantee everything fits.
|
||
*/
|
||
const uint32_t max_chunk_size = 16384;
|
||
|
||
for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
|
||
for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
|
||
const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
|
||
const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
|
||
|
||
ASSERTED uint32_t z_offset_el, array_offset;
|
||
uint32_t src_offset, src_tile_x, src_tile_y;
|
||
isl_tiling_get_intratile_offset_el(src->surf.tiling,
|
||
src_cpp * 8, src->surf.row_pitch_B,
|
||
src->surf.array_pitch_el_rows,
|
||
src_x + chunk_x, src_y + chunk_y, 0, 0,
|
||
&src_offset,
|
||
&src_tile_x, &src_tile_y,
|
||
&z_offset_el, &array_offset);
|
||
assert(z_offset_el == 0);
|
||
assert(array_offset == 0);
|
||
|
||
uint32_t dst_offset, dst_tile_x, dst_tile_y;
|
||
isl_tiling_get_intratile_offset_el(dst->surf.tiling,
|
||
dst_cpp * 8, dst->surf.row_pitch_B,
|
||
dst->surf.array_pitch_el_rows,
|
||
dst_x + chunk_x, dst_y + chunk_y, 0, 0,
|
||
&dst_offset,
|
||
&dst_tile_x, &dst_tile_y,
|
||
&z_offset_el, &array_offset);
|
||
assert(z_offset_el == 0);
|
||
assert(array_offset == 0);
|
||
if (!emit_copy_blt(batch, src, dst,
|
||
src_cpp, src->surf.row_pitch_B,
|
||
src_offset,
|
||
dst->surf.row_pitch_B, dst_offset,
|
||
src_tile_x, src_tile_y,
|
||
dst_tile_x, dst_tile_y,
|
||
chunk_w, chunk_h)) {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
|
||
static bool crocus_blit_blt(struct crocus_batch *batch,
|
||
const struct pipe_blit_info *info)
|
||
{
|
||
if (!validate_blit_for_blt(batch, info))
|
||
return false;
|
||
|
||
return crocus_emit_blt(batch,
|
||
(struct crocus_resource *)info->src.resource,
|
||
(struct crocus_resource *)info->dst.resource,
|
||
info->dst.level,
|
||
info->dst.box.x,
|
||
info->dst.box.y,
|
||
info->dst.box.z,
|
||
info->src.level,
|
||
&info->src.box);
|
||
}
|
||
|
||
|
||
static bool crocus_copy_region_blt(struct crocus_batch *batch,
|
||
struct crocus_resource *dst,
|
||
unsigned dst_level,
|
||
unsigned dstx, unsigned dsty, unsigned dstz,
|
||
struct crocus_resource *src,
|
||
unsigned src_level,
|
||
const struct pipe_box *src_box)
|
||
{
|
||
if (dst->base.target == PIPE_BUFFER || src->base.target == PIPE_BUFFER)
|
||
return false;
|
||
return crocus_emit_blt(batch,
|
||
src,
|
||
dst,
|
||
dst_level,
|
||
dstx, dsty, dstz,
|
||
src_level,
|
||
src_box);
|
||
}
|
||
#endif
|
||
|
||
void
|
||
genX(init_blt)(struct crocus_screen *screen)
|
||
{
|
||
#if GFX_VER <= 5
|
||
screen->vtbl.blit_blt = crocus_blit_blt;
|
||
screen->vtbl.copy_region_blt = crocus_copy_region_blt;
|
||
#else
|
||
screen->vtbl.blit_blt = NULL;
|
||
screen->vtbl.copy_region_blt = NULL;
|
||
#endif
|
||
}
|