ac/surface/gfx6: compute FMASK together with the color surface

instead of invoking FMASK computation separately.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2018-04-30 20:54:06 -04:00
parent 276acda835
commit 9bf3570fed
5 changed files with 149 additions and 100 deletions

View File

@ -227,8 +227,16 @@ ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,
return addrCreateOutput.hLib;
}
static int surf_config_sanity(const struct ac_surf_config *config)
static int surf_config_sanity(const struct ac_surf_config *config,
unsigned flags)
{
/* FMASK is allocated together with the color surface and can't be
* allocated separately.
*/
assert(!(flags & RADEON_SURF_FMASK));
if (flags & RADEON_SURF_FMASK)
return -EINVAL;
/* all dimension must be at least 1 ! */
if (!config->info.width || !config->info.height || !config->info.depth ||
!config->info.array_size || !config->info.levels)
@ -445,7 +453,6 @@ static bool get_display_flag(const struct ac_surf_config *config,
unsigned bpe = surf->bpe;
if (surf->flags & RADEON_SURF_SCANOUT &&
!(surf->flags & RADEON_SURF_FMASK) &&
config->info.samples <= 1 &&
surf->blk_w <= 2 && surf->blk_h == 1) {
/* subsampled */
@ -556,9 +563,8 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
compressed = surf->blk_w == 4 && surf->blk_h == 4;
/* MSAA and FMASK require 2D tiling. */
if (config->info.samples > 1 ||
(surf->flags & RADEON_SURF_FMASK))
/* MSAA requires 2D tiling. */
if (config->info.samples > 1)
mode = RADEON_SURF_MODE_2D;
/* DB doesn't support linear layouts. */
@ -607,7 +613,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
/* Set the micro tile type. */
if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
else if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_FMASK))
else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
else
AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
@ -615,7 +621,6 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
AddrSurfInfoIn.flags.cube = config->is_cube;
AddrSurfInfoIn.flags.fmask = (surf->flags & RADEON_SURF_FMASK) != 0;
AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
@ -680,8 +685,6 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
surf->u.legacy.bankw && surf->u.legacy.bankh &&
surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
assert(!(surf->flags & RADEON_SURF_FMASK));
/* If any of these parameters are incorrect, the calculation
* will fail. */
AddrTileInfoIn.banks = surf->u.legacy.num_banks;
@ -828,6 +831,67 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
}
}
/* Compute FMASK. */
if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color) {
ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
ADDR_TILEINFO fmask_tile_info = {};
fin.size = sizeof(fin);
fout.size = sizeof(fout);
fin.tileMode = AddrSurfInfoOut.tileMode;
fin.pitch = AddrSurfInfoOut.pitch;
fin.height = config->info.height;
fin.numSlices = AddrSurfInfoIn.numSlices;
fin.numSamples = AddrSurfInfoIn.numSamples;
fin.numFrags = AddrSurfInfoIn.numFrags;
fin.tileIndex = AddrSurfInfoOut.tileIndex;
fout.pTileInfo = &fmask_tile_info;
r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
if (r)
return r;
surf->u.legacy.fmask.size = fout.fmaskBytes;
surf->u.legacy.fmask.alignment = fout.baseAlign;
surf->u.legacy.fmask.tile_swizzle = 0;
surf->u.legacy.fmask.slice_tile_max =
(fout.pitch * fout.height) / 64;
if (surf->u.legacy.fmask.slice_tile_max)
surf->u.legacy.fmask.slice_tile_max -= 1;
surf->u.legacy.fmask.tiling_index = fout.tileIndex;
surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight;
surf->u.legacy.fmask.pitch_in_pixels = fout.pitch;
/* Compute tile swizzle for FMASK. */
if (config->info.fmask_surf_index &&
!(surf->flags & RADEON_SURF_SHAREABLE)) {
ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
/* This counter starts from 1 instead of 0. */
xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
xin.tileIndex = fout.tileIndex;
xin.macroModeIndex = fout.macroModeIndex;
xin.pTileInfo = fout.pTileInfo;
xin.tileMode = fin.tileMode;
int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
if (r != ADDR_OK)
return r;
assert(xout.tileSwizzle <=
u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
surf->u.legacy.fmask.tile_swizzle = xout.tileSwizzle;
}
}
/* Recalculate the whole DCC miptree size including disabled levels.
* This is what addrlib does, but calling addrlib would be a lot more
* complicated.
@ -1197,8 +1261,6 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
int r;
assert(!(surf->flags & RADEON_SURF_FMASK));
AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
compressed = surf->blk_w == 4 && surf->blk_h == 4;
@ -1422,7 +1484,7 @@ int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
{
int r;
r = surf_config_sanity(config);
r = surf_config_sanity(config, surf->flags);
if (r)
return r;

View File

@ -79,6 +79,16 @@ struct legacy_surf_level {
enum radeon_surf_mode mode:2;
};
struct legacy_surf_fmask {
uint64_t size;
unsigned alignment;
unsigned tile_swizzle;
unsigned slice_tile_max; /* max 4M */
uint8_t tiling_index; /* max 31 */
uint8_t bankh; /* max 8 */
uint16_t pitch_in_pixels;
};
struct legacy_surf_layout {
unsigned bankw:4; /* max 8 */
unsigned bankh:4; /* max 8 */
@ -101,6 +111,7 @@ struct legacy_surf_layout {
struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
struct legacy_surf_fmask fmask;
};
/* Same as addrlib - AddrResourceType. */
@ -222,7 +233,7 @@ struct ac_surf_info {
uint8_t num_channels; /* heuristic for displayability */
uint16_t array_size;
uint32_t *surf_index; /* Set a monotonic counter for tile swizzling. */
uint32_t *fmask_surf_index; /* GFX9+ */
uint32_t *fmask_surf_index;
};
struct ac_surf_config {

View File

@ -733,58 +733,20 @@ radv_image_get_fmask_info(struct radv_device *device,
unsigned nr_samples,
struct radv_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
struct radeon_surf fmask = {};
struct ac_surf_info info = image->info;
memset(out, 0, sizeof(*out));
if (device->physical_device->rad_info.chip_class >= GFX9) {
out->alignment = image->surface.u.gfx9.fmask_alignment;
out->size = image->surface.u.gfx9.fmask_size;
out->tile_swizzle = image->surface.u.gfx9.fmask_tile_swizzle;
return;
}
fmask.blk_w = image->surface.blk_w;
fmask.blk_h = image->surface.blk_h;
info.samples = 1;
fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
if (!image->shareable) {
info.fmask_surf_index = &device->fmask_mrt_offset_counter;
info.surf_index = &device->fmask_mrt_offset_counter;
}
/* Force 2D tiling if it wasn't set. This may occur when creating
* FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
* destination buffer must have an FMASK too. */
fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
switch (nr_samples) {
case 2:
case 4:
fmask.bpe = 1;
break;
case 8:
fmask.bpe = 4;
break;
default:
return;
}
device->ws->surface_init(device->ws, &info, &fmask);
assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
if (out->slice_tile_max)
out->slice_tile_max -= 1;
out->tile_mode_index = fmask.u.legacy.tiling_index[0];
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
out->tile_swizzle = fmask.tile_swizzle;
out->alignment = MAX2(256, fmask.surf_alignment);
out->size = fmask.surf_size;
out->slice_tile_max = image->surface.u.legacy.fmask.slice_tile_max;
out->tile_mode_index = image->surface.u.legacy.fmask.tiling_index;
out->pitch_in_pixels = image->surface.u.legacy.fmask.pitch_in_pixels;
out->bank_height = image->surface.u.legacy.fmask.bankh;
out->tile_swizzle = image->surface.u.legacy.fmask.tile_swizzle;
out->alignment = image->surface.u.legacy.fmask.alignment;
out->size = image->surface.u.legacy.fmask.size;
assert(!out->tile_swizzle || !image->shareable);
}

View File

@ -851,13 +851,6 @@ void si_texture_get_fmask_info(struct si_screen *sscreen,
unsigned nr_samples,
struct r600_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
struct pipe_resource templ = rtex->buffer.b.b;
struct radeon_surf fmask = {};
unsigned flags, bpe;
memset(out, 0, sizeof(*out));
if (sscreen->info.chip_class >= GFX9) {
out->alignment = rtex->surface.u.gfx9.fmask_alignment;
out->size = rtex->surface.u.gfx9.fmask_size;
@ -865,40 +858,13 @@ void si_texture_get_fmask_info(struct si_screen *sscreen,
return;
}
templ.nr_samples = 1;
flags = rtex->surface.flags | RADEON_SURF_FMASK;
switch (nr_samples) {
case 2:
case 4:
bpe = 1;
break;
case 8:
bpe = 4;
break;
default:
PRINT_ERR("Invalid sample count for FMASK allocation.\n");
return;
}
if (sscreen->ws->surface_init(sscreen->ws, &templ, flags, bpe,
RADEON_SURF_MODE_2D, &fmask)) {
PRINT_ERR("Got error in surface_init while allocating FMASK.\n");
return;
}
assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
if (out->slice_tile_max)
out->slice_tile_max -= 1;
out->tile_mode_index = fmask.u.legacy.tiling_index[0];
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
out->tile_swizzle = fmask.tile_swizzle;
out->alignment = MAX2(256, fmask.surf_alignment);
out->size = fmask.surf_size;
out->slice_tile_max = rtex->surface.u.legacy.fmask.slice_tile_max;
out->tile_mode_index = rtex->surface.u.legacy.fmask.tiling_index;
out->pitch_in_pixels = rtex->surface.u.legacy.fmask.pitch_in_pixels;
out->bank_height = rtex->surface.u.legacy.fmask.bankh;
out->tile_swizzle = rtex->surface.u.legacy.fmask.tile_swizzle;
out->alignment = rtex->surface.u.legacy.fmask.alignment;
out->size = rtex->surface.u.legacy.fmask.size;
}
static void si_texture_allocate_fmask(struct si_screen *sscreen,

View File

@ -243,6 +243,54 @@ static int radeon_winsys_surface_init(struct radeon_winsys *rws,
return r;
surf_drm_to_winsys(ws, surf_ws, &surf_drm);
/* Compute FMASK. */
if (ws->gen == DRV_SI &&
tex->nr_samples >= 2 &&
!(flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_FMASK))) {
/* FMASK is allocated like an ordinary texture. */
struct pipe_resource templ = *tex;
struct radeon_surf fmask = {};
unsigned fmask_flags, bpe;
templ.nr_samples = 1;
fmask_flags = flags | RADEON_SURF_FMASK;
switch (tex->nr_samples) {
case 2:
case 4:
bpe = 1;
break;
case 8:
bpe = 4;
break;
default:
fprintf(stderr, "radeon: Invalid sample count for FMASK allocation.\n");
return -1;
}
if (radeon_winsys_surface_init(rws, &templ, fmask_flags, bpe,
RADEON_SURF_MODE_2D, &fmask)) {
fprintf(stderr, "Got error in surface_init while allocating FMASK.\n");
return -1;
}
assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
surf_ws->u.legacy.fmask.size = fmask.surf_size;
surf_ws->u.legacy.fmask.alignment = MAX2(256, fmask.surf_alignment);
surf_ws->u.legacy.fmask.tile_swizzle = fmask.tile_swizzle;
surf_ws->u.legacy.fmask.slice_tile_max =
(fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
if (surf_ws->u.legacy.fmask.slice_tile_max)
surf_ws->u.legacy.fmask.slice_tile_max -= 1;
surf_ws->u.legacy.fmask.tiling_index = fmask.u.legacy.tiling_index[0];
surf_ws->u.legacy.fmask.bankh = fmask.u.legacy.bankh;
surf_ws->u.legacy.fmask.pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
}
return 0;
}