radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
a077185ea9
commit
d4d9ec55c5
|
@ -245,6 +245,7 @@ struct r600_htile_info {
|
|||
unsigned height;
|
||||
unsigned xalign;
|
||||
unsigned yalign;
|
||||
unsigned alignment;
|
||||
};
|
||||
|
||||
struct r600_texture {
|
||||
|
@ -252,6 +253,7 @@ struct r600_texture {
|
|||
|
||||
uint64_t size;
|
||||
unsigned num_level0_transfers;
|
||||
enum pipe_format db_render_format;
|
||||
bool is_depth;
|
||||
bool db_compatible;
|
||||
bool can_sample_z;
|
||||
|
@ -273,6 +275,7 @@ struct r600_texture {
|
|||
/* Depth buffer compression and fast clear. */
|
||||
struct r600_htile_info htile;
|
||||
struct r600_resource *htile_buffer;
|
||||
bool tc_compatible_htile;
|
||||
bool depth_cleared; /* if it was cleared at least once */
|
||||
float depth_clear_value;
|
||||
bool stencil_cleared; /* if it was cleared at least once */
|
||||
|
|
|
@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
|
|||
struct radeon_surf *surface,
|
||||
const struct pipe_resource *ptex,
|
||||
unsigned array_mode,
|
||||
bool is_flushed_depth)
|
||||
bool is_flushed_depth,
|
||||
bool tc_compatible_htile)
|
||||
{
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(ptex->format);
|
||||
|
@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
|
|||
if (!is_flushed_depth && is_depth) {
|
||||
surface->flags |= RADEON_SURF_ZBUFFER;
|
||||
|
||||
if (tc_compatible_htile &&
|
||||
array_mode == RADEON_SURF_MODE_2D) {
|
||||
/* TC-compatible HTILE only supports Z32_FLOAT.
|
||||
* Promote Z16 to Z32. DB->CB copies will convert
|
||||
* the format for transfers.
|
||||
*/
|
||||
surface->bpe = 4;
|
||||
surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
|
||||
}
|
||||
|
||||
if (is_stencil) {
|
||||
surface->flags |= RADEON_SURF_SBUFFER |
|
||||
RADEON_SURF_HAS_SBUFFER_MIPTREE;
|
||||
}
|
||||
}
|
||||
|
||||
if (rscreen->chip_class >= SI) {
|
||||
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
|
||||
}
|
||||
|
@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
|||
rtex->htile.height = height;
|
||||
rtex->htile.xalign = cl_width * 8;
|
||||
rtex->htile.yalign = cl_height * 8;
|
||||
rtex->htile.alignment = base_align;
|
||||
|
||||
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
|
||||
align(slice_bytes, base_align);
|
||||
|
@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
|||
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
|
||||
struct r600_texture *rtex)
|
||||
{
|
||||
unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
|
||||
uint64_t htile_size, alignment;
|
||||
uint32_t clear_value;
|
||||
|
||||
if (rtex->tc_compatible_htile) {
|
||||
htile_size = rtex->surface.htile_size;
|
||||
alignment = rtex->surface.htile_alignment;
|
||||
clear_value = 0x0000030F;
|
||||
} else {
|
||||
htile_size = r600_texture_get_htile_size(rscreen, rtex);
|
||||
alignment = rtex->htile.alignment;
|
||||
clear_value = 0;
|
||||
}
|
||||
|
||||
if (!htile_size)
|
||||
return;
|
||||
|
||||
rtex->htile_buffer = (struct r600_resource*)
|
||||
pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT, htile_size);
|
||||
r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
htile_size, alignment);
|
||||
if (rtex->htile_buffer == NULL) {
|
||||
/* this is not a fatal error as we can still keep rendering
|
||||
* without htile buffer */
|
||||
R600_ERR("Failed to create buffer object for htile buffer.\n");
|
||||
} else {
|
||||
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
|
||||
htile_size, 0, R600_COHERENCY_NONE);
|
||||
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
|
||||
0, htile_size, clear_value,
|
||||
R600_COHERENCY_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
|
|||
|
||||
if (rtex->htile_buffer)
|
||||
fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
|
||||
"xalign=%u, yalign=%u\n",
|
||||
"xalign=%u, yalign=%u, TC_compatible = %u\n",
|
||||
rtex->htile_buffer->b.b.width0,
|
||||
rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
|
||||
rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
|
||||
rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
|
||||
rtex->tc_compatible_htile);
|
||||
|
||||
if (rtex->dcc_offset) {
|
||||
fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
|
||||
|
@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
|
||||
assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
|
||||
rtex->tc_compatible_htile);
|
||||
|
||||
/* TC-compatible HTILE only supports Z32_FLOAT. */
|
||||
if (rtex->tc_compatible_htile)
|
||||
rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
|
||||
else
|
||||
rtex->db_render_format = base->format;
|
||||
|
||||
/* Tiled depth textures utilize the non-displayable tile order.
|
||||
* This must be done after r600_setup_surface.
|
||||
* Applies to R600-Cayman. */
|
||||
|
@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
|
|||
{
|
||||
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
||||
struct radeon_surf surface = {0};
|
||||
bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
|
||||
bool tc_compatible_htile =
|
||||
rscreen->chip_class >= VI &&
|
||||
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
|
||||
!(rscreen->debug_flags & DBG_NO_HYPERZ) &&
|
||||
!is_flushed_depth &&
|
||||
templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
|
||||
util_format_is_depth_or_stencil(templ->format);
|
||||
|
||||
int r;
|
||||
|
||||
r = r600_init_surface(rscreen, &surface, templ,
|
||||
r600_choose_tiling(rscreen, templ),
|
||||
templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
|
||||
is_flushed_depth, tc_compatible_htile);
|
||||
if (r) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
|
|||
else
|
||||
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
|
||||
|
||||
r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
|
||||
r = r600_init_surface(rscreen, &surface, templ, array_mode,
|
||||
false, false);
|
||||
if (r) {
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -278,6 +278,7 @@ enum radeon_feature_id {
|
|||
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
|
||||
#define RADEON_SURF_FMASK (1 << 21)
|
||||
#define RADEON_SURF_DISABLE_DCC (1 << 22)
|
||||
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
|
||||
|
||||
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
|
||||
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
|
||||
|
@ -344,6 +345,9 @@ struct radeon_surf {
|
|||
|
||||
uint64_t dcc_size;
|
||||
uint64_t dcc_alignment;
|
||||
/* TC-compatible HTILE only. */
|
||||
uint64_t htile_size;
|
||||
uint64_t htile_alignment;
|
||||
};
|
||||
|
||||
struct radeon_bo_list_item {
|
||||
|
|
|
@ -332,6 +332,8 @@ si_flush_depth_texture(struct si_context *sctx,
|
|||
}
|
||||
}
|
||||
|
||||
assert(!tex->tc_compatible_htile || levels_z == 0);
|
||||
|
||||
/* We may have to allocate the flushed texture here when called from
|
||||
* si_decompress_subresource.
|
||||
*/
|
||||
|
@ -699,7 +701,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
|||
zsbuf->u.tex.level == 0 &&
|
||||
zsbuf->u.tex.first_layer == 0 &&
|
||||
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
|
||||
if (buffers & PIPE_CLEAR_DEPTH) {
|
||||
/* TC-compatible HTILE only supports depth clears to 0 or 1. */
|
||||
if (buffers & PIPE_CLEAR_DEPTH &&
|
||||
(!zstex->tc_compatible_htile ||
|
||||
depth == 0 || depth == 1)) {
|
||||
/* Need to disable EXPCLEAR temporarily if clearing
|
||||
* to a new value. */
|
||||
if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
|
||||
|
@ -713,7 +718,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
|||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
|
||||
if (buffers & PIPE_CLEAR_STENCIL) {
|
||||
/* TC-compatible HTILE only supports stencil clears to 0. */
|
||||
if (buffers & PIPE_CLEAR_STENCIL &&
|
||||
(!zstex->tc_compatible_htile || stencil == 0)) {
|
||||
stencil &= 0xff;
|
||||
|
||||
/* Need to disable EXPCLEAR temporarily if clearing
|
||||
|
|
|
@ -399,6 +399,9 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
|
|||
state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
|
||||
tex->dcc_offset +
|
||||
base_level_info->dcc_offset) >> 8;
|
||||
} else if (tex->tc_compatible_htile) {
|
||||
state[6] |= S_008F28_COMPRESSION_EN(1);
|
||||
state[7] = tex->htile_buffer->gpu_address >> 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -508,8 +511,10 @@ static void si_set_sampler_views(struct pipe_context *ctx,
|
|||
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
|
||||
struct r600_texture *rtex =
|
||||
(struct r600_texture*)views[i]->texture;
|
||||
struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
|
||||
|
||||
if (rtex->db_compatible) {
|
||||
if (rtex->db_compatible &&
|
||||
(!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
|
||||
samplers->depth_texture_mask |= 1u << slot;
|
||||
} else {
|
||||
samplers->depth_texture_mask &= ~(1u << slot);
|
||||
|
|
|
@ -4607,12 +4607,26 @@ static void tex_fetch_args(
|
|||
|
||||
/* Pack depth comparison value */
|
||||
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
|
||||
LLVMValueRef z;
|
||||
|
||||
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
|
||||
address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
|
||||
z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
|
||||
} else {
|
||||
assert(ref_pos >= 0);
|
||||
address[count++] = coords[ref_pos];
|
||||
z = coords[ref_pos];
|
||||
}
|
||||
|
||||
/* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
|
||||
* so the depth comparison value isn't clamped for Z16 and
|
||||
* Z24 anymore. Do it manually here.
|
||||
*
|
||||
* It's unnecessary if the original texture format was
|
||||
* Z32_FLOAT, but we don't know that here.
|
||||
*/
|
||||
if (ctx->screen->b.chip_class == VI)
|
||||
z = radeon_llvm_saturate(bld_base, z);
|
||||
|
||||
address[count++] = z;
|
||||
}
|
||||
|
||||
/* Pack user derivatives */
|
||||
|
|
|
@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx)
|
|||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
|
||||
return;
|
||||
|
||||
/* Use the user format, not db_render_format, so that the polygon
|
||||
* offset behaves as expected by applications.
|
||||
*/
|
||||
switch (sctx->framebuffer.state.zsbuf->texture->format) {
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
|
||||
|
@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx,
|
|||
uint64_t z_offs, s_offs;
|
||||
uint32_t db_htile_data_base, db_htile_surface;
|
||||
|
||||
format = si_translate_dbformat(rtex->resource.b.b.format);
|
||||
format = si_translate_dbformat(rtex->db_render_format);
|
||||
|
||||
if (format == V_028040_Z_INVALID) {
|
||||
R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
|
||||
|
@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx,
|
|||
z_offs += rtex->surface.level[level].offset;
|
||||
s_offs += rtex->surface.stencil_level[level].offset;
|
||||
|
||||
db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
|
||||
db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
|
||||
|
||||
z_info = S_028040_FORMAT(format);
|
||||
if (rtex->resource.b.b.nr_samples > 1) {
|
||||
|
@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx,
|
|||
*/
|
||||
if (rtex->resource.b.b.nr_samples <= 1)
|
||||
s_info |= S_028044_ALLOW_EXPCLEAR(1);
|
||||
} else
|
||||
/* Use all of the htile_buffer for depth if there's no stencil. */
|
||||
} else if (!rtex->tc_compatible_htile) {
|
||||
/* Use all of the htile_buffer for depth if there's no stencil.
|
||||
* This must not be set when TC-compatible HTILE is enabled
|
||||
* due to a hw bug.
|
||||
*/
|
||||
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
|
||||
}
|
||||
|
||||
uint64_t va = rtex->htile_buffer->gpu_address;
|
||||
db_htile_data_base = va >> 8;
|
||||
db_htile_surface = S_028ABC_FULL_CACHE(1);
|
||||
|
||||
if (rtex->tc_compatible_htile) {
|
||||
db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
|
||||
|
||||
switch (rtex->resource.b.b.nr_samples) {
|
||||
case 0:
|
||||
case 1:
|
||||
z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
|
||||
break;
|
||||
case 2:
|
||||
case 4:
|
||||
z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
|
||||
break;
|
||||
case 8:
|
||||
z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
db_htile_data_base = 0;
|
||||
db_htile_surface = 0;
|
||||
|
@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
|||
|
||||
if (state->zsbuf) {
|
||||
surf = (struct r600_surface*)state->zsbuf;
|
||||
rtex = (struct r600_texture*)surf->base.texture;
|
||||
|
||||
if (!surf->depth_initialized) {
|
||||
si_init_depth_surface(sctx, surf);
|
||||
|
@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
|||
surflevel = tmp->surface.level;
|
||||
|
||||
if (tmp->db_compatible) {
|
||||
if (!view->is_stencil_sampler)
|
||||
pipe_format = tmp->db_render_format;
|
||||
|
||||
switch (pipe_format) {
|
||||
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
|
||||
pipe_format = PIPE_FORMAT_Z32_FLOAT;
|
||||
|
|
|
@ -1118,7 +1118,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
|
||||
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
|
||||
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
if (!rtex->tc_compatible_htile)
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
|
|
|
@ -137,6 +137,7 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
|
|||
createFlags.value = 0;
|
||||
createFlags.useTileIndex = 1;
|
||||
createFlags.degradeBaseLevel = 1;
|
||||
createFlags.useHtileSliceAlign = 1;
|
||||
|
||||
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
|
||||
addrCreateInput.chipFamily = ws->family;
|
||||
|
@ -160,7 +161,9 @@ static int compute_level(struct amdgpu_winsys *ws,
|
|||
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
|
||||
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
|
||||
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
|
||||
ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
|
||||
ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
|
||||
ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
|
||||
ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
|
||||
{
|
||||
struct radeon_surf_level *surf_level;
|
||||
ADDR_E_RETURNCODE ret;
|
||||
|
@ -257,6 +260,32 @@ static int compute_level(struct amdgpu_winsys *ws,
|
|||
}
|
||||
}
|
||||
|
||||
/* TC-compatible HTILE. */
|
||||
if (!is_stencil &&
|
||||
AddrSurfInfoIn->flags.depth &&
|
||||
AddrSurfInfoIn->flags.tcCompatible &&
|
||||
surf_level->mode == RADEON_SURF_MODE_2D &&
|
||||
level == 0) {
|
||||
AddrHtileIn->flags.tcCompatible = 1;
|
||||
AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
|
||||
AddrHtileIn->height = AddrSurfInfoOut->height;
|
||||
AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
|
||||
AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
|
||||
AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
|
||||
AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
|
||||
AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
|
||||
AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
|
||||
|
||||
ret = AddrComputeHtileInfo(ws->addrlib,
|
||||
AddrHtileIn,
|
||||
AddrHtileOut);
|
||||
|
||||
if (ret == ADDR_OK) {
|
||||
surf->htile_size = AddrHtileOut->htileBytes;
|
||||
surf->htile_alignment = AddrHtileOut->baseAlign;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -284,6 +313,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
|||
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
|
||||
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
|
||||
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
|
||||
ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
|
||||
ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
|
||||
ADDR_TILEINFO AddrTileInfoIn = {0};
|
||||
ADDR_TILEINFO AddrTileInfoOut = {0};
|
||||
int r;
|
||||
|
@ -296,6 +327,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
|||
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
|
||||
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
|
||||
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
|
||||
AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
|
||||
AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
|
||||
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
|
||||
|
||||
type = RADEON_SURF_GET(surf->flags, TYPE);
|
||||
|
@ -361,7 +394,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
|||
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
|
||||
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
|
||||
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
|
||||
AddrSurfInfoIn.flags.degrade4Space = 1;
|
||||
AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
|
||||
|
||||
/* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
|
||||
* requested, because TC-compatible HTILE requires 2D tiling.
|
||||
*/
|
||||
AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible;
|
||||
|
||||
/* DCC notes:
|
||||
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
|
||||
|
@ -443,11 +481,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
|||
surf->bo_size = 0;
|
||||
surf->dcc_size = 0;
|
||||
surf->dcc_alignment = 1;
|
||||
surf->htile_size = 0;
|
||||
surf->htile_alignment = 1;
|
||||
|
||||
/* Calculate texture layout information. */
|
||||
for (level = 0; level <= surf->last_level; level++) {
|
||||
r = compute_level(ws, surf, false, level, type, compressed,
|
||||
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
|
||||
&AddrSurfInfoIn, &AddrSurfInfoOut,
|
||||
&AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -475,12 +516,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
|||
AddrSurfInfoIn.bpp = 8;
|
||||
AddrSurfInfoIn.flags.depth = 0;
|
||||
AddrSurfInfoIn.flags.stencil = 1;
|
||||
AddrSurfInfoIn.flags.tcCompatible = 0;
|
||||
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
|
||||
AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
|
||||
|
||||
for (level = 0; level <= surf->last_level; level++) {
|
||||
r = compute_level(ws, surf, true, level, type, compressed,
|
||||
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
|
||||
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut,
|
||||
NULL, NULL);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -508,6 +551,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
|||
ws->info.num_tile_pipes);
|
||||
}
|
||||
|
||||
/* Make sure HTILE covers the whole miptree, because the shader reads
|
||||
* TC-compatible HTILE even for levels where it's disabled by DB.
|
||||
*/
|
||||
if (surf->htile_size && surf->last_level)
|
||||
surf->htile_size *= 2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue