radeonsi: implement TC-compatible HTILE
so that decompress blits aren't needed and depth texturing needs less memory bandwidth. Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16. The format promotion is not visible to state trackers. This is part of TC-compatible renderbuffer compression, which has 3 parts: DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now. I don't see a measurable increase in performance though. (I tested Talos Principle and DiRT: Showdown, the latter is improved by 0.5%, which is almost noise, and it originally used layered Z16, so at least we know that Z16 promoted to Z32F isn't slower now) Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
a077185ea9
commit
d4d9ec55c5
|
@ -245,6 +245,7 @@ struct r600_htile_info {
|
||||||
unsigned height;
|
unsigned height;
|
||||||
unsigned xalign;
|
unsigned xalign;
|
||||||
unsigned yalign;
|
unsigned yalign;
|
||||||
|
unsigned alignment;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct r600_texture {
|
struct r600_texture {
|
||||||
|
@ -252,6 +253,7 @@ struct r600_texture {
|
||||||
|
|
||||||
uint64_t size;
|
uint64_t size;
|
||||||
unsigned num_level0_transfers;
|
unsigned num_level0_transfers;
|
||||||
|
enum pipe_format db_render_format;
|
||||||
bool is_depth;
|
bool is_depth;
|
||||||
bool db_compatible;
|
bool db_compatible;
|
||||||
bool can_sample_z;
|
bool can_sample_z;
|
||||||
|
@ -273,6 +275,7 @@ struct r600_texture {
|
||||||
/* Depth buffer compression and fast clear. */
|
/* Depth buffer compression and fast clear. */
|
||||||
struct r600_htile_info htile;
|
struct r600_htile_info htile;
|
||||||
struct r600_resource *htile_buffer;
|
struct r600_resource *htile_buffer;
|
||||||
|
bool tc_compatible_htile;
|
||||||
bool depth_cleared; /* if it was cleared at least once */
|
bool depth_cleared; /* if it was cleared at least once */
|
||||||
float depth_clear_value;
|
float depth_clear_value;
|
||||||
bool stencil_cleared; /* if it was cleared at least once */
|
bool stencil_cleared; /* if it was cleared at least once */
|
||||||
|
|
|
@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
|
||||||
struct radeon_surf *surface,
|
struct radeon_surf *surface,
|
||||||
const struct pipe_resource *ptex,
|
const struct pipe_resource *ptex,
|
||||||
unsigned array_mode,
|
unsigned array_mode,
|
||||||
bool is_flushed_depth)
|
bool is_flushed_depth,
|
||||||
|
bool tc_compatible_htile)
|
||||||
{
|
{
|
||||||
const struct util_format_description *desc =
|
const struct util_format_description *desc =
|
||||||
util_format_description(ptex->format);
|
util_format_description(ptex->format);
|
||||||
|
@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
|
||||||
if (!is_flushed_depth && is_depth) {
|
if (!is_flushed_depth && is_depth) {
|
||||||
surface->flags |= RADEON_SURF_ZBUFFER;
|
surface->flags |= RADEON_SURF_ZBUFFER;
|
||||||
|
|
||||||
|
if (tc_compatible_htile &&
|
||||||
|
array_mode == RADEON_SURF_MODE_2D) {
|
||||||
|
/* TC-compatible HTILE only supports Z32_FLOAT.
|
||||||
|
* Promote Z16 to Z32. DB->CB copies will convert
|
||||||
|
* the format for transfers.
|
||||||
|
*/
|
||||||
|
surface->bpe = 4;
|
||||||
|
surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_stencil) {
|
if (is_stencil) {
|
||||||
surface->flags |= RADEON_SURF_SBUFFER |
|
surface->flags |= RADEON_SURF_SBUFFER |
|
||||||
RADEON_SURF_HAS_SBUFFER_MIPTREE;
|
RADEON_SURF_HAS_SBUFFER_MIPTREE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rscreen->chip_class >= SI) {
|
if (rscreen->chip_class >= SI) {
|
||||||
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
|
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
|
||||||
}
|
}
|
||||||
|
@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
||||||
rtex->htile.height = height;
|
rtex->htile.height = height;
|
||||||
rtex->htile.xalign = cl_width * 8;
|
rtex->htile.xalign = cl_width * 8;
|
||||||
rtex->htile.yalign = cl_height * 8;
|
rtex->htile.yalign = cl_height * 8;
|
||||||
|
rtex->htile.alignment = base_align;
|
||||||
|
|
||||||
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
|
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
|
||||||
align(slice_bytes, base_align);
|
align(slice_bytes, base_align);
|
||||||
|
@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
||||||
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
|
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
|
||||||
struct r600_texture *rtex)
|
struct r600_texture *rtex)
|
||||||
{
|
{
|
||||||
unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
|
uint64_t htile_size, alignment;
|
||||||
|
uint32_t clear_value;
|
||||||
|
|
||||||
|
if (rtex->tc_compatible_htile) {
|
||||||
|
htile_size = rtex->surface.htile_size;
|
||||||
|
alignment = rtex->surface.htile_alignment;
|
||||||
|
clear_value = 0x0000030F;
|
||||||
|
} else {
|
||||||
|
htile_size = r600_texture_get_htile_size(rscreen, rtex);
|
||||||
|
alignment = rtex->htile.alignment;
|
||||||
|
clear_value = 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!htile_size)
|
if (!htile_size)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rtex->htile_buffer = (struct r600_resource*)
|
rtex->htile_buffer = (struct r600_resource*)
|
||||||
pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
|
r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
|
||||||
PIPE_USAGE_DEFAULT, htile_size);
|
PIPE_USAGE_DEFAULT,
|
||||||
|
htile_size, alignment);
|
||||||
if (rtex->htile_buffer == NULL) {
|
if (rtex->htile_buffer == NULL) {
|
||||||
/* this is not a fatal error as we can still keep rendering
|
/* this is not a fatal error as we can still keep rendering
|
||||||
* without htile buffer */
|
* without htile buffer */
|
||||||
R600_ERR("Failed to create buffer object for htile buffer.\n");
|
R600_ERR("Failed to create buffer object for htile buffer.\n");
|
||||||
} else {
|
} else {
|
||||||
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
|
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
|
||||||
htile_size, 0, R600_COHERENCY_NONE);
|
0, htile_size, clear_value,
|
||||||
|
R600_COHERENCY_NONE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
|
||||||
|
|
||||||
if (rtex->htile_buffer)
|
if (rtex->htile_buffer)
|
||||||
fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
|
fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
|
||||||
"xalign=%u, yalign=%u\n",
|
"xalign=%u, yalign=%u, TC_compatible = %u\n",
|
||||||
rtex->htile_buffer->b.b.width0,
|
rtex->htile_buffer->b.b.width0,
|
||||||
rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
|
rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
|
||||||
rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
|
rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
|
||||||
|
rtex->tc_compatible_htile);
|
||||||
|
|
||||||
if (rtex->dcc_offset) {
|
if (rtex->dcc_offset) {
|
||||||
fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
|
fprintf(f, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
|
||||||
|
@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
|
||||||
|
assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
|
||||||
|
rtex->tc_compatible_htile);
|
||||||
|
|
||||||
|
/* TC-compatible HTILE only supports Z32_FLOAT. */
|
||||||
|
if (rtex->tc_compatible_htile)
|
||||||
|
rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
|
||||||
|
else
|
||||||
|
rtex->db_render_format = base->format;
|
||||||
|
|
||||||
/* Tiled depth textures utilize the non-displayable tile order.
|
/* Tiled depth textures utilize the non-displayable tile order.
|
||||||
* This must be done after r600_setup_surface.
|
* This must be done after r600_setup_surface.
|
||||||
* Applies to R600-Cayman. */
|
* Applies to R600-Cayman. */
|
||||||
|
@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
|
||||||
{
|
{
|
||||||
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
|
||||||
struct radeon_surf surface = {0};
|
struct radeon_surf surface = {0};
|
||||||
|
bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
|
||||||
|
bool tc_compatible_htile =
|
||||||
|
rscreen->chip_class >= VI &&
|
||||||
|
(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
|
||||||
|
!(rscreen->debug_flags & DBG_NO_HYPERZ) &&
|
||||||
|
!is_flushed_depth &&
|
||||||
|
templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
|
||||||
|
util_format_is_depth_or_stencil(templ->format);
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
r = r600_init_surface(rscreen, &surface, templ,
|
r = r600_init_surface(rscreen, &surface, templ,
|
||||||
r600_choose_tiling(rscreen, templ),
|
r600_choose_tiling(rscreen, templ),
|
||||||
templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
|
is_flushed_depth, tc_compatible_htile);
|
||||||
if (r) {
|
if (r) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
|
||||||
else
|
else
|
||||||
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
|
array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
|
||||||
|
|
||||||
r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
|
r = r600_init_surface(rscreen, &surface, templ, array_mode,
|
||||||
|
false, false);
|
||||||
if (r) {
|
if (r) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -278,6 +278,7 @@ enum radeon_feature_id {
|
||||||
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
|
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
|
||||||
#define RADEON_SURF_FMASK (1 << 21)
|
#define RADEON_SURF_FMASK (1 << 21)
|
||||||
#define RADEON_SURF_DISABLE_DCC (1 << 22)
|
#define RADEON_SURF_DISABLE_DCC (1 << 22)
|
||||||
|
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
|
||||||
|
|
||||||
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
|
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
|
||||||
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
|
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
|
||||||
|
@ -344,6 +345,9 @@ struct radeon_surf {
|
||||||
|
|
||||||
uint64_t dcc_size;
|
uint64_t dcc_size;
|
||||||
uint64_t dcc_alignment;
|
uint64_t dcc_alignment;
|
||||||
|
/* TC-compatible HTILE only. */
|
||||||
|
uint64_t htile_size;
|
||||||
|
uint64_t htile_alignment;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct radeon_bo_list_item {
|
struct radeon_bo_list_item {
|
||||||
|
|
|
@ -332,6 +332,8 @@ si_flush_depth_texture(struct si_context *sctx,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(!tex->tc_compatible_htile || levels_z == 0);
|
||||||
|
|
||||||
/* We may have to allocate the flushed texture here when called from
|
/* We may have to allocate the flushed texture here when called from
|
||||||
* si_decompress_subresource.
|
* si_decompress_subresource.
|
||||||
*/
|
*/
|
||||||
|
@ -699,7 +701,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
||||||
zsbuf->u.tex.level == 0 &&
|
zsbuf->u.tex.level == 0 &&
|
||||||
zsbuf->u.tex.first_layer == 0 &&
|
zsbuf->u.tex.first_layer == 0 &&
|
||||||
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
|
zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
|
||||||
if (buffers & PIPE_CLEAR_DEPTH) {
|
/* TC-compatible HTILE only supports depth clears to 0 or 1. */
|
||||||
|
if (buffers & PIPE_CLEAR_DEPTH &&
|
||||||
|
(!zstex->tc_compatible_htile ||
|
||||||
|
depth == 0 || depth == 1)) {
|
||||||
/* Need to disable EXPCLEAR temporarily if clearing
|
/* Need to disable EXPCLEAR temporarily if clearing
|
||||||
* to a new value. */
|
* to a new value. */
|
||||||
if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
|
if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
|
||||||
|
@ -713,7 +718,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
||||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buffers & PIPE_CLEAR_STENCIL) {
|
/* TC-compatible HTILE only supports stencil clears to 0. */
|
||||||
|
if (buffers & PIPE_CLEAR_STENCIL &&
|
||||||
|
(!zstex->tc_compatible_htile || stencil == 0)) {
|
||||||
stencil &= 0xff;
|
stencil &= 0xff;
|
||||||
|
|
||||||
/* Need to disable EXPCLEAR temporarily if clearing
|
/* Need to disable EXPCLEAR temporarily if clearing
|
||||||
|
|
|
@ -399,6 +399,9 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
|
||||||
state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
|
state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
|
||||||
tex->dcc_offset +
|
tex->dcc_offset +
|
||||||
base_level_info->dcc_offset) >> 8;
|
base_level_info->dcc_offset) >> 8;
|
||||||
|
} else if (tex->tc_compatible_htile) {
|
||||||
|
state[6] |= S_008F28_COMPRESSION_EN(1);
|
||||||
|
state[7] = tex->htile_buffer->gpu_address >> 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -508,8 +511,10 @@ static void si_set_sampler_views(struct pipe_context *ctx,
|
||||||
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
|
if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
|
||||||
struct r600_texture *rtex =
|
struct r600_texture *rtex =
|
||||||
(struct r600_texture*)views[i]->texture;
|
(struct r600_texture*)views[i]->texture;
|
||||||
|
struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
|
||||||
|
|
||||||
if (rtex->db_compatible) {
|
if (rtex->db_compatible &&
|
||||||
|
(!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
|
||||||
samplers->depth_texture_mask |= 1u << slot;
|
samplers->depth_texture_mask |= 1u << slot;
|
||||||
} else {
|
} else {
|
||||||
samplers->depth_texture_mask &= ~(1u << slot);
|
samplers->depth_texture_mask &= ~(1u << slot);
|
||||||
|
|
|
@ -4607,12 +4607,26 @@ static void tex_fetch_args(
|
||||||
|
|
||||||
/* Pack depth comparison value */
|
/* Pack depth comparison value */
|
||||||
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
|
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
|
||||||
|
LLVMValueRef z;
|
||||||
|
|
||||||
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
|
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
|
||||||
address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
|
z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
|
||||||
} else {
|
} else {
|
||||||
assert(ref_pos >= 0);
|
assert(ref_pos >= 0);
|
||||||
address[count++] = coords[ref_pos];
|
z = coords[ref_pos];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
|
||||||
|
* so the depth comparison value isn't clamped for Z16 and
|
||||||
|
* Z24 anymore. Do it manually here.
|
||||||
|
*
|
||||||
|
* It's unnecessary if the original texture format was
|
||||||
|
* Z32_FLOAT, but we don't know that here.
|
||||||
|
*/
|
||||||
|
if (ctx->screen->b.chip_class == VI)
|
||||||
|
z = radeon_llvm_saturate(bld_base, z);
|
||||||
|
|
||||||
|
address[count++] = z;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Pack user derivatives */
|
/* Pack user derivatives */
|
||||||
|
|
|
@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx)
|
||||||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
|
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* Use the user format, not db_render_format, so that the polygon
|
||||||
|
* offset behaves as expected by applications.
|
||||||
|
*/
|
||||||
switch (sctx->framebuffer.state.zsbuf->texture->format) {
|
switch (sctx->framebuffer.state.zsbuf->texture->format) {
|
||||||
case PIPE_FORMAT_Z16_UNORM:
|
case PIPE_FORMAT_Z16_UNORM:
|
||||||
si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
|
si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
|
||||||
|
@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx,
|
||||||
uint64_t z_offs, s_offs;
|
uint64_t z_offs, s_offs;
|
||||||
uint32_t db_htile_data_base, db_htile_surface;
|
uint32_t db_htile_data_base, db_htile_surface;
|
||||||
|
|
||||||
format = si_translate_dbformat(rtex->resource.b.b.format);
|
format = si_translate_dbformat(rtex->db_render_format);
|
||||||
|
|
||||||
if (format == V_028040_Z_INVALID) {
|
if (format == V_028040_Z_INVALID) {
|
||||||
R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
|
R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
|
||||||
|
@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx,
|
||||||
z_offs += rtex->surface.level[level].offset;
|
z_offs += rtex->surface.level[level].offset;
|
||||||
s_offs += rtex->surface.stencil_level[level].offset;
|
s_offs += rtex->surface.stencil_level[level].offset;
|
||||||
|
|
||||||
db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
|
db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
|
||||||
|
|
||||||
z_info = S_028040_FORMAT(format);
|
z_info = S_028040_FORMAT(format);
|
||||||
if (rtex->resource.b.b.nr_samples > 1) {
|
if (rtex->resource.b.b.nr_samples > 1) {
|
||||||
|
@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx,
|
||||||
*/
|
*/
|
||||||
if (rtex->resource.b.b.nr_samples <= 1)
|
if (rtex->resource.b.b.nr_samples <= 1)
|
||||||
s_info |= S_028044_ALLOW_EXPCLEAR(1);
|
s_info |= S_028044_ALLOW_EXPCLEAR(1);
|
||||||
} else
|
} else if (!rtex->tc_compatible_htile) {
|
||||||
/* Use all of the htile_buffer for depth if there's no stencil. */
|
/* Use all of the htile_buffer for depth if there's no stencil.
|
||||||
|
* This must not be set when TC-compatible HTILE is enabled
|
||||||
|
* due to a hw bug.
|
||||||
|
*/
|
||||||
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
|
s_info |= S_028044_TILE_STENCIL_DISABLE(1);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t va = rtex->htile_buffer->gpu_address;
|
uint64_t va = rtex->htile_buffer->gpu_address;
|
||||||
db_htile_data_base = va >> 8;
|
db_htile_data_base = va >> 8;
|
||||||
db_htile_surface = S_028ABC_FULL_CACHE(1);
|
db_htile_surface = S_028ABC_FULL_CACHE(1);
|
||||||
|
|
||||||
|
if (rtex->tc_compatible_htile) {
|
||||||
|
db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
|
||||||
|
|
||||||
|
switch (rtex->resource.b.b.nr_samples) {
|
||||||
|
case 0:
|
||||||
|
case 1:
|
||||||
|
z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
case 4:
|
||||||
|
z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
db_htile_data_base = 0;
|
db_htile_data_base = 0;
|
||||||
db_htile_surface = 0;
|
db_htile_surface = 0;
|
||||||
|
@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||||
|
|
||||||
if (state->zsbuf) {
|
if (state->zsbuf) {
|
||||||
surf = (struct r600_surface*)state->zsbuf;
|
surf = (struct r600_surface*)state->zsbuf;
|
||||||
|
rtex = (struct r600_texture*)surf->base.texture;
|
||||||
|
|
||||||
if (!surf->depth_initialized) {
|
if (!surf->depth_initialized) {
|
||||||
si_init_depth_surface(sctx, surf);
|
si_init_depth_surface(sctx, surf);
|
||||||
|
@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
||||||
surflevel = tmp->surface.level;
|
surflevel = tmp->surface.level;
|
||||||
|
|
||||||
if (tmp->db_compatible) {
|
if (tmp->db_compatible) {
|
||||||
|
if (!view->is_stencil_sampler)
|
||||||
|
pipe_format = tmp->db_render_format;
|
||||||
|
|
||||||
switch (pipe_format) {
|
switch (pipe_format) {
|
||||||
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
|
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
|
||||||
pipe_format = PIPE_FORMAT_Z32_FLOAT;
|
pipe_format = PIPE_FORMAT_Z32_FLOAT;
|
||||||
|
|
|
@ -1118,7 +1118,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||||
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
|
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
|
||||||
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
|
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
|
||||||
|
|
||||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
if (!rtex->tc_compatible_htile)
|
||||||
|
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||||
|
|
||||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||||
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
|
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
|
||||||
|
|
|
@ -137,6 +137,7 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
|
||||||
createFlags.value = 0;
|
createFlags.value = 0;
|
||||||
createFlags.useTileIndex = 1;
|
createFlags.useTileIndex = 1;
|
||||||
createFlags.degradeBaseLevel = 1;
|
createFlags.degradeBaseLevel = 1;
|
||||||
|
createFlags.useHtileSliceAlign = 1;
|
||||||
|
|
||||||
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
|
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
|
||||||
addrCreateInput.chipFamily = ws->family;
|
addrCreateInput.chipFamily = ws->family;
|
||||||
|
@ -160,7 +161,9 @@ static int compute_level(struct amdgpu_winsys *ws,
|
||||||
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
|
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
|
||||||
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
|
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
|
||||||
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
|
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
|
||||||
ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
|
ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
|
||||||
|
ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
|
||||||
|
ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
|
||||||
{
|
{
|
||||||
struct radeon_surf_level *surf_level;
|
struct radeon_surf_level *surf_level;
|
||||||
ADDR_E_RETURNCODE ret;
|
ADDR_E_RETURNCODE ret;
|
||||||
|
@ -257,6 +260,32 @@ static int compute_level(struct amdgpu_winsys *ws,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* TC-compatible HTILE. */
|
||||||
|
if (!is_stencil &&
|
||||||
|
AddrSurfInfoIn->flags.depth &&
|
||||||
|
AddrSurfInfoIn->flags.tcCompatible &&
|
||||||
|
surf_level->mode == RADEON_SURF_MODE_2D &&
|
||||||
|
level == 0) {
|
||||||
|
AddrHtileIn->flags.tcCompatible = 1;
|
||||||
|
AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
|
||||||
|
AddrHtileIn->height = AddrSurfInfoOut->height;
|
||||||
|
AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
|
||||||
|
AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
|
||||||
|
AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
|
||||||
|
AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
|
||||||
|
AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
|
||||||
|
AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
|
||||||
|
|
||||||
|
ret = AddrComputeHtileInfo(ws->addrlib,
|
||||||
|
AddrHtileIn,
|
||||||
|
AddrHtileOut);
|
||||||
|
|
||||||
|
if (ret == ADDR_OK) {
|
||||||
|
surf->htile_size = AddrHtileOut->htileBytes;
|
||||||
|
surf->htile_alignment = AddrHtileOut->baseAlign;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -284,6 +313,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
||||||
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
|
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
|
||||||
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
|
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
|
||||||
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
|
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
|
||||||
|
ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
|
||||||
|
ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
|
||||||
ADDR_TILEINFO AddrTileInfoIn = {0};
|
ADDR_TILEINFO AddrTileInfoIn = {0};
|
||||||
ADDR_TILEINFO AddrTileInfoOut = {0};
|
ADDR_TILEINFO AddrTileInfoOut = {0};
|
||||||
int r;
|
int r;
|
||||||
|
@ -296,6 +327,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
||||||
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
|
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
|
||||||
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
|
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
|
||||||
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
|
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
|
||||||
|
AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
|
||||||
|
AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
|
||||||
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
|
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
|
||||||
|
|
||||||
type = RADEON_SURF_GET(surf->flags, TYPE);
|
type = RADEON_SURF_GET(surf->flags, TYPE);
|
||||||
|
@ -361,7 +394,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
||||||
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
|
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
|
||||||
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
|
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
|
||||||
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
|
AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
|
||||||
AddrSurfInfoIn.flags.degrade4Space = 1;
|
AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
|
||||||
|
|
||||||
|
/* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
|
||||||
|
* requested, because TC-compatible HTILE requires 2D tiling.
|
||||||
|
*/
|
||||||
|
AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible;
|
||||||
|
|
||||||
/* DCC notes:
|
/* DCC notes:
|
||||||
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
|
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
|
||||||
|
@ -443,11 +481,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
||||||
surf->bo_size = 0;
|
surf->bo_size = 0;
|
||||||
surf->dcc_size = 0;
|
surf->dcc_size = 0;
|
||||||
surf->dcc_alignment = 1;
|
surf->dcc_alignment = 1;
|
||||||
|
surf->htile_size = 0;
|
||||||
|
surf->htile_alignment = 1;
|
||||||
|
|
||||||
/* Calculate texture layout information. */
|
/* Calculate texture layout information. */
|
||||||
for (level = 0; level <= surf->last_level; level++) {
|
for (level = 0; level <= surf->last_level; level++) {
|
||||||
r = compute_level(ws, surf, false, level, type, compressed,
|
r = compute_level(ws, surf, false, level, type, compressed,
|
||||||
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
|
&AddrSurfInfoIn, &AddrSurfInfoOut,
|
||||||
|
&AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
@ -475,12 +516,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
||||||
AddrSurfInfoIn.bpp = 8;
|
AddrSurfInfoIn.bpp = 8;
|
||||||
AddrSurfInfoIn.flags.depth = 0;
|
AddrSurfInfoIn.flags.depth = 0;
|
||||||
AddrSurfInfoIn.flags.stencil = 1;
|
AddrSurfInfoIn.flags.stencil = 1;
|
||||||
|
AddrSurfInfoIn.flags.tcCompatible = 0;
|
||||||
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
|
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
|
||||||
AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
|
AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
|
||||||
|
|
||||||
for (level = 0; level <= surf->last_level; level++) {
|
for (level = 0; level <= surf->last_level; level++) {
|
||||||
r = compute_level(ws, surf, true, level, type, compressed,
|
r = compute_level(ws, surf, true, level, type, compressed,
|
||||||
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
|
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut,
|
||||||
|
NULL, NULL);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
@ -508,6 +551,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
|
||||||
ws->info.num_tile_pipes);
|
ws->info.num_tile_pipes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Make sure HTILE covers the whole miptree, because the shader reads
|
||||||
|
* TC-compatible HTILE even for levels where it's disabled by DB.
|
||||||
|
*/
|
||||||
|
if (surf->htile_size && surf->last_level)
|
||||||
|
surf->htile_size *= 2;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue