radeonsi: implement MSAA colorbuffer compression for rendering
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
parent
2f1c449415
commit
2a4b2e2305
|
@ -184,10 +184,13 @@ static void r600_flush_framebuffer(struct r600_context *ctx)
|
|||
S_0085F0_CB7_DEST_BASE_ENA(1) |
|
||||
S_0085F0_DB_ACTION_ENA(1) |
|
||||
S_0085F0_DB_DEST_BASE_ENA(1));
|
||||
si_cmd_flush_and_inv_cb_meta(pm4);
|
||||
|
||||
si_pm4_emit(ctx, pm4);
|
||||
si_pm4_free_state(ctx, pm4, ~0);
|
||||
|
||||
ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
|
||||
ctx->flush_and_inv_cb_meta = false;
|
||||
}
|
||||
|
||||
void si_context_flush(struct r600_context *ctx, unsigned flags)
|
||||
|
|
|
@ -40,6 +40,22 @@ struct r600_transfer {
|
|||
struct pipe_resource *staging;
|
||||
};
|
||||
|
||||
struct r600_fmask_info {
|
||||
unsigned offset;
|
||||
unsigned size;
|
||||
unsigned alignment;
|
||||
unsigned bank_height;
|
||||
unsigned slice_tile_max;
|
||||
unsigned tile_mode_index;
|
||||
};
|
||||
|
||||
struct r600_cmask_info {
|
||||
unsigned offset;
|
||||
unsigned size;
|
||||
unsigned alignment;
|
||||
unsigned slice_tile_max;
|
||||
};
|
||||
|
||||
struct r600_texture {
|
||||
struct si_resource resource;
|
||||
|
||||
|
@ -48,12 +64,17 @@ struct r600_texture {
|
|||
* for the stencil buffer below. */
|
||||
enum pipe_format real_format;
|
||||
|
||||
unsigned size;
|
||||
unsigned pitch_override;
|
||||
unsigned is_depth;
|
||||
unsigned dirty_level_mask; /* each bit says if that miplevel is dirty */
|
||||
struct r600_texture *flushed_depth_texture;
|
||||
boolean is_flushing_texture;
|
||||
struct radeon_surface surface;
|
||||
|
||||
/* Colorbuffer compression and fast clear. */
|
||||
struct r600_fmask_info fmask;
|
||||
struct r600_cmask_info cmask;
|
||||
};
|
||||
|
||||
struct r600_surface {
|
||||
|
|
|
@ -173,6 +173,9 @@ static int r600_setup_surface(struct pipe_screen *screen,
|
|||
if (r) {
|
||||
return r;
|
||||
}
|
||||
|
||||
rtex->size = rtex->surface.bo_size;
|
||||
|
||||
if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) {
|
||||
/* old ddx on evergreen over estimate alignment for 1d, only 1 level
|
||||
* for those
|
||||
|
@ -419,6 +422,116 @@ static const struct u_resource_vtbl r600_texture_vtbl =
|
|||
|
||||
DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "RADEON_PRINT_TEXDEPTH", FALSE);
|
||||
|
||||
/* The number of samples can be specified independently of the texture. */
|
||||
static void r600_texture_get_fmask_info(struct r600_screen *rscreen,
|
||||
struct r600_texture *rtex,
|
||||
unsigned nr_samples,
|
||||
struct r600_fmask_info *out)
|
||||
{
|
||||
/* FMASK is allocated like an ordinary texture. */
|
||||
struct radeon_surface fmask = rtex->surface;
|
||||
|
||||
memset(out, 0, sizeof(*out));
|
||||
|
||||
fmask.bo_alignment = 0;
|
||||
fmask.bo_size = 0;
|
||||
fmask.nsamples = 1;
|
||||
fmask.flags |= RADEON_SURF_FMASK | RADEON_SURF_HAS_TILE_MODE_INDEX;
|
||||
|
||||
switch (nr_samples) {
|
||||
case 2:
|
||||
case 4:
|
||||
fmask.bpe = 1;
|
||||
break;
|
||||
case 8:
|
||||
fmask.bpe = 4;
|
||||
break;
|
||||
default:
|
||||
R600_ERR("Invalid sample count for FMASK allocation.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
|
||||
R600_ERR("Got error in surface_init while allocating FMASK.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
|
||||
|
||||
out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
|
||||
if (out->slice_tile_max)
|
||||
out->slice_tile_max -= 1;
|
||||
|
||||
out->tile_mode_index = fmask.tiling_index[0];
|
||||
out->bank_height = fmask.bankh;
|
||||
out->alignment = MAX2(256, fmask.bo_alignment);
|
||||
out->size = fmask.bo_size;
|
||||
}
|
||||
|
||||
static void r600_texture_allocate_fmask(struct r600_screen *rscreen,
|
||||
struct r600_texture *rtex)
|
||||
{
|
||||
r600_texture_get_fmask_info(rscreen, rtex,
|
||||
rtex->resource.b.b.nr_samples, &rtex->fmask);
|
||||
|
||||
rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment);
|
||||
rtex->size = rtex->fmask.offset + rtex->fmask.size;
|
||||
}
|
||||
|
||||
static void si_texture_get_cmask_info(struct r600_screen *rscreen,
|
||||
struct r600_texture *rtex,
|
||||
struct r600_cmask_info *out)
|
||||
{
|
||||
unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
|
||||
unsigned num_pipes = rscreen->tiling_info.num_channels;
|
||||
unsigned cl_width, cl_height;
|
||||
|
||||
switch (num_pipes) {
|
||||
case 2:
|
||||
cl_width = 32;
|
||||
cl_height = 16;
|
||||
break;
|
||||
case 4:
|
||||
cl_width = 32;
|
||||
cl_height = 32;
|
||||
break;
|
||||
case 8:
|
||||
cl_width = 64;
|
||||
cl_height = 32;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned base_align = num_pipes * pipe_interleave_bytes;
|
||||
|
||||
unsigned width = align(rtex->surface.npix_x, cl_width*8);
|
||||
unsigned height = align(rtex->surface.npix_y, cl_height*8);
|
||||
unsigned slice_elements = (width * height) / (8*8);
|
||||
|
||||
/* Each element of CMASK is a nibble. */
|
||||
unsigned slice_bytes = slice_elements / 2;
|
||||
|
||||
out->slice_tile_max = (width * height) / (128*128);
|
||||
if (out->slice_tile_max)
|
||||
out->slice_tile_max -= 1;
|
||||
|
||||
out->alignment = MAX2(256, base_align);
|
||||
out->size = rtex->surface.array_size * align(slice_bytes, base_align);
|
||||
}
|
||||
|
||||
static void r600_texture_allocate_cmask(struct r600_screen *rscreen,
|
||||
struct r600_texture *rtex)
|
||||
{
|
||||
si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
|
||||
|
||||
if (rtex->cmask.size) {
|
||||
rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment);
|
||||
rtex->size = rtex->cmask.offset + rtex->cmask.size;
|
||||
}
|
||||
}
|
||||
|
||||
static struct r600_texture *
|
||||
r600_texture_create_object(struct pipe_screen *screen,
|
||||
const struct pipe_resource *base,
|
||||
|
@ -456,13 +569,23 @@ r600_texture_create_object(struct pipe_screen *screen,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (base->nr_samples > 1 && !rtex->is_depth && !buf) {
|
||||
r600_texture_allocate_fmask(rscreen, rtex);
|
||||
r600_texture_allocate_cmask(rscreen, rtex);
|
||||
}
|
||||
|
||||
if (!rtex->is_depth && base->nr_samples > 1 &&
|
||||
(!rtex->fmask.size || !rtex->cmask.size)) {
|
||||
FREE(rtex);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Now create the backing buffer. */
|
||||
if (!buf && alloc_bo) {
|
||||
unsigned base_align = rtex->surface.bo_alignment;
|
||||
unsigned size = rtex->surface.bo_size;
|
||||
|
||||
base_align = rtex->surface.bo_alignment;
|
||||
if (!si_init_resource(rscreen, resource, size, base_align, FALSE, base->usage)) {
|
||||
if (!si_init_resource(rscreen, resource, rtex->size, base_align, FALSE, base->usage)) {
|
||||
FREE(rtex);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -472,6 +595,12 @@ r600_texture_create_object(struct pipe_screen *screen,
|
|||
resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
|
||||
}
|
||||
|
||||
if (rtex->cmask.size) {
|
||||
/* Initialize the cmask to 0xCC (= compressed state). */
|
||||
char *map = rscreen->ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
|
||||
memset(map + rtex->cmask.offset, 0xCC, rtex->cmask.size);
|
||||
}
|
||||
|
||||
if (debug_get_option_print_texdepth() && rtex->is_depth) {
|
||||
printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
|
||||
"blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
|
||||
|
|
|
@ -217,6 +217,8 @@ struct r600_context {
|
|||
/* SI state handling */
|
||||
union si_state queued;
|
||||
union si_state emitted;
|
||||
|
||||
bool flush_and_inv_cb_meta;
|
||||
};
|
||||
|
||||
/* r600_blit.c */
|
||||
|
|
|
@ -78,3 +78,12 @@ void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl)
|
|||
si_pm4_cmd_end(pm4, false);
|
||||
}
|
||||
}
|
||||
|
||||
void si_cmd_flush_and_inv_cb_meta(struct si_pm4_state *pm4)
|
||||
{
|
||||
si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
|
||||
si_pm4_cmd_add(pm4,
|
||||
EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) |
|
||||
EVENT_INDEX(0));
|
||||
si_pm4_cmd_end(pm4, false);
|
||||
}
|
||||
|
|
|
@ -1852,8 +1852,22 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
|
|||
|
||||
if (rtex->resource.b.b.nr_samples > 1) {
|
||||
unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
|
||||
|
||||
color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
|
||||
S_028C74_NUM_FRAGMENTS(log_samples);
|
||||
|
||||
if (rtex->fmask.size) {
|
||||
color_info |= S_028C70_COMPRESSION(1);
|
||||
unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
|
||||
|
||||
/* due to a bug in the hw, FMASK_BANK_HEIGHT must be set on SI too */
|
||||
color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(rtex->fmask.tile_mode_index) |
|
||||
S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
|
||||
}
|
||||
}
|
||||
|
||||
if (rtex->cmask.size) {
|
||||
color_info |= S_028C70_FAST_CLEAR(1);
|
||||
}
|
||||
|
||||
offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
|
||||
|
@ -1875,6 +1889,19 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
|
|||
si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info);
|
||||
si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib);
|
||||
|
||||
if (rtex->cmask.size) {
|
||||
si_pm4_set_reg(pm4, R_028C7C_CB_COLOR0_CMASK + cb * 0x3C,
|
||||
offset + (rtex->cmask.offset >> 8));
|
||||
si_pm4_set_reg(pm4, R_028C80_CB_COLOR0_CMASK_SLICE + cb * 0x3C,
|
||||
S_028C80_TILE_MAX(rtex->cmask.slice_tile_max));
|
||||
}
|
||||
if (rtex->fmask.size) {
|
||||
si_pm4_set_reg(pm4, R_028C84_CB_COLOR0_FMASK + cb * 0x3C,
|
||||
offset + (rtex->fmask.offset >> 8));
|
||||
si_pm4_set_reg(pm4, R_028C88_CB_COLOR0_FMASK_SLICE + cb * 0x3C,
|
||||
S_028C88_TILE_MAX(rtex->fmask.slice_tile_max));
|
||||
}
|
||||
|
||||
/* set CB_COLOR1_INFO for possible dual-src blending */
|
||||
if (state->nr_cbufs == 1) {
|
||||
assert(cb == 0);
|
||||
|
@ -2210,6 +2237,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
|||
return;
|
||||
|
||||
si_pm4_inval_fb_cache(pm4, state->nr_cbufs);
|
||||
rctx->flush_and_inv_cb_meta = true;
|
||||
|
||||
if (state->zsbuf)
|
||||
si_pm4_inval_zsbuf_cache(pm4);
|
||||
|
|
|
@ -83,6 +83,7 @@ struct si_vertex_element
|
|||
union si_state {
|
||||
struct {
|
||||
struct si_pm4_state *sync;
|
||||
struct si_pm4_state *flush_and_inv_cb_meta;
|
||||
struct si_pm4_state *init;
|
||||
struct si_state_blend *blend;
|
||||
struct si_pm4_state *blend_color;
|
||||
|
@ -229,5 +230,6 @@ void si_cmd_draw_index_2(struct si_pm4_state *pm4, uint32_t max_size,
|
|||
void si_cmd_draw_index_auto(struct si_pm4_state *pm4, uint32_t count,
|
||||
uint32_t initiator, bool predicate);
|
||||
void si_cmd_surface_sync(struct si_pm4_state *pm4, uint32_t cp_coher_cntl);
|
||||
void si_cmd_flush_and_inv_cb_meta(struct si_pm4_state *pm4);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -706,6 +706,17 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
|||
si_pm4_set_state(rctx, sync, pm4);
|
||||
}
|
||||
|
||||
if (rctx->flush_and_inv_cb_meta) {
|
||||
struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
|
||||
|
||||
if (pm4 == NULL)
|
||||
return;
|
||||
|
||||
si_cmd_flush_and_inv_cb_meta(pm4);
|
||||
si_pm4_set_state(rctx, flush_and_inv_cb_meta, pm4);
|
||||
rctx->flush_and_inv_cb_meta = false;
|
||||
}
|
||||
|
||||
/* Emit states. */
|
||||
rctx->pm4_dirty_cdwords += si_pm4_dirty_dw(rctx);
|
||||
|
||||
|
|
|
@ -8521,6 +8521,7 @@
|
|||
#define S_028C74_FMASK_TILE_MODE_INDEX(x) (((x) & 0x1F) << 5)
|
||||
#define G_028C74_FMASK_TILE_MODE_INDEX(x) (((x) >> 5) & 0x1F)
|
||||
#define C_028C74_FMASK_TILE_MODE_INDEX 0xFFFFFC1F
|
||||
#define S_028C74_FMASK_BANK_HEIGHT(x) (((x) & 0x3) << 10) /* SI errata */
|
||||
#define S_028C74_NUM_SAMPLES(x) (((x) & 0x07) << 12)
|
||||
#define G_028C74_NUM_SAMPLES(x) (((x) >> 12) & 0x07)
|
||||
#define C_028C74_NUM_SAMPLES 0xFFFF8FFF
|
||||
|
|
Loading…
Reference in New Issue