radeonsi: only flush the right set of caches for CP DMA operations

That's either framebuffer caches or caches for shader resources.
The motivation is that framebuffer caches need to be flushed very rarely
here.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
Marek Olšák 2014-12-29 14:45:49 +01:00
parent 73c2b0d18c
commit edf18da85d
9 changed files with 48 additions and 34 deletions

View File

@ -565,7 +565,8 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
}
static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value)
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer)
{
struct r600_context *rctx = (struct r600_context*)ctx;

View File

@ -912,12 +912,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
}
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value)
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
pipe_mutex_lock(rscreen->aux_context_lock);
rctx->clear_buffer(&rctx->b, dst, offset, size, value);
rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
pipe_mutex_unlock(rscreen->aux_context_lock);
}

View File

@ -388,7 +388,8 @@ struct r600_common_context {
const struct pipe_box *src_box);
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value);
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer);
void (*blit_decompress_depth)(struct pipe_context *ctx,
struct r600_texture *texture,
@ -441,7 +442,8 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
const struct tgsi_token *tokens);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value);
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
const char *r600_get_llvm_processor_name(enum radeon_family family);

View File

@ -559,7 +559,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0);
r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
htile_size, 0, true);
}
}
@ -638,7 +639,8 @@ r600_texture_create_object(struct pipe_screen *screen,
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC);
rtex->cmask.offset, rtex->cmask.size,
0xCCCCCCCC, true);
}
/* Initialize the CMASK base register value. */
@ -1273,7 +1275,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
/* Do the fast clear. */
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
tex->cmask.offset, tex->cmask.size, 0);
tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
fb_state->dirty = true;

View File

@ -122,7 +122,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, 0);
rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
0, false);
context->flush(context, NULL, 0);
}

View File

@ -556,7 +556,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
/* Fallback for buffers. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
return;
}

View File

@ -1054,9 +1054,11 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
unsigned offset, unsigned size, unsigned value)
unsigned offset, unsigned size, unsigned value,
bool is_framebuffer)
{
struct si_context *sctx = (struct si_context*)ctx;
unsigned flush_flags;
if (!size)
return;
@ -1081,12 +1083,15 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
uint64_t va = r600_resource(dst)->gpu_address + offset;
/* Flush the caches where the resource is bound. */
/* XXX only flush the caches where the buffer is bound. */
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
if (is_framebuffer)
flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
else
flush_flags = SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE;
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
flush_flags;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@ -1120,17 +1125,16 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
/* XXX only flush the caches where the buffer is bound. */
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
sctx->b.flags |= flush_flags;
}
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
uint64_t dst_offset, uint64_t src_offset, unsigned size)
uint64_t dst_offset, uint64_t src_offset, unsigned size,
bool is_framebuffer)
{
unsigned flush_flags;
if (!size)
return;
@ -1144,11 +1148,15 @@ void si_copy_buffer(struct si_context *sctx,
src_offset += r600_resource(src)->gpu_address;
/* Flush the caches where the resource is bound. */
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
SI_CONTEXT_PS_PARTIAL_FLUSH;
if (is_framebuffer)
flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
else
flush_flags = SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE;
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
flush_flags;
while (size) {
unsigned sync_flags = 0;
@ -1180,10 +1188,9 @@ void si_copy_buffer(struct si_context *sctx,
dst_offset += byte_count;
}
sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
SI_CONTEXT_INV_TC_L2 |
SI_CONTEXT_INV_KCACHE |
SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
sctx->b.flags |= flush_flags;
}
/* INIT/DEINIT */

View File

@ -155,7 +155,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
/* Clear the NULL constant buffer, because loads should return zeros. */
sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
sctx->null_const_buf.buffer->width0, 0);
sctx->null_const_buf.buffer->width0, 0, false);
}
return &sctx->b.b;

View File

@ -238,7 +238,7 @@ void si_release_all_descriptors(struct si_context *sctx);
void si_all_descriptors_begin_new_cs(struct si_context *sctx);
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
uint64_t dst_offset, uint64_t src_offset, unsigned size);
uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
const uint8_t *ptr, unsigned size, uint32_t *const_offset);