radeonsi/gfx9: keep reusing the same buffer/address for the gfx9 flush fence

instead of using a monotonic suballocator

v2: initialize the memory at context creation

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
Marek Olšák 2017-06-07 00:16:46 +02:00
parent c66fc618cc
commit 79bd1d4f8b
3 changed files with 28 additions and 8 deletions

View File

@ -64,6 +64,7 @@ static void si_destroy_context(struct pipe_context *context)
free(sctx->border_color_table);
r600_resource_reference(&sctx->scratch_buffer, NULL);
r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
r600_resource_reference(&sctx->wait_mem_scratch, NULL);
si_pm4_free_state(sctx, sctx->init_config, ~0);
if (sctx->init_config_gs_rings)
@ -269,6 +270,23 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
/* these must be last */
si_begin_new_cs(sctx);
if (sctx->b.chip_class >= GFX9) {
sctx->wait_mem_scratch = (struct r600_resource*)
pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
if (!sctx->wait_mem_scratch)
goto fail;
/* Initialize the memory. */
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
radeon_emit(cs, sctx->wait_mem_number);
}
/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
* if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
if (sctx->b.chip_class == CIK) {

View File

@ -263,6 +263,8 @@ struct si_context {
struct si_screen *screen;
LLVMTargetMachineRef tm; /* only non-threaded compilation */
struct si_shader_ctx_state fixed_func_tcs_shader;
struct r600_resource *wait_mem_scratch;
unsigned wait_mem_number;
struct radeon_winsys_cs *ce_ib;
struct radeon_winsys_cs *ce_preamble_ib;

View File

@ -954,9 +954,8 @@ void si_emit_cache_flush(struct si_context *sctx)
* wait for idle on GFX9. We have to use a TS event.
*/
if (sctx->b.chip_class >= GFX9 && flush_cb_db) {
struct r600_resource *rbuf = NULL;
uint64_t va;
unsigned offset = 0, tc_flags, cb_db_event;
unsigned tc_flags, cb_db_event;
/* Set the CB/DB flush event. */
switch (flush_cb_db) {
@ -997,14 +996,15 @@ void si_emit_cache_flush(struct si_context *sctx)
sctx->b.num_L2_invalidates++;
}
/* Allocate memory for the fence. */
u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
&offset, (struct pipe_resource**)&rbuf);
va = rbuf->gpu_address + offset;
/* Do the flush (enqueue the event and wait for it). */
va = sctx->wait_mem_scratch->gpu_address;
sctx->wait_mem_number++;
r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1,
rbuf, va, 0, 1);
r600_gfx_wait_fence(rctx, va, 1, 0xffffffff);
sctx->wait_mem_scratch, va,
sctx->wait_mem_number - 1,
sctx->wait_mem_number);
r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
}
/* Make sure ME is idle (it executes most packets) before continuing.