freedreno: Move the batch cache to the context.

Our draw call rate was significantly limited by the atomics we had to do
to manage access to the batch cache.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11511>
This commit is contained in:
Emma Anholt 2021-06-15 13:44:27 -07:00 committed by Marge Bot
parent c3d6493122
commit b2349a4671
16 changed files with 322 additions and 555 deletions

View File

@ -446,13 +446,9 @@ fd5_blitter_blit(struct fd_context *ctx,
batch = fd_bc_alloc_batch(ctx, true); batch = fd_bc_alloc_batch(ctx, true);
fd_screen_lock(ctx->screen);
fd_batch_resource_read(batch, src); fd_batch_resource_read(batch, src);
fd_batch_resource_write(batch, dst); fd_batch_resource_write(batch, dst);
fd_screen_unlock(ctx->screen);
DBG_BLIT(info, batch); DBG_BLIT(info, batch);
fd_batch_update_queries(batch); fd_batch_update_queries(batch);

View File

@ -916,13 +916,9 @@ handle_rgba_blit(struct fd_context *ctx,
batch = fd_bc_alloc_batch(ctx, true); batch = fd_bc_alloc_batch(ctx, true);
fd_screen_lock(ctx->screen);
fd_batch_resource_read(batch, src); fd_batch_resource_read(batch, src);
fd_batch_resource_write(batch, dst); fd_batch_resource_write(batch, dst);
fd_screen_unlock(ctx->screen);
ASSERTED bool ret = fd_batch_lock_submit(batch); ASSERTED bool ret = fd_batch_lock_submit(batch);
assert(ret); assert(ret);

View File

@ -30,6 +30,7 @@
#include "util/u_string.h" #include "util/u_string.h"
#include "freedreno_batch.h" #include "freedreno_batch.h"
#include "freedreno_batch_cache.h"
#include "freedreno_context.h" #include "freedreno_context.h"
#include "freedreno_fence.h" #include "freedreno_fence.h"
#include "freedreno_query_hw.h" #include "freedreno_query_hw.h"
@ -138,6 +139,8 @@ fd_batch_create(struct fd_context *ctx, bool nondraw)
batch->resources = batch->resources =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
batch->dependents =
_mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
batch_init(batch); batch_init(batch);
@ -186,6 +189,8 @@ cleanup_submit(struct fd_batch *batch)
fd_submit_del(batch->submit); fd_submit_del(batch->submit);
batch->submit = NULL; batch->submit = NULL;
util_copy_framebuffer_state(&batch->framebuffer, NULL);
} }
static void static void
@ -227,46 +232,58 @@ batch_fini(struct fd_batch *batch)
u_trace_fini(&batch->trace); u_trace_fini(&batch->trace);
} }
/* Flushes any batches that this batch depends on, recursively. */
static void static void
batch_flush_dependencies(struct fd_batch *batch) assert_dt batch_flush_dependencies(struct fd_batch *batch) assert_dt
{ {
struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; set_foreach (batch->dependents, entry) {
struct fd_batch *dep; struct fd_batch *dep = (void *)entry->key;
foreach_batch (dep, cache, batch->dependents_mask) {
fd_batch_flush(dep); fd_batch_flush(dep);
fd_batch_reference(&dep, NULL); fd_batch_reference(&dep, NULL);
} }
_mesa_set_clear(batch->dependents, NULL);
batch->dependents_mask = 0;
} }
static void static void
batch_reset_dependencies(struct fd_batch *batch) batch_reset_dependencies(struct fd_batch *batch)
{ {
struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; set_foreach (batch->dependents, entry) {
struct fd_batch *dep; struct fd_batch *dep = (void *)entry->key;
foreach_batch (dep, cache, batch->dependents_mask) {
fd_batch_reference(&dep, NULL); fd_batch_reference(&dep, NULL);
} }
_mesa_set_clear(batch->dependents, NULL);
batch->dependents_mask = 0;
} }
static void static void
batch_reset_resources(struct fd_batch *batch) batch_reset_resources(struct fd_batch *batch)
{ {
fd_screen_assert_locked(batch->ctx->screen); struct fd_batch_cache *cache = &batch->ctx->batch_cache;
set_foreach (batch->resources, entry) { set_foreach (batch->resources, entry) {
struct fd_resource *rsc = (struct fd_resource *)entry->key; struct fd_resource *rsc = (struct fd_resource *)entry->key;
_mesa_set_remove(batch->resources, entry); struct hash_entry *entry =
debug_assert(rsc->track->batch_mask & (1 << batch->idx)); _mesa_hash_table_search_pre_hashed(cache->written_resources, rsc->hash, rsc);
rsc->track->batch_mask &= ~(1 << batch->idx); if (entry) {
if (rsc->track->write_batch == batch) struct fd_batch *write_batch = entry->data;
fd_batch_reference_locked(&rsc->track->write_batch, NULL); assert(write_batch == batch);
struct pipe_resource *table_ref = &rsc->b.b;
pipe_resource_reference(&table_ref, NULL);
fd_batch_reference(&write_batch, NULL);
_mesa_hash_table_remove(cache->written_resources, entry);
}
ASSERTED int32_t count = p_atomic_dec_return(&rsc->batch_references);
assert(count >= 0);
struct pipe_resource *table_ref = &rsc->b.b;
pipe_resource_reference(&table_ref, NULL);
} }
/* Clear at the end so if the batch is reused we get a fully empty set
* rather than having any deleted keys.
*/
_mesa_set_clear(batch->resources, NULL);
} }
static void static void
@ -275,11 +292,7 @@ batch_reset(struct fd_batch *batch) assert_dt
DBG("%p", batch); DBG("%p", batch);
batch_reset_dependencies(batch); batch_reset_dependencies(batch);
fd_screen_lock(batch->ctx->screen);
batch_reset_resources(batch); batch_reset_resources(batch);
fd_screen_unlock(batch->ctx->screen);
batch_fini(batch); batch_fini(batch);
batch_init(batch); batch_init(batch);
} }
@ -294,30 +307,23 @@ fd_batch_reset(struct fd_batch *batch)
void void
__fd_batch_destroy(struct fd_batch *batch) __fd_batch_destroy(struct fd_batch *batch)
{ {
struct fd_context *ctx = batch->ctx;
DBG("%p", batch); DBG("%p", batch);
fd_screen_assert_locked(batch->ctx->screen); fd_bc_free_key(batch);
fd_bc_invalidate_batch(batch, true);
batch_reset_resources(batch); batch_reset_resources(batch);
debug_assert(batch->resources->entries == 0); assert(batch->resources->entries == 0);
_mesa_set_destroy(batch->resources, NULL); _mesa_set_destroy(batch->resources, NULL);
fd_screen_unlock(ctx->screen);
batch_reset_dependencies(batch); batch_reset_dependencies(batch);
debug_assert(batch->dependents_mask == 0); assert(batch->dependents->entries == 0);
_mesa_set_destroy(batch->dependents, NULL);
util_copy_framebuffer_state(&batch->framebuffer, NULL);
batch_fini(batch); batch_fini(batch);
simple_mtx_destroy(&batch->submit_lock); simple_mtx_destroy(&batch->submit_lock);
free(batch->key);
free(batch); free(batch);
fd_screen_lock(ctx->screen);
} }
void void
@ -353,20 +359,12 @@ batch_flush(struct fd_batch *batch) assert_dt
batch_flush_dependencies(batch); batch_flush_dependencies(batch);
fd_screen_lock(batch->ctx->screen);
batch_reset_resources(batch); batch_reset_resources(batch);
/* NOTE: remove=false removes the batch from the hashtable, so future fd_bc_free_key(batch);
* lookups won't cache-hit a flushed batch, but leaves the weak reference
* to the batch to avoid having multiple batches with same batch->idx, as
* that causes all sorts of hilarity.
*/
fd_bc_invalidate_batch(batch, false);
batch->flushed = true; batch->flushed = true;
if (batch == batch->ctx->batch) if (batch == batch->ctx->batch)
fd_batch_reference_locked(&batch->ctx->batch, NULL); fd_batch_reference(&batch->ctx->batch, NULL);
fd_screen_unlock(batch->ctx->screen);
if (batch->fence) if (batch->fence)
fd_fence_ref(&batch->ctx->last_fence, batch->fence); fd_fence_ref(&batch->ctx->last_fence, batch->fence);
@ -386,78 +384,61 @@ fd_batch_flush(struct fd_batch *batch)
{ {
struct fd_batch *tmp = NULL; struct fd_batch *tmp = NULL;
/* NOTE: we need to hold an extra ref across the body of flush, /* NOTE: Many callers pass in a ctx->batch or fd_bc_writer() batches without
* since the last ref to this batch could be dropped when cleaning * refcounting, which batch_flush will reset, so we need to hold a ref across
* up used_resources * the body of the flush.
*/ */
fd_batch_reference(&tmp, batch); fd_batch_reference(&tmp, batch);
batch_flush(tmp); batch_flush(tmp);
fd_batch_reference(&tmp, NULL); fd_batch_reference(&tmp, NULL);
} }
/* find a batches dependents mask, including recursive dependencies: */ static uint32_t ASSERTED
static uint32_t dependents_contains(const struct fd_batch *haystack, const struct fd_batch *needle)
recursive_dependents_mask(struct fd_batch *batch)
{ {
struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; if (haystack == needle)
struct fd_batch *dep; return true;
uint32_t dependents_mask = batch->dependents_mask;
foreach_batch (dep, cache, batch->dependents_mask) set_foreach (haystack->dependents, entry) {
dependents_mask |= recursive_dependents_mask(dep); if (dependents_contains(entry->key, needle))
return true;
}
return dependents_mask; return false;
} }
void void
fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep)
{ {
fd_screen_assert_locked(batch->ctx->screen); if (_mesa_set_search(batch->dependents, dep))
if (batch->dependents_mask & (1 << dep->idx))
return; return;
/* a loop should not be possible */ /* a loop should not be possible */
debug_assert(!((1 << batch->idx) & recursive_dependents_mask(dep))); assert(!dependents_contains(dep, batch));
struct fd_batch *other = NULL; struct fd_batch *table_ref = NULL;
fd_batch_reference_locked(&other, dep); fd_batch_reference(&table_ref, dep);
batch->dependents_mask |= (1 << dep->idx); _mesa_set_add(batch->dependents, dep);
DBG("%p: added dependency on %p", batch, dep); DBG("%p: added dependency on %p", batch, dep);
} }
static void
flush_write_batch(struct fd_resource *rsc) assert_dt
{
struct fd_batch *b = NULL;
fd_batch_reference_locked(&b, rsc->track->write_batch);
fd_screen_unlock(b->ctx->screen);
fd_batch_flush(b);
fd_screen_lock(b->ctx->screen);
fd_batch_reference_locked(&b, NULL);
}
static void static void
fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc) fd_batch_add_resource(struct fd_batch *batch, struct fd_resource *rsc)
{ {
bool found = false;
if (likely(fd_batch_references_resource(batch, rsc))) { _mesa_set_search_or_add_pre_hashed(batch->resources, rsc->hash, rsc, &found);
debug_assert(_mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc)); if (!found) {
return; struct pipe_resource *table_ref = NULL;
pipe_resource_reference(&table_ref, &rsc->b.b);
p_atomic_inc(&rsc->batch_references);
} }
debug_assert(!_mesa_set_search(batch->resources, rsc));
_mesa_set_add_pre_hashed(batch->resources, rsc->hash, rsc);
rsc->track->batch_mask |= (1 << batch->idx);
} }
void void
fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc) fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
{ {
fd_screen_assert_locked(batch->ctx->screen); struct fd_context *ctx = batch->ctx;
struct fd_batch_cache *cache = &ctx->batch_cache;
DBG("%p: write %p", batch, rsc); DBG("%p: write %p", batch, rsc);
@ -466,7 +447,8 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
*/ */
rsc->valid = true; rsc->valid = true;
if (rsc->track->write_batch == batch) struct fd_batch *prev_writer = fd_bc_writer(ctx, rsc);
if (prev_writer == batch)
return; return;
fd_batch_write_prep(batch, rsc); fd_batch_write_prep(batch, rsc);
@ -474,32 +456,21 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
if (rsc->stencil) if (rsc->stencil)
fd_batch_resource_write(batch, rsc->stencil); fd_batch_resource_write(batch, rsc->stencil);
/* note, invalidate write batch, to avoid further writes to rsc /* Flush any other batches accessing our resource. Similar to
* resulting in a write-after-read hazard. * fd_bc_flush_readers().
*/ */
/* if we are pending read or write by any other batch: */ foreach_batch (reader, cache) {
if (unlikely(rsc->track->batch_mask & ~(1 << batch->idx))) { if (reader == batch || !fd_batch_references(reader, rsc))
struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; continue;
struct fd_batch *dep; fd_batch_flush(reader);
if (rsc->track->write_batch)
flush_write_batch(rsc);
foreach_batch (dep, cache, rsc->track->batch_mask) {
struct fd_batch *b = NULL;
if (dep == batch)
continue;
/* note that batch_add_dep could flush and unref dep, so
* we need to hold a reference to keep it live for the
* fd_bc_invalidate_batch()
*/
fd_batch_reference(&b, dep);
fd_batch_add_dep(batch, b);
fd_bc_invalidate_batch(b, false);
fd_batch_reference_locked(&b, NULL);
}
} }
fd_batch_reference_locked(&rsc->track->write_batch, batch);
struct fd_batch *table_ref = NULL;
fd_batch_reference(&table_ref, batch);
struct pipe_resource *table_rsc_ref = NULL;
pipe_resource_reference(&table_rsc_ref, &rsc->b.b);
_mesa_hash_table_insert_pre_hashed(cache->written_resources, rsc->hash, rsc,
batch);
fd_batch_add_resource(batch, rsc); fd_batch_add_resource(batch, rsc);
} }
@ -507,19 +478,16 @@ fd_batch_resource_write(struct fd_batch *batch, struct fd_resource *rsc)
void void
fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc) fd_batch_resource_read_slowpath(struct fd_batch *batch, struct fd_resource *rsc)
{ {
fd_screen_assert_locked(batch->ctx->screen);
if (rsc->stencil) if (rsc->stencil)
fd_batch_resource_read(batch, rsc->stencil); fd_batch_resource_read(batch, rsc->stencil);
DBG("%p: read %p", batch, rsc); DBG("%p: read %p", batch, rsc);
/* If reading a resource pending a write, go ahead and flush the struct fd_context *ctx = batch->ctx;
* writer. This avoids situations where we end up having to
* flush the current batch in _resource_used() struct fd_batch *writer = fd_bc_writer(ctx, rsc);
*/ if (writer && writer != batch)
if (unlikely(rsc->track->write_batch && rsc->track->write_batch != batch)) fd_batch_flush(writer);
flush_write_batch(rsc);
fd_batch_add_resource(batch, rsc); fd_batch_add_resource(batch, rsc);
} }

View File

@ -28,6 +28,7 @@
#define FREEDRENO_BATCH_H_ #define FREEDRENO_BATCH_H_
#include "util/list.h" #include "util/list.h"
#include "util/set.h"
#include "util/simple_mtx.h" #include "util/simple_mtx.h"
#include "util/u_inlines.h" #include "util/u_inlines.h"
#include "util/u_queue.h" #include "util/u_queue.h"
@ -244,8 +245,9 @@ struct fd_batch {
uint32_t query_tile_stride; uint32_t query_tile_stride;
/*@}*/ /*@}*/
/* Set of resources used by currently-unsubmitted batch (read or /* Set of references to resources used by currently-unsubmitted batch (read
* write).. does not hold a reference to the resource. * or write). Note that this set may not include all BOs referenced by the
* batch due to fd_bc_resource_invalidate().
*/ */
struct set *resources; struct set *resources;
@ -254,7 +256,7 @@ struct fd_batch {
uint32_t hash; uint32_t hash;
/** set of dependent batches.. holds refs to dependent batches: */ /** set of dependent batches.. holds refs to dependent batches: */
uint32_t dependents_mask; struct set *dependents;
/* Buffer for tessellation engine input /* Buffer for tessellation engine input
*/ */
@ -289,30 +291,11 @@ struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,
void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt; void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;
void __fd_batch_destroy(struct fd_batch *batch); void __fd_batch_destroy(struct fd_batch *batch);
/*
* NOTE the rule is, you need to hold the screen->lock when destroying
* a batch.. so either use fd_batch_reference() (which grabs the lock
* for you) if you don't hold the lock, or fd_batch_reference_locked()
* if you do hold the lock.
*
* WARNING the _locked() version can briefly drop the lock. Without
* recursive mutexes, I'm not sure there is much else we can do (since
* __fd_batch_destroy() needs to unref resources)
*
* WARNING you must acquire the screen->lock and use the _locked()
* version in case that the batch being ref'd can disappear under
* you.
*/
static inline void static inline void
fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch) fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
{ {
struct fd_batch *old_batch = *ptr; struct fd_batch *old_batch = *ptr;
/* only need lock if a reference is dropped: */
if (old_batch)
fd_screen_assert_locked(old_batch->ctx->screen);
if (pipe_reference_described( if (pipe_reference_described(
&(*ptr)->reference, &batch->reference, &(*ptr)->reference, &batch->reference,
(debug_reference_descriptor)__fd_batch_describe)) (debug_reference_descriptor)__fd_batch_describe))
@ -321,21 +304,6 @@ fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
*ptr = batch; *ptr = batch;
} }
static inline void
fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
{
struct fd_batch *old_batch = *ptr;
struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
if (ctx)
fd_screen_lock(ctx->screen);
fd_batch_reference_locked(ptr, batch);
if (ctx)
fd_screen_unlock(ctx->screen);
}
static inline void static inline void
fd_batch_unlock_submit(struct fd_batch *batch) fd_batch_unlock_submit(struct fd_batch *batch)
{ {

View File

@ -129,39 +129,30 @@ fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key)
} }
void void
fd_bc_init(struct fd_batch_cache *cache) fd_bc_init(struct fd_context *ctx)
{ {
struct fd_batch_cache *cache = &ctx->batch_cache;
cache->ht = cache->ht =
_mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals); _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
cache->written_resources = _mesa_pointer_hash_table_create(NULL);
} }
void void
fd_bc_fini(struct fd_batch_cache *cache) fd_bc_fini(struct fd_context *ctx) assert_dt
{ {
struct fd_batch_cache *cache = &ctx->batch_cache;
fd_bc_flush(ctx, false);
_mesa_hash_table_destroy(cache->ht, NULL); _mesa_hash_table_destroy(cache->ht, NULL);
_mesa_hash_table_destroy(cache->written_resources, NULL);
} }
/* Flushes all batches in the batch cache. Used at glFlush() and similar times. */ /* Flushes all batches in the batch cache. Used at glFlush() and similar times. */
void void
fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt
{ {
struct fd_batch_cache *cache = &ctx->screen->batch_cache; struct fd_batch_cache *cache = &ctx->batch_cache;
/* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
* can cause batches to be unref'd and freed under our feet, so grab
* a reference to all the batches we need up-front.
*/
struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
struct fd_batch *batch;
unsigned n = 0;
fd_screen_lock(ctx->screen);
foreach_batch (batch, cache, cache->batch_mask) {
if (batch->ctx == ctx) {
fd_batch_reference_locked(&batches[n++], batch);
}
}
/* deferred flush doesn't actually flush, but it marks every other /* deferred flush doesn't actually flush, but it marks every other
* batch associated with the context as dependent on the current * batch associated with the context as dependent on the current
@ -171,245 +162,170 @@ fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt
if (deferred) { if (deferred) {
struct fd_batch *current_batch = fd_context_batch(ctx); struct fd_batch *current_batch = fd_context_batch(ctx);
for (unsigned i = 0; i < n; i++) { foreach_batch (batch, cache) {
if (batches[i] && (batches[i]->ctx == ctx) && if (batch != current_batch)
(batches[i] != current_batch)) { fd_batch_add_dep(current_batch, batch);
fd_batch_add_dep(current_batch, batches[i]);
}
} }
fd_batch_reference(&current_batch, NULL);
fd_batch_reference_locked(&current_batch, NULL);
fd_screen_unlock(ctx->screen);
} else { } else {
fd_screen_unlock(ctx->screen); foreach_batch (batch, cache) {
fd_batch_flush(batch);
for (unsigned i = 0; i < n; i++) {
fd_batch_flush(batches[i]);
} }
} }
for (unsigned i = 0; i < n; i++) {
fd_batch_reference(&batches[i], NULL);
}
} }
/** /**
* Flushes the batch (if any) writing this resource. Must not hold the screen * Flushes the batch (if any) writing this resource.
* lock.
*/ */
void void
fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
{ {
fd_screen_lock(ctx->screen); struct fd_batch_cache *cache = &ctx->batch_cache;
struct fd_batch *write_batch = NULL; struct hash_entry *entry =
fd_batch_reference_locked(&write_batch, rsc->track->write_batch); _mesa_hash_table_search_pre_hashed(cache->written_resources, rsc->hash, rsc);
fd_screen_unlock(ctx->screen); if (entry) {
fd_batch_flush(entry->data);
if (write_batch) {
fd_batch_flush(write_batch);
fd_batch_reference(&write_batch, NULL);
} }
} }
/** /**
* Flushes any batches reading this resource. Must not hold the screen lock. * Flushes any batches reading this resource.
*/ */
void void
fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
{ {
struct fd_batch *batch, *batches[32] = {}; foreach_batch (batch, &ctx->batch_cache) {
uint32_t batch_count = 0; if (fd_batch_references(batch, rsc))
fd_batch_flush(batch);
}
}
/* This is a bit awkward, probably a fd_batch_flush_locked() /**
* would make things simpler.. but we need to hold the lock * Flushes any batches accessing this resource as part of the gmem key.
* to iterate the batches which reference this resource. So *
* we must first grab references under a lock, then flush. * Used in resource shadowing.
*/ */
fd_screen_lock(ctx->screen); void
foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask) fd_bc_flush_gmem_users(struct fd_context *ctx, struct fd_resource *rsc)
fd_batch_reference_locked(&batches[batch_count++], batch); {
fd_screen_unlock(ctx->screen); foreach_batch (batch, &ctx->batch_cache) {
if (!batch->key)
for (int i = 0; i < batch_count; i++) { continue;
fd_batch_flush(batches[i]); for (int i = 0; i < batch->key->num_surfs; i++) {
fd_batch_reference(&batches[i], NULL); if (batch->key->surf[i].texture == &rsc->b.b) {
struct fd_batch *tmp = NULL;
fd_batch_reference(&tmp, batch);
fd_batch_flush(batch);
fd_batch_reference(&tmp, NULL);
break;
}
}
} }
} }
void void
fd_bc_dump(struct fd_context *ctx, const char *fmt, ...) fd_bc_dump(struct fd_context *ctx, const char *fmt, ...)
{ {
struct fd_batch_cache *cache = &ctx->screen->batch_cache; struct fd_batch_cache *cache = &ctx->batch_cache;
if (!FD_DBG(MSGS)) if (!FD_DBG(MSGS))
return; return;
fd_screen_lock(ctx->screen);
va_list ap; va_list ap;
va_start(ap, fmt); va_start(ap, fmt);
vprintf(fmt, ap); vprintf(fmt, ap);
va_end(ap); va_end(ap);
for (int i = 0; i < ARRAY_SIZE(cache->batches); i++) { foreach_batch (batch, cache) {
struct fd_batch *batch = cache->batches[i]; printf(" %p<%u>%s\n", batch, batch->seqno,
if (batch) { batch->needs_flush ? ", NEEDS FLUSH" : "");
printf(" %p<%u>%s\n", batch, batch->seqno,
batch->needs_flush ? ", NEEDS FLUSH" : "");
}
} }
printf("----\n"); printf("----\n");
fd_screen_unlock(ctx->screen);
} }
/** /* Removes a batch from the batch cache (typically after a flush) */
* Note that when batch is flushed, it needs to remain in the cache so
* that fd_bc_invalidate_resource() can work.. otherwise we can have
* the case where a rsc is destroyed while a batch still has a dangling
* reference to it.
*
* Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
* would have a reference to the underlying bo, so it is ok for the
* rsc to be destroyed before the batch.
*/
void void
fd_bc_invalidate_batch(struct fd_batch *batch, bool remove) fd_bc_free_key(struct fd_batch *batch)
{ {
if (!batch) if (!batch)
return; return;
struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache; struct fd_context *ctx = batch->ctx;
struct fd_batch_key *key = batch->key; struct fd_batch_cache *cache = &ctx->batch_cache;
fd_screen_assert_locked(batch->ctx->screen); if (batch->key) {
_mesa_hash_table_remove_key(cache->ht, batch->key);
if (remove) { free((void *)batch->key);
cache->batches[batch->idx] = NULL; batch->key = NULL;
cache->batch_mask &= ~(1 << batch->idx);
} }
}
if (!key) /* Called when the resource is has had its underlying storage replaced, so
return; * previous batch references to it are no longer relevant for flushing access to
* that storage.
*/
void
fd_bc_invalidate_resource(struct fd_context *ctx, struct fd_resource *rsc)
{
foreach_batch (batch, &ctx->batch_cache) {
struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc);
if (entry) {
struct pipe_resource *table_ref = &rsc->b.b;
pipe_resource_reference(&table_ref, NULL);
DBG("%p: key=%p", batch, batch->key); ASSERTED int32_t count = p_atomic_dec_return(&rsc->batch_references);
for (unsigned idx = 0; idx < key->num_surfs; idx++) { assert(count >= 0);
struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
rsc->track->bc_batch_mask &= ~(1 << batch->idx); _mesa_set_remove(batch->resources, entry);
}
} }
struct hash_entry *entry = struct hash_entry *entry =
_mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key); _mesa_hash_table_search(ctx->batch_cache.written_resources, rsc);
_mesa_hash_table_remove(cache->ht, entry); if (entry) {
} struct fd_batch *batch = entry->data;
struct pipe_resource *table_ref = &rsc->b.b;
void pipe_resource_reference(&table_ref, NULL);
fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy) _mesa_hash_table_remove(ctx->batch_cache.written_resources, entry);
{ fd_batch_reference(&batch, NULL);
struct fd_screen *screen = fd_screen(rsc->b.b.screen);
struct fd_batch *batch;
fd_screen_lock(screen);
if (destroy) {
foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) {
struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc);
_mesa_set_remove(batch->resources, entry);
}
rsc->track->batch_mask = 0;
fd_batch_reference_locked(&rsc->track->write_batch, NULL);
} }
foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask)
fd_bc_invalidate_batch(batch, false);
rsc->track->bc_batch_mask = 0;
fd_screen_unlock(screen);
} }
static struct fd_batch * static struct fd_batch *
alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx, alloc_batch(struct fd_batch_cache *cache, struct fd_context *ctx, bool nondraw) assert_dt
bool nondraw) assert_dt
{ {
struct fd_batch *batch; struct fd_batch *batch;
uint32_t idx;
fd_screen_assert_locked(ctx->screen); if (cache->ht->entries >= 32) {
while ((idx = ffs(~cache->batch_mask)) == 0) {
#if 0
for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
batch = cache->batches[i];
debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
set_foreach (batch->dependencies, entry) {
struct fd_batch *dep = (struct fd_batch *)entry->key;
debug_printf(" %d", dep->idx);
}
debug_printf("\n");
}
#endif
/* TODO: is LRU the better policy? Or perhaps the batch that /* TODO: is LRU the better policy? Or perhaps the batch that
* depends on the fewest other batches? * depends on the fewest other batches?
*/ */
struct fd_batch *flush_batch = NULL; struct fd_batch *flush_batch = NULL;
for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) { foreach_batch (batch, cache) {
if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno)) if (!flush_batch || (batch->seqno < flush_batch->seqno))
fd_batch_reference_locked(&flush_batch, cache->batches[i]); fd_batch_reference(&flush_batch, batch);
} }
/* we can drop lock temporarily here, since we hold a ref,
* flush_batch won't disappear under us.
*/
fd_screen_unlock(ctx->screen);
DBG("%p: too many batches! flush forced!", flush_batch); DBG("%p: too many batches! flush forced!", flush_batch);
fd_batch_flush(flush_batch); fd_batch_flush(flush_batch);
fd_screen_lock(ctx->screen);
/* While the resources get cleaned up automatically, the flush_batch fd_batch_reference(&flush_batch, NULL);
* doesn't get removed from the dependencies of other batches, so
* it won't be unref'd and will remain in the table.
*
* TODO maybe keep a bitmask of batches that depend on me, to make
* this easier:
*/
for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
struct fd_batch *other = cache->batches[i];
if (!other)
continue;
if (other->dependents_mask & (1 << flush_batch->idx)) {
other->dependents_mask &= ~(1 << flush_batch->idx);
struct fd_batch *ref = flush_batch;
fd_batch_reference_locked(&ref, NULL);
}
}
fd_batch_reference_locked(&flush_batch, NULL);
} }
idx--; /* bit zero returns 1 for ffs() */
batch = fd_batch_create(ctx, nondraw); batch = fd_batch_create(ctx, nondraw);
if (!batch) if (!batch)
return NULL; return NULL;
batch->seqno = cache->cnt++; batch->seqno = cache->cnt++;
batch->idx = idx;
cache->batch_mask |= (1 << idx);
debug_assert(cache->batches[idx] == NULL);
cache->batches[idx] = batch;
return batch; return batch;
} }
struct fd_batch * struct fd_batch *
fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw) assert_dt
{ {
struct fd_batch_cache *cache = &ctx->screen->batch_cache; struct fd_batch_cache *cache = &ctx->batch_cache;
struct fd_batch *batch; struct fd_batch *batch;
/* For normal draw batches, pctx->set_framebuffer_state() handles /* For normal draw batches, pctx->set_framebuffer_state() handles
@ -419,9 +335,7 @@ fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
if (nondraw) if (nondraw)
fd_context_switch_from(ctx); fd_context_switch_from(ctx);
fd_screen_lock(ctx->screen); batch = alloc_batch(cache, ctx, nondraw);
batch = alloc_batch_locked(cache, ctx, nondraw);
fd_screen_unlock(ctx->screen);
if (batch && nondraw) if (batch && nondraw)
fd_context_switch_to(ctx, batch); fd_context_switch_to(ctx, batch);
@ -432,7 +346,7 @@ fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
static struct fd_batch * static struct fd_batch *
batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt
{ {
struct fd_batch_cache *cache = &ctx->screen->batch_cache; struct fd_batch_cache *cache = &ctx->batch_cache;
struct fd_batch *batch = NULL; struct fd_batch *batch = NULL;
uint32_t hash = fd_batch_key_hash(key); uint32_t hash = fd_batch_key_hash(key);
struct hash_entry *entry = struct hash_entry *entry =
@ -440,12 +354,12 @@ batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt
if (entry) { if (entry) {
free(key); free(key);
fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data); fd_batch_reference(&batch, (struct fd_batch *)entry->data);
assert(!batch->flushed); assert(!batch->flushed);
return batch; return batch;
} }
batch = alloc_batch_locked(cache, ctx, false); batch = alloc_batch(cache, ctx, false);
#ifdef DEBUG #ifdef DEBUG
DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width, DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width,
key->height, key->layers, key->samples); key->height, key->layers, key->samples);
@ -473,11 +387,6 @@ batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt
batch->key = key; batch->key = key;
batch->hash = hash; batch->hash = hash;
for (unsigned idx = 0; idx < key->num_surfs; idx++) {
struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
rsc->track->bc_batch_mask = (1 << batch->idx);
}
return batch; return batch;
} }
@ -514,9 +423,5 @@ fd_batch_from_fb(struct fd_context *ctx,
key->num_surfs = idx; key->num_surfs = idx;
fd_screen_lock(ctx->screen); return batch_from_key(ctx, key);
struct fd_batch *batch = batch_from_key(ctx, key);
fd_screen_unlock(ctx->screen);
return batch;
} }

View File

@ -28,6 +28,7 @@
#define FREEDRENO_BATCH_CACHE_H_ #define FREEDRENO_BATCH_CACHE_H_
#include "pipe/p_state.h" #include "pipe/p_state.h"
#include "util/hash_table.h"
#include "freedreno_util.h" #include "freedreno_util.h"
@ -39,41 +40,32 @@ struct fd_screen;
struct hash_table; struct hash_table;
struct fd_batch_cache { struct fd_batch_cache {
/* Mapping from struct key (basically framebuffer state) to a batch of
* rendering for rendering to that target.
*/
struct hash_table *ht; struct hash_table *ht;
unsigned cnt; unsigned cnt;
/* set of active batches.. there is an upper limit on the number of /* Mapping from struct pipe_resource to the batch writing it. Keeps a reference. */
* in-flight batches, for two reasons: struct hash_table *written_resources;
* 1) to avoid big spikes in number of batches in edge cases, such as
* game startup (ie, lots of texture uploads, but no usages yet of
* the textures), etc.
* 2) so we can use a simple bitmask in fd_resource to track which
* batches have reference to the resource
*/
struct fd_batch *batches[32];
uint32_t batch_mask;
}; };
/* note: if batches get unref'd in the body of the loop, they are removed #define foreach_batch(batch, cache) \
* from the various masks.. but since we copy the mask at the beginning of hash_table_foreach ((cache)->ht, _batch_entry) \
* the loop into _m, we need the &= at the end of the loop to make sure for (struct fd_batch *batch = _batch_entry->data; batch != NULL; batch = NULL)
* we don't have stale bits in _m
*/
#define foreach_batch(batch, cache, mask) \
for (uint32_t _m = (mask); \
_m && ((batch) = (cache)->batches[u_bit_scan(&_m)]); _m &= (mask))
void fd_bc_init(struct fd_batch_cache *cache); void fd_bc_init(struct fd_context *ctx);
void fd_bc_fini(struct fd_batch_cache *cache); void fd_bc_fini(struct fd_context *ctx);
void fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt; void fd_bc_flush(struct fd_context *ctx, bool deferred) assert_dt;
void fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt; void fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt;
void fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt; void fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt;
void fd_bc_flush_gmem_users(struct fd_context *ctx, struct fd_resource *rsc) assert_dt;
void fd_bc_dump(struct fd_context *ctx, const char *fmt, ...) void fd_bc_dump(struct fd_context *ctx, const char *fmt, ...)
_util_printf_format(2, 3); _util_printf_format(2, 3);
void fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy); void fd_bc_free_key(struct fd_batch *batch);
void fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy); void fd_bc_invalidate_resource(struct fd_context *ctx, struct fd_resource *rsc);
struct fd_batch *fd_bc_alloc_batch(struct fd_context *ctx, struct fd_batch *fd_bc_alloc_batch(struct fd_context *ctx,
bool nondraw) assert_dt; bool nondraw) assert_dt;

View File

@ -353,9 +353,7 @@ fd_context_destroy(struct pipe_context *pctx)
util_copy_framebuffer_state(&ctx->framebuffer, NULL); util_copy_framebuffer_state(&ctx->framebuffer, NULL);
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */ fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
fd_bc_fini(ctx);
/* Make sure nothing in the batch cache references our context any more. */
fd_bc_flush(ctx, false);
fd_prog_fini(pctx); fd_prog_fini(pctx);
@ -649,6 +647,8 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
slab_create_child(&ctx->transfer_pool, &screen->transfer_pool); slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool); slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool);
fd_bc_init(ctx);
fd_draw_init(pctx); fd_draw_init(pctx);
fd_resource_context_init(pctx); fd_resource_context_init(pctx);
fd_query_context_init(pctx); fd_query_context_init(pctx);

View File

@ -37,6 +37,7 @@
#include "util/perf/u_trace.h" #include "util/perf/u_trace.h"
#include "freedreno_autotune.h" #include "freedreno_autotune.h"
#include "freedreno_batch_cache.h"
#include "freedreno_gmem.h" #include "freedreno_gmem.h"
#include "freedreno_perfetto.h" #include "freedreno_perfetto.h"
#include "freedreno_screen.h" #include "freedreno_screen.h"
@ -212,6 +213,8 @@ struct fd_context {
*/ */
simple_mtx_t gmem_lock; simple_mtx_t gmem_lock;
struct fd_batch_cache batch_cache;
struct fd_device *dev; struct fd_device *dev;
struct fd_screen *screen; struct fd_screen *screen;
struct fd_pipe *pipe; struct fd_pipe *pipe;

View File

@ -197,8 +197,6 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
* Figure out the buffers/features we need: * Figure out the buffers/features we need:
*/ */
fd_screen_lock(ctx->screen);
if (ctx->dirty & FD_DIRTY_RESOURCE) if (ctx->dirty & FD_DIRTY_RESOURCE)
batch_draw_tracking_for_dirty_bits(batch); batch_draw_tracking_for_dirty_bits(batch);
@ -220,8 +218,6 @@ batch_draw_tracking(struct fd_batch *batch, const struct pipe_draw_info *info,
list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node) list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node)
resource_written(batch, aq->prsc); resource_written(batch, aq->prsc);
fd_screen_unlock(ctx->screen);
} }
static void static void
@ -399,8 +395,6 @@ batch_clear_tracking(struct fd_batch *batch, unsigned buffers) assert_dt
batch->resolve |= buffers; batch->resolve |= buffers;
fd_screen_lock(ctx->screen);
if (buffers & PIPE_CLEAR_COLOR) if (buffers & PIPE_CLEAR_COLOR)
for (unsigned i = 0; i < pfb->nr_cbufs; i++) for (unsigned i = 0; i < pfb->nr_cbufs; i++)
if (buffers & (PIPE_CLEAR_COLOR0 << i)) if (buffers & (PIPE_CLEAR_COLOR0 << i))
@ -415,8 +409,6 @@ batch_clear_tracking(struct fd_batch *batch, unsigned buffers) assert_dt
list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node) list_for_each_entry (struct fd_acc_query, aq, &ctx->acc_active_queries, node)
resource_written(batch, aq->prsc); resource_written(batch, aq->prsc);
fd_screen_unlock(ctx->screen);
} }
static void static void
@ -519,8 +511,6 @@ fd_launch_grid(struct pipe_context *pctx,
fd_batch_reference(&ctx->batch, batch); fd_batch_reference(&ctx->batch, batch);
fd_context_all_dirty(ctx); fd_context_all_dirty(ctx);
fd_screen_lock(ctx->screen);
/* Mark SSBOs */ /* Mark SSBOs */
u_foreach_bit (i, so->enabled_mask & so->writable_mask) u_foreach_bit (i, so->enabled_mask & so->writable_mask)
resource_written(batch, so->sb[i].buffer); resource_written(batch, so->sb[i].buffer);
@ -553,8 +543,6 @@ fd_launch_grid(struct pipe_context *pctx,
if (info->indirect) if (info->indirect)
resource_read(batch, info->indirect); resource_read(batch, info->indirect);
fd_screen_unlock(ctx->screen);
DBG("%p: work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u", DBG("%p: work_dim=%u, block=%ux%ux%u, grid=%ux%ux%u",
batch, info->work_dim, batch, info->work_dim,
info->block[0], info->block[1], info->block[2], info->block[0], info->block[1], info->block[2],

View File

@ -89,9 +89,7 @@ fd_acc_query_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
fd_batch_needs_flush(aq->batch); fd_batch_needs_flush(aq->batch);
p->resume(aq, aq->batch); p->resume(aq, aq->batch);
fd_screen_lock(batch->ctx->screen);
fd_batch_resource_write(batch, fd_resource(aq->prsc)); fd_batch_resource_write(batch, fd_resource(aq->prsc));
fd_screen_unlock(batch->ctx->screen);
} }
static void static void

View File

@ -340,7 +340,7 @@ fd_hw_query_prepare(struct fd_batch *batch, uint32_t num_tiles)
uint32_t tile_stride = batch->next_sample_offset; uint32_t tile_stride = batch->next_sample_offset;
if (tile_stride > 0) if (tile_stride > 0)
fd_resource_resize(batch->query_buf, tile_stride * num_tiles); fd_resource_resize(batch->ctx, batch->query_buf, tile_stride * num_tiles);
batch->query_tile_stride = tile_stride; batch->query_tile_stride = tile_stride;

View File

@ -191,7 +191,7 @@ __fd_resource_wait(struct fd_context *ctx, struct fd_resource *rsc, unsigned op,
} }
static void static void
realloc_bo(struct fd_resource *rsc, uint32_t size) realloc_bo(struct fd_context *ctx, struct fd_resource *rsc, uint32_t size)
{ {
struct pipe_resource *prsc = &rsc->b.b; struct pipe_resource *prsc = &rsc->b.b;
struct fd_screen *screen = fd_screen(rsc->b.b.screen); struct fd_screen *screen = fd_screen(rsc->b.b.screen);
@ -223,7 +223,8 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
} }
util_range_set_empty(&rsc->valid_buffer_range); util_range_set_empty(&rsc->valid_buffer_range);
fd_bc_invalidate_resource(rsc, true); if (ctx)
fd_bc_invalidate_resource(ctx, rsc);
} }
static void static void
@ -267,20 +268,20 @@ fd_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resource *pdst,
*/ */
assert(pdst->target == PIPE_BUFFER); assert(pdst->target == PIPE_BUFFER);
assert(psrc->target == PIPE_BUFFER); assert(psrc->target == PIPE_BUFFER);
assert(dst->track->bc_batch_mask == 0); #ifndef NDEBUG
assert(src->track->bc_batch_mask == 0); foreach_batch (batch, &ctx->batch_cache) {
assert(src->track->batch_mask == 0); assert(!fd_batch_references(batch, src));
assert(src->track->write_batch == NULL); }
#endif
assert(memcmp(&dst->layout, &src->layout, sizeof(dst->layout)) == 0); assert(memcmp(&dst->layout, &src->layout, sizeof(dst->layout)) == 0);
/* get rid of any references that batch-cache might have to us (which /* get rid of any references that batch-cache might have to us.
* should empty/destroy rsc->batches hashset)
* *
* Note that we aren't actually destroying dst, but we are replacing * Note that we aren't actually destroying dst, but we are replacing
* it's storage so we want to go thru the same motions of decoupling * it's storage so we want to go thru the same motions of decoupling
* it's batch connections. * it's batch connections.
*/ */
fd_bc_invalidate_resource(dst, true); fd_bc_invalidate_resource(ctx, dst);
rebind_resource(dst); rebind_resource(dst);
util_idalloc_mt_free(&ctx->screen->buffer_ids, delete_buffer_id); util_idalloc_mt_free(&ctx->screen->buffer_ids, delete_buffer_id);
@ -290,7 +291,6 @@ fd_replace_buffer_storage(struct pipe_context *pctx, struct pipe_resource *pdst,
fd_bo_del(dst->bo); fd_bo_del(dst->bo);
dst->bo = fd_bo_ref(src->bo); dst->bo = fd_bo_ref(src->bo);
fd_resource_tracking_reference(&dst->track, src->track);
src->is_replacement = true; src->is_replacement = true;
dst->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno); dst->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
@ -318,7 +318,11 @@ fd_resource_busy(struct pipe_screen *pscreen, struct pipe_resource *prsc,
{ {
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
if (pending(rsc, !!(usage & PIPE_MAP_WRITE))) /* Note that while TC *can* ask us about busy for MAP_READ to promote to an
* unsync map, it's not a case we have ever seen a need to optimize for, and
* being conservative in resource_busy() is safe.
*/
if (p_atomic_read(&rsc->batch_references) != 0)
return true; return true;
if (resource_busy(rsc, translate_usage(usage))) if (resource_busy(rsc, translate_usage(usage)))
@ -357,8 +361,6 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
{ {
struct pipe_context *pctx = &ctx->base; struct pipe_context *pctx = &ctx->base;
struct pipe_resource *prsc = &rsc->b.b; struct pipe_resource *prsc = &rsc->b.b;
struct fd_screen *screen = fd_screen(pctx->screen);
struct fd_batch *batch;
bool fallback = false; bool fallback = false;
if (prsc->next) if (prsc->next)
@ -379,9 +381,7 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
* Note that being in the gmem key doesn't necessarily mean the * Note that being in the gmem key doesn't necessarily mean the
* batch was considered a writer! * batch was considered a writer!
*/ */
foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask) { fd_bc_flush_gmem_users(ctx, rsc);
fd_batch_flush(batch);
}
/* TODO: somehow munge dimensions and format to copy unsupported /* TODO: somehow munge dimensions and format to copy unsupported
* render target format to something that is supported? * render target format to something that is supported?
@ -414,14 +414,11 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
assert(!ctx->in_shadow); assert(!ctx->in_shadow);
ctx->in_shadow = true; ctx->in_shadow = true;
/* get rid of any references that batch-cache might have to us (which /* Signal that any HW state pointing at this resource needs to be
* should empty/destroy rsc->batches hashset) * re-emitted.
*/ */
fd_bc_invalidate_resource(rsc, false);
rebind_resource(rsc); rebind_resource(rsc);
fd_screen_lock(ctx->screen);
/* Swap the backing bo's, so shadow becomes the old buffer, /* Swap the backing bo's, so shadow becomes the old buffer,
* blit from shadow to new buffer. From here on out, we * blit from shadow to new buffer. From here on out, we
* cannot fail. * cannot fail.
@ -432,8 +429,8 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
*/ */
struct fd_resource *shadow = fd_resource(pshadow); struct fd_resource *shadow = fd_resource(pshadow);
DBG("shadow: %p (%d, %p) -> %p (%d, %p)", rsc, rsc->b.b.reference.count, DBG("shadow: %p (%d) -> %p (%d)", rsc, rsc->b.b.reference.count,
rsc->track, shadow, shadow->b.b.reference.count, shadow->track); shadow, shadow->b.b.reference.count);
swap(rsc->bo, shadow->bo); swap(rsc->bo, shadow->bo);
swap(rsc->valid, shadow->valid); swap(rsc->valid, shadow->valid);
@ -446,19 +443,27 @@ fd_try_shadow_resource(struct fd_context *ctx, struct fd_resource *rsc,
swap(rsc->layout, shadow->layout); swap(rsc->layout, shadow->layout);
rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno); rsc->seqno = p_atomic_inc_return(&ctx->screen->rsc_seqno);
/* at this point, the newly created shadow buffer is not referenced /* At this point, the newly created shadow buffer is not referenced by any
* by any batches, but the existing rsc (probably) is. We need to * batches, but the existing rsc may be as a reader (we flushed writers
* transfer those references over: * above). We want to remove the old reader references to rsc so that we
* don't cause unnecessary synchronization on the write we're about to emit,
* but we don't need to bother with transferring the references over to
* shadow because right after our blit (which is also doing a read so it only
* cares about the lack of a writer) shadow will be freed.
*/ */
debug_assert(shadow->track->batch_mask == 0); assert(!pending(ctx, shadow, true));
foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask) { foreach_batch (batch, &ctx->batch_cache) {
struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc); struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc);
_mesa_set_remove(batch->resources, entry); if (entry) {
_mesa_set_add_pre_hashed(batch->resources, shadow->hash, shadow); struct pipe_resource *table_ref = &rsc->b.b;
} pipe_resource_reference(&table_ref, NULL);
swap(rsc->track, shadow->track);
fd_screen_unlock(ctx->screen); ASSERTED int32_t count = p_atomic_dec_return(&rsc->batch_references);
assert(count >= 0);
_mesa_set_remove(batch->resources, entry);
}
}
struct pipe_blit_info blit = {}; struct pipe_blit_info blit = {};
blit.dst.resource = prsc; blit.dst.resource = prsc;
@ -725,14 +730,14 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
} }
static void static void
invalidate_resource(struct fd_resource *rsc, unsigned usage) assert_dt invalidate_resource(struct fd_context *ctx, struct fd_resource *rsc, unsigned usage) assert_dt
{ {
bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE)); bool needs_flush = pending(ctx, rsc, !!(usage & PIPE_MAP_WRITE));
unsigned op = translate_usage(usage); unsigned op = translate_usage(usage);
if (needs_flush || resource_busy(rsc, op)) { if (needs_flush || resource_busy(rsc, op)) {
rebind_resource(rsc); rebind_resource(rsc);
realloc_bo(rsc, fd_bo_size(rsc->bo)); realloc_bo(ctx, rsc, fd_bo_size(rsc->bo));
} else { } else {
util_range_set_empty(&rsc->valid_buffer_range); util_range_set_empty(&rsc->valid_buffer_range);
} }
@ -821,10 +826,10 @@ resource_transfer_map(struct pipe_context *pctx, struct pipe_resource *prsc,
} }
if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) { if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
invalidate_resource(rsc, usage); invalidate_resource(ctx, rsc, usage);
} else { } else {
unsigned op = translate_usage(usage); unsigned op = translate_usage(usage);
bool needs_flush = pending(rsc, !!(usage & PIPE_MAP_WRITE)); bool needs_flush = pending(ctx, rsc, !!(usage & PIPE_MAP_WRITE));
/* If the GPU is writing to the resource, or if it is reading from the /* If the GPU is writing to the resource, or if it is reading from the
* resource and we're trying to write to it, flush the renders. * resource and we're trying to write to it, flush the renders.
@ -995,8 +1000,13 @@ fd_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
struct fd_screen *screen = fd_screen(prsc->screen); struct fd_screen *screen = fd_screen(prsc->screen);
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
if (!rsc->is_replacement) /* Note that a destroyed resource may be present in an outstanding
fd_bc_invalidate_resource(rsc, true); * batch->resources (not as a batch_cache->written_resource, since those
* have references to resources). However, all that means is that we might
* miss an opportunity to reorder a batch by spuriously detecting a newly
* created resource as still in use and flush the old reader.
*/
if (rsc->bo) if (rsc->bo)
fd_bo_del(rsc->bo); fd_bo_del(rsc->bo);
if (rsc->lrz) if (rsc->lrz)
@ -1011,7 +1021,6 @@ fd_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
util_range_destroy(&rsc->valid_buffer_range); util_range_destroy(&rsc->valid_buffer_range);
simple_mtx_destroy(&rsc->lock); simple_mtx_destroy(&rsc->lock);
fd_resource_tracking_reference(&rsc->track, NULL);
FREE(rsc); FREE(rsc);
} }
@ -1048,7 +1057,7 @@ fd_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *pctx,
/* special case to resize query buf after allocated.. */ /* special case to resize query buf after allocated.. */
void void
fd_resource_resize(struct pipe_resource *prsc, uint32_t sz) fd_resource_resize(struct fd_context *ctx, struct pipe_resource *prsc, uint32_t sz)
{ {
struct fd_resource *rsc = fd_resource(prsc); struct fd_resource *rsc = fd_resource(prsc);
@ -1057,7 +1066,7 @@ fd_resource_resize(struct pipe_resource *prsc, uint32_t sz)
debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER); debug_assert(prsc->bind == PIPE_BIND_QUERY_BUFFER);
prsc->width0 = sz; prsc->width0 = sz;
realloc_bo(rsc, fd_screen(prsc->screen)->setup_slices(rsc)); realloc_bo(ctx, rsc, fd_screen(prsc->screen)->setup_slices(rsc));
} }
static void static void
@ -1097,14 +1106,6 @@ alloc_resource_struct(struct pipe_screen *pscreen,
util_range_init(&rsc->valid_buffer_range); util_range_init(&rsc->valid_buffer_range);
simple_mtx_init(&rsc->lock, mtx_plain); simple_mtx_init(&rsc->lock, mtx_plain);
rsc->track = CALLOC_STRUCT(fd_resource_tracking);
if (!rsc->track) {
free(rsc);
return NULL;
}
pipe_reference_init(&rsc->track->reference, 1);
threaded_resource_init(prsc); threaded_resource_init(prsc);
if (tmpl->target == PIPE_BUFFER) if (tmpl->target == PIPE_BUFFER)
@ -1284,7 +1285,7 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
return NULL; return NULL;
rsc = fd_resource(prsc); rsc = fd_resource(prsc);
realloc_bo(rsc, size); realloc_bo(NULL, rsc, size);
if (!rsc->bo) if (!rsc->bo)
goto fail; goto fail;
@ -1408,24 +1409,25 @@ fd_invalidate_resource(struct pipe_context *pctx,
if (prsc->target == PIPE_BUFFER) { if (prsc->target == PIPE_BUFFER) {
/* Handle the glInvalidateBufferData() case: /* Handle the glInvalidateBufferData() case:
*/ */
invalidate_resource(rsc, PIPE_MAP_READ | PIPE_MAP_WRITE); invalidate_resource(ctx, rsc, PIPE_MAP_READ | PIPE_MAP_WRITE);
} else if (rsc->track->write_batch) { } else {
/* Handle the glInvalidateFramebuffer() case, telling us that struct fd_batch *batch = fd_bc_writer(ctx, rsc);
* we can skip resolve. if (batch) {
*/ /* Handle the glInvalidateFramebuffer() case, telling us that
* we can skip resolve.
*/
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
struct fd_batch *batch = rsc->track->write_batch; if (pfb->zsbuf && pfb->zsbuf->texture == prsc) {
struct pipe_framebuffer_state *pfb = &batch->framebuffer; batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL);
fd_context_dirty(ctx, FD_DIRTY_ZSA);
}
if (pfb->zsbuf && pfb->zsbuf->texture == prsc) { for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
batch->resolve &= ~(FD_BUFFER_DEPTH | FD_BUFFER_STENCIL); if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
fd_context_dirty(ctx, FD_DIRTY_ZSA); batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
} fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER);
}
for (unsigned i = 0; i < pfb->nr_cbufs; i++) {
if (pfb->cbufs[i] && pfb->cbufs[i]->texture == prsc) {
batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
fd_context_dirty(ctx, FD_DIRTY_FRAMEBUFFER);
} }
} }
} }

View File

@ -28,6 +28,7 @@
#define FREEDRENO_RESOURCE_H_ #define FREEDRENO_RESOURCE_H_
#include "util/list.h" #include "util/list.h"
#include "util/set.h"
#include "util/simple_mtx.h" #include "util/simple_mtx.h"
#include "util/u_dump.h" #include "util/u_dump.h"
#include "util/u_range.h" #include "util/u_range.h"
@ -55,62 +56,6 @@ enum fd_lrz_direction {
FD_LRZ_GREATER, FD_LRZ_GREATER,
}; };
/**
* State related to batch/resource tracking.
*
* With threaded_context we need to support replace_buffer_storage, in
* which case we can end up in transfer_map with tres->latest, but other
* pipe_context APIs using the original prsc pointer. This allows TC to
* not have to synchronize the front-end thread with the buffer storage
* replacement called on driver thread. But it complicates the batch/
* resource tracking.
*
* To handle this, we need to split the tracking out into it's own ref-
* counted structure, so as needed both "versions" of the resource can
* point to the same tracking.
*
* We could *almost* just push this down to fd_bo, except for a3xx/a4xx
* hw queries, where we don't know up-front the size to allocate for
* per-tile query results.
*/
struct fd_resource_tracking {
struct pipe_reference reference;
/* bitmask of in-flight batches which reference this resource. Note
* that the batch doesn't hold reference to resources (but instead
* the fd_ringbuffer holds refs to the underlying fd_bo), but in case
* the resource is destroyed we need to clean up the batch's weak
* references to us.
*/
uint32_t batch_mask;
/* reference to batch that writes this resource: */
struct fd_batch *write_batch;
/* Set of batches whose batch-cache key references this resource.
* We need to track this to know which batch-cache entries to
* invalidate if, for example, the resource is invalidated or
* shadowed.
*/
uint32_t bc_batch_mask;
};
void __fd_resource_tracking_destroy(struct fd_resource_tracking *track);
static inline void
fd_resource_tracking_reference(struct fd_resource_tracking **ptr,
struct fd_resource_tracking *track)
{
struct fd_resource_tracking *old_track = *ptr;
if (pipe_reference(&(*ptr)->reference, &track->reference)) {
assert(!old_track->write_batch);
free(old_track);
}
*ptr = track;
}
/** /**
* A resource (any buffer/texture/image/etc) * A resource (any buffer/texture/image/etc)
*/ */
@ -119,6 +64,13 @@ struct fd_resource {
struct fd_bo *bo; /* use fd_resource_set_bo() to write */ struct fd_bo *bo; /* use fd_resource_set_bo() to write */
enum pipe_format internal_format; enum pipe_format internal_format;
uint32_t hash; /* _mesa_hash_pointer() on this resource's address. */ uint32_t hash; /* _mesa_hash_pointer() on this resource's address. */
/* Atomic counter of the number of batches referencing this resource in any
* batch cache. Used for fd_resource_busy(), which needs to check for busy
* in the batch cache from the frontend thread.
*/
uint32_t batch_references;
struct fdl_layout layout; struct fdl_layout layout;
/* buffer range that has been initialized */ /* buffer range that has been initialized */
@ -130,8 +82,6 @@ struct fd_resource {
/* TODO rename to secondary or auxiliary? */ /* TODO rename to secondary or auxiliary? */
struct fd_resource *stencil; struct fd_resource *stencil;
struct fd_resource_tracking *track;
simple_mtx_t lock; simple_mtx_t lock;
/* bitmask of state this resource could potentially dirty when rebound, /* bitmask of state this resource could potentially dirty when rebound,
@ -144,8 +94,6 @@ struct fd_resource {
/* Is this buffer a replacement created by threaded_context to avoid /* Is this buffer a replacement created by threaded_context to avoid
* a stall in PIPE_MAP_DISCARD_WHOLE_RESOURCE|PIPE_MAP_WRITE case? * a stall in PIPE_MAP_DISCARD_WHOLE_RESOURCE|PIPE_MAP_WRITE case?
* If so, it no longer "owns" it's rsc->track, and so should not
* invalidate when the rsc is destroyed.
*/ */
bool is_replacement : 1; bool is_replacement : 1;
@ -193,17 +141,35 @@ fd_memory_object(struct pipe_memory_object *pmemobj)
} }
static inline bool static inline bool
pending(struct fd_resource *rsc, bool write) fd_batch_references(struct fd_batch *batch, struct fd_resource *rsc)
{ {
/* if we have a pending GPU write, we are busy in any case: */ return _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc) != NULL;
if (rsc->track->write_batch) }
return true;
/* if CPU wants to write, but we are pending a GPU read, we are busy: */ static inline struct fd_batch *
if (write && rsc->track->batch_mask) fd_bc_writer(struct fd_context *ctx, struct fd_resource *rsc)
return true; {
struct hash_entry *entry =
_mesa_hash_table_search_pre_hashed(ctx->batch_cache.written_resources, rsc->hash, rsc);
if (entry)
return entry->data;
return NULL;
}
if (rsc->stencil && pending(rsc->stencil, write)) static inline bool
pending(struct fd_context *ctx, struct fd_resource *rsc, bool write) assert_dt
{
if (write) {
foreach_batch (batch, &ctx->batch_cache) {
if (fd_batch_references(batch, rsc))
return true;
}
} else {
if (fd_bc_writer(ctx, rsc))
return true;
}
if (rsc->stencil && pending(ctx, rsc->stencil, write))
return true; return true;
return false; return false;
@ -348,7 +314,7 @@ void fd_resource_screen_init(struct pipe_screen *pscreen);
void fd_resource_context_init(struct pipe_context *pctx); void fd_resource_context_init(struct pipe_context *pctx);
uint32_t fd_setup_slices(struct fd_resource *rsc); uint32_t fd_setup_slices(struct fd_resource *rsc);
void fd_resource_resize(struct pipe_resource *prsc, uint32_t sz); void fd_resource_resize(struct fd_context *ctx, struct pipe_resource *prsc, uint32_t sz);
void fd_replace_buffer_storage(struct pipe_context *ctx, void fd_replace_buffer_storage(struct pipe_context *ctx,
struct pipe_resource *dst, struct pipe_resource *dst,
struct pipe_resource *src, struct pipe_resource *src,
@ -365,12 +331,6 @@ void fd_resource_dump(struct fd_resource *rsc, const char *name);
bool fd_render_condition_check(struct pipe_context *pctx) assert_dt; bool fd_render_condition_check(struct pipe_context *pctx) assert_dt;
static inline bool
fd_batch_references_resource(struct fd_batch *batch, struct fd_resource *rsc)
{
return rsc->track->batch_mask & (1 << batch->idx);
}
static inline void static inline void
fd_batch_write_prep(struct fd_batch *batch, struct fd_resource *rsc) assert_dt fd_batch_write_prep(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
{ {
@ -388,7 +348,7 @@ fd_batch_resource_read(struct fd_batch *batch,
* writing to it (since both _write and _read flush other writers), and * writing to it (since both _write and _read flush other writers), and
* that we've already recursed for stencil. * that we've already recursed for stencil.
*/ */
if (unlikely(!fd_batch_references_resource(batch, rsc))) if (unlikely(!fd_batch_references(batch, rsc)))
fd_batch_resource_read_slowpath(batch, rsc); fd_batch_resource_read_slowpath(batch, rsc);
} }

View File

@ -155,7 +155,6 @@ fd_screen_destroy(struct pipe_screen *pscreen)
if (screen->ro) if (screen->ro)
screen->ro->destroy(screen->ro); screen->ro->destroy(screen->ro);
fd_bc_fini(&screen->batch_cache);
fd_gmem_screen_fini(pscreen); fd_gmem_screen_fini(pscreen);
slab_destroy_parent(&screen->transfer_pool); slab_destroy_parent(&screen->transfer_pool);
@ -1088,8 +1087,6 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro,
if (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS) if (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS)
screen->reorder = !FD_DBG(INORDER); screen->reorder = !FD_DBG(INORDER);
fd_bc_init(&screen->batch_cache);
list_inithead(&screen->context_list); list_inithead(&screen->context_list);
util_idalloc_mt_init_tc(&screen->buffer_ids); util_idalloc_mt_init_tc(&screen->buffer_ids);

View File

@ -41,11 +41,11 @@
#include "util/u_memory.h" #include "util/u_memory.h"
#include "util/u_queue.h" #include "util/u_queue.h"
#include "freedreno_batch_cache.h"
#include "freedreno_gmem.h" #include "freedreno_gmem.h"
#include "freedreno_util.h" #include "freedreno_util.h"
struct fd_bo; struct fd_bo;
struct fd_resource;
/* Potential reasons for needing to skip bypass path and use GMEM, the /* Potential reasons for needing to skip bypass path and use GMEM, the
* generation backend can override this with screen->gmem_reason_mask * generation backend can override this with screen->gmem_reason_mask
@ -135,7 +135,6 @@ struct fd_screen {
int64_t cpu_gpu_time_delta; int64_t cpu_gpu_time_delta;
struct fd_batch_cache batch_cache;
struct fd_gmem_cache gmem_cache; struct fd_gmem_cache gmem_cache;
bool reorder; bool reorder;

View File

@ -276,18 +276,13 @@ fd_set_framebuffer_state(struct pipe_context *pctx,
cso->samples = util_framebuffer_get_num_samples(cso); cso->samples = util_framebuffer_get_num_samples(cso);
if (ctx->screen->reorder) { if (ctx->screen->reorder) {
struct fd_batch *old_batch = NULL; if (ctx->batch) {
fd_batch_finish_queries(ctx->batch);
fd_batch_reference(&ctx->batch, NULL);
}
fd_batch_reference(&old_batch, ctx->batch);
if (likely(old_batch))
fd_batch_finish_queries(old_batch);
fd_batch_reference(&ctx->batch, NULL);
fd_context_all_dirty(ctx); fd_context_all_dirty(ctx);
ctx->update_active_queries = true; ctx->update_active_queries = true;
fd_batch_reference(&old_batch, NULL);
} else if (ctx->batch) { } else if (ctx->batch) {
DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush, DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->batch->needs_flush,
framebuffer->cbufs[0], framebuffer->zsbuf); framebuffer->cbufs[0], framebuffer->zsbuf);