mesa/src/gallium/drivers/freedreno/freedreno_context.c

712 lines
20 KiB
C

/*
* Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "freedreno_context.h"
#include "ir3/ir3_cache.h"
#include "util/u_upload_mgr.h"
#include "freedreno_blitter.h"
#include "freedreno_draw.h"
#include "freedreno_fence.h"
#include "freedreno_gmem.h"
#include "freedreno_program.h"
#include "freedreno_query.h"
#include "freedreno_query_hw.h"
#include "freedreno_resource.h"
#include "freedreno_state.h"
#include "freedreno_texture.h"
#include "freedreno_util.h"
#include "freedreno_tracepoints.h"
#include "util/u_trace_gallium.h"
static void
fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fencep,
unsigned flags) in_dt
{
struct fd_context *ctx = fd_context(pctx);
struct pipe_fence_handle *fence = NULL;
struct fd_batch *batch = NULL;
/* We want to lookup current batch if it exists, but not create a new
* one if not (unless we need a fence)
*/
fd_batch_reference(&batch, ctx->batch);
DBG("%p: flush: flags=%x, fencep=%p", batch, flags, fencep);
if (fencep && !batch) {
batch = fd_context_batch(ctx);
} else if (!batch) {
if (ctx->screen->reorder)
fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED);
fd_bc_dump(ctx, "%p: NULL batch, remaining:\n", ctx);
return;
}
/* With TC_FLUSH_ASYNC, the fence will have been pre-created from
* the front-end thread. But not yet associated with a batch,
* because we cannot safely access ctx->batch outside of the driver
* thread. So instead, replace the existing batch->fence with the
* one created earlier
*/
if ((flags & TC_FLUSH_ASYNC) && fencep) {
/* We don't currently expect async+flush in the fence-fd
* case.. for that to work properly we'd need TC to tell
* us in the create_fence callback that it needs an fd.
*/
assert(!(flags & PIPE_FLUSH_FENCE_FD));
fd_fence_set_batch(*fencep, batch);
fd_fence_ref(&batch->fence, *fencep);
/* If we have nothing to flush, update the pre-created unflushed
* fence with the current state of the last-fence:
*/
if (ctx->last_fence) {
fd_fence_repopulate(*fencep, ctx->last_fence);
fd_fence_ref(&fence, *fencep);
fd_bc_dump(ctx, "%p: (deferred) reuse last_fence, remaining:\n", ctx);
goto out;
}
/* async flush is not compatible with deferred flush, since
* nothing triggers the batch flush which fence_flush() would
* be waiting for
*/
flags &= ~PIPE_FLUSH_DEFERRED;
} else if (!batch->fence) {
batch->fence = fd_fence_create(batch);
}
/* In some sequence of events, we can end up with a last_fence that is
* not an "fd" fence, which results in eglDupNativeFenceFDANDROID()
* errors.
*/
if ((flags & PIPE_FLUSH_FENCE_FD) && ctx->last_fence &&
!fd_fence_is_fd(ctx->last_fence))
fd_fence_ref(&ctx->last_fence, NULL);
/* if no rendering since last flush, ie. app just decided it needed
* a fence, re-use the last one:
*/
if (ctx->last_fence) {
fd_fence_ref(&fence, ctx->last_fence);
fd_bc_dump(ctx, "%p: reuse last_fence, remaining:\n", ctx);
goto out;
}
/* Take a ref to the batch's fence (batch can be unref'd when flushed: */
fd_fence_ref(&fence, batch->fence);
if (flags & PIPE_FLUSH_FENCE_FD)
fence->submit_fence.use_fence_fd = true;
fd_bc_dump(ctx, "%p: flushing %p<%u>, flags=0x%x, pending:\n", ctx,
batch, batch->seqno, flags);
/* If we get here, we need to flush for a fence, even if there is
* no rendering yet:
*/
batch->needs_flush = true;
if (!ctx->screen->reorder) {
fd_batch_flush(batch);
} else {
fd_bc_flush(ctx, flags & PIPE_FLUSH_DEFERRED);
}
fd_bc_dump(ctx, "%p: remaining:\n", ctx);
out:
if (fencep)
fd_fence_ref(fencep, fence);
fd_fence_ref(&ctx->last_fence, fence);
fd_fence_ref(&fence, NULL);
fd_batch_reference(&batch, NULL);
u_trace_context_process(&ctx->trace_context,
!!(flags & PIPE_FLUSH_END_OF_FRAME));
}
static void
fd_texture_barrier(struct pipe_context *pctx, unsigned flags) in_dt
{
if (flags == PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
struct fd_context *ctx = fd_context(pctx);
if (ctx->framebuffer_barrier) {
ctx->framebuffer_barrier(ctx);
return;
}
}
/* On devices that could sample from GMEM we could possibly do better.
* Or if we knew that we were doing GMEM bypass we could just emit a
* cache flush, perhaps? But we don't know if future draws would cause
* us to use GMEM, and a flush in bypass isn't the end of the world.
*/
fd_context_flush(pctx, NULL, 0);
}
static void
fd_memory_barrier(struct pipe_context *pctx, unsigned flags)
{
if (!(flags & ~PIPE_BARRIER_UPDATE))
return;
fd_context_flush(pctx, NULL, 0);
/* TODO do we need to check for persistently mapped buffers and
* fd_bo_cpu_prep()??
*/
}
static void
emit_string_tail(struct fd_ringbuffer *ring, const char *string, int len)
{
const uint32_t *buf = (const void *)string;
while (len >= 4) {
OUT_RING(ring, *buf);
buf++;
len -= 4;
}
/* copy remainder bytes without reading past end of input string: */
if (len > 0) {
uint32_t w = 0;
memcpy(&w, buf, len);
OUT_RING(ring, w);
}
}
/* for prior to a5xx: */
void
fd_emit_string(struct fd_ringbuffer *ring, const char *string, int len)
{
/* max packet size is 0x3fff+1 dwords: */
len = MIN2(len, 0x4000 * 4);
OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
emit_string_tail(ring, string, len);
}
/* for a5xx+ */
void
fd_emit_string5(struct fd_ringbuffer *ring, const char *string, int len)
{
/* max packet size is 0x3fff dwords: */
len = MIN2(len, 0x3fff * 4);
OUT_PKT7(ring, CP_NOP, align(len, 4) / 4);
emit_string_tail(ring, string, len);
}
/**
* emit marker string as payload of a no-op packet, which can be
* decoded by cffdump.
*/
static void
fd_emit_string_marker(struct pipe_context *pctx, const char *string,
int len) in_dt
{
struct fd_context *ctx = fd_context(pctx);
DBG("%.*s", len, string);
if (!ctx->batch)
return;
struct fd_batch *batch = fd_context_batch_locked(ctx);
fd_batch_needs_flush(batch);
if (ctx->screen->gen >= 5) {
fd_emit_string5(batch->draw, string, len);
} else {
fd_emit_string(batch->draw, string, len);
}
fd_batch_unlock_submit(batch);
fd_batch_reference(&batch, NULL);
}
/**
* If we have a pending fence_server_sync() (GPU side sync), flush now.
* The alternative to try to track this with batch dependencies gets
* hairy quickly.
*
* Call this before switching to a different batch, to handle this case.
*/
void
fd_context_switch_from(struct fd_context *ctx)
{
if (ctx->batch && (ctx->batch->in_fence_fd != -1))
fd_batch_flush(ctx->batch);
}
/**
* If there is a pending fence-fd that we need to sync on, this will
* transfer the reference to the next batch we are going to render
* to.
*/
void
fd_context_switch_to(struct fd_context *ctx, struct fd_batch *batch)
{
if (ctx->in_fence_fd != -1) {
sync_accumulate("freedreno", &batch->in_fence_fd, ctx->in_fence_fd);
close(ctx->in_fence_fd);
ctx->in_fence_fd = -1;
}
}
/**
* Return a reference to the current batch, caller must unref.
*/
struct fd_batch *
fd_context_batch(struct fd_context *ctx)
{
struct fd_batch *batch = NULL;
tc_assert_driver_thread(ctx->tc);
fd_batch_reference(&batch, ctx->batch);
if (unlikely(!batch)) {
batch =
fd_batch_from_fb(ctx, &ctx->framebuffer);
util_copy_framebuffer_state(&batch->framebuffer, &ctx->framebuffer);
fd_batch_reference(&ctx->batch, batch);
fd_context_all_dirty(ctx);
}
fd_context_switch_to(ctx, batch);
return batch;
}
/**
* Return a locked reference to the current batch. A batch with emit
* lock held is protected against flushing while the lock is held.
* The emit-lock should be acquired before screen-lock. The emit-lock
* should be held while emitting cmdstream.
*/
struct fd_batch *
fd_context_batch_locked(struct fd_context *ctx)
{
struct fd_batch *batch = NULL;
while (!batch) {
batch = fd_context_batch(ctx);
if (!fd_batch_lock_submit(batch)) {
fd_batch_reference(&batch, NULL);
}
}
return batch;
}
void
fd_context_destroy(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
unsigned i;
DBG("");
fd_screen_lock(ctx->screen);
list_del(&ctx->node);
fd_screen_unlock(ctx->screen);
fd_fence_ref(&ctx->last_fence, NULL);
if (ctx->in_fence_fd != -1)
close(ctx->in_fence_fd);
for (i = 0; i < ARRAY_SIZE(ctx->pvtmem); i++) {
if (ctx->pvtmem[i].bo)
fd_bo_del(ctx->pvtmem[i].bo);
}
util_copy_framebuffer_state(&ctx->framebuffer, NULL);
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
/* Make sure nothing in the batch cache references our context any more. */
fd_bc_flush(ctx, false);
fd_prog_fini(pctx);
if (ctx->blitter)
util_blitter_destroy(ctx->blitter);
if (pctx->stream_uploader)
u_upload_destroy(pctx->stream_uploader);
for (i = 0; i < ARRAY_SIZE(ctx->clear_rs_state); i++)
if (ctx->clear_rs_state[i])
pctx->delete_rasterizer_state(pctx, ctx->clear_rs_state[i]);
slab_destroy_child(&ctx->transfer_pool);
slab_destroy_child(&ctx->transfer_pool_unsync);
for (i = 0; i < ARRAY_SIZE(ctx->vsc_pipe_bo); i++) {
if (!ctx->vsc_pipe_bo[i])
break;
fd_bo_del(ctx->vsc_pipe_bo[i]);
}
fd_device_del(ctx->dev);
fd_pipe_purge(ctx->pipe);
fd_pipe_del(ctx->pipe);
simple_mtx_destroy(&ctx->gmem_lock);
u_trace_context_fini(&ctx->trace_context);
fd_autotune_fini(&ctx->autotune);
ir3_cache_destroy(ctx->shader_cache);
if (FD_DBG(BSTAT) || FD_DBG(MSGS)) {
mesa_logi(
"batch_total=%u, batch_sysmem=%u, batch_gmem=%u, batch_nondraw=%u, "
"batch_restore=%u\n",
(uint32_t)ctx->stats.batch_total, (uint32_t)ctx->stats.batch_sysmem,
(uint32_t)ctx->stats.batch_gmem, (uint32_t)ctx->stats.batch_nondraw,
(uint32_t)ctx->stats.batch_restore);
}
}
static void
fd_set_debug_callback(struct pipe_context *pctx,
const struct util_debug_callback *cb)
{
struct fd_context *ctx = fd_context(pctx);
struct fd_screen *screen = ctx->screen;
util_queue_finish(&screen->compile_queue);
if (cb)
ctx->debug = *cb;
else
memset(&ctx->debug, 0, sizeof(ctx->debug));
}
static uint32_t
fd_get_reset_count(struct fd_context *ctx, bool per_context)
{
uint64_t val;
enum fd_param_id param = per_context ? FD_CTX_FAULTS : FD_GLOBAL_FAULTS;
int ret = fd_pipe_get_param(ctx->pipe, param, &val);
assert(!ret);
return val;
}
static enum pipe_reset_status
fd_get_device_reset_status(struct pipe_context *pctx)
{
struct fd_context *ctx = fd_context(pctx);
int context_faults = fd_get_reset_count(ctx, true);
int global_faults = fd_get_reset_count(ctx, false);
enum pipe_reset_status status;
if (context_faults != ctx->context_reset_count) {
status = PIPE_GUILTY_CONTEXT_RESET;
} else if (global_faults != ctx->global_reset_count) {
status = PIPE_INNOCENT_CONTEXT_RESET;
} else {
status = PIPE_NO_RESET;
}
ctx->context_reset_count = context_faults;
ctx->global_reset_count = global_faults;
return status;
}
static void
fd_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
unsigned idx, bool end_of_pipe)
{
struct fd_batch *batch = container_of(ut, struct fd_batch, trace);
struct fd_ringbuffer *ring = cs;
struct pipe_resource *buffer = timestamps;
if (ring->cur == batch->last_timestamp_cmd) {
uint64_t *ts = fd_bo_map(fd_resource(buffer)->bo);
ts[idx] = U_TRACE_NO_TIMESTAMP;
return;
}
unsigned ts_offset = idx * sizeof(uint64_t);
batch->ctx->record_timestamp(ring, fd_resource(buffer)->bo, ts_offset);
batch->last_timestamp_cmd = ring->cur;
}
static uint64_t
fd_trace_read_ts(struct u_trace_context *utctx,
void *timestamps, unsigned idx, void *flush_data)
{
struct fd_context *ctx =
container_of(utctx, struct fd_context, trace_context);
struct pipe_resource *buffer = timestamps;
struct fd_bo *ts_bo = fd_resource(buffer)->bo;
/* Only need to stall on results for the first entry: */
if (idx == 0) {
/* Avoid triggering deferred submits from flushing, since that
* changes the behavior of what we are trying to measure:
*/
while (fd_bo_cpu_prep(ts_bo, ctx->pipe, FD_BO_PREP_NOSYNC))
usleep(10000);
int ret = fd_bo_cpu_prep(ts_bo, ctx->pipe, FD_BO_PREP_READ);
if (ret)
return U_TRACE_NO_TIMESTAMP;
}
uint64_t *ts = fd_bo_map(ts_bo);
/* Don't translate the no-timestamp marker: */
if (ts[idx] == U_TRACE_NO_TIMESTAMP)
return U_TRACE_NO_TIMESTAMP;
return ctx->ts_to_ns(ts[idx]);
}
static void
fd_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
{
/* We don't use flush_data at the moment. */
}
/* TODO we could combine a few of these small buffers (solid_vbuf,
* blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
* save a tiny bit of memory
*/
static struct pipe_resource *
create_solid_vertexbuf(struct pipe_context *pctx)
{
static const float init_shader_const[] = {
-1.000000f, +1.000000f, +1.000000f, +1.000000f, -1.000000f, +1.000000f,
};
struct pipe_resource *prsc =
pipe_buffer_create(pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE,
sizeof(init_shader_const));
pipe_buffer_write(pctx, prsc, 0, sizeof(init_shader_const),
init_shader_const);
return prsc;
}
static struct pipe_resource *
create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
{
struct pipe_resource *prsc = pipe_buffer_create(
pctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
return prsc;
}
void
fd_context_setup_common_vbos(struct fd_context *ctx)
{
struct pipe_context *pctx = &ctx->base;
ctx->solid_vbuf = create_solid_vertexbuf(pctx);
ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
/* setup solid_vbuf_state: */
ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
pctx, 1,
(struct pipe_vertex_element[]){{
.vertex_buffer_index = 0,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
}});
ctx->solid_vbuf_state.vertexbuf.count = 1;
ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
ctx->solid_vbuf_state.vertexbuf.vb[0].buffer.resource = ctx->solid_vbuf;
/* setup blit_vbuf_state: */
ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
pctx, 2,
(struct pipe_vertex_element[]){
{
.vertex_buffer_index = 0,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32_FLOAT,
},
{
.vertex_buffer_index = 1,
.src_offset = 0,
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
}});
ctx->blit_vbuf_state.vertexbuf.count = 2;
ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
ctx->blit_vbuf_state.vertexbuf.vb[0].buffer.resource =
ctx->blit_texcoord_vbuf;
ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
ctx->blit_vbuf_state.vertexbuf.vb[1].buffer.resource = ctx->solid_vbuf;
}
void
fd_context_cleanup_common_vbos(struct fd_context *ctx)
{
struct pipe_context *pctx = &ctx->base;
pctx->delete_vertex_elements_state(pctx, ctx->solid_vbuf_state.vtx);
pctx->delete_vertex_elements_state(pctx, ctx->blit_vbuf_state.vtx);
pipe_resource_reference(&ctx->solid_vbuf, NULL);
pipe_resource_reference(&ctx->blit_texcoord_vbuf, NULL);
}
struct pipe_context *
fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
void *priv, unsigned flags)
disable_thread_safety_analysis
{
struct fd_screen *screen = fd_screen(pscreen);
struct pipe_context *pctx;
unsigned prio = 1;
/* lower numerical value == higher priority: */
if (FD_DBG(HIPRIO))
prio = 0;
else if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
prio = 0;
else if (flags & PIPE_CONTEXT_LOW_PRIORITY)
prio = 2;
/* Some of the stats will get printed out at context destroy, so
* make sure they are collected:
*/
if (FD_DBG(BSTAT) || FD_DBG(MSGS))
ctx->stats_users++;
ctx->flags = flags;
ctx->screen = screen;
ctx->pipe = fd_pipe_new2(screen->dev, FD_PIPE_3D, prio);
ctx->in_fence_fd = -1;
if (fd_device_version(screen->dev) >= FD_VERSION_ROBUSTNESS) {
ctx->context_reset_count = fd_get_reset_count(ctx, true);
ctx->global_reset_count = fd_get_reset_count(ctx, false);
}
simple_mtx_init(&ctx->gmem_lock, mtx_plain);
/* need some sane default in case gallium frontends don't
* set some state:
*/
ctx->sample_mask = 0xffff;
ctx->active_queries = true;
pctx = &ctx->base;
pctx->screen = pscreen;
pctx->priv = priv;
pctx->flush = fd_context_flush;
pctx->emit_string_marker = fd_emit_string_marker;
pctx->set_debug_callback = fd_set_debug_callback;
pctx->get_device_reset_status = fd_get_device_reset_status;
pctx->create_fence_fd = fd_create_fence_fd;
pctx->fence_server_sync = fd_fence_server_sync;
pctx->fence_server_signal = fd_fence_server_signal;
pctx->texture_barrier = fd_texture_barrier;
pctx->memory_barrier = fd_memory_barrier;
pctx->stream_uploader = u_upload_create_default(pctx);
if (!pctx->stream_uploader)
goto fail;
pctx->const_uploader = pctx->stream_uploader;
slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);
slab_create_child(&ctx->transfer_pool_unsync, &screen->transfer_pool);
fd_draw_init(pctx);
fd_resource_context_init(pctx);
fd_query_context_init(pctx);
fd_texture_init(pctx);
fd_state_init(pctx);
ctx->blitter = util_blitter_create(pctx);
if (!ctx->blitter)
goto fail;
list_inithead(&ctx->hw_active_queries);
list_inithead(&ctx->acc_active_queries);
fd_screen_lock(ctx->screen);
ctx->seqno = ++screen->ctx_seqno;
list_add(&ctx->node, &ctx->screen->context_list);
fd_screen_unlock(ctx->screen);
ctx->current_scissor = &ctx->disabled_scissor;
fd_gpu_tracepoint_config_variable();
u_trace_pipe_context_init(&ctx->trace_context, pctx,
fd_trace_record_ts,
fd_trace_read_ts,
fd_trace_delete_flush_data);
fd_autotune_init(&ctx->autotune, screen->dev);
return pctx;
fail:
pctx->destroy(pctx);
return NULL;
}
struct pipe_context *
fd_context_init_tc(struct pipe_context *pctx, unsigned flags)
{
struct fd_context *ctx = fd_context(pctx);
if (!(flags & PIPE_CONTEXT_PREFER_THREADED))
return pctx;
/* Clover (compute-only) is unsupported. */
if (flags & PIPE_CONTEXT_COMPUTE_ONLY)
return pctx;
struct pipe_context *tc = threaded_context_create(
pctx, &ctx->screen->transfer_pool,
fd_replace_buffer_storage,
&(struct threaded_context_options){
.create_fence = fd_fence_create_unflushed,
.is_resource_busy = fd_resource_busy,
.unsynchronized_get_device_reset_status = true,
},
&ctx->tc);
if (tc && tc != pctx)
threaded_context_init_bytes_mapped_limit((struct threaded_context *)tc, 16);
return tc;
}