mesa/src/gallium/drivers/v3d/v3d_job.c

617 lines
22 KiB
C

/*
* Copyright © 2014-2017 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
/** @file v3d_job.c
*
* Functions for submitting V3D render jobs to the kernel.
*/
#include <xf86drm.h>
#include "v3d_context.h"
/* The OQ/semaphore packets are the same across V3D versions. */
#define V3D_VERSION 33
#include "broadcom/cle/v3dx_pack.h"
#include "broadcom/common/v3d_macros.h"
#include "util/hash_table.h"
#include "util/ralloc.h"
#include "util/set.h"
#include "broadcom/clif/clif_dump.h"
void
v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
{
set_foreach(job->bos, entry) {
struct v3d_bo *bo = (struct v3d_bo *)entry->key;
v3d_bo_unreference(&bo);
}
_mesa_hash_table_remove_key(v3d->jobs, &job->key);
if (job->write_prscs) {
set_foreach(job->write_prscs, entry) {
const struct pipe_resource *prsc = entry->key;
_mesa_hash_table_remove_key(v3d->write_jobs, prsc);
}
}
for (int i = 0; i < job->nr_cbufs; i++) {
if (job->cbufs[i]) {
_mesa_hash_table_remove_key(v3d->write_jobs,
job->cbufs[i]->texture);
pipe_surface_reference(&job->cbufs[i], NULL);
}
}
if (job->zsbuf) {
struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
if (rsc->separate_stencil)
_mesa_hash_table_remove_key(v3d->write_jobs,
&rsc->separate_stencil->base);
_mesa_hash_table_remove_key(v3d->write_jobs,
job->zsbuf->texture);
pipe_surface_reference(&job->zsbuf, NULL);
}
if (job->bbuf)
pipe_surface_reference(&job->bbuf, NULL);
if (v3d->job == job)
v3d->job = NULL;
v3d_destroy_cl(&job->bcl);
v3d_destroy_cl(&job->rcl);
v3d_destroy_cl(&job->indirect);
v3d_bo_unreference(&job->tile_alloc);
v3d_bo_unreference(&job->tile_state);
ralloc_free(job);
}
struct v3d_job *
v3d_job_create(struct v3d_context *v3d)
{
struct v3d_job *job = rzalloc(v3d, struct v3d_job);
job->v3d = v3d;
v3d_init_cl(job, &job->bcl);
v3d_init_cl(job, &job->rcl);
v3d_init_cl(job, &job->indirect);
job->draw_min_x = ~0;
job->draw_min_y = ~0;
job->draw_max_x = 0;
job->draw_max_y = 0;
job->bos = _mesa_set_create(job,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
return job;
}
void
v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
{
if (!bo)
return;
if (_mesa_set_search(job->bos, bo))
return;
v3d_bo_reference(bo);
_mesa_set_add(job->bos, bo);
job->referenced_size += bo->size;
uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
if (job->submit.bo_handle_count >= job->bo_handles_size) {
job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
bo_handles = reralloc(job, bo_handles,
uint32_t, job->bo_handles_size);
job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
}
bo_handles[job->submit.bo_handle_count++] = bo->handle;
}
void
v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
{
struct v3d_context *v3d = job->v3d;
if (!job->write_prscs) {
job->write_prscs = _mesa_set_create(job,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
}
_mesa_set_add(job->write_prscs, prsc);
_mesa_hash_table_insert(v3d->write_jobs, prsc, job);
}
void
v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
{
hash_table_foreach(v3d->jobs, entry) {
struct v3d_job *job = entry->data;
if (_mesa_set_search(job->bos, bo))
v3d_job_submit(v3d, job);
}
}
void
v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
{
v3d_job_add_write_resource(job, prsc);
if (!job->tf_write_prscs)
job->tf_write_prscs = _mesa_pointer_set_create(job);
_mesa_set_add(job->tf_write_prscs, prsc);
}
static bool
v3d_job_writes_resource_from_tf(struct v3d_job *job,
struct pipe_resource *prsc)
{
if (!job->tf_enabled)
return false;
if (!job->tf_write_prscs)
return false;
return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
}
void
v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
struct pipe_resource *prsc,
enum v3d_flush_cond flush_cond,
bool is_compute_pipeline)
{
struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
prsc);
struct v3d_resource *rsc = v3d_resource(prsc);
/* We need to sync if graphics pipeline reads a resource written
* by the compute pipeline. The same would be needed for the case of
* graphics-compute dependency but nowadays all compute jobs
* are serialized with the previous submitted job.
*/
if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
v3d->sync_on_last_compute_job = true;
rsc->compute_written = false;
}
if (!entry)
return;
struct v3d_job *job = entry->data;
bool needs_flush;
switch (flush_cond) {
case V3D_FLUSH_ALWAYS:
needs_flush = true;
break;
case V3D_FLUSH_NOT_CURRENT_JOB:
needs_flush = !v3d->job || v3d->job != job;
break;
case V3D_FLUSH_DEFAULT:
default:
/* For writes from TF in the same job we use the "Wait for TF"
* feature provided by the hardware so we don't want to flush.
* The exception to this is when the caller is about to map the
* resource since in that case we don't have a 'Wait for TF'
* command the in command stream. In this scenario the caller
* is expected to set 'always_flush' to True.
*/
needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
}
if (needs_flush)
v3d_job_submit(v3d, job);
}
void
v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
struct pipe_resource *prsc,
enum v3d_flush_cond flush_cond,
bool is_compute_pipeline)
{
struct v3d_resource *rsc = v3d_resource(prsc);
/* We only need to force the flush on TF writes, which is the only
* case where we might skip the flush to use the 'Wait for TF'
* command. Here we are flushing for a read, which means that the
* caller intends to write to the resource, so we don't care if
* there was a previous TF write to it.
*/
v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
is_compute_pipeline);
hash_table_foreach(v3d->jobs, entry) {
struct v3d_job *job = entry->data;
if (!_mesa_set_search(job->bos, rsc->bo))
continue;
bool needs_flush;
switch (flush_cond) {
case V3D_FLUSH_NOT_CURRENT_JOB:
needs_flush = !v3d->job || v3d->job != job;
break;
case V3D_FLUSH_ALWAYS:
case V3D_FLUSH_DEFAULT:
default:
needs_flush = true;
}
if (needs_flush)
v3d_job_submit(v3d, job);
/* Reminder: v3d->jobs is safe to keep iterating even
* after deletion of an entry.
*/
continue;
}
}
/**
* Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
*
* If we've already started rendering to this FBO, then return the same job,
* otherwise make a new one. If we're beginning rendering to an FBO, make
* sure that any previous reads of the FBO (or writes to its color/Z surfaces)
* have been flushed.
*/
struct v3d_job *
v3d_get_job(struct v3d_context *v3d,
uint32_t nr_cbufs,
struct pipe_surface **cbufs,
struct pipe_surface *zsbuf,
struct pipe_surface *bbuf)
{
/* Return the existing job for this FBO if we have one */
struct v3d_job_key local_key = {
.cbufs = {
cbufs[0],
cbufs[1],
cbufs[2],
cbufs[3],
},
.zsbuf = zsbuf,
.bbuf = bbuf,
};
struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
&local_key);
if (entry)
return entry->data;
/* Creating a new job. Make sure that any previous jobs reading or
* writing these buffers are flushed.
*/
struct v3d_job *job = v3d_job_create(v3d);
job->nr_cbufs = nr_cbufs;
for (int i = 0; i < job->nr_cbufs; i++) {
if (cbufs[i]) {
v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
V3D_FLUSH_DEFAULT,
false);
pipe_surface_reference(&job->cbufs[i], cbufs[i]);
if (cbufs[i]->texture->nr_samples > 1)
job->msaa = true;
}
}
if (zsbuf) {
v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
V3D_FLUSH_DEFAULT,
false);
pipe_surface_reference(&job->zsbuf, zsbuf);
if (zsbuf->texture->nr_samples > 1)
job->msaa = true;
}
if (bbuf) {
pipe_surface_reference(&job->bbuf, bbuf);
if (bbuf->texture->nr_samples > 1)
job->msaa = true;
}
for (int i = 0; i < job->nr_cbufs; i++) {
if (cbufs[i])
_mesa_hash_table_insert(v3d->write_jobs,
cbufs[i]->texture, job);
}
if (zsbuf) {
_mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
if (rsc->separate_stencil) {
v3d_flush_jobs_reading_resource(v3d,
&rsc->separate_stencil->base,
V3D_FLUSH_DEFAULT,
false);
_mesa_hash_table_insert(v3d->write_jobs,
&rsc->separate_stencil->base,
job);
}
}
job->double_buffer =
unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !job->msaa;
memcpy(&job->key, &local_key, sizeof(local_key));
_mesa_hash_table_insert(v3d->jobs, &job->key, job);
return job;
}
struct v3d_job *
v3d_get_job_for_fbo(struct v3d_context *v3d)
{
if (v3d->job)
return v3d->job;
uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;
struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);
if (v3d->framebuffer.samples >= 1) {
job->msaa = true;
job->double_buffer = false;
}
v3d_get_tile_buffer_size(job->msaa, job->double_buffer,
job->nr_cbufs, job->cbufs, job->bbuf,
&job->tile_width, &job->tile_height,
&job->internal_bpp);
/* The dirty flags are tracking what's been updated while v3d->job has
* been bound, so set them all to ~0 when switching between jobs. We
* also need to reset all state at the start of rendering.
*/
v3d->dirty = ~0;
/* If we're binding to uninitialized buffers, no need to load their
* contents before drawing.
*/
for (int i = 0; i < nr_cbufs; i++) {
if (cbufs[i]) {
struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
if (!rsc->writes)
job->clear |= PIPE_CLEAR_COLOR0 << i;
}
}
if (zsbuf) {
struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
if (!rsc->writes)
job->clear |= PIPE_CLEAR_DEPTH;
if (rsc->separate_stencil)
rsc = rsc->separate_stencil;
if (!rsc->writes)
job->clear |= PIPE_CLEAR_STENCIL;
}
job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,
job->tile_width);
job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height,
job->tile_height);
v3d->job = job;
return job;
}
static void
v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
{
if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL |
V3D_DEBUG_CL_NO_BIN |
V3D_DEBUG_CLIF))))
return;
struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
stderr,
V3D_DEBUG & (V3D_DEBUG_CL |
V3D_DEBUG_CL_NO_BIN),
V3D_DEBUG & V3D_DEBUG_CL_NO_BIN);
set_foreach(job->bos, entry) {
struct v3d_bo *bo = (void *)entry->key;
char *name = ralloc_asprintf(NULL, "%s_0x%x",
bo->name, bo->offset);
v3d_bo_map(bo);
clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
ralloc_free(name);
}
clif_dump(clif, &job->submit);
clif_dump_destroy(clif);
}
static void
v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
{
assert(v3d->prim_counts);
perf_debug("stalling on TF counts readback\n");
struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
/* When we only have a vertex shader we determine the primitive
* count in the CPU so don't update it here again.
*/
if (v3d->prog.gs)
v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
}
}
/**
* Submits the job to the kernel and then reinitializes it.
*/
void
v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
{
struct v3d_screen *screen = v3d->screen;
if (!job->needs_flush)
goto done;
/* The GL_PRIMITIVES_GENERATED query is included with
* OES_geometry_shader.
*/
job->needs_primitives_generated =
v3d->n_primitives_generated_queries_in_flight > 0 &&
v3d->prog.gs;
if (job->needs_primitives_generated)
v3d_ensure_prim_counts_allocated(v3d);
if (screen->devinfo.ver >= 41)
v3d41_emit_rcl(job);
else
v3d33_emit_rcl(job);
if (cl_offset(&job->bcl) > 0) {
if (screen->devinfo.ver >= 41)
v3d41_bcl_epilogue(v3d, job);
else
v3d33_bcl_epilogue(v3d, job);
}
/* While the RCL will implicitly depend on the last RCL to have
* finished, we also need to block on any previous TFU job we may have
* dispatched.
*/
job->submit.in_sync_rcl = v3d->out_sync;
/* Update the sync object for the last rendering by our context. */
job->submit.out_sync = v3d->out_sync;
job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
if (v3d->active_perfmon) {
assert(screen->has_perfmon);
job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id;
}
/* If we are submitting a job with a different perfmon, we need to
* ensure the previous one fully finishes before starting this;
* otherwise it would wrongly mix counter results.
*/
if (v3d->active_perfmon != v3d->last_perfmon) {
v3d->last_perfmon = v3d->active_perfmon;
job->submit.in_sync_bcl = v3d->out_sync;
}
job->submit.flags = 0;
if (job->tmu_dirty_rcl && screen->has_cache_flush)
job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
/* On V3D 4.1, the tile alloc/state setup moved to register writes
* instead of binner packets.
*/
if (screen->devinfo.ver >= 41) {
v3d_job_add_bo(job, job->tile_alloc);
job->submit.qma = job->tile_alloc->offset;
job->submit.qms = job->tile_alloc->size;
v3d_job_add_bo(job, job->tile_state);
job->submit.qts = job->tile_state->offset;
}
v3d_clif_dump(v3d, job);
if (!(unlikely(V3D_DEBUG & V3D_DEBUG_NORAST))) {
int ret;
ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
static bool warned = false;
if (ret && !warned) {
fprintf(stderr, "Draw call returned %s. "
"Expect corruption.\n", strerror(errno));
warned = true;
} else if (!ret) {
if (v3d->active_perfmon)
v3d->active_perfmon->job_submitted = true;
}
/* If we are submitting a job in the middle of transform
* feedback or there is a primitives generated query with a
* geometry shader then we need to read the primitive counts
* and accumulate them, otherwise they will be reset at the
* start of the next draw when we emit the Tile Binning Mode
* Configuration packet.
*
* If the job doesn't have any TF draw calls, then we know
* the primitive count must be zero and we can skip stalling
* for this. This also fixes a problem because it seems that
* in this scenario the counters are not reset with the Tile
* Binning Mode Configuration packet, which would translate
* to us reading an obsolete (possibly non-zero) value from
* the GPU counters.
*/
if (job->needs_primitives_generated ||
(v3d->streamout.num_targets &&
job->tf_draw_calls_queued > 0))
v3d_read_and_accumulate_primitive_counters(v3d);
}
done:
v3d_job_free(v3d, job);
}
static bool
v3d_job_compare(const void *a, const void *b)
{
return memcmp(a, b, sizeof(struct v3d_job_key)) == 0;
}
static uint32_t
v3d_job_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(struct v3d_job_key));
}
void
v3d_job_init(struct v3d_context *v3d)
{
v3d->jobs = _mesa_hash_table_create(v3d,
v3d_job_hash,
v3d_job_compare);
v3d->write_jobs = _mesa_hash_table_create(v3d,
_mesa_hash_pointer,
_mesa_key_pointer_equal);
}