436 lines
14 KiB
C
436 lines
14 KiB
C
/*
|
|
* Copyright © 2017 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef IRIS_BATCH_DOT_H
|
|
#define IRIS_BATCH_DOT_H
|
|
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
|
|
#include "util/u_dynarray.h"
|
|
#include "util/perf/u_trace.h"
|
|
|
|
#include "drm-uapi/i915_drm.h"
|
|
#include "common/intel_decoder.h"
|
|
#include "ds/intel_driver_ds.h"
|
|
#include "ds/intel_tracepoints.h"
|
|
|
|
#include "iris_fence.h"
|
|
#include "iris_fine_fence.h"
|
|
|
|
struct iris_context;
|
|
|
|
/* The kernel assumes batchbuffers are smaller than 256kB. */
|
|
#define MAX_BATCH_SIZE (256 * 1024)
|
|
|
|
/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
|
|
* bytes for MI_BATCH_BUFFER_START (when chaining). Plus another 24 bytes for
|
|
* the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
|
|
* invalidation pipe control.
|
|
*/
|
|
#define BATCH_RESERVED 60
|
|
|
|
/* Our target batch size - flush approximately at this point. */
|
|
#define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
|
|
|
|
enum iris_batch_name {
|
|
IRIS_BATCH_RENDER,
|
|
IRIS_BATCH_COMPUTE,
|
|
IRIS_BATCH_BLITTER,
|
|
};
|
|
|
|
struct iris_batch {
|
|
struct iris_context *ice;
|
|
struct iris_screen *screen;
|
|
struct util_debug_callback *dbg;
|
|
struct pipe_device_reset_callback *reset;
|
|
|
|
/** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
|
|
enum iris_batch_name name;
|
|
|
|
/** Current batchbuffer being queued up. */
|
|
struct iris_bo *bo;
|
|
void *map;
|
|
void *map_next;
|
|
|
|
/** Size of the primary batch being submitted to execbuf (in bytes). */
|
|
unsigned primary_batch_size;
|
|
|
|
/** Total size of all chained batches (in bytes). */
|
|
unsigned total_chained_batch_size;
|
|
|
|
/** Last binder address set in this hardware context. */
|
|
uint64_t last_binder_address;
|
|
|
|
uint32_t ctx_id;
|
|
uint32_t exec_flags;
|
|
bool has_engines_context;
|
|
|
|
/** A list of all BOs referenced by this batch */
|
|
struct iris_bo **exec_bos;
|
|
int exec_count;
|
|
int exec_array_size;
|
|
/** Bitset of whether this batch writes to BO `i'. */
|
|
BITSET_WORD *bos_written;
|
|
uint32_t max_gem_handle;
|
|
|
|
/** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
|
|
* instruction is a MI_BATCH_BUFFER_END).
|
|
*/
|
|
bool noop_enabled;
|
|
|
|
/** Whether the first utrace point has been recorded.
|
|
*/
|
|
bool begin_trace_recorded;
|
|
|
|
/**
|
|
* A list of iris_syncobjs associated with this batch.
|
|
*
|
|
* The first list entry will always be a signalling sync-point, indicating
|
|
* that this batch has completed. The others are likely to be sync-points
|
|
* to wait on before executing the batch.
|
|
*/
|
|
struct util_dynarray syncobjs;
|
|
|
|
/** A list of drm_i915_exec_fences to have execbuf signal or wait on */
|
|
struct util_dynarray exec_fences;
|
|
|
|
/** The amount of aperture space (in bytes) used by all exec_bos */
|
|
int aperture_space;
|
|
|
|
struct {
|
|
/** Uploader to use for sequence numbers */
|
|
struct u_upload_mgr *uploader;
|
|
|
|
/** GPU buffer and CPU map where our seqno's will be written. */
|
|
struct iris_state_ref ref;
|
|
uint32_t *map;
|
|
|
|
/** The sequence number to write the next time we add a fence. */
|
|
uint32_t next;
|
|
} fine_fences;
|
|
|
|
/** A seqno (and syncobj) for the last batch that was submitted. */
|
|
struct iris_fine_fence *last_fence;
|
|
|
|
/** List of other batches which we might need to flush to use a BO */
|
|
struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
|
|
unsigned num_other_batches;
|
|
|
|
struct {
|
|
/**
|
|
* Set of struct brw_bo * that have been rendered to within this
|
|
* batchbuffer and would need flushing before being used from another
|
|
* cache domain that isn't coherent with it (i.e. the sampler).
|
|
*/
|
|
struct hash_table *render;
|
|
} cache;
|
|
|
|
struct intel_batch_decode_ctx decoder;
|
|
struct hash_table_u64 *state_sizes;
|
|
|
|
/**
|
|
* Matrix representation of the cache coherency status of the GPU at the
|
|
* current end point of the batch. For every i and j,
|
|
* coherent_seqnos[i][j] denotes the seqno of the most recent flush of
|
|
* cache domain j visible to cache domain i (which obviously implies that
|
|
* coherent_seqnos[i][i] is the most recent flush of cache domain i). This
|
|
* can be used to efficiently determine whether synchronization is
|
|
* necessary before accessing data from cache domain i if it was previously
|
|
* accessed from another cache domain j.
|
|
*/
|
|
uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
|
|
|
|
/**
|
|
* A vector representing the cache coherency status of the L3. For each
|
|
* cache domain i, l3_coherent_seqnos[i] denotes the seqno of the most
|
|
* recent flush of that domain which is visible to L3 clients.
|
|
*/
|
|
uint64_t l3_coherent_seqnos[NUM_IRIS_DOMAINS];
|
|
|
|
/**
|
|
* Sequence number used to track the completion of any subsequent memory
|
|
* operations in the batch until the next sync boundary.
|
|
*/
|
|
uint64_t next_seqno;
|
|
|
|
/** Have we emitted any draw calls to this batch? */
|
|
bool contains_draw;
|
|
|
|
/** Have we emitted any draw calls with next_seqno? */
|
|
bool contains_draw_with_next_seqno;
|
|
|
|
/** Batch contains fence signal operation. */
|
|
bool contains_fence_signal;
|
|
|
|
/**
|
|
* Number of times iris_batch_sync_region_start() has been called without a
|
|
* matching iris_batch_sync_region_end() on this batch.
|
|
*/
|
|
uint32_t sync_region_depth;
|
|
|
|
uint32_t last_aux_map_state;
|
|
struct iris_measure_batch *measure;
|
|
|
|
/** Where tracepoints are recorded */
|
|
struct u_trace trace;
|
|
|
|
/** Batch wrapper structure for perfetto */
|
|
struct intel_ds_queue *ds;
|
|
};
|
|
|
|
void iris_init_batches(struct iris_context *ice, int priority);
|
|
void iris_chain_to_new_batch(struct iris_batch *batch);
|
|
void iris_destroy_batches(struct iris_context *ice);
|
|
void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
|
|
|
|
void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
|
|
#define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
|
|
|
|
bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
|
|
|
|
bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
|
|
|
|
#define RELOC_WRITE EXEC_OBJECT_WRITE
|
|
|
|
void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
|
|
bool writable, enum iris_domain access);
|
|
|
|
enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
|
|
|
|
static inline unsigned
|
|
iris_batch_bytes_used(struct iris_batch *batch)
|
|
{
|
|
return batch->map_next - batch->map;
|
|
}
|
|
|
|
/**
|
|
* Ensure the current command buffer has \param size bytes of space
|
|
* remaining. If not, this creates a secondary batch buffer and emits
|
|
* a jump from the primary batch to the start of the secondary.
|
|
*
|
|
* Most callers want iris_get_command_space() instead.
|
|
*/
|
|
static inline void
|
|
iris_require_command_space(struct iris_batch *batch, unsigned size)
|
|
{
|
|
const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
|
|
|
|
if (required_bytes >= BATCH_SZ) {
|
|
iris_chain_to_new_batch(batch);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Allocate space in the current command buffer, and return a pointer
|
|
* to the mapped area so the caller can write commands there.
|
|
*
|
|
* This should be called whenever emitting commands.
|
|
*/
|
|
static inline void *
|
|
iris_get_command_space(struct iris_batch *batch, unsigned bytes)
|
|
{
|
|
if (!batch->begin_trace_recorded) {
|
|
batch->begin_trace_recorded = true;
|
|
trace_intel_begin_batch(&batch->trace);
|
|
}
|
|
iris_require_command_space(batch, bytes);
|
|
void *map = batch->map_next;
|
|
batch->map_next += bytes;
|
|
return map;
|
|
}
|
|
|
|
/**
|
|
* Helper to emit GPU commands - allocates space, copies them there.
|
|
*/
|
|
static inline void
|
|
iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
|
|
{
|
|
void *map = iris_get_command_space(batch, size);
|
|
memcpy(map, data, size);
|
|
}
|
|
|
|
/**
|
|
* Get a pointer to the batch's signalling syncobj. Does not refcount.
|
|
*/
|
|
static inline struct iris_syncobj *
|
|
iris_batch_get_signal_syncobj(struct iris_batch *batch)
|
|
{
|
|
/* The signalling syncobj is the first one in the list. */
|
|
struct iris_syncobj *syncobj =
|
|
((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
|
|
return syncobj;
|
|
}
|
|
|
|
|
|
/**
|
|
* Take a reference to the batch's signalling syncobj.
|
|
*
|
|
* Callers can use this to wait for the the current batch under construction
|
|
* to complete (after flushing it).
|
|
*/
|
|
static inline void
|
|
iris_batch_reference_signal_syncobj(struct iris_batch *batch,
|
|
struct iris_syncobj **out_syncobj)
|
|
{
|
|
struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
|
|
iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
|
|
}
|
|
|
|
/**
|
|
* Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
|
|
*/
|
|
static inline void
|
|
iris_record_state_size(struct hash_table_u64 *ht,
|
|
uint32_t offset_from_base,
|
|
uint32_t size)
|
|
{
|
|
if (ht) {
|
|
_mesa_hash_table_u64_insert(ht, offset_from_base,
|
|
(void *)(uintptr_t) size);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Mark the start of a region in the batch with stable synchronization
|
|
* sequence number. Any buffer object accessed by the batch buffer only needs
|
|
* to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
|
|
* by iris_batch_sync_region_start() and iris_batch_sync_region_end().
|
|
*/
|
|
static inline void
|
|
iris_batch_sync_region_start(struct iris_batch *batch)
|
|
{
|
|
batch->sync_region_depth++;
|
|
}
|
|
|
|
/**
|
|
* Mark the end of a region in the batch with stable synchronization sequence
|
|
* number. Should be called once after each call to
|
|
* iris_batch_sync_region_start().
|
|
*/
|
|
static inline void
|
|
iris_batch_sync_region_end(struct iris_batch *batch)
|
|
{
|
|
assert(batch->sync_region_depth);
|
|
batch->sync_region_depth--;
|
|
}
|
|
|
|
/**
|
|
* Start a new synchronization section at the current point of the batch,
|
|
* unless disallowed by a previous iris_batch_sync_region_start().
|
|
*/
|
|
static inline void
|
|
iris_batch_sync_boundary(struct iris_batch *batch)
|
|
{
|
|
if (!batch->sync_region_depth) {
|
|
batch->contains_draw_with_next_seqno = false;
|
|
batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
|
|
assert(batch->next_seqno > 0);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update the cache coherency status of the batch to reflect a flush of the
|
|
* specified caching domain.
|
|
*/
|
|
static inline void
|
|
iris_batch_mark_flush_sync(struct iris_batch *batch,
|
|
enum iris_domain access)
|
|
{
|
|
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
|
|
|
if (iris_domain_is_l3_coherent(devinfo, access))
|
|
batch->l3_coherent_seqnos[access] = batch->next_seqno - 1;
|
|
else
|
|
batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
|
|
}
|
|
|
|
/**
|
|
* Update the cache coherency status of the batch to reflect an invalidation
|
|
* of the specified caching domain. All prior flushes of other caches will be
|
|
* considered visible to the specified caching domain.
|
|
*/
|
|
static inline void
|
|
iris_batch_mark_invalidate_sync(struct iris_batch *batch,
|
|
enum iris_domain access)
|
|
{
|
|
const struct intel_device_info *devinfo = &batch->screen->devinfo;
|
|
|
|
for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
|
|
if (i == access)
|
|
continue;
|
|
|
|
if (iris_domain_is_l3_coherent(devinfo, access)) {
|
|
if (iris_domain_is_read_only(access)) {
|
|
/* Invalidating a L3-coherent read-only domain "access" also
|
|
* triggers an invalidation of any matching L3 cachelines as well.
|
|
*
|
|
* If domain 'i' is L3-coherent, it sees the latest data in L3,
|
|
* otherwise it sees the latest globally-observable data.
|
|
*/
|
|
batch->coherent_seqnos[access][i] =
|
|
iris_domain_is_l3_coherent(devinfo, i) ?
|
|
batch->l3_coherent_seqnos[i] : batch->coherent_seqnos[i][i];
|
|
} else {
|
|
/* Invalidating L3-coherent write domains does not trigger
|
|
* an invalidation of any matching L3 cachelines, however.
|
|
*
|
|
* It sees the latest data from domain i visible to L3 clients.
|
|
*/
|
|
batch->coherent_seqnos[access][i] = batch->l3_coherent_seqnos[i];
|
|
}
|
|
} else {
|
|
/* "access" isn't L3-coherent, so invalidating it means it sees the
|
|
* most recent globally-observable data from domain i.
|
|
*/
|
|
batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Update the cache coherency status of the batch to reflect a reset. All
|
|
* previously accessed data can be considered visible to every caching domain
|
|
* thanks to the kernel's heavyweight flushing at batch buffer boundaries.
|
|
*/
|
|
static inline void
|
|
iris_batch_mark_reset_sync(struct iris_batch *batch)
|
|
{
|
|
for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++) {
|
|
batch->l3_coherent_seqnos[i] = batch->next_seqno - 1;
|
|
for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
|
|
batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
|
|
}
|
|
}
|
|
|
|
const char *
|
|
iris_batch_name_to_string(enum iris_batch_name name);
|
|
|
|
#define iris_foreach_batch(ice, batch) \
|
|
for (struct iris_batch *batch = &ice->batches[0]; \
|
|
batch <= &ice->batches[((struct iris_screen *)ice->ctx.screen)->devinfo.ver >= 12 ? IRIS_BATCH_BLITTER : IRIS_BATCH_COMPUTE]; \
|
|
++batch)
|
|
|
|
#endif
|