iris: rewrite to use memzones and not relocs

This commit is contained in:
Kenneth Graunke 2018-04-06 00:05:24 -07:00
parent 68229caa38
commit 651be7cf3d
3 changed files with 72 additions and 282 deletions

View File

@ -82,15 +82,6 @@ uint_key_hash(const void *key)
return (uintptr_t) key;
}
static void
init_reloc_list(struct iris_reloc_list *rlist, int count)
{
rlist->reloc_count = 0;
rlist->reloc_array_size = count;
rlist->relocs = malloc(rlist->reloc_array_size *
sizeof(struct drm_i915_gem_relocation_entry));
}
static void
create_batch_buffer(struct iris_bufmgr *bufmgr,
struct iris_batch_buffer *buf,
@ -116,9 +107,6 @@ iris_init_batch(struct iris_batch *batch,
assert(util_bitcount(ring) == 1);
batch->ring = ring;
init_reloc_list(&batch->cmdbuf.relocs, 256);
init_reloc_list(&batch->statebuf.relocs, 256);
batch->exec_count = 0;
batch->exec_array_size = 100;
batch->exec_bos =
@ -189,12 +177,6 @@ iris_batch_reset(struct iris_batch *batch)
batch->last_cmd_bo = batch->cmdbuf.bo;
create_batch_buffer(bufmgr, &batch->cmdbuf, "command buffer", BATCH_SZ);
create_batch_buffer(bufmgr, &batch->statebuf, "state buffer", STATE_SZ);
/* Avoid making 0 a valid state offset - otherwise the decoder will try
* and decode data when we use offset 0 as a null pointer.
*/
batch->statebuf.map_next += 1;
add_exec_bo(batch, batch->cmdbuf.bo);
assert(batch->cmdbuf.bo->index == 0);
@ -220,10 +202,6 @@ free_batch_buffer(struct iris_batch_buffer *buf)
buf->bo = NULL;
buf->map = NULL;
buf->map_next = NULL;
free(buf->relocs.relocs);
buf->relocs.relocs = NULL;
buf->relocs.reloc_array_size = 0;
}
void
@ -235,7 +213,6 @@ iris_batch_free(struct iris_batch *batch)
free(batch->exec_bos);
free(batch->validation_list);
free_batch_buffer(&batch->cmdbuf);
free_batch_buffer(&batch->statebuf);
iris_bo_unreference(batch->last_cmd_bo);
@ -406,19 +383,6 @@ iris_require_command_space(struct iris_batch *batch, unsigned size)
require_buffer_space(batch, &batch->cmdbuf, size, BATCH_SZ, MAX_BATCH_SIZE);
}
/**
* Reserve some space in the statebuffer, or flush.
*
* This is used to estimate when we're near the end of the batch,
* so we can flush early.
*/
void
iris_require_state_space(struct iris_batch *batch, unsigned size)
{
require_buffer_space(batch, &batch->statebuf, size, STATE_SZ,
MAX_STATE_SIZE);
}
void
iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
{
@ -456,7 +420,6 @@ static int
submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd)
{
iris_bo_unmap(batch->cmdbuf.bo);
iris_bo_unmap(batch->statebuf.bo);
/* The requirement for using I915_EXEC_NO_RELOC are:
*
@ -470,23 +433,6 @@ submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd)
* To avoid stalling, execobject.offset should match the current
* address of that object within the active context.
*/
/* Set statebuffer relocations */
const unsigned state_index = batch->statebuf.bo->index;
if (state_index < batch->exec_count &&
batch->exec_bos[state_index] == batch->statebuf.bo) {
struct drm_i915_gem_exec_object2 *entry =
&batch->validation_list[state_index];
assert(entry->handle == batch->statebuf.bo->gem_handle);
entry->relocation_count = batch->statebuf.relocs.reloc_count;
entry->relocs_ptr = (uintptr_t) batch->statebuf.relocs.relocs;
}
/* Set batchbuffer relocations */
struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
assert(entry->handle == batch->cmdbuf.bo->gem_handle);
entry->relocation_count = batch->cmdbuf.relocs.reloc_count;
entry->relocs_ptr = (uintptr_t) batch->cmdbuf.relocs.relocs;
struct drm_i915_gem_execbuffer2 execbuf = {
.buffers_ptr = (uintptr_t) batch->validation_list,
.buffer_count = batch->exec_count,
@ -568,16 +514,12 @@ _iris_batch_flush_fence(struct iris_batch *batch,
if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) {
int bytes_for_commands = buffer_bytes_used(&batch->cmdbuf);
int bytes_for_state = buffer_bytes_used(&batch->statebuf);
fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt),"
" %5db (%0.1f%%) (state), %4d BOs (%0.1fMb aperture),"
" %4d batch relocs, %4d state relocs\n", file, line,
fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%), "
"%4d BOs (%0.1fMb aperture)\n",
file, line,
bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ,
bytes_for_state, 100.0f * bytes_for_state / STATE_SZ,
batch->exec_count,
(float) batch->aperture_space / (1024 * 1024),
batch->cmdbuf.relocs.reloc_count,
batch->statebuf.relocs.reloc_count);
(float) batch->aperture_space / (1024 * 1024));
}
int ret = submit_batch(batch, in_fence_fd, out_fence_fd);
@ -603,13 +545,9 @@ _iris_batch_flush_fence(struct iris_batch *batch,
iris_bo_unreference(batch->exec_bos[i]);
batch->exec_bos[i] = NULL;
}
batch->cmdbuf.relocs.reloc_count = 0;
batch->statebuf.relocs.reloc_count = 0;
batch->exec_count = 0;
batch->aperture_space = 0;
iris_bo_unreference(batch->statebuf.bo);
/* Start a new batch buffer. */
iris_batch_reset_and_clear_render_cache(batch);
@ -630,46 +568,8 @@ iris_batch_references(struct iris_batch *batch, struct iris_bo *bo)
return false;
}
/* This is the only way buffers get added to the validate list.
/* This is the only way buffers get added to the validate list.
*/
static uint64_t
emit_reloc(struct iris_batch *batch,
struct iris_reloc_list *rlist, uint32_t offset,
struct iris_bo *target, uint32_t target_offset,
unsigned int reloc_flags)
{
assert(target != NULL);
unsigned int index = add_exec_bo(batch, target);
struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index];
if (target->kflags & EXEC_OBJECT_PINNED) {
assert(entry->offset == target->gtt_offset);
return entry->offset + target_offset;
}
if (rlist->reloc_count == rlist->reloc_array_size) {
rlist->reloc_array_size *= 2;
rlist->relocs = realloc(rlist->relocs,
rlist->reloc_array_size *
sizeof(struct drm_i915_gem_relocation_entry));
}
rlist->relocs[rlist->reloc_count++] =
(struct drm_i915_gem_relocation_entry) {
.offset = offset,
.delta = target_offset,
.target_handle = index,
.presumed_offset = entry->offset,
};
/* Using the old buffer offset, write in what the right data would be, in
* case the buffer doesn't move and we can short-circuit the relocation
* processing in the kernel
*/
return entry->offset + target_offset;
}
void
iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo)
{
@ -677,79 +577,6 @@ iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo)
add_exec_bo(batch, bo);
}
uint64_t
iris_batch_reloc(struct iris_batch *batch, uint32_t batch_offset,
struct iris_bo *target, uint32_t target_offset,
unsigned int reloc_flags)
{
assert(batch_offset <= batch->cmdbuf.bo->size - sizeof(uint32_t));
return emit_reloc(batch, &batch->cmdbuf.relocs, batch_offset,
target, target_offset, reloc_flags);
}
uint64_t
iris_state_reloc(struct iris_batch *batch, uint32_t state_offset,
struct iris_bo *target, uint32_t target_offset,
unsigned int reloc_flags)
{
assert(state_offset <= batch->statebuf.bo->size - sizeof(uint32_t));
return emit_reloc(batch, &batch->statebuf.relocs, state_offset,
target, target_offset, reloc_flags);
}
static uint32_t
iris_state_entry_size(struct iris_batch *batch, uint32_t offset)
{
struct hash_entry *entry =
_mesa_hash_table_search(batch->state_sizes, (void *)(uintptr_t) offset);
return entry ? (uintptr_t) entry->data : 0;
}
/**
* Allocates a block of space in the batchbuffer for indirect state.
*/
void *
iris_alloc_state(struct iris_batch *batch,
int size, int alignment,
uint32_t *out_offset)
{
assert(size < batch->statebuf.bo->size);
const unsigned existing_bytes = buffer_bytes_used(&batch->statebuf);
unsigned aligned_size =
ALIGN(existing_bytes, alignment) - existing_bytes + size;
require_buffer_space(batch, &batch->statebuf, aligned_size,
STATE_SZ, MAX_STATE_SIZE);
unsigned offset = ALIGN(buffer_bytes_used(&batch->statebuf), alignment);
if (unlikely(batch->state_sizes)) {
_mesa_hash_table_insert(batch->state_sizes,
(void *) (uintptr_t) offset,
(void *) (uintptr_t) size);
}
batch->statebuf.map_next += aligned_size;
*out_offset = offset;
return batch->statebuf.map + offset;
}
uint32_t
iris_emit_state(struct iris_batch *batch,
const void *data,
int size, int alignment)
{
uint32_t out_offset;
void *dest = iris_alloc_state(batch, size, alignment, &out_offset);
memcpy(dest, data, size);
return out_offset;
}
static void
decode_batch(struct iris_batch *batch)
{

View File

@ -39,13 +39,7 @@
struct iris_address {
struct iris_bo *bo;
unsigned reloc_flags;
uint32_t offset;
};
struct iris_reloc_list {
struct drm_i915_gem_relocation_entry *relocs;
int reloc_count;
int reloc_array_size;
uint64_t offset;
};
struct iris_batch_buffer {
@ -55,8 +49,6 @@ struct iris_batch_buffer {
struct iris_bo *partial_bo;
unsigned partial_bytes;
struct iris_reloc_list relocs;
};
struct iris_batch {
@ -65,8 +57,6 @@ struct iris_batch {
/** Current batchbuffer being queued up. */
struct iris_batch_buffer cmdbuf;
/** Current statebuffer being queued up. */
struct iris_batch_buffer statebuf;
/** Last BO submitted to the hardware. Used for glFinish(). */
struct iris_bo *last_cmd_bo;
@ -99,12 +89,7 @@ void iris_init_batch(struct iris_batch *batch,
uint8_t ring);
void iris_batch_free(struct iris_batch *batch);
void iris_require_command_space(struct iris_batch *batch, unsigned size);
void iris_require_state_space(struct iris_batch *batch, unsigned size);
void iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size);
uint32_t iris_emit_state(struct iris_batch *batch, const void *data, int size,
int alignment);
void *iris_alloc_state(struct iris_batch *batch, int size, int alignment,
uint32_t *out_offset);
int _iris_batch_flush_fence(struct iris_batch *batch,
int in_fence_fd, int *out_fence_fd,
@ -123,15 +108,4 @@ bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo);
uint64_t iris_batch_reloc(struct iris_batch *batch,
uint32_t batch_offset,
struct iris_bo *target,
uint32_t target_offset,
unsigned flags);
uint64_t iris_state_reloc(struct iris_batch *batch,
uint32_t batch_offset,
struct iris_bo *target,
uint32_t target_offset,
unsigned flags);
#endif

View File

@ -57,11 +57,11 @@ static uint64_t
__gen_combine_address(struct iris_batch *batch, void *location,
struct iris_address addr, uint32_t delta)
{
if (addr.bo == NULL)
return addr.offset + delta;
// XXX: reloc flags?
if (addr.bo)
iris_use_pinned_bo(batch, addr.bo);
return iris_batch_reloc(batch, location - batch->cmdbuf.map, addr.bo,
addr.offset + delta, addr.reloc_flags);
return addr.offset + delta;
}
#define __genxml_cmd_length(cmd) cmd ## _length
@ -105,26 +105,6 @@ get_command_space(struct iris_batch *batch, unsigned bytes)
VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \
} while (0)
#define iris_emit_with_addr(batch, dwords, num_dw, addr_field, addr) \
do { \
STATIC_ASSERT((GENX(addr_field) % 64) == 0); \
assert(num_dw <= ARRAY_SIZE(dwords)); \
int addr_idx = GENX(addr_field) / 32; \
uint32_t *dw = get_command_space(batch, 4 * num_dw); \
for (uint32_t i = 0; i < addr_idx; i++) { \
dw[i] = (dwords)[i]; \
} \
uint64_t *qw = (uint64_t *) &dw[addr_idx]; \
*qw = iris_batch_reloc(batch, (void *)qw - batch->cmdbuf.map, \
addr.bo, \
addr.offset + (dwords)[addr_idx + 1], \
addr.reloc_flags); \
for (uint32_t i = addr_idx + 1; i < num_dw; i++) { \
dw[i] = (dwords)[i]; \
} \
VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dw * 4)); \
} while (0)
#include "genxml/genX_pack.h"
#include "genxml/gen_macros.h"
#include "genxml/genX_bits.h"
@ -290,11 +270,44 @@ translate_fill_mode(unsigned pipe_polymode)
}
static struct iris_address
ro_bo(struct iris_bo *bo, uint32_t offset)
ro_bo(struct iris_bo *bo, uint64_t offset)
{
return (struct iris_address) { .bo = bo, .offset = offset };
}
static uint32_t *
stream_state(struct iris_batch *batch,
struct u_upload_mgr *uploader,
unsigned size,
unsigned alignment,
unsigned *out_offset)
{
struct pipe_resource *res = NULL;
void *ptr = NULL;
u_upload_alloc(uploader, 0, size, alignment, out_offset, &res, &ptr);
iris_use_pinned_bo(batch, ((struct iris_resource *) res)->bo);
pipe_resource_reference(&res, NULL);
return ptr;
}
static uint32_t
emit_state(struct iris_batch *batch,
struct u_upload_mgr *uploader,
const void *data,
unsigned size,
unsigned alignment)
{
unsigned offset = 0;
uint32_t *map = stream_state(batch, uploader, size, alignment, &offset);
if (map)
memcpy(map, data, size);
return offset;
}
static void
iris_emit_state_base_address(struct iris_batch *batch)
{
@ -323,13 +336,13 @@ iris_emit_state_base_address(struct iris_batch *batch)
sba.IndirectObjectBufferSizeModifyEnable = true;
sba.InstructionBuffersizeModifyEnable = true;
sba.SurfaceStateBaseAddress = ro_bo(batch->statebuf.bo, 0);
sba.DynamicStateBaseAddress = ro_bo(batch->statebuf.bo, 0);
sba.SurfaceStateBaseAddress = ro_bo(NULL, 1ull << 32);
sba.DynamicStateBaseAddress = ro_bo(NULL, 2 * (1ull << 32));
sba.GeneralStateBufferSize = 0xfffff;
sba.IndirectObjectBufferSize = 0xfffff;
sba.InstructionBufferSize = 0xfffff;
sba.DynamicStateBufferSize = ALIGN(MAX_STATE_SIZE, 4096);
sba.DynamicStateBufferSize = 0xfffff;
}
}
@ -1806,32 +1819,6 @@ static const uint32_t push_constant_opcodes[] = {
[MESA_SHADER_COMPUTE] = 0,
};
static uint32_t
emit_patched_surface_state(struct iris_batch *batch,
uint32_t *surface_state,
const struct iris_resource *res,
unsigned reloc_flags)
{
const int num_dwords = GENX(RENDER_SURFACE_STATE_length);
uint32_t offset;
uint32_t *dw = iris_alloc_state(batch, 4 * num_dwords, 64, &offset);
STATIC_ASSERT(GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) % 32 == 0);
int addr_idx = GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) / 32;
for (uint32_t i = 0; i < addr_idx; i++)
dw[i] = surface_state[i];
uint64_t *qw = (uint64_t *) &dw[addr_idx];
// XXX: mt->offset, if needed
*qw = iris_state_reloc(batch, (void *)qw - batch->statebuf.map, res->bo,
surface_state[addr_idx + 1], reloc_flags);
for (uint32_t i = addr_idx + 1; i < num_dwords; i++)
dw[i] = surface_state[i];
return offset;
}
static void
iris_upload_render_state(struct iris_context *ice,
struct iris_batch *batch,
@ -1846,7 +1833,8 @@ iris_upload_render_state(struct iris_context *ice,
struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
ptr.CCViewportPointer =
iris_emit_state(batch, cso->cc_vp, sizeof(cso->cc_vp), 32);
emit_state(batch, ice->state.dynamic_uploader,
cso->cc_vp, sizeof(cso->cc_vp), 32);
}
}
@ -1854,9 +1842,9 @@ iris_upload_render_state(struct iris_context *ice,
struct iris_viewport_state *cso = ice->state.cso_vp;
iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
ptr.SFClipViewportPointer =
iris_emit_state(batch, cso->sf_cl_vp,
4 * GENX(SF_CLIP_VIEWPORT_length) *
ice->state.num_viewports, 64);
emit_state(batch, ice->state.dynamic_uploader, cso->sf_cl_vp,
4 * GENX(SF_CLIP_VIEWPORT_length) *
ice->state.num_viewports, 64);
}
}
@ -1874,7 +1862,8 @@ iris_upload_render_state(struct iris_context *ice,
cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length));
uint32_t blend_offset;
uint32_t *blend_map =
iris_alloc_state(batch, num_dwords, 64, &blend_offset);
stream_state(batch, ice->state.dynamic_uploader, 4 * num_dwords, 64,
&blend_offset);
uint32_t blend_state_header;
iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) {
@ -1896,9 +1885,9 @@ iris_upload_render_state(struct iris_context *ice,
struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
uint32_t cc_offset;
void *cc_map =
iris_alloc_state(batch,
sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
64, &cc_offset);
stream_state(batch, ice->state.dynamic_uploader,
sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
64, &cc_offset);
iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) {
cc.AlphaTestFormat = ALPHATEST_FLOAT32;
cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value;
@ -1966,8 +1955,9 @@ iris_upload_render_state(struct iris_context *ice,
uint32_t *bt_map = NULL;
if (prog_data->binding_table.size_bytes != 0) {
bt_map = iris_alloc_state(batch, prog_data->binding_table.size_bytes,
64, &bt_offset);
bt_map = stream_state(batch, ice->state.surface_uploader,
prog_data->binding_table.size_bytes,
64, &bt_offset);
}
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
@ -1983,9 +1973,10 @@ iris_upload_render_state(struct iris_context *ice,
for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
struct iris_surface *surf = (void *) cso_fb->cbufs[i];
struct iris_resource *res = (void *) surf->pipe.texture;
*bt_map++ = emit_patched_surface_state(batch, surf->surface_state,
res, RELOC_WRITE);
*bt_map++ =
emit_state(batch, ice->state.surface_uploader,
surf->surface_state,
4 * GENX(RENDER_SURFACE_STATE_length), 64);
}
}
@ -1996,7 +1987,6 @@ iris_upload_render_state(struct iris_context *ice,
// XXX: these are per-context??????????? pipe_sampler_view::context
*bt_map++ =
emit_patched_surface_state(batch, view->surface_state, res, 0);
}
// XXX: not implemented yet
@ -2019,9 +2009,9 @@ iris_upload_render_state(struct iris_context *ice,
const int count = IRIS_MAX_TEXTURE_SAMPLERS;
uint32_t offset;
uint32_t *map = iris_alloc_state(batch,
count * 4 * GENX(SAMPLER_STATE_length),
32, &offset);
uint32_t *map = stream_state(batch, ice->state.dynamic_uploader,
count * 4 * GENX(SAMPLER_STATE_length),
32, &offset);
for (int i = 0; i < count; i++) {
// XXX: when we have a correct count, these better be bound
@ -2169,9 +2159,9 @@ iris_upload_render_state(struct iris_context *ice,
if (dirty & IRIS_DIRTY_SCISSOR) {
uint32_t scissor_offset =
iris_emit_state(batch, ice->state.scissors,
sizeof(struct pipe_scissor_state) *
ice->state.num_scissors, 32);
emit_state(batch, ice->state.dynamic_uploader, ice->state.scissors,
sizeof(struct pipe_scissor_state) *
ice->state.num_scissors, 32);
iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
ptr.ScissorRectPointer = scissor_offset;
@ -2231,9 +2221,8 @@ iris_upload_render_state(struct iris_context *ice,
sizeof(uint32_t) * (1 + 4 * cso->num_buffers));
for (unsigned i = 0; i < cso->num_buffers; i++) {
*addr = iris_batch_reloc(batch, (void *) addr - batch->cmdbuf.map,
cso->bos[i].bo, cso->bos[i].offset +
*delta, cso->bos[i].reloc_flags);
iris_use_pinned_bo(batch, cso->bos[i].bo);
*addr = cso->bos[i].offset + *delta;
addr = (void *) addr + 16;
delta = (void *) delta + 16;
}