intel: Use 3DSTATE_BINDING_TABLE_POOL_ALLOC exclusively on Gfx11+

On Icelake and later, we can use a new 3DSTATE_BINDING_TABLE_POOL_ALLOC
command to update the location of the binder (buffer containing binding
table entries), rather than having to move Surface State Base Address
via a STATE_BASE_ADDRESS command.  This has less stalling and also means
our surface addresses can remain relative to a fixed 4GB address range,
meaning we don't have to re-stream them any time the binder changes.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14507>
This commit is contained in:
Kenneth Graunke 2022-01-11 16:17:34 -08:00 committed by Marge Bot
parent e3a0e97300
commit 8b9045e7a4
4 changed files with 54 additions and 37 deletions

View File

@ -234,6 +234,7 @@ iris_init_batch(struct iris_context *ice,
decode_get_bo, decode_get_state_size, batch);
batch->decoder.dynamic_base = IRIS_MEMZONE_DYNAMIC_START;
batch->decoder.instruction_base = IRIS_MEMZONE_SHADER_START;
batch->decoder.surface_base = IRIS_MEMZONE_BINDER_START;
batch->decoder.max_vbo_decoded_lines = 32;
if (batch->name == IRIS_BATCH_BLITTER)
batch->decoder.engine = I915_ENGINE_CLASS_COPY;
@ -513,6 +514,7 @@ iris_batch_reset(struct iris_batch *batch)
{
struct iris_screen *screen = batch->screen;
struct iris_bufmgr *bufmgr = screen->bufmgr;
const struct intel_device_info *devinfo = &screen->devinfo;
u_trace_fini(&batch->trace);
@ -521,7 +523,8 @@ iris_batch_reset(struct iris_batch *batch)
batch->total_chained_batch_size = 0;
batch->contains_draw = false;
batch->contains_fence_signal = false;
batch->decoder.surface_base = batch->last_binder_address;
if (devinfo->ver < 11)
batch->decoder.surface_base = batch->last_binder_address;
create_batch(batch);
assert(batch->bo->index == 0);

View File

@ -63,6 +63,7 @@ static void
binder_realloc(struct iris_context *ice)
{
struct iris_screen *screen = (void *) ice->ctx.screen;
const struct intel_device_info *devinfo = &screen->devinfo;
struct iris_bufmgr *bufmgr = screen->bufmgr;
struct iris_binder *binder = &ice->state.binder;
@ -76,15 +77,21 @@ binder_realloc(struct iris_context *ice)
/* Avoid using offset 0 - tools consider it NULL. */
binder->insert_point = binder->alignment;
/* Allocating a new binder requires changing Surface State Base Address,
* which also invalidates all our previous binding tables - each entry
* in those tables is an offset from the old base.
*
* We do this here so that iris_binder_reserve_3d correctly gets a new
* larger total_size when making the updated reservation.
*/
ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
if (devinfo->ver < 11) {
/* Allocating a new binder requires changing Surface State Base Address,
* which also invalidates all our previous binding tables - each entry
* in those tables is an offset from the old base.
*
* We do this here so that iris_binder_reserve_3d correctly gets a new
* larger total_size when making the updated reservation.
*
* On Icelake and later, we just update the binding table pool address
* rather than moving surface state base address, so we no longer need
* to do any of this.
*/
ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
}
}
static uint32_t

View File

@ -166,13 +166,15 @@ blorp_alloc_binding_table(struct blorp_batch *blorp_batch,
iris_binder_reserve(ice, num_entries * sizeof(uint32_t));
uint32_t *bt_map = binder->map + bt_offset;
uint32_t surf_base_offset = GFX_VER < 11 ? binder->bo->address : 0;
*out_bt_offset = bt_offset;
for (unsigned i = 0; i < num_entries; i++) {
surface_maps[i] = stream_state(batch, ice->state.surface_uploader,
state_size, state_alignment,
&surface_offsets[i], NULL);
bt_map[i] = surface_offsets[i] - (uint32_t) binder->bo->address;
bt_map[i] = surface_offsets[i] - surf_base_offset;
}
iris_use_pinned_bo(batch, binder->bo, false, IRIS_DOMAIN_NONE);

View File

@ -718,6 +718,7 @@ init_state_base_address(struct iris_batch *batch)
sba.InstructionBaseAddressModifyEnable = true;
sba.GeneralStateBufferSizeModifyEnable = true;
sba.DynamicStateBufferSizeModifyEnable = true;
sba.SurfaceStateBaseAddressModifyEnable = true;
#if GFX_VER >= 9
sba.BindlessSurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_BINDLESS_START);
sba.BindlessSurfaceStateSize = (IRIS_BINDLESS_SIZE >> 12) - 1;
@ -732,6 +733,7 @@ init_state_base_address(struct iris_batch *batch)
sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START);
sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START);
sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_BINDER_START);
sba.GeneralStateBufferSize = 0xfffff;
sba.IndirectObjectBufferSize = 0xfffff;
@ -5049,9 +5051,9 @@ use_image(struct iris_batch *batch, struct iris_context *ice,
}
#define push_bt_entry(addr) \
assert(addr >= binder_addr); \
assert(addr >= surf_base_offset); \
assert(s < shader->bt.size_bytes / sizeof(uint32_t)); \
if (!pin_only) bt_map[s++] = (addr) - binder_addr;
if (!pin_only) bt_map[s++] = (addr) - surf_base_offset;
#define bt_assert(section) \
if (!pin_only && shader->bt.used_mask[section] != 0) \
@ -5078,7 +5080,7 @@ iris_populate_binding_table(struct iris_context *ice,
struct iris_binding_table *bt = &shader->bt;
UNUSED struct brw_stage_prog_data *prog_data = shader->prog_data;
struct iris_shader_state *shs = &ice->state.shaders[stage];
uint32_t binder_addr = binder->bo->address;
uint32_t surf_base_offset = GFX_VER < 11 ? binder->bo->address : 0;
uint32_t *bt_map = binder->map + binder->bt_offset[stage];
int s = 0;
@ -5458,7 +5460,8 @@ iris_update_binder_address(struct iris_batch *batch,
iris_batch_sync_region_start(batch);
flush_before_state_base_change(batch);
#if GFX_VER >= 11
/* Use 3DSTATE_BINDING_TABLE_POOL_ALLOC on Icelake and later */
#if GFX_VERx10 == 120
/* Wa_1607854226:
@ -5470,6 +5473,30 @@ iris_update_binder_address(struct iris_batch *batch,
emit_pipeline_select(batch, _3D);
#endif
iris_emit_pipe_control_flush(batch, "Stall for binder realloc",
PIPE_CONTROL_CS_STALL);
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
btpa.BindingTablePoolBaseAddress = ro_bo(binder->bo, 0);
btpa.BindingTablePoolBufferSize = binder->size / 4096;
#if GFX_VERx10 < 125
btpa.BindingTablePoolEnable = true;
#endif
btpa.MOCS = mocs;
}
#if GFX_VERx10 == 120
/* Wa_1607854226:
*
* Put the pipeline back into compute mode.
*/
if (batch->name == IRIS_BATCH_COMPUTE)
emit_pipeline_select(batch, GPGPU);
#endif
#else
/* Use STATE_BASE_ADDRESS on older platforms */
flush_before_state_base_change(batch);
iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
sba.SurfaceStateBaseAddressModifyEnable = true;
sba.SurfaceStateBaseAddress = ro_bo(binder->bo, 0);
@ -5485,32 +5512,10 @@ iris_update_binder_address(struct iris_batch *batch,
sba.SurfaceStateMOCS = mocs;
#if GFX_VER >= 9
sba.BindlessSurfaceStateMOCS = mocs;
#endif
#if GFX_VER >= 11
sba.BindlessSamplerStateMOCS = mocs;
#endif
}
#if GFX_VERx10 == 120
/* Wa_1607854226:
*
* Put the pipeline back into compute mode.
*/
if (batch->name == IRIS_BATCH_COMPUTE)
emit_pipeline_select(batch, GPGPU);
#endif
if (GFX_VERx10 >= 125) {
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
btpa.BindingTablePoolBaseAddress = ro_bo(binder->bo, 0);
btpa.BindingTablePoolBufferSize = IRIS_BINDER_SIZE / 4096;
#if GFX_VERx10 < 125
btpa.BindingTablePoolEnable = true;
#endif
btpa.MOCS = mocs;
}
}
flush_after_state_base_change(batch);
iris_batch_sync_region_end(batch);