intel: Use 3DSTATE_BINDING_TABLE_POOL_ALLOC exclusively on Gfx11+
On Icelake and later, we can use a new 3DSTATE_BINDING_TABLE_POOL_ALLOC command to update the location of the binder (buffer containing binding table entries), rather than having to move Surface State Base Address via a STATE_BASE_ADDRESS command. This has less stalling and also means our surface addresses can remain relative to a fixed 4GB address range, meaning we don't have to re-stream them any time the binder changes. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14507>
This commit is contained in:
parent
e3a0e97300
commit
8b9045e7a4
|
@ -234,6 +234,7 @@ iris_init_batch(struct iris_context *ice,
|
|||
decode_get_bo, decode_get_state_size, batch);
|
||||
batch->decoder.dynamic_base = IRIS_MEMZONE_DYNAMIC_START;
|
||||
batch->decoder.instruction_base = IRIS_MEMZONE_SHADER_START;
|
||||
batch->decoder.surface_base = IRIS_MEMZONE_BINDER_START;
|
||||
batch->decoder.max_vbo_decoded_lines = 32;
|
||||
if (batch->name == IRIS_BATCH_BLITTER)
|
||||
batch->decoder.engine = I915_ENGINE_CLASS_COPY;
|
||||
|
@ -513,6 +514,7 @@ iris_batch_reset(struct iris_batch *batch)
|
|||
{
|
||||
struct iris_screen *screen = batch->screen;
|
||||
struct iris_bufmgr *bufmgr = screen->bufmgr;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
u_trace_fini(&batch->trace);
|
||||
|
||||
|
@ -521,7 +523,8 @@ iris_batch_reset(struct iris_batch *batch)
|
|||
batch->total_chained_batch_size = 0;
|
||||
batch->contains_draw = false;
|
||||
batch->contains_fence_signal = false;
|
||||
batch->decoder.surface_base = batch->last_binder_address;
|
||||
if (devinfo->ver < 11)
|
||||
batch->decoder.surface_base = batch->last_binder_address;
|
||||
|
||||
create_batch(batch);
|
||||
assert(batch->bo->index == 0);
|
||||
|
|
|
@ -63,6 +63,7 @@ static void
|
|||
binder_realloc(struct iris_context *ice)
|
||||
{
|
||||
struct iris_screen *screen = (void *) ice->ctx.screen;
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
struct iris_bufmgr *bufmgr = screen->bufmgr;
|
||||
struct iris_binder *binder = &ice->state.binder;
|
||||
|
||||
|
@ -76,15 +77,21 @@ binder_realloc(struct iris_context *ice)
|
|||
/* Avoid using offset 0 - tools consider it NULL. */
|
||||
binder->insert_point = binder->alignment;
|
||||
|
||||
/* Allocating a new binder requires changing Surface State Base Address,
|
||||
* which also invalidates all our previous binding tables - each entry
|
||||
* in those tables is an offset from the old base.
|
||||
*
|
||||
* We do this here so that iris_binder_reserve_3d correctly gets a new
|
||||
* larger total_size when making the updated reservation.
|
||||
*/
|
||||
ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
|
||||
ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
|
||||
if (devinfo->ver < 11) {
|
||||
/* Allocating a new binder requires changing Surface State Base Address,
|
||||
* which also invalidates all our previous binding tables - each entry
|
||||
* in those tables is an offset from the old base.
|
||||
*
|
||||
* We do this here so that iris_binder_reserve_3d correctly gets a new
|
||||
* larger total_size when making the updated reservation.
|
||||
*
|
||||
* On Icelake and later, we just update the binding table pool address
|
||||
* rather than moving surface state base address, so we no longer need
|
||||
* to do any of this.
|
||||
*/
|
||||
ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
|
||||
ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
|
@ -166,13 +166,15 @@ blorp_alloc_binding_table(struct blorp_batch *blorp_batch,
|
|||
iris_binder_reserve(ice, num_entries * sizeof(uint32_t));
|
||||
uint32_t *bt_map = binder->map + bt_offset;
|
||||
|
||||
uint32_t surf_base_offset = GFX_VER < 11 ? binder->bo->address : 0;
|
||||
|
||||
*out_bt_offset = bt_offset;
|
||||
|
||||
for (unsigned i = 0; i < num_entries; i++) {
|
||||
surface_maps[i] = stream_state(batch, ice->state.surface_uploader,
|
||||
state_size, state_alignment,
|
||||
&surface_offsets[i], NULL);
|
||||
bt_map[i] = surface_offsets[i] - (uint32_t) binder->bo->address;
|
||||
bt_map[i] = surface_offsets[i] - surf_base_offset;
|
||||
}
|
||||
|
||||
iris_use_pinned_bo(batch, binder->bo, false, IRIS_DOMAIN_NONE);
|
||||
|
|
|
@ -718,6 +718,7 @@ init_state_base_address(struct iris_batch *batch)
|
|||
sba.InstructionBaseAddressModifyEnable = true;
|
||||
sba.GeneralStateBufferSizeModifyEnable = true;
|
||||
sba.DynamicStateBufferSizeModifyEnable = true;
|
||||
sba.SurfaceStateBaseAddressModifyEnable = true;
|
||||
#if GFX_VER >= 9
|
||||
sba.BindlessSurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_BINDLESS_START);
|
||||
sba.BindlessSurfaceStateSize = (IRIS_BINDLESS_SIZE >> 12) - 1;
|
||||
|
@ -732,6 +733,7 @@ init_state_base_address(struct iris_batch *batch)
|
|||
|
||||
sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START);
|
||||
sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START);
|
||||
sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_BINDER_START);
|
||||
|
||||
sba.GeneralStateBufferSize = 0xfffff;
|
||||
sba.IndirectObjectBufferSize = 0xfffff;
|
||||
|
@ -5049,9 +5051,9 @@ use_image(struct iris_batch *batch, struct iris_context *ice,
|
|||
}
|
||||
|
||||
#define push_bt_entry(addr) \
|
||||
assert(addr >= binder_addr); \
|
||||
assert(addr >= surf_base_offset); \
|
||||
assert(s < shader->bt.size_bytes / sizeof(uint32_t)); \
|
||||
if (!pin_only) bt_map[s++] = (addr) - binder_addr;
|
||||
if (!pin_only) bt_map[s++] = (addr) - surf_base_offset;
|
||||
|
||||
#define bt_assert(section) \
|
||||
if (!pin_only && shader->bt.used_mask[section] != 0) \
|
||||
|
@ -5078,7 +5080,7 @@ iris_populate_binding_table(struct iris_context *ice,
|
|||
struct iris_binding_table *bt = &shader->bt;
|
||||
UNUSED struct brw_stage_prog_data *prog_data = shader->prog_data;
|
||||
struct iris_shader_state *shs = &ice->state.shaders[stage];
|
||||
uint32_t binder_addr = binder->bo->address;
|
||||
uint32_t surf_base_offset = GFX_VER < 11 ? binder->bo->address : 0;
|
||||
|
||||
uint32_t *bt_map = binder->map + binder->bt_offset[stage];
|
||||
int s = 0;
|
||||
|
@ -5458,7 +5460,8 @@ iris_update_binder_address(struct iris_batch *batch,
|
|||
|
||||
iris_batch_sync_region_start(batch);
|
||||
|
||||
flush_before_state_base_change(batch);
|
||||
#if GFX_VER >= 11
|
||||
/* Use 3DSTATE_BINDING_TABLE_POOL_ALLOC on Icelake and later */
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
/* Wa_1607854226:
|
||||
|
@ -5470,6 +5473,30 @@ iris_update_binder_address(struct iris_batch *batch,
|
|||
emit_pipeline_select(batch, _3D);
|
||||
#endif
|
||||
|
||||
iris_emit_pipe_control_flush(batch, "Stall for binder realloc",
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
|
||||
btpa.BindingTablePoolBaseAddress = ro_bo(binder->bo, 0);
|
||||
btpa.BindingTablePoolBufferSize = binder->size / 4096;
|
||||
#if GFX_VERx10 < 125
|
||||
btpa.BindingTablePoolEnable = true;
|
||||
#endif
|
||||
btpa.MOCS = mocs;
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
/* Wa_1607854226:
|
||||
*
|
||||
* Put the pipeline back into compute mode.
|
||||
*/
|
||||
if (batch->name == IRIS_BATCH_COMPUTE)
|
||||
emit_pipeline_select(batch, GPGPU);
|
||||
#endif
|
||||
#else
|
||||
/* Use STATE_BASE_ADDRESS on older platforms */
|
||||
flush_before_state_base_change(batch);
|
||||
|
||||
iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
|
||||
sba.SurfaceStateBaseAddressModifyEnable = true;
|
||||
sba.SurfaceStateBaseAddress = ro_bo(binder->bo, 0);
|
||||
|
@ -5485,32 +5512,10 @@ iris_update_binder_address(struct iris_batch *batch,
|
|||
sba.SurfaceStateMOCS = mocs;
|
||||
#if GFX_VER >= 9
|
||||
sba.BindlessSurfaceStateMOCS = mocs;
|
||||
#endif
|
||||
#if GFX_VER >= 11
|
||||
sba.BindlessSamplerStateMOCS = mocs;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if GFX_VERx10 == 120
|
||||
/* Wa_1607854226:
|
||||
*
|
||||
* Put the pipeline back into compute mode.
|
||||
*/
|
||||
if (batch->name == IRIS_BATCH_COMPUTE)
|
||||
emit_pipeline_select(batch, GPGPU);
|
||||
#endif
|
||||
|
||||
if (GFX_VERx10 >= 125) {
|
||||
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POOL_ALLOC), btpa) {
|
||||
btpa.BindingTablePoolBaseAddress = ro_bo(binder->bo, 0);
|
||||
btpa.BindingTablePoolBufferSize = IRIS_BINDER_SIZE / 4096;
|
||||
#if GFX_VERx10 < 125
|
||||
btpa.BindingTablePoolEnable = true;
|
||||
#endif
|
||||
btpa.MOCS = mocs;
|
||||
}
|
||||
}
|
||||
|
||||
flush_after_state_base_change(batch);
|
||||
iris_batch_sync_region_end(batch);
|
||||
|
||||
|
|
Loading…
Reference in New Issue