iris: Reconfigure the URB only if it's necessary or possibly useful

Reconfiguring the URB partitioning is likely to cause shader stalls,
as the dividing line between each stage's section of memory is moving.
(Technically, 3DSTATE_URB_* are pipelined commands, but that mostly
means that the command streamer doesn't need to stall.)  So it should
be beneficial to update the URB configuration less often.

If the previous URB configuration already has enough space for our
current shader's needs, we can just continue using it, assuming we
are able to allocate the maximum number of URB entries per stage.
However, if we ran out of URB space and had to limit the number of
URB entrties for a stage, and the per-entry size is larger than we
need, we should reconfigure it to try and improve concurrency.

So, we begin tracking the last URB configuration in the context,
and compare against that when updating shader variants.

Cuts 36% of the URB reconfigurations (excluding BLORP) from a
Shadow of Mordor trace, and 46% from a GFXBench Manhattan 3.0 trace.

One nice thing is that this removes the need to look at the old
prog_data when updating shaders, which should make it possible to
unbind shader variants without causing spurious URB updates.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8721>
This commit is contained in:
Kenneth Graunke 2021-01-25 22:25:51 -08:00 committed by Marge Bot
parent a710145b5b
commit 939bc0c588
5 changed files with 34 additions and 22 deletions

View File

@ -360,6 +360,9 @@ iris_blorp_exec(struct blorp_batch *blorp_batch,
ice->state.dirty |= ~skip_bits;
ice->state.stage_dirty |= ~skip_stage_bits;
for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.size); i++)
ice->shaders.urb.size[i] = 0;
if (params->src.enabled)
iris_bo_bump_seqno(params->src.addr.buffer, batch->next_seqno,
IRIS_DOMAIN_OTHER_READ);

View File

@ -97,6 +97,7 @@ iris_lost_context_state(struct iris_batch *batch)
ice->state.dirty = ~0ull;
ice->state.stage_dirty = ~0ull;
ice->state.current_hash_scale = 0;
memset(&ice->shaders.urb, 0, sizeof(ice->shaders.urb));
memset(ice->state.last_block, 0, sizeof(ice->state.last_block));
memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
batch->last_surface_base_address = ~0ull;

View File

@ -608,6 +608,12 @@ struct iris_context {
struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
struct iris_compiled_shader *prog[MESA_SHADER_STAGES];
struct brw_vue_map *last_vue_map;
struct {
unsigned size[4];
unsigned entries[4];
unsigned start[4];
bool constrained;
} urb;
/** List of shader variants whose deletion has been deferred for now */
struct list_head deleted_variants[MESA_SHADER_STAGES];

View File

@ -1851,12 +1851,6 @@ iris_update_compiled_shaders(struct iris_context *ice)
const uint64_t dirty = ice->state.dirty;
const uint64_t stage_dirty = ice->state.stage_dirty;
struct brw_vue_prog_data *old_prog_datas[4];
if (!(dirty & IRIS_DIRTY_URB)) {
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++)
old_prog_datas[i] = get_vue_prog_data(ice, i);
}
if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
struct iris_uncompiled_shader *tes =
@ -1931,10 +1925,20 @@ iris_update_compiled_shaders(struct iris_context *ice)
/* Changing shader interfaces may require a URB configuration. */
if (!(dirty & IRIS_DIRTY_URB)) {
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
struct brw_vue_prog_data *old = old_prog_datas[i];
struct brw_vue_prog_data *new = get_vue_prog_data(ice, i);
if (!!old != !!new ||
(new && new->urb_entry_size != old->urb_entry_size)) {
struct brw_vue_prog_data *prog_data = get_vue_prog_data(ice, i);
unsigned needed_size = prog_data ? prog_data->urb_entry_size : 0;
unsigned last_allocated_size = ice->shaders.urb.size[i];
/* If the last URB allocation wasn't large enough for our needs,
* flag it as needing to be reconfigured. Otherwise, we can use
* the existing config. However, if the URB is constrained, and
* we can shrink our size for this stage, we may be able to gain
* extra concurrency by reconfiguring it to be smaller. Do so.
*/
if (last_allocated_size < needed_size ||
(ice->shaders.urb.constrained &&
last_allocated_size > needed_size)) {
ice->state.dirty |= IRIS_DIRTY_URB;
break;
}

View File

@ -5581,35 +5581,33 @@ iris_upload_dirty_render_state(struct iris_context *ice,
}
if (dirty & IRIS_DIRTY_URB) {
unsigned size[4];
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
if (!ice->shaders.prog[i]) {
size[i] = 1;
ice->shaders.urb.size[i] = 1;
} else {
struct brw_vue_prog_data *vue_prog_data =
(void *) ice->shaders.prog[i]->prog_data;
size[i] = vue_prog_data->urb_entry_size;
ice->shaders.urb.size[i] = vue_prog_data->urb_entry_size;
}
assert(size[i] != 0);
assert(ice->shaders.urb.size[i] != 0);
}
bool constrained;
unsigned entries[4], start[4];
gen_get_urb_config(&batch->screen->devinfo,
batch->screen->l3_config_3d,
ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL,
ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL,
size, entries, start,
ice->shaders.urb.size,
ice->shaders.urb.entries,
ice->shaders.urb.start,
&ice->state.urb_deref_block_size,
&constrained);
&ice->shaders.urb.constrained);
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
urb._3DCommandSubOpcode += i;
urb.VSURBStartingAddress = start[i];
urb.VSURBEntryAllocationSize = size[i] - 1;
urb.VSNumberofURBEntries = entries[i];
urb.VSURBStartingAddress = ice->shaders.urb.start[i];
urb.VSURBEntryAllocationSize = ice->shaders.urb.size[i] - 1;
urb.VSNumberofURBEntries = ice->shaders.urb.entries[i];
}
}
}