i965: Implement L3 state atom.

The L3 state atom calculates the target L3 partition weights when the
program bound to some shader stage is modified, and in case they are
far enough from the current partitioning it makes sure that the L3
state is re-emitted.

v2: Fix for inconsistent units the context URB size is expressed in.
    Clamp URB size to 1008 KB on SKL due to FF hardware limitation.

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg  <krh@bitplanet.net>
This commit is contained in:
Francisco Jerez 2015-12-08 18:53:22 +02:00
parent 95ad0bd33b
commit acc77947ca
3 changed files with 88 additions and 0 deletions

View File

@ -702,6 +702,8 @@ enum brw_predicate_state {
struct shader_times;
struct brw_l3_config;
/**
* brw_context is derived from gl_context.
*/
@ -1267,6 +1269,10 @@ struct brw_context
uint32_t num_instances;
int basevertex;
struct {
const struct brw_l3_config *config;
} l3;
struct {
drm_intel_bo *bo;
const char **names;

View File

@ -130,6 +130,7 @@ extern const struct brw_tracked_state gen7_clip_state;
extern const struct brw_tracked_state gen7_ds_state;
extern const struct brw_tracked_state gen7_gs_state;
extern const struct brw_tracked_state gen7_hs_state;
extern const struct brw_tracked_state gen7_l3_state;
extern const struct brw_tracked_state gen7_ps_state;
extern const struct brw_tracked_state gen7_push_constant_space;
extern const struct brw_tracked_state gen7_sbe_state;

View File

@ -422,3 +422,84 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
}
}
}
/**
* Return the unit brw_context::urb::size is expressed in, in KB. \sa
* brw_device_info::urb::size.
*/
static unsigned
get_urb_size_scale(const struct brw_device_info *devinfo)
{
return (devinfo->gen >= 8 ? devinfo->num_slices : 1);
}
/**
* Update the URB size in the context state for the specified L3
* configuration.
*/
static void
update_urb_size(struct brw_context *brw, const struct brw_l3_config *cfg)
{
const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
/* From the SKL "L3 Allocation and Programming" documentation:
*
* "URB is limited to 1008KB due to programming restrictions. This is not
* a restriction of the L3 implementation, but of the FF and other clients.
* Therefore, in a GT4 implementation it is possible for the programmed
* allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
* only 1008KB of this will be used."
*/
const unsigned max = (devinfo->gen == 9 ? 1008 : ~0);
const unsigned sz =
MIN2(max, cfg->n[L3P_URB] * get_l3_way_size(devinfo)) /
get_urb_size_scale(devinfo);
if (brw->urb.size != sz) {
brw->urb.size = sz;
brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
}
}
static void
emit_l3_state(struct brw_context *brw)
{
const struct brw_l3_weights w = get_pipeline_state_l3_weights(brw);
const float dw = diff_l3_weights(w, get_config_l3_weights(brw->l3.config));
/* The distance between any two compatible weight vectors cannot exceed two
* due to the triangle inequality.
*/
const float large_dw_threshold = 2.0;
/* Somewhat arbitrary, simply makes sure that there will be no repeated
* transitions to the same L3 configuration, could probably do better here.
*/
const float small_dw_threshold = 0.5;
/* If we're emitting a new batch the caches should already be clean and the
* transition should be relatively cheap, so it shouldn't hurt much to use
* the smaller threshold. Otherwise use the larger threshold so that we
* only reprogram the L3 mid-batch if the most recently programmed
* configuration is incompatible with the current pipeline state.
*/
const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ?
small_dw_threshold : large_dw_threshold);
if (dw > dw_threshold && brw->can_do_pipelined_register_writes) {
const struct brw_l3_config *const cfg =
get_l3_config(brw->intelScreen->devinfo, w);
setup_l3_config(brw, cfg);
update_urb_size(brw, cfg);
brw->l3.config = cfg;
}
}
const struct brw_tracked_state gen7_l3_state = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_CS_PROG_DATA |
BRW_NEW_FS_PROG_DATA |
BRW_NEW_GS_PROG_DATA |
BRW_NEW_VS_PROG_DATA,
},
.emit = emit_l3_state
};