i965: Combine CACHE_NEW_*_UNIT into BRW_NEW_GEN4_UNIT_STATE.

On Gen4-5, unit state is specified as indirect state, rather than
commands.  If any unit state changes, we upload it via brw_state_batch
and arrange for 3DSTATE_PIPELINED_POINTERS to be re-emitted, which
updates pointers to all unit state at once.

Since there's only one command and state atom (brw_psp_urb_cs) that
needs to know about this, there's no benefit to having six separate
flags.  We can combine CACHE_NEW_*_UNIT into a single flag.

We also haven't cached these in a long time, so it doesn't make sense
to use the "CACHE_NEW_" prefix.  Instead, use the "BRW_NEW_" prefix.

This also saves 12 * sizeof(void *) bytes of memory per context, as
we remove useless aux_compare/aux_free functions for each CACHE bit.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Kenneth Graunke 2014-09-26 01:50:04 -07:00
parent bea9b8e306
commit f421db70ba
9 changed files with 11 additions and 31 deletions

View File

@ -224,7 +224,7 @@ static void upload_cc_unit(struct brw_context *brw)
cc->cc4.cc_viewport_state_offset = (brw->batch.bo->offset64 +
brw->cc.vp_offset) >> 5; /* reloc */
brw->state.dirty.cache |= CACHE_NEW_CC_UNIT;
brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
/* Emit CC viewport relocation */
drm_intel_bo_emit_reloc(brw->batch.bo,

View File

@ -158,7 +158,7 @@ brw_upload_clip_unit(struct brw_context *brw)
clip->viewport_ymin = -1;
clip->viewport_ymax = 1;
brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT;
brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
}
const struct brw_tracked_state brw_clip_unit = {

View File

@ -183,6 +183,7 @@ enum brw_state_id {
BRW_STATE_PUSH_CONSTANT_ALLOCATION,
BRW_STATE_NUM_SAMPLES,
BRW_STATE_TEXTURE_BUFFER,
BRW_STATE_GEN4_UNIT_STATE,
BRW_NUM_STATE_BITS
};
@ -224,6 +225,7 @@ enum brw_state_id {
#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
#define BRW_NEW_NUM_SAMPLES (1ull << BRW_STATE_NUM_SAMPLES)
#define BRW_NEW_TEXTURE_BUFFER (1ull << BRW_STATE_TEXTURE_BUFFER)
#define BRW_NEW_GEN4_UNIT_STATE (1ull << BRW_STATE_GEN4_UNIT_STATE)
struct brw_state_flags {
/** State update flags signalled by mesa internals */
@ -684,21 +686,15 @@ struct brw_gs_prog_data
enum brw_cache_id {
BRW_CC_VP,
BRW_CC_UNIT,
BRW_WM_PROG,
BRW_BLORP_BLIT_PROG,
BRW_SAMPLER,
BRW_WM_UNIT,
BRW_SF_PROG,
BRW_SF_VP,
BRW_SF_UNIT, /* scissor state on gen6 */
BRW_VS_UNIT,
BRW_VS_PROG,
BRW_FF_GS_UNIT,
BRW_FF_GS_PROG,
BRW_GS_PROG,
BRW_CLIP_VP,
BRW_CLIP_UNIT,
BRW_CLIP_PROG,
BRW_MAX_CACHE
@ -778,21 +774,15 @@ enum shader_time_shader_type {
/* Flags for brw->state.cache.
*/
#define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
#define CACHE_NEW_BLORP_BLIT_PROG (1<<BRW_BLORP_BLIT_PROG)
#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
#define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
#define CACHE_NEW_FF_GS_UNIT (1<<BRW_FF_GS_UNIT)
#define CACHE_NEW_FF_GS_PROG (1<<BRW_FF_GS_PROG)
#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
struct brw_vertex_buffer {

View File

@ -85,7 +85,7 @@ brw_upload_gs_unit(struct brw_context *brw)
gs->gs6.max_vp_index = brw->ctx.Const.MaxViewports - 1;
brw->state.dirty.cache |= CACHE_NEW_FF_GS_UNIT;
brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
}
const struct brw_tracked_state brw_gs_unit = {

View File

@ -117,15 +117,10 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
.dirty = {
.mesa = 0,
.brw = BRW_NEW_BATCH |
BRW_NEW_GEN4_UNIT_STATE |
BRW_NEW_STATE_BASE_ADDRESS |
BRW_NEW_URB_FENCE,
.cache = CACHE_NEW_CC_UNIT |
CACHE_NEW_CLIP_UNIT |
CACHE_NEW_FF_GS_PROG |
CACHE_NEW_FF_GS_UNIT |
CACHE_NEW_SF_UNIT |
CACHE_NEW_VS_UNIT |
CACHE_NEW_WM_UNIT,
.cache = CACHE_NEW_FF_GS_PROG,
},
.emit = upload_psp_urb_cbs,
};

View File

@ -292,7 +292,7 @@ static void upload_sf_unit( struct brw_context *brw )
(sf->sf5.viewport_transform << 1)),
I915_GEM_DOMAIN_INSTRUCTION, 0);
brw->state.dirty.cache |= CACHE_NEW_SF_UNIT;
brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
}
const struct brw_tracked_state brw_sf_unit = {

View File

@ -521,26 +521,21 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
{0, 0, 0}
};
static struct dirty_bit_map cache_bits[] = {
DEFINE_BIT(CACHE_NEW_CC_VP),
DEFINE_BIT(CACHE_NEW_CC_UNIT),
DEFINE_BIT(CACHE_NEW_WM_PROG),
DEFINE_BIT(CACHE_NEW_BLORP_BLIT_PROG),
DEFINE_BIT(CACHE_NEW_SAMPLER),
DEFINE_BIT(CACHE_NEW_WM_UNIT),
DEFINE_BIT(CACHE_NEW_SF_PROG),
DEFINE_BIT(CACHE_NEW_SF_VP),
DEFINE_BIT(CACHE_NEW_SF_UNIT),
DEFINE_BIT(CACHE_NEW_VS_UNIT),
DEFINE_BIT(CACHE_NEW_VS_PROG),
DEFINE_BIT(CACHE_NEW_FF_GS_UNIT),
DEFINE_BIT(CACHE_NEW_FF_GS_PROG),
DEFINE_BIT(CACHE_NEW_GS_PROG),
DEFINE_BIT(CACHE_NEW_CLIP_VP),
DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
DEFINE_BIT(CACHE_NEW_CLIP_PROG),
{0, 0, 0}
};

View File

@ -181,7 +181,7 @@ brw_upload_vs_unit(struct brw_context *brw)
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
brw->state.dirty.cache |= CACHE_NEW_VS_UNIT;
brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
}
const struct brw_tracked_state brw_vs_unit = {

View File

@ -243,7 +243,7 @@ brw_upload_wm_unit(struct brw_context *brw)
I915_GEM_DOMAIN_INSTRUCTION, 0);
}
brw->state.dirty.cache |= CACHE_NEW_WM_UNIT;
brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
}
const struct brw_tracked_state brw_wm_unit = {