i965: Combine CACHE_NEW_*_UNIT into BRW_NEW_GEN4_UNIT_STATE.

On Gen4-5, unit state is specified as indirect state, rather than
commands.  If any unit state changes, we upload it via brw_state_batch
and arrange for 3DSTATE_PIPELINED_POINTERS to be re-emitted, which
updates pointers to all unit state at once.

Since there's only one command and state atom (brw_psp_urb_cs) that
needs to know about this, there's no benefit to having six separate
flags.  We can combine CACHE_NEW_*_UNIT into a single flag.

We also haven't cached these in a long time, so it doesn't make sense
to use the "CACHE_NEW_" prefix.  Instead, use the "BRW_NEW_" prefix.

This also saves 12 * sizeof(void *) bytes of memory per context, as
we remove useless aux_compare/aux_free functions for each CACHE bit.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Kenneth Graunke 2014-09-26 01:50:04 -07:00
parent bea9b8e306
commit f421db70ba
9 changed files with 11 additions and 31 deletions

View File

@ -224,7 +224,7 @@ static void upload_cc_unit(struct brw_context *brw)
cc->cc4.cc_viewport_state_offset = (brw->batch.bo->offset64 + cc->cc4.cc_viewport_state_offset = (brw->batch.bo->offset64 +
brw->cc.vp_offset) >> 5; /* reloc */ brw->cc.vp_offset) >> 5; /* reloc */
brw->state.dirty.cache |= CACHE_NEW_CC_UNIT; brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
/* Emit CC viewport relocation */ /* Emit CC viewport relocation */
drm_intel_bo_emit_reloc(brw->batch.bo, drm_intel_bo_emit_reloc(brw->batch.bo,

View File

@ -158,7 +158,7 @@ brw_upload_clip_unit(struct brw_context *brw)
clip->viewport_ymin = -1; clip->viewport_ymin = -1;
clip->viewport_ymax = 1; clip->viewport_ymax = 1;
brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT; brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
} }
const struct brw_tracked_state brw_clip_unit = { const struct brw_tracked_state brw_clip_unit = {

View File

@ -183,6 +183,7 @@ enum brw_state_id {
BRW_STATE_PUSH_CONSTANT_ALLOCATION, BRW_STATE_PUSH_CONSTANT_ALLOCATION,
BRW_STATE_NUM_SAMPLES, BRW_STATE_NUM_SAMPLES,
BRW_STATE_TEXTURE_BUFFER, BRW_STATE_TEXTURE_BUFFER,
BRW_STATE_GEN4_UNIT_STATE,
BRW_NUM_STATE_BITS BRW_NUM_STATE_BITS
}; };
@ -224,6 +225,7 @@ enum brw_state_id {
#define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION) #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1ull << BRW_STATE_PUSH_CONSTANT_ALLOCATION)
#define BRW_NEW_NUM_SAMPLES (1ull << BRW_STATE_NUM_SAMPLES) #define BRW_NEW_NUM_SAMPLES (1ull << BRW_STATE_NUM_SAMPLES)
#define BRW_NEW_TEXTURE_BUFFER (1ull << BRW_STATE_TEXTURE_BUFFER) #define BRW_NEW_TEXTURE_BUFFER (1ull << BRW_STATE_TEXTURE_BUFFER)
#define BRW_NEW_GEN4_UNIT_STATE (1ull << BRW_STATE_GEN4_UNIT_STATE)
struct brw_state_flags { struct brw_state_flags {
/** State update flags signalled by mesa internals */ /** State update flags signalled by mesa internals */
@ -684,21 +686,15 @@ struct brw_gs_prog_data
enum brw_cache_id { enum brw_cache_id {
BRW_CC_VP, BRW_CC_VP,
BRW_CC_UNIT,
BRW_WM_PROG, BRW_WM_PROG,
BRW_BLORP_BLIT_PROG, BRW_BLORP_BLIT_PROG,
BRW_SAMPLER, BRW_SAMPLER,
BRW_WM_UNIT,
BRW_SF_PROG, BRW_SF_PROG,
BRW_SF_VP, BRW_SF_VP,
BRW_SF_UNIT, /* scissor state on gen6 */
BRW_VS_UNIT,
BRW_VS_PROG, BRW_VS_PROG,
BRW_FF_GS_UNIT,
BRW_FF_GS_PROG, BRW_FF_GS_PROG,
BRW_GS_PROG, BRW_GS_PROG,
BRW_CLIP_VP, BRW_CLIP_VP,
BRW_CLIP_UNIT,
BRW_CLIP_PROG, BRW_CLIP_PROG,
BRW_MAX_CACHE BRW_MAX_CACHE
@ -778,21 +774,15 @@ enum shader_time_shader_type {
/* Flags for brw->state.cache. /* Flags for brw->state.cache.
*/ */
#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) #define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
#define CACHE_NEW_BLORP_BLIT_PROG (1<<BRW_BLORP_BLIT_PROG) #define CACHE_NEW_BLORP_BLIT_PROG (1<<BRW_BLORP_BLIT_PROG)
#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) #define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) #define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) #define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) #define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
#define CACHE_NEW_FF_GS_UNIT (1<<BRW_FF_GS_UNIT)
#define CACHE_NEW_FF_GS_PROG (1<<BRW_FF_GS_PROG) #define CACHE_NEW_FF_GS_PROG (1<<BRW_FF_GS_PROG)
#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) #define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) #define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) #define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
struct brw_vertex_buffer { struct brw_vertex_buffer {

View File

@ -85,7 +85,7 @@ brw_upload_gs_unit(struct brw_context *brw)
gs->gs6.max_vp_index = brw->ctx.Const.MaxViewports - 1; gs->gs6.max_vp_index = brw->ctx.Const.MaxViewports - 1;
brw->state.dirty.cache |= CACHE_NEW_FF_GS_UNIT; brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
} }
const struct brw_tracked_state brw_gs_unit = { const struct brw_tracked_state brw_gs_unit = {

View File

@ -117,15 +117,10 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
.dirty = { .dirty = {
.mesa = 0, .mesa = 0,
.brw = BRW_NEW_BATCH | .brw = BRW_NEW_BATCH |
BRW_NEW_GEN4_UNIT_STATE |
BRW_NEW_STATE_BASE_ADDRESS | BRW_NEW_STATE_BASE_ADDRESS |
BRW_NEW_URB_FENCE, BRW_NEW_URB_FENCE,
.cache = CACHE_NEW_CC_UNIT | .cache = CACHE_NEW_FF_GS_PROG,
CACHE_NEW_CLIP_UNIT |
CACHE_NEW_FF_GS_PROG |
CACHE_NEW_FF_GS_UNIT |
CACHE_NEW_SF_UNIT |
CACHE_NEW_VS_UNIT |
CACHE_NEW_WM_UNIT,
}, },
.emit = upload_psp_urb_cbs, .emit = upload_psp_urb_cbs,
}; };

View File

@ -292,7 +292,7 @@ static void upload_sf_unit( struct brw_context *brw )
(sf->sf5.viewport_transform << 1)), (sf->sf5.viewport_transform << 1)),
I915_GEM_DOMAIN_INSTRUCTION, 0); I915_GEM_DOMAIN_INSTRUCTION, 0);
brw->state.dirty.cache |= CACHE_NEW_SF_UNIT; brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
} }
const struct brw_tracked_state brw_sf_unit = { const struct brw_tracked_state brw_sf_unit = {

View File

@ -521,26 +521,21 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION), DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
DEFINE_BIT(BRW_NEW_NUM_SAMPLES), DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER), DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
{0, 0, 0} {0, 0, 0}
}; };
static struct dirty_bit_map cache_bits[] = { static struct dirty_bit_map cache_bits[] = {
DEFINE_BIT(CACHE_NEW_CC_VP), DEFINE_BIT(CACHE_NEW_CC_VP),
DEFINE_BIT(CACHE_NEW_CC_UNIT),
DEFINE_BIT(CACHE_NEW_WM_PROG), DEFINE_BIT(CACHE_NEW_WM_PROG),
DEFINE_BIT(CACHE_NEW_BLORP_BLIT_PROG), DEFINE_BIT(CACHE_NEW_BLORP_BLIT_PROG),
DEFINE_BIT(CACHE_NEW_SAMPLER), DEFINE_BIT(CACHE_NEW_SAMPLER),
DEFINE_BIT(CACHE_NEW_WM_UNIT),
DEFINE_BIT(CACHE_NEW_SF_PROG), DEFINE_BIT(CACHE_NEW_SF_PROG),
DEFINE_BIT(CACHE_NEW_SF_VP), DEFINE_BIT(CACHE_NEW_SF_VP),
DEFINE_BIT(CACHE_NEW_SF_UNIT),
DEFINE_BIT(CACHE_NEW_VS_UNIT),
DEFINE_BIT(CACHE_NEW_VS_PROG), DEFINE_BIT(CACHE_NEW_VS_PROG),
DEFINE_BIT(CACHE_NEW_FF_GS_UNIT),
DEFINE_BIT(CACHE_NEW_FF_GS_PROG), DEFINE_BIT(CACHE_NEW_FF_GS_PROG),
DEFINE_BIT(CACHE_NEW_GS_PROG), DEFINE_BIT(CACHE_NEW_GS_PROG),
DEFINE_BIT(CACHE_NEW_CLIP_VP), DEFINE_BIT(CACHE_NEW_CLIP_VP),
DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
DEFINE_BIT(CACHE_NEW_CLIP_PROG), DEFINE_BIT(CACHE_NEW_CLIP_PROG),
{0, 0, 0} {0, 0, 0}
}; };

View File

@ -181,7 +181,7 @@ brw_upload_vs_unit(struct brw_context *brw)
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
} }
brw->state.dirty.cache |= CACHE_NEW_VS_UNIT; brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
} }
const struct brw_tracked_state brw_vs_unit = { const struct brw_tracked_state brw_vs_unit = {

View File

@ -243,7 +243,7 @@ brw_upload_wm_unit(struct brw_context *brw)
I915_GEM_DOMAIN_INSTRUCTION, 0); I915_GEM_DOMAIN_INSTRUCTION, 0);
} }
brw->state.dirty.cache |= CACHE_NEW_WM_UNIT; brw->state.dirty.brw |= BRW_NEW_GEN4_UNIT_STATE;
} }
const struct brw_tracked_state brw_wm_unit = { const struct brw_tracked_state brw_wm_unit = {