i965/gen6: Use the dynamic state base address to reduce relocations.

Now that all the dynamic state is streamed through the top of the
batchbuffer, we can cut out many of our relocations to that state by
using the base address.

Improves 3DMMES taiji performance 3.3% +/- 0.4% (n=15).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Eric Anholt 2011-04-22 16:00:14 -07:00
parent 855f56ca13
commit a82a43e8d9
9 changed files with 54 additions and 45 deletions

View File

@ -550,12 +550,28 @@ static void upload_state_base_address( struct brw_context *brw )
if (intel->gen >= 6) {
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
OUT_BATCH(1); /* General state base address */
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0,
1); /* Surface state base address */
OUT_BATCH(1); /* Dynamic state base address */
OUT_BATCH(1); /* Indirect object base address */
OUT_BATCH(1); /* Instruction base address */
/* General state base address: stateless DP read/write requests */
OUT_BATCH(1);
/* Surface state base address:
* BINDING_TABLE_STATE
* SURFACE_STATE
*/
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
/* Dynamic state base address:
* SAMPLER_STATE
* SAMPLER_BORDER_COLOR_STATE
* CLIP, SF, WM/CC viewport state
* COLOR_CALC_STATE
* DEPTH_STENCIL_STATE
* BLEND_STATE
* Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
* Disable is clear, which we rely on)
*/
OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
OUT_BATCH(1); /* Instruction base address: shader kernels (incl. SIP) */
OUT_BATCH(1); /* General state upper bound */
OUT_BATCH(1); /* Dynamic state upper bound */
OUT_BATCH(1); /* Indirect object upper bound */

View File

@ -120,6 +120,9 @@ static const struct brw_tracked_state *gen6_atoms[] =
/* Command packets: */
&brw_invarient_state,
/* must do before binding table pointers, cc state ptrs */
&brw_state_base_address,
&brw_cc_vp,
&gen6_viewport_state, /* must do after *_vp stages */
@ -150,8 +153,6 @@ static const struct brw_tracked_state *gen6_atoms[] =
&gen6_scissor_state,
&brw_state_base_address,
&gen6_binding_table_pointers,
&brw_depthbuffer,

View File

@ -271,16 +271,20 @@ static void brw_update_sampler_state(struct brw_context *brw,
upload_default_color(brw, gl_sampler, unit);
/* reloc */
sampler->ss2.default_color_pointer = (intel->batch.bo->offset +
brw->wm.sdc_offset[unit]) >> 5;
if (intel->gen >= 6) {
sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
} else {
/* reloc */
sampler->ss2.default_color_pointer = (intel->batch.bo->offset +
brw->wm.sdc_offset[unit]) >> 5;
drm_intel_bo_emit_reloc(intel->batch.bo,
brw->wm.sampler_offset +
unit * sizeof(struct brw_sampler_state) +
offsetof(struct brw_sampler_state, ss2),
intel->batch.bo, brw->wm.sdc_offset[unit],
I915_GEM_DOMAIN_SAMPLER, 0);
drm_intel_bo_emit_reloc(intel->batch.bo,
brw->wm.sampler_offset +
unit * sizeof(struct brw_sampler_state) +
offsetof(struct brw_sampler_state, ss2),
intel->batch.bo, brw->wm.sdc_offset[unit],
I915_GEM_DOMAIN_SAMPLER, 0);
}
}

View File

@ -156,12 +156,9 @@ static void upload_cc_state_pointers(struct brw_context *brw)
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.blend_state_offset | 1);
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.depth_stencil_state_offset | 1);
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.state_offset | 1);
OUT_BATCH(brw->cc.blend_state_offset | 1);
OUT_BATCH(brw->cc.depth_stencil_state_offset | 1);
OUT_BATCH(brw->cc.state_offset | 1);
ADVANCE_BATCH();
}

View File

@ -43,12 +43,7 @@ upload_sampler_state_pointers(struct brw_context *brw)
(4 - 2));
OUT_BATCH(0); /* VS */
OUT_BATCH(0); /* GS */
if (brw->wm.sampler_count)
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->wm.sampler_offset);
else
OUT_BATCH(0);
OUT_BATCH(brw->wm.sampler_offset);
ADVANCE_BATCH();
}

View File

@ -79,8 +79,7 @@ gen6_prepare_scissor_state(struct brw_context *brw)
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
scissor_state_offset);
OUT_BATCH(scissor_state_offset);
ADVANCE_BATCH();
}

View File

@ -113,12 +113,9 @@ static void upload_viewport_state_pointers(struct brw_context *brw)
GEN6_CC_VIEWPORT_MODIFY |
GEN6_SF_VIEWPORT_MODIFY |
GEN6_CLIP_VIEWPORT_MODIFY);
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->clip.vp_offset);
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->sf.vp_offset);
OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
brw->cc.vp_offset);
OUT_BATCH(brw->clip.vp_offset);
OUT_BATCH(brw->sf.vp_offset);
OUT_BATCH(brw->cc.vp_offset);
ADVANCE_BATCH();
}

View File

@ -134,10 +134,10 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
/* This is also the set of state flags from gen6_prepare_vs_constants */
OUT_RELOC(intel->batch.bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
brw->vs.push_const_offset +
/* Pointer to the VS constant buffer. Covered by the set of
* state flags from gen6_prepare_wm_constants
*/
OUT_BATCH(brw->vs.push_const_offset +
brw->vs.push_const_size - 1);
OUT_BATCH(0);
OUT_BATCH(0);

View File

@ -112,10 +112,10 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
/* This is also the set of state flags from gen6_prepare_wm_constants */
OUT_RELOC(intel->batch.bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
brw->wm.push_const_offset +
/* Pointer to the WM constant buffer. Covered by the set of
* state flags from gen6_prepare_wm_constants
*/
OUT_BATCH(brw->wm.push_const_offset +
ALIGN(brw->wm.prog_data->nr_params,
brw->wm.prog_data->dispatch_width) / 8 - 1);
OUT_BATCH(0);