ilo: rework pipeline workarounds

Add current_pipe_control_dw1 and deferred_pipe_control_dw1 to track what have
been done since lsat 3DPRIMITIVE and what need to be done before next
3DPRIMITIVE.  Based on them, we can emit WAs more smartly.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
This commit is contained in:
Chia-I Wu 2014-09-22 23:59:53 +08:00
parent 34e807817f
commit 6c9d67118a
3 changed files with 353 additions and 242 deletions

View File

@ -87,7 +87,19 @@ struct ilo_3d_pipeline {
* HW states.
*/
struct ilo_3d_pipeline_state {
bool has_gen6_wa_pipe_control;
/*
* When a WA is needed before some command, we always emit the WA right
* before the command. Knowing what have already been done since last
* 3DPRIMITIVE allows us to skip some WAs.
*/
uint32_t current_pipe_control_dw1;
/*
* When a WA is needed after some command, we may have the WA follow the
* command immediately or defer it. If this is non-zero, a PIPE_CONTROL
* will be emitted before 3DPRIMITIVE.
*/
uint32_t deferred_pipe_control_dw1;
bool primitive_restart;
int reduced_prim;
@ -144,7 +156,9 @@ static inline void
ilo_3d_pipeline_invalidate(struct ilo_3d_pipeline *p, uint32_t flags)
{
p->invalidate_flags |= flags;
p->state.has_gen6_wa_pipe_control = false;
/* Kernel flushes everything. Shouldn't we set all bits here? */
p->state.current_pipe_control_dw1 = 0;
}
/**

View File

@ -40,42 +40,37 @@
#include "ilo_3d_pipeline_gen6.h"
/**
* This should be called before any depth stall flush (including those
* produced by non-pipelined state commands) or cache flush on GEN6.
*
* \see intel_emit_post_sync_nonzero_flush()
* A wrapper for gen6_PIPE_CONTROL().
*/
static inline void
gen6_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
{
struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
p->workaround_bo : NULL;
ILO_DEV_ASSERT(p->dev, 6, 6);
gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
p->state.current_pipe_control_dw1 |= dw1;
assert(!p->state.deferred_pipe_control_dw1);
}
/**
* This should be called before PIPE_CONTROL.
*/
static void
gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
bool caller_post_sync)
gen6_wa_pre_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
/* emit once */
if (p->state.has_gen6_wa_pipe_control)
return;
p->state.has_gen6_wa_pipe_control = true;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 60:
*
* "Pipe-control with CS-stall bit set must be sent BEFORE the
* pipe-control with a post-sync op and no write-cache flushes."
*
* The workaround below necessitates this workaround.
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
NULL, 0, false);
/* the caller will emit the post-sync op */
if (caller_post_sync)
return;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 60:
* This WA may also be triggered indirectly by the other two WAs on the
* same page:
*
* "Before any depth stall flush (including those produced by
* non-pipelined state commands), software needs to first send a
@ -84,18 +79,98 @@ gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p,
* "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
* PIPE_CONTROL with any non-zero post-sync-op is required."
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_WRITE_IMM,
p->workaround_bo, 0, false);
const bool direct_wa_cond = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) &&
!(dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
const bool indirect_wa_cond = (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) |
(dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
ILO_DEV_ASSERT(p->dev, 6, 6);
if (!direct_wa_cond && !indirect_wa_cond)
return;
if (!(p->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_CS_STALL)) {
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 73:
*
* "1 of the following must also be set (when CS stall is set):
*
* - Depth Cache Flush Enable ([0] of DW1)
* - Stall at Pixel Scoreboard ([1] of DW1)
* - Depth Stall ([13] of DW1)
* - Post-Sync Operation ([13] of DW1)
* - Render Target Cache Flush Enable ([12] of DW1)
* - Notify Enable ([8] of DW1)"
*
* Because of the WAs above, we have to pick Stall at Pixel Scoreboard.
*/
const uint32_t direct_wa = GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
gen6_pipe_control(p, direct_wa);
}
if (indirect_wa_cond &&
!(p->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_WRITE__MASK)) {
const uint32_t indirect_wa = GEN6_PIPE_CONTROL_WRITE_IMM;
gen6_pipe_control(p, indirect_wa);
}
}
/**
* This should be called before any non-pipelined state command.
*/
static void
gen6_wa_pre_non_pipelined(struct ilo_3d_pipeline *p)
{
ILO_DEV_ASSERT(p->dev, 6, 6);
/* non-pipelined state commands produce depth stall */
gen6_wa_pre_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
}
static void
gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p)
gen6_wa_post_3dstate_constant_vs(struct ilo_3d_pipeline *p)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
/*
* According to upload_vs_state() of the classic driver, we need to emit a
* PIPE_CONTROL after 3DSTATE_CONSTANT_VS, otherwise the command is kept
* being buffered by VS FF, to the point that the FF dies.
*/
const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE;
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_pipe_control(p, dw1);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen6_pipe_control(p, dw1);
}
static void
gen6_wa_pre_3dstate_wm_max_threads(struct ilo_3d_pipeline *p)
{
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 274:
*
* "A PIPE_CONTROL command, with only the Stall At Pixel Scoreboard
* field set (DW1 Bit 1), must be issued prior to any change to the
* value in this field (Maximum Number of Threads in 3DSTATE_WM)"
*/
const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
ILO_DEV_ASSERT(p->dev, 6, 6);
gen6_wa_pre_pipe_control(p, dw1);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen6_pipe_control(p, dw1);
}
static void
gen6_wa_pre_3dstate_multisample(struct ilo_3d_pipeline *p)
{
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 305:
*
@ -104,76 +179,43 @@ gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p)
* requires driver to send a PIPE_CONTROL with a CS stall along with a
* Depth Flush prior to this command."
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_CS_STALL,
0, 0, false);
const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_CS_STALL;
ILO_DEV_ASSERT(p->dev, 6, 6);
gen6_wa_pre_pipe_control(p, dw1);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen6_pipe_control(p, dw1);
}
static void
gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p)
gen6_wa_pre_depth(struct ilo_3d_pipeline *p)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
gen6_wa_pipe_control_post_sync(p, false);
ILO_DEV_ASSERT(p->dev, 6, 6);
/*
* According to intel_emit_depth_stall_flushes() of classic i965, we need
* to emit a sequence of PIPE_CONTROLs prior to emitting depth related
* commands.
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false);
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
NULL, 0, false);
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false);
}
static void
gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
/* the post-sync workaround should cover this already */
if (p->state.has_gen6_wa_pipe_control)
return;
/*
* From the Sandy Bridge PRM, volume 2 part 1, page 274:
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "A PIPE_CONTROL command, with only the Stall At Pixel Scoreboard
* field set (DW1 Bit 1), must be issued prior to any change to the
* value in this field (Maximum Number of Threads in 3DSTATE_WM)"
* "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
* any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
* 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
* issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
* set), followed by a pipelined depth cache flush (PIPE_CONTROL with
* Depth Flush Bit set, followed by another pipelined depth stall
* (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
* guarantee that the pipeline from WM onwards is already flushed
* (e.g., via a preceding MI_FLUSH)."
*
* According to the classic driver, it also applies for GEN6.
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
NULL, 0, false);
gen6_wa_pre_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
}
static void
gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(6));
gen6_wa_pipe_control_post_sync(p, false);
/*
* According to upload_vs_state() of classic i965, we need to emit
* PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being
* buffered by VS FF, to the point that the FF dies.
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE,
NULL, 0, false);
gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
gen6_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
}
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@ -186,7 +228,7 @@ gen6_pipeline_common_select(struct ilo_3d_pipeline *p,
/* PIPELINE_SELECT */
if (session->hw_ctx_changed) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_PIPELINE_SELECT(p->builder, 0x0);
}
@ -200,7 +242,7 @@ gen6_pipeline_common_sip(struct ilo_3d_pipeline *p,
/* STATE_SIP */
if (session->hw_ctx_changed) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_STATE_SIP(p->builder, 0);
}
@ -215,7 +257,7 @@ gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p,
if (session->state_bo_changed || session->kernel_bo_changed ||
session->batch_bo_changed) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_state_base_address(p->builder, session->hw_ctx_changed);
@ -468,7 +510,9 @@ gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p,
{
/* 3DPRIMITIVE */
gen6_3DPRIMITIVE(p->builder, vec->draw, &vec->ib);
p->state.has_gen6_wa_pipe_control = false;
p->state.current_pipe_control_dw1 = 0;
assert(!p->state.deferred_pipe_control_dw1);
}
void
@ -485,7 +529,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
* cannot find
*/
if (emit_3dstate_vs && ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
/* 3DSTATE_CONSTANT_VS */
if (emit_3dstate_constant_vs) {
@ -503,7 +547,7 @@ gen6_pipeline_vs(struct ilo_3d_pipeline *p,
}
if (emit_3dstate_constant_vs && ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_vs_const_flush(p);
gen6_wa_post_3dstate_constant_vs(p);
}
static void
@ -578,7 +622,7 @@ gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p,
/* 3DSTATE_GS_SVB_INDEX */
if (emit) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_GS_SVB_INDEX(p->builder,
0, 0, p->state.so_max_vertices,
@ -651,7 +695,7 @@ gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p,
/* 3DSTATE_DRAWING_RECTANGLE */
if (DIRTY(FB)) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_DRAWING_RECTANGLE(p->builder, 0, 0,
vec->fb.state.width, vec->fb.state.height);
@ -680,7 +724,7 @@ gen6_pipeline_wm(struct ilo_3d_pipeline *p,
vec->blend->alpha_to_coverage);
if (ilo_dev_gen(p->dev) == ILO_GEN(6) && session->hw_ctx_changed)
gen6_wa_pipe_control_wm_max_threads_stall(p);
gen6_wa_pre_3dstate_wm_max_threads(p);
gen6_3DSTATE_WM(p->builder, vec->fs, num_samplers,
vec->rasterizer, dual_blend, cc_may_kill, 0);
@ -700,8 +744,8 @@ gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
&p->packed_sample_position_4x : &p->packed_sample_position_1x;
if (ilo_dev_gen(p->dev) == ILO_GEN(6)) {
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pipe_control_wm_multisample_flush(p);
gen6_wa_pre_non_pipelined(p);
gen6_wa_pre_3dstate_multisample(p);
}
gen6_3DSTATE_MULTISAMPLE(p->builder,
@ -741,8 +785,8 @@ gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p,
}
if (ilo_dev_gen(p->dev) == ILO_GEN(6)) {
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pipe_control_wm_depth_flush(p);
gen6_wa_pre_non_pipelined(p);
gen6_wa_pre_depth(p);
}
gen6_3DSTATE_DEPTH_BUFFER(p->builder, zs);
@ -761,7 +805,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) &&
vec->rasterizer->state.poly_stipple_enable) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_POLY_STIPPLE_PATTERN(p->builder,
&vec->poly_stipple);
@ -772,7 +816,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
/* 3DSTATE_LINE_STIPPLE */
if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_stipple_enable) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_LINE_STIPPLE(p->builder,
vec->rasterizer->state.line_stipple_pattern,
@ -782,7 +826,7 @@ gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p,
/* 3DSTATE_AA_LINE_PARAMETERS */
if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_3DSTATE_AA_LINE_PARAMETERS(p->builder);
}
@ -1437,18 +1481,22 @@ ilo_3d_pipeline_emit_draw_gen6(struct ilo_3d_pipeline *p,
void
ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p)
{
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
const uint32_t dw1 = GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_CS_STALL;
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
GEN6_PIPE_CONTROL_WRITE_NONE |
GEN6_PIPE_CONTROL_CS_STALL,
0, 0, false);
ILO_DEV_ASSERT(p->dev, 6, 7.5);
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pre_pipe_control(p, dw1);
gen6_PIPE_CONTROL(p->builder, dw1, NULL, 0, false);
p->state.current_pipe_control_dw1 |= dw1;
p->state.deferred_pipe_control_dw1 &= ~dw1;
}
void
@ -1478,27 +1526,18 @@ ilo_3d_pipeline_emit_query_gen6(struct ilo_3d_pipeline *p,
GEN6_REG_SO_NUM_PRIMS_WRITTEN;
const uint32_t *regs;
int reg_count = 0, i;
uint32_t pipe_control_dw1 = 0;
ILO_DEV_ASSERT(p->dev, 6, 7.5);
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, false);
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT,
q->bo, offset, true);
pipe_control_dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT;
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIME_ELAPSED:
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pipe_control_post_sync(p, true);
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_WRITE_TIMESTAMP,
q->bo, offset, true);
pipe_control_dw1 = GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
regs = &primitives_generated_reg;
@ -1516,6 +1555,16 @@ ilo_3d_pipeline_emit_query_gen6(struct ilo_3d_pipeline *p,
break;
}
if (pipe_control_dw1) {
if (ilo_dev_gen(p->dev) == ILO_GEN(6))
gen6_wa_pre_pipe_control(p, pipe_control_dw1);
gen6_PIPE_CONTROL(p->builder, pipe_control_dw1, q->bo, offset, true);
p->state.current_pipe_control_dw1 |= pipe_control_dw1;
p->state.deferred_pipe_control_dw1 &= ~pipe_control_dw1;
}
if (!reg_count)
return;
@ -1544,7 +1593,7 @@ gen6_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
gen6_3DSTATE_CONSTANT_VS(p->builder, NULL, NULL, 0);
gen6_3DSTATE_VS(p->builder, NULL, 0);
gen6_wa_pipe_control_vs_const_flush(p);
gen6_wa_post_3dstate_constant_vs(p);
gen6_3DSTATE_CONSTANT_GS(p->builder, NULL, NULL, 0);
gen6_3DSTATE_GS(p->builder, NULL, NULL, 0);
@ -1577,7 +1626,7 @@ gen6_rectlist_wm(struct ilo_3d_pipeline *p,
gen6_3DSTATE_CONSTANT_PS(p->builder, NULL, NULL, 0);
gen6_wa_pipe_control_wm_max_threads_stall(p);
gen6_wa_pre_3dstate_wm_max_threads(p);
gen6_3DSTATE_WM(p->builder, NULL, 0, NULL, false, false, hiz_op);
}
@ -1586,7 +1635,7 @@ gen6_rectlist_wm_depth(struct ilo_3d_pipeline *p,
const struct ilo_blitter *blitter,
struct gen6_rectlist_session *session)
{
gen6_wa_pipe_control_wm_depth_flush(p);
gen6_wa_pre_depth(p);
if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
ILO_BLITTER_USE_FB_STENCIL)) {
@ -1616,7 +1665,7 @@ gen6_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
const uint32_t *packed_sample_pos = (blitter->fb.num_samples > 1) ?
&p->packed_sample_position_4x : &p->packed_sample_position_1x;
gen6_wa_pipe_control_wm_multisample_flush(p);
gen6_wa_pre_3dstate_multisample(p);
gen6_3DSTATE_MULTISAMPLE(p->builder, blitter->fb.num_samples,
packed_sample_pos, true);
@ -1630,7 +1679,7 @@ gen6_rectlist_commands(struct ilo_3d_pipeline *p,
const struct ilo_blitter *blitter,
struct gen6_rectlist_session *session)
{
gen6_wa_pipe_control_post_sync(p, false);
gen6_wa_pre_non_pipelined(p);
gen6_rectlist_wm_multisample(p, blitter, session);

View File

@ -37,66 +37,55 @@
#include "ilo_3d_pipeline_gen6.h"
#include "ilo_3d_pipeline_gen7.h"
static void
gen7_wa_pipe_control_cs_stall(struct ilo_3d_pipeline *p,
bool change_multisample_state,
bool change_depth_state)
/**
* A wrapper for gen6_PIPE_CONTROL().
*/
static inline void
gen7_pipe_control(struct ilo_3d_pipeline *p, uint32_t dw1)
{
struct intel_bo *bo = NULL;
uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
p->workaround_bo : NULL;
assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
ilo_dev_gen(p->dev) == ILO_GEN(7.5));
ILO_DEV_ASSERT(p->dev, 7, 7.5);
/* emit once */
if (p->state.has_gen6_wa_pipe_control)
return;
p->state.has_gen6_wa_pipe_control = true;
if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
/* CS stall cannot be set alone */
const uint32_t mask = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_WRITE__MASK;
if (!(dw1 & mask))
dw1 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
}
gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
p->state.current_pipe_control_dw1 |= dw1;
p->state.deferred_pipe_control_dw1 &= ~dw1;
}
static void
gen7_wa_post_3dstate_push_constant_alloc_ps(struct ilo_3d_pipeline *p)
{
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 258:
*
* "Due to an HW issue driver needs to send a pipe control with stall
* when ever there is state change in depth bias related state"
*
* From the Ivy Bridge PRM, volume 2 part 1, page 292:
*
* "A PIPE_CONTOL command with the CS Stall bit set must be programmed
* in the ring after this instruction
* (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
*
* From the Ivy Bridge PRM, volume 2 part 1, page 304:
*
* "Driver must ierarchi that all the caches in the depth pipe are
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
* requires driver to send a PIPE_CONTROL with a CS stall along with a
* Depth Flush prior to this command.
*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Driver must send a least one PIPE_CONTROL command with CS Stall and
* a post sync operation prior to the group of depth
* commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
* 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
*/
const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
if (change_multisample_state)
dw1 |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
ILO_DEV_ASSERT(p->dev, 7, 7.5);
if (change_depth_state) {
dw1 |= GEN6_PIPE_CONTROL_WRITE_IMM;
bo = p->workaround_bo;
}
gen6_PIPE_CONTROL(p->builder, dw1, bo, 0, false);
p->state.deferred_pipe_control_dw1 |= dw1;
}
static void
gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
gen7_wa_pre_vs(struct ilo_3d_pipeline *p)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
ilo_dev_gen(p->dev) == ILO_GEN(7.5));
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 106:
*
@ -106,34 +95,73 @@ gen7_wa_pipe_control_vs_depth_stall(struct ilo_3d_pipeline *p)
* 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
* needs to be sent before any combination of VS associated 3DSTATE."
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_WRITE_IMM,
p->workaround_bo, 0, false);
const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
GEN6_PIPE_CONTROL_WRITE_IMM;
ILO_DEV_ASSERT(p->dev, 7, 7.5);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen7_pipe_control(p, dw1);
}
static void
gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
bool change_depth_buffer)
gen7_wa_pre_3dstate_sf_depth_bias(struct ilo_3d_pipeline *p)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
ilo_dev_gen(p->dev) == ILO_GEN(7.5));
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 258:
*
* "Due to an HW issue driver needs to send a pipe control with stall
* when ever there is state change in depth bias related state (in
* 3DSTATE_SF)"
*/
const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
ILO_DEV_ASSERT(p->dev, 7, 7.5);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen7_pipe_control(p, dw1);
}
static void
gen7_wa_pre_3dstate_multisample(struct ilo_3d_pipeline *p)
{
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 304:
*
* "Driver must ierarchi that all the caches in the depth pipe are
* flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
* requires driver to send a PIPE_CONTROL with a CS stall along with a
* Depth Flush prior to this command.
*/
const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
GEN6_PIPE_CONTROL_CS_STALL;
ILO_DEV_ASSERT(p->dev, 7, 7.5);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen7_pipe_control(p, dw1);
}
static void
gen7_wa_pre_depth(struct ilo_3d_pipeline *p)
{
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Driver must send a least one PIPE_CONTROL command with CS Stall and
* a post sync operation prior to the group of depth
* commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
* 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
*/
const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_WRITE_IMM;
ILO_DEV_ASSERT(p->dev, 7, 7.5);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen7_pipe_control(p, dw1);
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 276:
*
* "The driver must make sure a PIPE_CONTROL with the Depth Stall
* Enable bit set after all the following states are programmed:
*
* * 3DSTATE_PS
* * 3DSTATE_VIEWPORT_STATE_POINTERS_CC
* * 3DSTATE_CONSTANT_PS
* * 3DSTATE_BINDING_TABLE_POINTERS_PS
* * 3DSTATE_SAMPLER_STATE_POINTERS_PS
* * 3DSTATE_CC_STATE_POINTERS
* * 3DSTATE_BLEND_STATE_POINTERS
* * 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
*
* From the Ivy Bridge PRM, volume 2 part 1, page 315:
*
* "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
@ -146,28 +174,14 @@ gen7_wa_pipe_control_wm_depth_stall(struct ilo_3d_pipeline *p,
* guarantee that the pipeline from WM onwards is already flushed
* (e.g., via a preceding MI_FLUSH)."
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false);
if (!change_depth_buffer)
return;
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH,
NULL, 0, false);
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_DEPTH_STALL,
NULL, 0, false);
gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
gen7_pipe_control(p, GEN6_PIPE_CONTROL_DEPTH_STALL);
}
static void
gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
gen7_wa_pre_3dstate_ps_max_threads(struct ilo_3d_pipeline *p)
{
assert(ilo_dev_gen(p->dev) == ILO_GEN(7) ||
ilo_dev_gen(p->dev) == ILO_GEN(7.5));
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 286:
*
@ -175,10 +189,37 @@ gen7_wa_pipe_control_ps_max_threads_stall(struct ilo_3d_pipeline *p)
* between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
* Pixel Scoreboard set is required to be issued."
*/
gen6_PIPE_CONTROL(p->builder,
GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL,
NULL, 0, false);
const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
ILO_DEV_ASSERT(p->dev, 7, 7.5);
if ((p->state.current_pipe_control_dw1 & dw1) != dw1)
gen7_pipe_control(p, dw1);
}
static void
gen7_wa_post_ps_and_later(struct ilo_3d_pipeline *p)
{
/*
* From the Ivy Bridge PRM, volume 2 part 1, page 276:
*
* "The driver must make sure a PIPE_CONTROL with the Depth Stall
* Enable bit set after all the following states are programmed:
*
* - 3DSTATE_PS
* - 3DSTATE_VIEWPORT_STATE_POINTERS_CC
* - 3DSTATE_CONSTANT_PS
* - 3DSTATE_BINDING_TABLE_POINTERS_PS
* - 3DSTATE_SAMPLER_STATE_POINTERS_PS
* - 3DSTATE_CC_STATE_POINTERS
* - 3DSTATE_BLEND_STATE_POINTERS
* - 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
*/
const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL;
ILO_DEV_ASSERT(p->dev, 7, 7.5);
p->state.deferred_pipe_control_dw1 |= dw1;
}
#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
@ -212,7 +253,7 @@ gen7_pipeline_common_urb(struct ilo_3d_pipeline *p,
vs_entry_size *= sizeof(float) * 4;
vs_total_size = p->dev->urb_size - offset;
gen7_wa_pipe_control_vs_depth_stall(p);
gen7_wa_pre_vs(p);
gen7_3DSTATE_URB_VS(p->builder,
offset, vs_total_size, vs_entry_size);
@ -245,7 +286,7 @@ gen7_pipeline_common_pcb_alloc(struct ilo_3d_pipeline *p,
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->builder, offset, size);
if (ilo_dev_gen(p->dev) == ILO_GEN(7))
gen7_wa_pipe_control_cs_stall(p, true, true);
gen7_wa_post_3dstate_push_constant_alloc_ps(p);
}
}
@ -303,7 +344,7 @@ gen7_pipeline_vs(struct ilo_3d_pipeline *p,
/* emit depth stall before any of the VS commands */
if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
emit_3dstate_constant_vs || emit_3dstate_vs)
gen7_wa_pipe_control_vs_depth_stall(p);
gen7_wa_pre_vs(p);
/* 3DSTATE_BINDING_TABLE_POINTERS_VS */
if (emit_3dstate_binding_table) {
@ -459,7 +500,7 @@ gen7_pipeline_sf(struct ilo_3d_pipeline *p,
if (DIRTY(RASTERIZER) || DIRTY(FB)) {
struct pipe_surface *zs = vec->fb.state.zsbuf;
gen7_wa_pipe_control_cs_stall(p, true, true);
gen7_wa_pre_3dstate_sf_depth_bias(p);
gen7_3DSTATE_SF(p->builder, vec->rasterizer,
(zs) ? zs->format : PIPE_FORMAT_NONE);
}
@ -508,7 +549,7 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
if ((ilo_dev_gen(p->dev) == ILO_GEN(7) ||
ilo_dev_gen(p->dev) == ILO_GEN(7.5)) &&
session->hw_ctx_changed)
gen7_wa_pipe_control_ps_max_threads_stall(p);
gen7_wa_pre_3dstate_ps_max_threads(p);
gen7_3DSTATE_PS(p->builder, vec->fs, num_samplers, dual_blend);
}
@ -527,7 +568,6 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
(DIRTY(FB) || DIRTY(DSA) || session->state_bo_changed);
if (emit_3dstate_ps ||
emit_3dstate_depth_buffer ||
session->pcb_state_fs_changed ||
session->viewport_state_changed ||
session->binding_table_fs_changed ||
@ -535,7 +575,10 @@ gen7_pipeline_wm(struct ilo_3d_pipeline *p,
session->cc_state_cc_changed ||
session->cc_state_blend_changed ||
session->cc_state_dsa_changed)
gen7_wa_pipe_control_wm_depth_stall(p, emit_3dstate_depth_buffer);
gen7_wa_post_ps_and_later(p);
if (emit_3dstate_depth_buffer)
gen7_wa_pre_depth(p);
}
/* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
@ -575,7 +618,7 @@ gen7_pipeline_wm_multisample(struct ilo_3d_pipeline *p,
if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
const uint32_t *packed_sample_pos;
gen7_wa_pipe_control_cs_stall(p, true, true);
gen7_wa_pre_3dstate_multisample(p);
packed_sample_pos =
(vec->fb.num_samples > 4) ? p->packed_sample_position_8x :
@ -597,9 +640,14 @@ gen7_pipeline_vf_draw(struct ilo_3d_pipeline *p,
const struct ilo_state_vector *vec,
struct gen6_pipeline_session *session)
{
if (p->state.deferred_pipe_control_dw1)
gen7_pipe_control(p, p->state.deferred_pipe_control_dw1);
/* 3DPRIMITIVE */
gen7_3DPRIMITIVE(p->builder, vec->draw, &vec->ib);
p->state.has_gen6_wa_pipe_control = false;
p->state.current_pipe_control_dw1 = 0;
p->state.deferred_pipe_control_dw1 = 0;
}
static void
@ -670,7 +718,7 @@ gen7_rectlist_pcb_alloc(struct ilo_3d_pipeline *p,
gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(p->builder, offset, size);
gen7_wa_pipe_control_cs_stall(p, true, true);
gen7_wa_post_3dstate_push_constant_alloc_ps(p);
}
static void
@ -713,7 +761,7 @@ gen7_rectlist_vs_to_sf(struct ilo_3d_pipeline *p,
gen6_3DSTATE_CLIP(p->builder, NULL, NULL, false, 0);
gen7_wa_pipe_control_cs_stall(p, true, true);
gen7_wa_pre_3dstate_sf_depth_bias(p);
gen7_3DSTATE_SF(p->builder, NULL, blitter->fb.dst.base.format);
gen7_3DSTATE_SBE(p->builder, NULL, NULL);
@ -745,7 +793,7 @@ gen7_rectlist_wm(struct ilo_3d_pipeline *p,
gen7_3DSTATE_CONSTANT_PS(p->builder, NULL, NULL, 0);
gen7_wa_pipe_control_ps_max_threads_stall(p);
gen7_wa_pre_3dstate_ps_max_threads(p);
gen7_3DSTATE_PS(p->builder, NULL, 0, false);
}
@ -754,7 +802,7 @@ gen7_rectlist_wm_depth(struct ilo_3d_pipeline *p,
const struct ilo_blitter *blitter,
struct gen6_rectlist_session *session)
{
gen7_wa_pipe_control_wm_depth_stall(p, true);
gen7_wa_pre_depth(p);
if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
ILO_BLITTER_USE_FB_STENCIL)) {
@ -786,7 +834,7 @@ gen7_rectlist_wm_multisample(struct ilo_3d_pipeline *p,
(blitter->fb.num_samples > 1) ? &p->packed_sample_position_4x :
&p->packed_sample_position_1x;
gen7_wa_pipe_control_cs_stall(p, true, true);
gen7_wa_pre_3dstate_multisample(p);
gen6_3DSTATE_MULTISAMPLE(p->builder, blitter->fb.num_samples,
packed_sample_pos, true);
@ -813,7 +861,7 @@ gen7_rectlist_commands(struct ilo_3d_pipeline *p,
gen7_rectlist_pcb_alloc(p, blitter, session);
/* needed for any VS-related commands */
gen7_wa_pipe_control_vs_depth_stall(p);
gen7_wa_pre_vs(p);
gen7_rectlist_urb(p, blitter, session);