i965: Create prog_data temporary variables in PS state upload code.
prog_data->foo is a bit more readable than brw->wm.prog_data->foo. The local variable definition is also a great location to put the obligatory /* CACHE_NEW_WM_PROG */ comment. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
parent
6a1c1fd503
commit
2315ae6653
|
@ -75,25 +75,28 @@ static void
|
||||||
brw_upload_wm_unit(struct brw_context *brw)
|
brw_upload_wm_unit(struct brw_context *brw)
|
||||||
{
|
{
|
||||||
struct gl_context *ctx = &brw->ctx;
|
struct gl_context *ctx = &brw->ctx;
|
||||||
|
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||||
const struct gl_fragment_program *fp = brw->fragment_program;
|
const struct gl_fragment_program *fp = brw->fragment_program;
|
||||||
|
/* CACHE_NEW_WM_PROG */
|
||||||
|
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
|
||||||
struct brw_wm_unit_state *wm;
|
struct brw_wm_unit_state *wm;
|
||||||
|
|
||||||
wm = brw_state_batch(brw, AUB_TRACE_WM_STATE,
|
wm = brw_state_batch(brw, AUB_TRACE_WM_STATE,
|
||||||
sizeof(*wm), 32, &brw->wm.base.state_offset);
|
sizeof(*wm), 32, &brw->wm.base.state_offset);
|
||||||
memset(wm, 0, sizeof(*wm));
|
memset(wm, 0, sizeof(*wm));
|
||||||
|
|
||||||
if (brw->wm.prog_data->prog_offset_16) {
|
if (prog_data->prog_offset_16) {
|
||||||
/* These two fields should be the same pre-gen6, which is why we
|
/* These two fields should be the same pre-gen6, which is why we
|
||||||
* only have one hardware field to program for both dispatch
|
* only have one hardware field to program for both dispatch
|
||||||
* widths.
|
* widths.
|
||||||
*/
|
*/
|
||||||
assert(brw->wm.prog_data->base.dispatch_grf_start_reg ==
|
assert(prog_data->base.dispatch_grf_start_reg ==
|
||||||
brw->wm.prog_data->dispatch_grf_start_reg_16);
|
prog_data->dispatch_grf_start_reg_16);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */
|
/* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */
|
||||||
wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks;
|
wm->thread0.grf_reg_count = prog_data->reg_blocks;
|
||||||
wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16;
|
wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_16;
|
||||||
|
|
||||||
wm->thread0.kernel_start_pointer =
|
wm->thread0.kernel_start_pointer =
|
||||||
brw_program_reloc(brw,
|
brw_program_reloc(brw,
|
||||||
|
@ -107,7 +110,7 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||||
brw->wm.base.state_offset +
|
brw->wm.base.state_offset +
|
||||||
offsetof(struct brw_wm_unit_state, wm9),
|
offsetof(struct brw_wm_unit_state, wm9),
|
||||||
brw->wm.base.prog_offset +
|
brw->wm.base.prog_offset +
|
||||||
brw->wm.prog_data->prog_offset_16 +
|
prog_data->prog_offset_16 +
|
||||||
(wm->wm9.grf_reg_count_2 << 1)) >> 6;
|
(wm->wm9.grf_reg_count_2 << 1)) >> 6;
|
||||||
|
|
||||||
wm->thread1.depth_coef_urb_read_offset = 1;
|
wm->thread1.depth_coef_urb_read_offset = 1;
|
||||||
|
@ -122,25 +125,25 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||||
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
|
wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
|
||||||
|
|
||||||
wm->thread1.binding_table_entry_count =
|
wm->thread1.binding_table_entry_count =
|
||||||
brw->wm.prog_data->base.binding_table.size_bytes / 4;
|
prog_data->base.binding_table.size_bytes / 4;
|
||||||
|
|
||||||
if (brw->wm.prog_data->base.total_scratch != 0) {
|
if (prog_data->base.total_scratch != 0) {
|
||||||
wm->thread2.scratch_space_base_pointer =
|
wm->thread2.scratch_space_base_pointer =
|
||||||
brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
|
brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
|
||||||
wm->thread2.per_thread_scratch_space =
|
wm->thread2.per_thread_scratch_space =
|
||||||
ffs(brw->wm.prog_data->base.total_scratch) - 11;
|
ffs(prog_data->base.total_scratch) - 11;
|
||||||
} else {
|
} else {
|
||||||
wm->thread2.scratch_space_base_pointer = 0;
|
wm->thread2.scratch_space_base_pointer = 0;
|
||||||
wm->thread2.per_thread_scratch_space = 0;
|
wm->thread2.per_thread_scratch_space = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
wm->thread3.dispatch_grf_start_reg =
|
wm->thread3.dispatch_grf_start_reg =
|
||||||
brw->wm.prog_data->base.dispatch_grf_start_reg;
|
prog_data->base.dispatch_grf_start_reg;
|
||||||
wm->thread3.urb_entry_read_length =
|
wm->thread3.urb_entry_read_length =
|
||||||
brw->wm.prog_data->num_varying_inputs * 2;
|
prog_data->num_varying_inputs * 2;
|
||||||
wm->thread3.urb_entry_read_offset = 0;
|
wm->thread3.urb_entry_read_offset = 0;
|
||||||
wm->thread3.const_urb_entry_read_length =
|
wm->thread3.const_urb_entry_read_length =
|
||||||
brw->wm.prog_data->base.curb_read_length;
|
prog_data->base.curb_read_length;
|
||||||
/* BRW_NEW_CURBE_OFFSETS */
|
/* BRW_NEW_CURBE_OFFSETS */
|
||||||
wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
|
wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
|
||||||
|
|
||||||
|
@ -175,7 +178,7 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||||
wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled;
|
wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled;
|
||||||
|
|
||||||
wm->wm5.enable_8_pix = 1;
|
wm->wm5.enable_8_pix = 1;
|
||||||
if (brw->wm.prog_data->prog_offset_16)
|
if (prog_data->prog_offset_16)
|
||||||
wm->wm5.enable_16_pix = 1;
|
wm->wm5.enable_16_pix = 1;
|
||||||
|
|
||||||
wm->wm5.max_threads = brw->max_wm_threads - 1;
|
wm->wm5.max_threads = brw->max_wm_threads - 1;
|
||||||
|
@ -219,7 +222,7 @@ brw_upload_wm_unit(struct brw_context *brw)
|
||||||
wm->wm4.stats_enable = 1;
|
wm->wm4.stats_enable = 1;
|
||||||
|
|
||||||
/* Emit scratch space relocation */
|
/* Emit scratch space relocation */
|
||||||
if (brw->wm.prog_data->base.total_scratch != 0) {
|
if (prog_data->base.total_scratch != 0) {
|
||||||
drm_intel_bo_emit_reloc(brw->batch.bo,
|
drm_intel_bo_emit_reloc(brw->batch.bo,
|
||||||
brw->wm.base.state_offset +
|
brw->wm.base.state_offset +
|
||||||
offsetof(struct brw_wm_unit_state, thread2),
|
offsetof(struct brw_wm_unit_state, thread2),
|
||||||
|
|
|
@ -69,22 +69,23 @@ static void
|
||||||
upload_wm_state(struct brw_context *brw)
|
upload_wm_state(struct brw_context *brw)
|
||||||
{
|
{
|
||||||
struct gl_context *ctx = &brw->ctx;
|
struct gl_context *ctx = &brw->ctx;
|
||||||
|
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||||
const struct brw_fragment_program *fp =
|
const struct brw_fragment_program *fp =
|
||||||
brw_fragment_program_const(brw->fragment_program);
|
brw_fragment_program_const(brw->fragment_program);
|
||||||
|
/* CACHE_NEW_WM_PROG */
|
||||||
|
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
|
||||||
uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
|
uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
|
||||||
|
|
||||||
/* _NEW_BUFFERS */
|
/* _NEW_BUFFERS */
|
||||||
bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
|
bool multisampled_fbo = ctx->DrawBuffer->Visual.samples > 1;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG
|
/* We can't fold this into gen6_upload_wm_push_constants(), because
|
||||||
*
|
|
||||||
* We can't fold this into gen6_upload_wm_push_constants(), because
|
|
||||||
* according to the SNB PRM, vol 2 part 1 section 7.2.2
|
* according to the SNB PRM, vol 2 part 1 section 7.2.2
|
||||||
* (3DSTATE_CONSTANT_PS [DevSNB]):
|
* (3DSTATE_CONSTANT_PS [DevSNB]):
|
||||||
*
|
*
|
||||||
* "[DevSNB]: This packet must be followed by WM_STATE."
|
* "[DevSNB]: This packet must be followed by WM_STATE."
|
||||||
*/
|
*/
|
||||||
if (brw->wm.prog_data->base.nr_params == 0) {
|
if (prog_data->base.nr_params == 0) {
|
||||||
/* Disable the push constant buffers. */
|
/* Disable the push constant buffers. */
|
||||||
BEGIN_BATCH(5);
|
BEGIN_BATCH(5);
|
||||||
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
|
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
|
||||||
|
@ -125,14 +126,11 @@ upload_wm_state(struct brw_context *brw)
|
||||||
dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) <<
|
dw2 |= (ALIGN(brw->wm.base.sampler_count, 4) / 4) <<
|
||||||
GEN6_WM_SAMPLER_COUNT_SHIFT;
|
GEN6_WM_SAMPLER_COUNT_SHIFT;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
dw2 |= ((prog_data->base.binding_table.size_bytes / 4) <<
|
||||||
dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
|
|
||||||
GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
||||||
|
|
||||||
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
|
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
|
||||||
|
|
||||||
/* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
|
/* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
|
||||||
* should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
|
* should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
|
||||||
* is successfully compiled. In majority of the cases that bring us
|
* is successfully compiled. In majority of the cases that bring us
|
||||||
|
@ -142,32 +140,32 @@ upload_wm_state(struct brw_context *brw)
|
||||||
_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
|
_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
|
||||||
assert(min_inv_per_frag >= 1);
|
assert(min_inv_per_frag >= 1);
|
||||||
|
|
||||||
if (brw->wm.prog_data->prog_offset_16) {
|
if (prog_data->prog_offset_16) {
|
||||||
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
|
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
|
||||||
|
|
||||||
if (min_inv_per_frag == 1) {
|
if (min_inv_per_frag == 1) {
|
||||||
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
|
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
|
||||||
dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
|
dw4 |= (prog_data->base.dispatch_grf_start_reg <<
|
||||||
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
||||||
dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
|
dw4 |= (prog_data->dispatch_grf_start_reg_16 <<
|
||||||
GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
|
GEN6_WM_DISPATCH_START_GRF_SHIFT_2);
|
||||||
ksp0 = brw->wm.base.prog_offset;
|
ksp0 = brw->wm.base.prog_offset;
|
||||||
ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
|
ksp2 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
|
||||||
} else {
|
} else {
|
||||||
dw4 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
|
dw4 |= (prog_data->dispatch_grf_start_reg_16 <<
|
||||||
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
||||||
ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
|
ksp0 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
|
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
|
||||||
dw4 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
|
dw4 |= (prog_data->base.dispatch_grf_start_reg <<
|
||||||
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
|
||||||
ksp0 = brw->wm.base.prog_offset;
|
ksp0 = brw->wm.base.prog_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG | _NEW_COLOR */
|
/* CACHE_NEW_WM_PROG | _NEW_COLOR */
|
||||||
if (brw->wm.prog_data->dual_src_blend &&
|
if (prog_data->dual_src_blend &&
|
||||||
(ctx->Color.BlendEnabled & 1) &&
|
(ctx->Color.BlendEnabled & 1) &&
|
||||||
ctx->Color.Blend[0]._UsesDualSrc) {
|
ctx->Color.Blend[0]._UsesDualSrc) {
|
||||||
dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
|
dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
|
||||||
|
@ -186,14 +184,13 @@ upload_wm_state(struct brw_context *brw)
|
||||||
dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
|
dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
|
||||||
if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||||
dw5 |= GEN6_WM_COMPUTED_DEPTH;
|
dw5 |= GEN6_WM_COMPUTED_DEPTH;
|
||||||
/* CACHE_NEW_WM_PROG */
|
dw6 |= prog_data->barycentric_interp_modes <<
|
||||||
dw6 |= brw->wm.prog_data->barycentric_interp_modes <<
|
|
||||||
GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
|
GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
|
||||||
|
|
||||||
/* _NEW_COLOR, _NEW_MULTISAMPLE */
|
/* _NEW_COLOR, _NEW_MULTISAMPLE */
|
||||||
if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
|
if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
|
||||||
ctx->Multisample.SampleAlphaToCoverage ||
|
ctx->Multisample.SampleAlphaToCoverage ||
|
||||||
brw->wm.prog_data->uses_omask)
|
prog_data->uses_omask)
|
||||||
dw5 |= GEN6_WM_KILL_ENABLE;
|
dw5 |= GEN6_WM_KILL_ENABLE;
|
||||||
|
|
||||||
/* _NEW_BUFFERS | _NEW_COLOR */
|
/* _NEW_BUFFERS | _NEW_COLOR */
|
||||||
|
@ -209,11 +206,10 @@ upload_wm_state(struct brw_context *brw)
|
||||||
* Target Write messages. If present, the oMask data is used to mask off
|
* Target Write messages. If present, the oMask data is used to mask off
|
||||||
* samples."
|
* samples."
|
||||||
*/
|
*/
|
||||||
if(brw->wm.prog_data->uses_omask)
|
if (prog_data->uses_omask)
|
||||||
dw5 |= GEN6_WM_OMASK_TO_RENDER_TARGET;
|
dw5 |= GEN6_WM_OMASK_TO_RENDER_TARGET;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
dw6 |= prog_data->num_varying_inputs <<
|
||||||
dw6 |= brw->wm.prog_data->num_varying_inputs <<
|
|
||||||
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
|
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
|
||||||
if (multisampled_fbo) {
|
if (multisampled_fbo) {
|
||||||
/* _NEW_MULTISAMPLE */
|
/* _NEW_MULTISAMPLE */
|
||||||
|
@ -275,7 +271,7 @@ upload_wm_state(struct brw_context *brw)
|
||||||
* We only require XY sample offsets. So, this recommendation doesn't
|
* We only require XY sample offsets. So, this recommendation doesn't
|
||||||
* look useful at the moment. We might need this in future.
|
* look useful at the moment. We might need this in future.
|
||||||
*/
|
*/
|
||||||
if (brw->wm.prog_data->uses_pos_offset)
|
if (prog_data->uses_pos_offset)
|
||||||
dw6 |= GEN6_WM_POSOFFSET_SAMPLE;
|
dw6 |= GEN6_WM_POSOFFSET_SAMPLE;
|
||||||
else
|
else
|
||||||
dw6 |= GEN6_WM_POSOFFSET_NONE;
|
dw6 |= GEN6_WM_POSOFFSET_NONE;
|
||||||
|
@ -284,10 +280,10 @@ upload_wm_state(struct brw_context *brw)
|
||||||
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
|
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
|
||||||
OUT_BATCH(ksp0);
|
OUT_BATCH(ksp0);
|
||||||
OUT_BATCH(dw2);
|
OUT_BATCH(dw2);
|
||||||
if (brw->wm.prog_data->base.total_scratch) {
|
if (prog_data->base.total_scratch) {
|
||||||
OUT_RELOC(brw->wm.base.scratch_bo,
|
OUT_RELOC(brw->wm.base.scratch_bo,
|
||||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
||||||
ffs(brw->wm.prog_data->base.total_scratch) - 11);
|
ffs(prog_data->base.total_scratch) - 11);
|
||||||
} else {
|
} else {
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,8 +36,11 @@ static void
|
||||||
upload_wm_state(struct brw_context *brw)
|
upload_wm_state(struct brw_context *brw)
|
||||||
{
|
{
|
||||||
struct gl_context *ctx = &brw->ctx;
|
struct gl_context *ctx = &brw->ctx;
|
||||||
|
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||||
const struct brw_fragment_program *fp =
|
const struct brw_fragment_program *fp =
|
||||||
brw_fragment_program_const(brw->fragment_program);
|
brw_fragment_program_const(brw->fragment_program);
|
||||||
|
/* CACHE_NEW_WM_PROG */
|
||||||
|
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
|
||||||
bool writes_depth = false;
|
bool writes_depth = false;
|
||||||
uint32_t dw1, dw2;
|
uint32_t dw1, dw2;
|
||||||
|
|
||||||
|
@ -57,7 +60,6 @@ upload_wm_state(struct brw_context *brw)
|
||||||
if (ctx->Polygon.StippleFlag)
|
if (ctx->Polygon.StippleFlag)
|
||||||
dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
|
dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
|
||||||
|
|
||||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
|
||||||
if (fp->program.Base.InputsRead & VARYING_BIT_POS)
|
if (fp->program.Base.InputsRead & VARYING_BIT_POS)
|
||||||
dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
|
dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
|
||||||
if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||||
|
@ -78,8 +80,7 @@ upload_wm_state(struct brw_context *brw)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* CACHE_NEW_WM_PROG */
|
dw1 |= prog_data->barycentric_interp_modes <<
|
||||||
dw1 |= brw->wm.prog_data->barycentric_interp_modes <<
|
|
||||||
GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
|
GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
|
||||||
|
|
||||||
/* _NEW_COLOR, _NEW_MULTISAMPLE */
|
/* _NEW_COLOR, _NEW_MULTISAMPLE */
|
||||||
|
@ -87,7 +88,7 @@ upload_wm_state(struct brw_context *brw)
|
||||||
*/
|
*/
|
||||||
if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
|
if (fp->program.UsesKill || ctx->Color.AlphaEnabled ||
|
||||||
ctx->Multisample.SampleAlphaToCoverage ||
|
ctx->Multisample.SampleAlphaToCoverage ||
|
||||||
brw->wm.prog_data->uses_omask) {
|
prog_data->uses_omask) {
|
||||||
dw1 |= GEN7_WM_KILL_ENABLE;
|
dw1 |= GEN7_WM_KILL_ENABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,13 +144,15 @@ upload_ps_state(struct brw_context *brw)
|
||||||
const int max_threads_shift = brw->is_haswell ?
|
const int max_threads_shift = brw->is_haswell ?
|
||||||
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
|
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
|
||||||
|
|
||||||
|
/* CACHE_NEW_WM_PROG */
|
||||||
|
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
|
||||||
|
|
||||||
dw2 = dw4 = dw5 = ksp2 = 0;
|
dw2 = dw4 = dw5 = ksp2 = 0;
|
||||||
|
|
||||||
dw2 |=
|
dw2 |=
|
||||||
(ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
|
(ALIGN(brw->wm.base.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
dw2 |= ((prog_data->base.binding_table.size_bytes / 4) <<
|
||||||
dw2 |= ((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
|
|
||||||
GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
||||||
|
|
||||||
/* Use ALT floating point mode for ARB fragment programs, because they
|
/* Use ALT floating point mode for ARB fragment programs, because they
|
||||||
|
@ -169,8 +172,7 @@ upload_ps_state(struct brw_context *brw)
|
||||||
|
|
||||||
dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
|
dw4 |= (brw->max_wm_threads - 1) << max_threads_shift;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
if (prog_data->base.nr_params > 0)
|
||||||
if (brw->wm.prog_data->base.nr_params > 0)
|
|
||||||
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
|
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
|
||||||
|
|
||||||
/* From the IVB PRM, volume 2 part 1, page 287:
|
/* From the IVB PRM, volume 2 part 1, page 287:
|
||||||
|
@ -180,7 +182,7 @@ upload_ps_state(struct brw_context *brw)
|
||||||
* Target Write messages. If present, the oMask data is used to mask off
|
* Target Write messages. If present, the oMask data is used to mask off
|
||||||
* samples."
|
* samples."
|
||||||
*/
|
*/
|
||||||
if (brw->wm.prog_data->uses_omask)
|
if (prog_data->uses_omask)
|
||||||
dw4 |= GEN7_PS_OMASK_TO_RENDER_TARGET;
|
dw4 |= GEN7_PS_OMASK_TO_RENDER_TARGET;
|
||||||
|
|
||||||
/* From the IVB PRM, volume 2 part 1, page 287:
|
/* From the IVB PRM, volume 2 part 1, page 287:
|
||||||
|
@ -194,7 +196,7 @@ upload_ps_state(struct brw_context *brw)
|
||||||
* We only require XY sample offsets. So, this recommendation doesn't
|
* We only require XY sample offsets. So, this recommendation doesn't
|
||||||
* look useful at the moment. We might need this in future.
|
* look useful at the moment. We might need this in future.
|
||||||
*/
|
*/
|
||||||
if (brw->wm.prog_data->uses_pos_offset)
|
if (prog_data->uses_pos_offset)
|
||||||
dw4 |= GEN7_PS_POSOFFSET_SAMPLE;
|
dw4 |= GEN7_PS_POSOFFSET_SAMPLE;
|
||||||
else
|
else
|
||||||
dw4 |= GEN7_PS_POSOFFSET_NONE;
|
dw4 |= GEN7_PS_POSOFFSET_NONE;
|
||||||
|
@ -204,14 +206,14 @@ upload_ps_state(struct brw_context *brw)
|
||||||
* The hardware wedges if you have this bit set but don't turn on any dual
|
* The hardware wedges if you have this bit set but don't turn on any dual
|
||||||
* source blend factors.
|
* source blend factors.
|
||||||
*/
|
*/
|
||||||
if (brw->wm.prog_data->dual_src_blend &&
|
if (prog_data->dual_src_blend &&
|
||||||
(ctx->Color.BlendEnabled & 1) &&
|
(ctx->Color.BlendEnabled & 1) &&
|
||||||
ctx->Color.Blend[0]._UsesDualSrc) {
|
ctx->Color.Blend[0]._UsesDualSrc) {
|
||||||
dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
|
dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
/* CACHE_NEW_WM_PROG */
|
||||||
if (brw->wm.prog_data->num_varying_inputs != 0)
|
if (prog_data->num_varying_inputs != 0)
|
||||||
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
|
dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
|
||||||
|
|
||||||
/* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
|
/* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
|
||||||
|
@ -223,25 +225,25 @@ upload_ps_state(struct brw_context *brw)
|
||||||
_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
|
_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
|
||||||
assert(min_inv_per_frag >= 1);
|
assert(min_inv_per_frag >= 1);
|
||||||
|
|
||||||
if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
|
if (prog_data->prog_offset_16 || prog_data->no_8) {
|
||||||
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
|
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||||
if (!brw->wm.prog_data->no_8 && min_inv_per_frag == 1) {
|
if (!prog_data->no_8 && min_inv_per_frag == 1) {
|
||||||
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
|
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||||
dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
|
dw5 |= (prog_data->base.dispatch_grf_start_reg <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
||||||
dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
|
dw5 |= (prog_data->dispatch_grf_start_reg_16 <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
|
||||||
ksp0 = brw->wm.base.prog_offset;
|
ksp0 = brw->wm.base.prog_offset;
|
||||||
ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
|
ksp2 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
|
||||||
} else {
|
} else {
|
||||||
dw5 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
|
dw5 |= (prog_data->dispatch_grf_start_reg_16 <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
||||||
ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
|
ksp0 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
|
dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||||
dw5 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
|
dw5 |= (prog_data->base.dispatch_grf_start_reg <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
||||||
ksp0 = brw->wm.base.prog_offset;
|
ksp0 = brw->wm.base.prog_offset;
|
||||||
}
|
}
|
||||||
|
@ -252,10 +254,10 @@ upload_ps_state(struct brw_context *brw)
|
||||||
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
|
OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
|
||||||
OUT_BATCH(ksp0);
|
OUT_BATCH(ksp0);
|
||||||
OUT_BATCH(dw2);
|
OUT_BATCH(dw2);
|
||||||
if (brw->wm.prog_data->base.total_scratch) {
|
if (prog_data->base.total_scratch) {
|
||||||
OUT_RELOC(brw->wm.base.scratch_bo,
|
OUT_RELOC(brw->wm.base.scratch_bo,
|
||||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
||||||
ffs(brw->wm.prog_data->base.total_scratch) - 11);
|
ffs(prog_data->base.total_scratch) - 11);
|
||||||
} else {
|
} else {
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,8 @@ upload_ps_extra(struct brw_context *brw)
|
||||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||||
const struct brw_fragment_program *fp =
|
const struct brw_fragment_program *fp =
|
||||||
brw_fragment_program_const(brw->fragment_program);
|
brw_fragment_program_const(brw->fragment_program);
|
||||||
|
/* CACHE_NEW_WM_PROG */
|
||||||
|
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
|
||||||
uint32_t dw1 = 0;
|
uint32_t dw1 = 0;
|
||||||
|
|
||||||
dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
|
dw1 |= GEN8_PSX_PIXEL_SHADER_VALID;
|
||||||
|
@ -41,8 +43,7 @@ upload_ps_extra(struct brw_context *brw)
|
||||||
if (fp->program.UsesKill)
|
if (fp->program.UsesKill)
|
||||||
dw1 |= GEN8_PSX_KILL_ENABLE;
|
dw1 |= GEN8_PSX_KILL_ENABLE;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
if (prog_data->num_varying_inputs != 0)
|
||||||
if (brw->wm.prog_data->num_varying_inputs != 0)
|
|
||||||
dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
|
dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE;
|
||||||
|
|
||||||
if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||||
|
@ -74,7 +75,7 @@ upload_ps_extra(struct brw_context *brw)
|
||||||
if (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
|
if (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
|
||||||
dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
|
dw1 |= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
|
||||||
|
|
||||||
if (brw->wm.prog_data->uses_omask)
|
if (prog_data->uses_omask)
|
||||||
dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
|
dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;
|
||||||
|
|
||||||
BEGIN_BATCH(2);
|
BEGIN_BATCH(2);
|
||||||
|
@ -136,6 +137,9 @@ upload_ps_state(struct brw_context *brw)
|
||||||
struct gl_context *ctx = &brw->ctx;
|
struct gl_context *ctx = &brw->ctx;
|
||||||
uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
|
uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
|
||||||
|
|
||||||
|
/* CACHE_NEW_WM_PROG */
|
||||||
|
const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
|
||||||
|
|
||||||
/* Initialize the execution mask with VMask. Otherwise, derivatives are
|
/* Initialize the execution mask with VMask. Otherwise, derivatives are
|
||||||
* incorrect for subspans where some of the pixels are unlit. We believe
|
* incorrect for subspans where some of the pixels are unlit. We believe
|
||||||
* the bit just didn't take effect in previous generations.
|
* the bit just didn't take effect in previous generations.
|
||||||
|
@ -147,7 +151,7 @@ upload_ps_state(struct brw_context *brw)
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
/* CACHE_NEW_WM_PROG */
|
||||||
dw3 |=
|
dw3 |=
|
||||||
((brw->wm.prog_data->base.binding_table.size_bytes / 4) <<
|
((prog_data->base.binding_table.size_bytes / 4) <<
|
||||||
GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
|
||||||
|
|
||||||
/* Use ALT floating point mode for ARB fragment programs, because they
|
/* Use ALT floating point mode for ARB fragment programs, because they
|
||||||
|
@ -163,8 +167,7 @@ upload_ps_state(struct brw_context *brw)
|
||||||
*/
|
*/
|
||||||
dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;
|
dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;
|
||||||
|
|
||||||
/* CACHE_NEW_WM_PROG */
|
if (prog_data->base.nr_params > 0)
|
||||||
if (brw->wm.prog_data->base.nr_params > 0)
|
|
||||||
dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
|
dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
|
||||||
|
|
||||||
/* From the documentation for this packet:
|
/* From the documentation for this packet:
|
||||||
|
@ -197,25 +200,25 @@ upload_ps_state(struct brw_context *brw)
|
||||||
_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
|
_mesa_get_min_invocations_per_fragment(ctx, brw->fragment_program, false);
|
||||||
assert(min_invocations_per_fragment >= 1);
|
assert(min_invocations_per_fragment >= 1);
|
||||||
|
|
||||||
if (brw->wm.prog_data->prog_offset_16 || brw->wm.prog_data->no_8) {
|
if (prog_data->prog_offset_16 || prog_data->no_8) {
|
||||||
dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
|
dw6 |= GEN7_PS_16_DISPATCH_ENABLE;
|
||||||
if (!brw->wm.prog_data->no_8 && min_invocations_per_fragment == 1) {
|
if (!prog_data->no_8 && min_invocations_per_fragment == 1) {
|
||||||
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
|
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||||
dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
|
dw7 |= (prog_data->base.dispatch_grf_start_reg <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
||||||
dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
|
dw7 |= (prog_data->dispatch_grf_start_reg_16 <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
|
||||||
ksp0 = brw->wm.base.prog_offset;
|
ksp0 = brw->wm.base.prog_offset;
|
||||||
ksp2 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
|
ksp2 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
|
||||||
} else {
|
} else {
|
||||||
dw7 |= (brw->wm.prog_data->dispatch_grf_start_reg_16 <<
|
dw7 |= (prog_data->dispatch_grf_start_reg_16 <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
||||||
|
|
||||||
ksp0 = brw->wm.base.prog_offset + brw->wm.prog_data->prog_offset_16;
|
ksp0 = brw->wm.base.prog_offset + prog_data->prog_offset_16;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
|
dw6 |= GEN7_PS_8_DISPATCH_ENABLE;
|
||||||
dw7 |= (brw->wm.prog_data->base.dispatch_grf_start_reg <<
|
dw7 |= (prog_data->base.dispatch_grf_start_reg <<
|
||||||
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
|
||||||
ksp0 = brw->wm.base.prog_offset;
|
ksp0 = brw->wm.base.prog_offset;
|
||||||
}
|
}
|
||||||
|
@ -225,10 +228,10 @@ upload_ps_state(struct brw_context *brw)
|
||||||
OUT_BATCH(ksp0);
|
OUT_BATCH(ksp0);
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(0);
|
||||||
OUT_BATCH(dw3);
|
OUT_BATCH(dw3);
|
||||||
if (brw->wm.prog_data->base.total_scratch) {
|
if (prog_data->base.total_scratch) {
|
||||||
OUT_RELOC64(brw->wm.base.scratch_bo,
|
OUT_RELOC64(brw->wm.base.scratch_bo,
|
||||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
||||||
ffs(brw->wm.prog_data->base.total_scratch) - 11);
|
ffs(prog_data->base.total_scratch) - 11);
|
||||||
} else {
|
} else {
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(0);
|
||||||
OUT_BATCH(0);
|
OUT_BATCH(0);
|
||||||
|
|
Loading…
Reference in New Issue