iris: Fix handling of SIMD32 fragment shaders

The brw_wm_prog_data_dispatch_grf_start_reg and _prog_offset helpers
read the _NPixelDispatchEnable fields from 3DSTATE_PS to figure out
which bits to pull out of the prog data and stuff where.  Therefore,
they need to be called with the final set of _NPixelDispatchEnable bits
after we've done the workaround for SIMD32 and 16x MSAA.  Otherwise, if
you end up with a somewhat odd combination of enables, the GRF start reg
and KSP data ends up in the wrong slots.  In particular, running
SIMD32-only is broken but several other combinations are as well.

Fixes: 5445c176e2 "iris: Disable SIMD32 when using a 16x MSAA..."
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Jason Ekstrand 2019-08-03 11:37:34 -05:00
parent 9f37c9903b
commit aebca3961b
1 changed files with 50 additions and 44 deletions

View File

@ -3645,23 +3645,6 @@ iris_store_fs_state(struct iris_context *ice,
*/
ps.PositionXYOffsetSelect =
wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
/* ps._32PixelDispatchEnable is filled in at draw time. */
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
ps.KernelStartPointer0 =
KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.KernelStartPointer1 =
KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
ps.KernelStartPointer2 =
KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
if (prog_data->total_scratch) {
struct iris_bo *bo =
@ -3686,8 +3669,6 @@ iris_store_fs_state(struct iris_context *ice,
#if GEN_GEN >= 9
psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
#else
psx.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
#endif
}
}
@ -4655,47 +4636,72 @@ iris_upload_dirty_render_state(struct iris_context *ice,
iris_get_scratch_space(ice, prog_data->total_scratch, stage);
iris_use_pinned_bo(batch, bo, true);
}
#if GEN_GEN >= 9
if (stage == MESA_SHADER_FRAGMENT && wm_prog_data->uses_sample_mask) {
uint32_t *shader_ps = (uint32_t *) shader->derived_data;
uint32_t *shader_psx = shader_ps + GENX(3DSTATE_PS_length);
uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0};
struct iris_rasterizer_state *cso = ice->state.cso_rast;
if (stage == MESA_SHADER_FRAGMENT) {
UNUSED struct iris_rasterizer_state *cso = ice->state.cso_rast;
struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
/* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
*
* "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
* SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
* mode."
*
* 16x MSAA only exists on Gen9+, so we can skip this on Gen8.
*/
iris_pack_command(GENX(3DSTATE_PS), &ps_state, ps) {
ps._32PixelDispatchEnable = wm_prog_data->dispatch_32 &&
(cso_fb->samples != 16 || wm_prog_data->persample_dispatch);
uint32_t ps_state[GENX(3DSTATE_PS_length)] = {0};
iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) {
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
/* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
*
* "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
* SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
* mode."
*
* 16x MSAA only exists on Gen9+, so we can skip this on Gen8.
*/
if (GEN_GEN >= 9 && cso_fb->samples == 16 &&
!wm_prog_data->persample_dispatch) {
assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
ps._32PixelDispatchEnable = false;
}
ps.DispatchGRFStartRegisterForConstantSetupData0 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
ps.DispatchGRFStartRegisterForConstantSetupData1 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
ps.KernelStartPointer0 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
ps.KernelStartPointer1 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
ps.KernelStartPointer2 = KSP(shader) +
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
}
iris_pack_command(GENX(3DSTATE_PS_EXTRA), &psx_state, psx) {
uint32_t psx_state[GENX(3DSTATE_PS_EXTRA_length)] = {0};
iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
#if GEN_GEN >= 9
if (wm_prog_data->post_depth_coverage)
psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
else if (wm_prog_data->inner_coverage && cso->conservative_rasterization)
else if (wm_prog_data->inner_coverage &&
cso->conservative_rasterization)
psx.InputCoverageMaskState = ICMS_INNER_CONSERVATIVE;
else
psx.InputCoverageMaskState = ICMS_NORMAL;
#else
psx.PixelShaderUsesInputCoverageMask =
wm_prog_data->uses_sample_mask;
#endif
}
uint32_t *shader_ps = (uint32_t *) shader->derived_data;
uint32_t *shader_psx = shader_ps + GENX(3DSTATE_PS_length);
iris_emit_merge(batch, shader_ps, ps_state,
GENX(3DSTATE_PS_length));
iris_emit_merge(batch,
shader_psx,
psx_state,
iris_emit_merge(batch, shader_psx, psx_state,
GENX(3DSTATE_PS_EXTRA_length));
} else
#endif
} else {
iris_batch_emit(batch, shader->derived_data,
iris_derived_program_state_size(stage));
}
} else {
if (stage == MESA_SHADER_TESS_EVAL) {
iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);