anv: Disable fs dispatch for depth/stencil only pipelines
Fixes most renderpass bugs.
This commit is contained in:
parent
a804d82ef6
commit
292031a1a5
|
@ -138,7 +138,7 @@ create_pipeline(struct anv_device *device,
|
|||
VK_NULL_HANDLE,
|
||||
&(VkGraphicsPipelineCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.stageCount = 2,
|
||||
.stageCount = fs_nir ? 2 : 1,
|
||||
.pStages = (VkPipelineShaderStageCreateInfo[]) {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
|
@ -430,17 +430,13 @@ emit_color_clear(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
|
||||
static void
|
||||
build_depthstencil_shaders(struct nir_shader **out_vs,
|
||||
struct nir_shader **out_fs)
|
||||
build_depthstencil_shader(struct nir_shader **out_vs)
|
||||
{
|
||||
nir_builder vs_b;
|
||||
nir_builder fs_b;
|
||||
|
||||
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
|
||||
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
|
||||
fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
|
||||
|
||||
const struct glsl_type *position_type = glsl_vec4_type();
|
||||
|
||||
|
@ -457,7 +453,6 @@ build_depthstencil_shaders(struct nir_shader **out_vs,
|
|||
nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
|
||||
|
||||
*out_vs = vs_b.shader;
|
||||
*out_fs = fs_b.shader;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
@ -466,9 +461,8 @@ create_depthstencil_pipeline(struct anv_device *device,
|
|||
struct anv_pipeline **pipeline)
|
||||
{
|
||||
struct nir_shader *vs_nir;
|
||||
struct nir_shader *fs_nir;
|
||||
|
||||
build_depthstencil_shaders(&vs_nir, &fs_nir);
|
||||
build_depthstencil_shader(&vs_nir);
|
||||
|
||||
const VkPipelineVertexInputStateCreateInfo vi_state = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
|
@ -522,7 +516,7 @@ create_depthstencil_pipeline(struct anv_device *device,
|
|||
.pAttachments = NULL,
|
||||
};
|
||||
|
||||
return create_pipeline(device, vs_nir, fs_nir, &vi_state, &ds_state,
|
||||
return create_pipeline(device, vs_nir, NULL, &vi_state, &ds_state,
|
||||
&cb_state, &device->meta_state.alloc,
|
||||
/*use_repclear*/ true, pipeline);
|
||||
}
|
||||
|
|
|
@ -1091,6 +1091,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
|
|||
pipeline->vs_simd8 = NO_KERNEL;
|
||||
pipeline->vs_vec4 = NO_KERNEL;
|
||||
pipeline->gs_kernel = NO_KERNEL;
|
||||
pipeline->ps_ksp0 = NO_KERNEL;
|
||||
|
||||
pipeline->active_stages = 0;
|
||||
pipeline->total_scratch = 0;
|
||||
|
|
|
@ -475,142 +475,150 @@ genX(graphics_pipeline_create)(
|
|||
|
||||
const struct brw_wm_prog_data *wm_prog_data = &pipeline->wm_prog_data;
|
||||
|
||||
/* TODO: We should clean this up. Among other things, this is mostly
|
||||
* shared with other gens.
|
||||
*/
|
||||
const struct brw_vue_map *fs_input_map;
|
||||
if (pipeline->gs_kernel == NO_KERNEL)
|
||||
fs_input_map = &vue_prog_data->vue_map;
|
||||
else
|
||||
fs_input_map = &gs_prog_data->base.vue_map;
|
||||
const int num_thread_bias = ANV_GEN == 8 ? 2 : 1;
|
||||
if (pipeline->ps_ksp0 == NO_KERNEL) {
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS));
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
|
||||
.PixelShaderValid = false);
|
||||
} else {
|
||||
/* TODO: We should clean this up. Among other things, this is mostly
|
||||
* shared with other gens.
|
||||
*/
|
||||
const struct brw_vue_map *fs_input_map;
|
||||
if (pipeline->gs_kernel == NO_KERNEL)
|
||||
fs_input_map = &vue_prog_data->vue_map;
|
||||
else
|
||||
fs_input_map = &gs_prog_data->base.vue_map;
|
||||
|
||||
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
|
||||
GENX(3DSTATE_SBE_SWIZ_header),
|
||||
};
|
||||
struct GENX(3DSTATE_SBE_SWIZ) swiz = {
|
||||
GENX(3DSTATE_SBE_SWIZ_header),
|
||||
};
|
||||
|
||||
int max_source_attr = 0;
|
||||
for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
|
||||
int input_index = wm_prog_data->urb_setup[attr];
|
||||
int max_source_attr = 0;
|
||||
for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
|
||||
int input_index = wm_prog_data->urb_setup[attr];
|
||||
|
||||
if (input_index < 0)
|
||||
continue;
|
||||
if (input_index < 0)
|
||||
continue;
|
||||
|
||||
int source_attr = fs_input_map->varying_to_slot[attr];
|
||||
max_source_attr = MAX2(max_source_attr, source_attr);
|
||||
int source_attr = fs_input_map->varying_to_slot[attr];
|
||||
max_source_attr = MAX2(max_source_attr, source_attr);
|
||||
|
||||
if (input_index >= 16)
|
||||
continue;
|
||||
if (input_index >= 16)
|
||||
continue;
|
||||
|
||||
if (source_attr == -1) {
|
||||
/* This attribute does not exist in the VUE--that means that the
|
||||
* vertex shader did not write to it. It could be that it's a
|
||||
* regular varying read by the fragment shader but not written by the
|
||||
* vertex shader or it's gl_PrimitiveID. In the first case the value
|
||||
* is undefined, in the second it needs to be gl_PrimitiveID.
|
||||
*/
|
||||
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
|
||||
swiz.Attribute[input_index].ComponentOverrideX = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideY = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideZ = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideW = true;
|
||||
} else {
|
||||
/* We have to subtract two slots to accout for the URB entry output
|
||||
* read offset in the VS and GS stages.
|
||||
*/
|
||||
swiz.Attribute[input_index].SourceAttribute = source_attr - 2;
|
||||
if (source_attr == -1) {
|
||||
/* This attribute does not exist in the VUE--that means that the
|
||||
* vertex shader did not write to it. It could be that it's a
|
||||
* regular varying read by the fragment shader but not written by
|
||||
* the vertex shader or it's gl_PrimitiveID. In the first case the
|
||||
* value is undefined, in the second it needs to be
|
||||
* gl_PrimitiveID.
|
||||
*/
|
||||
swiz.Attribute[input_index].ConstantSource = PRIM_ID;
|
||||
swiz.Attribute[input_index].ComponentOverrideX = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideY = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideZ = true;
|
||||
swiz.Attribute[input_index].ComponentOverrideW = true;
|
||||
} else {
|
||||
/* We have to subtract two slots to accout for the URB entry output
|
||||
* read offset in the VS and GS stages.
|
||||
*/
|
||||
swiz.Attribute[input_index].SourceAttribute = source_attr - 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE),
|
||||
.AttributeSwizzleEnable = true,
|
||||
.ForceVertexURBEntryReadLength = false,
|
||||
.ForceVertexURBEntryReadOffset = false,
|
||||
.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2),
|
||||
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
|
||||
.NumberofSFOutputAttributes =
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_SBE),
|
||||
.AttributeSwizzleEnable = true,
|
||||
.ForceVertexURBEntryReadLength = false,
|
||||
.ForceVertexURBEntryReadOffset = false,
|
||||
.VertexURBEntryReadLength =
|
||||
DIV_ROUND_UP(max_source_attr + 1, 2),
|
||||
.PointSpriteTextureCoordinateOrigin = UPPERLEFT,
|
||||
.NumberofSFOutputAttributes =
|
||||
wm_prog_data->num_varying_inputs,
|
||||
|
||||
#if ANV_GEN >= 9
|
||||
.Attribute0ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute1ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute2ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute3ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute4ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute5ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute6ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute7ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute8ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute9ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute10ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute11ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute12ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute13ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute14ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute15ActiveComponentFormat = ACF_XYZW,
|
||||
/* wow, much field, very attribute */
|
||||
.Attribute16ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute17ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute18ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute19ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute20ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute21ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute22ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute23ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute24ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute25ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute26ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute27ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute28ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute29ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute28ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute29ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute30ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute0ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute1ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute2ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute3ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute4ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute5ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute6ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute7ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute8ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute9ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute10ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute11ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute12ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute13ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute14ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute15ActiveComponentFormat = ACF_XYZW,
|
||||
/* wow, much field, very attribute */
|
||||
.Attribute16ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute17ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute18ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute19ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute20ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute21ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute22ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute23ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute24ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute25ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute26ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute27ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute28ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute29ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute28ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute29ActiveComponentFormat = ACF_XYZW,
|
||||
.Attribute30ActiveComponentFormat = ACF_XYZW,
|
||||
#endif
|
||||
);
|
||||
);
|
||||
|
||||
uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
|
||||
GENX(3DSTATE_SBE_SWIZ_length));
|
||||
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
|
||||
uint32_t *dw = anv_batch_emit_dwords(&pipeline->batch,
|
||||
GENX(3DSTATE_SBE_SWIZ_length));
|
||||
GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->batch, dw, &swiz);
|
||||
|
||||
const int num_thread_bias = ANV_GEN == 8 ? 2 : 1;
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
|
||||
.KernelStartPointer0 = pipeline->ps_ksp0,
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS),
|
||||
.KernelStartPointer0 = pipeline->ps_ksp0,
|
||||
|
||||
.SingleProgramFlow = false,
|
||||
.VectorMaskEnable = true,
|
||||
.SamplerCount = 1,
|
||||
.SingleProgramFlow = false,
|
||||
.VectorMaskEnable = true,
|
||||
.SamplerCount = 1,
|
||||
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
|
||||
.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048),
|
||||
.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
|
||||
.PerThreadScratchSpace = ffs(wm_prog_data->base.total_scratch / 2048),
|
||||
|
||||
.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias,
|
||||
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
||||
POSOFFSET_SAMPLE: POSOFFSET_NONE,
|
||||
.PushConstantEnable = wm_prog_data->base.nr_params > 0,
|
||||
._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
|
||||
._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
|
||||
._32PixelDispatchEnable = false,
|
||||
.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias,
|
||||
.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
|
||||
POSOFFSET_SAMPLE: POSOFFSET_NONE,
|
||||
.PushConstantEnable = wm_prog_data->base.nr_params > 0,
|
||||
._8PixelDispatchEnable = pipeline->ps_simd8 != NO_KERNEL,
|
||||
._16PixelDispatchEnable = pipeline->ps_simd16 != NO_KERNEL,
|
||||
._32PixelDispatchEnable = false,
|
||||
|
||||
.DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0,
|
||||
.DispatchGRFStartRegisterForConstantSetupData1 = 0,
|
||||
.DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2,
|
||||
.DispatchGRFStartRegisterForConstantSetupData0 = pipeline->ps_grf_start0,
|
||||
.DispatchGRFStartRegisterForConstantSetupData1 = 0,
|
||||
.DispatchGRFStartRegisterForConstantSetupData2 = pipeline->ps_grf_start2,
|
||||
|
||||
.KernelStartPointer1 = 0,
|
||||
.KernelStartPointer2 = pipeline->ps_ksp2);
|
||||
.KernelStartPointer1 = 0,
|
||||
.KernelStartPointer2 = pipeline->ps_ksp2);
|
||||
|
||||
bool per_sample_ps = false;
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
|
||||
.PixelShaderValid = true,
|
||||
.PixelShaderKillsPixel = wm_prog_data->uses_kill,
|
||||
.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
|
||||
.AttributeEnable = wm_prog_data->num_varying_inputs > 0,
|
||||
.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
|
||||
.PixelShaderIsPerSample = per_sample_ps,
|
||||
bool per_sample_ps = false;
|
||||
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA),
|
||||
.PixelShaderValid = true,
|
||||
.PixelShaderKillsPixel = wm_prog_data->uses_kill,
|
||||
.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode,
|
||||
.AttributeEnable = wm_prog_data->num_varying_inputs > 0,
|
||||
.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask,
|
||||
.PixelShaderIsPerSample = per_sample_ps,
|
||||
#if ANV_GEN >= 9
|
||||
.PixelShaderPullsBary = wm_prog_data->pulls_bary,
|
||||
.InputCoverageMaskState = ICMS_NONE
|
||||
.PixelShaderPullsBary = wm_prog_data->pulls_bary,
|
||||
.InputCoverageMaskState = ICMS_NONE
|
||||
#endif
|
||||
);
|
||||
);
|
||||
}
|
||||
|
||||
*pPipeline = anv_pipeline_to_handle(pipeline);
|
||||
|
||||
|
|
Loading…
Reference in New Issue