vkd3d-shader: Implement sample explicit LOD override.

In control flow, we can force LOD 0.0 to avoid undefined result when
games sample with implicit LOD in non-quad uniform control flow.

Behavior on different implementations is:
- Helper lanes come to life and interpolate shader input.
- LOD is clamped to 0.0 in divergent control flow.

This hack is not safe in general, since we force 0.0 even when the
control flow is quad uniform.

This is the most practical solution for the problem for now.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
This commit is contained in:
Hans-Kristian Arntzen 2021-06-16 16:52:48 +02:00
parent a08e493a3a
commit cb61a4c83a
1 changed files with 38 additions and 5 deletions

View File

@ -9058,6 +9058,7 @@ static void vkd3d_dxbc_compiler_emit_sample(struct vkd3d_dxbc_compiler *compiler
struct vkd3d_shader_image image;
unsigned int num_coordinates;
uint32_t image_operands[4];
bool force_explicit_lod;
DWORD coordinate_mask;
bool is_sparse_op;
SpvOp op;
@ -9074,18 +9075,44 @@ static void vkd3d_dxbc_compiler_emit_sample(struct vkd3d_dxbc_compiler *compiler
if ((is_sparse_op = (instruction->dst_count > 1 && dst[1].reg.type != VKD3DSPR_NULL)))
vkd3d_spirv_enable_capability(builder, SpvCapabilitySparseResidency);
/* Workaround */
switch (instruction->handler_idx)
{
case VKD3DSIH_SAMPLE:
case VKD3DSIH_SAMPLE_FEEDBACK:
op = is_sparse_op ? SpvOpImageSparseSampleImplicitLod : SpvOpImageSampleImplicitLod;
case VKD3DSIH_SAMPLE_B:
case VKD3DSIH_SAMPLE_B_FEEDBACK:
force_explicit_lod = (compiler->control_flow_depth || compiler->control_flow_has_early_return) &&
vkd3d_dxbc_compiler_has_quirk(compiler, VKD3D_SHADER_QUIRK_FORCE_EXPLICIT_LOD_IN_CONTROL_FLOW);
break;
default:
force_explicit_lod = false;
break;
}
switch (instruction->handler_idx)
{
case VKD3DSIH_SAMPLE:
case VKD3DSIH_SAMPLE_FEEDBACK:
if (force_explicit_lod)
op = is_sparse_op ? SpvOpImageSparseSampleExplicitLod : SpvOpImageSampleExplicitLod;
else
op = is_sparse_op ? SpvOpImageSparseSampleImplicitLod : SpvOpImageSampleImplicitLod;
break;
case VKD3DSIH_SAMPLE_B:
case VKD3DSIH_SAMPLE_B_FEEDBACK:
op = is_sparse_op ? SpvOpImageSparseSampleImplicitLod : SpvOpImageSampleImplicitLod;
operands_mask |= SpvImageOperandsBiasMask;
image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler,
&src[3], VKD3DSP_WRITEMASK_0);
if (force_explicit_lod)
{
op = is_sparse_op ? SpvOpImageSparseSampleExplicitLod : SpvOpImageSampleExplicitLod;
}
else
{
op = is_sparse_op ? SpvOpImageSparseSampleImplicitLod : SpvOpImageSampleImplicitLod;
operands_mask |= SpvImageOperandsBiasMask;
image_operands[image_operand_count++] = vkd3d_dxbc_compiler_emit_load_src(compiler,
&src[3], VKD3DSP_WRITEMASK_0);
}
break;
case VKD3DSIH_SAMPLE_GRAD:
case VKD3DSIH_SAMPLE_GRAD_FEEDBACK:
@ -9109,6 +9136,12 @@ static void vkd3d_dxbc_compiler_emit_sample(struct vkd3d_dxbc_compiler *compiler
return;
}
if (force_explicit_lod)
{
operands_mask |= SpvImageOperandsLodMask;
image_operands[image_operand_count++] = vkd3d_dxbc_compiler_get_constant_float(compiler, 0.0f);
}
if (vkd3d_shader_instruction_has_texel_offset(instruction))
{
operands_mask |= SpvImageOperandsConstOffsetMask;