From 08ab13d3400891cd6a0e7d97ff1e292cc927c7c9 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 17 Sep 2019 11:01:01 +0200 Subject: [PATCH] radv/gfx10: fix storing/loading NGG stream outputs for GS The GS outputs are stored differently in the LDS storage, they are indexed by out_idx which is incremented for each stored DWORD. Thus, we need a different path for exporting the stream outputs. This fixes a bunch of CTS failures when NGG GS is force enabled. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_nir_to_llvm.c | 91 ++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 13 deletions(-) diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 69a5e0bb9f9..46d9ae97049 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3175,6 +3175,17 @@ static void build_export_prim(struct radv_shader_context *ctx, ac_build_export(&ctx->ac, &args); } +static struct radv_stream_output * +radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location) +{ + for (unsigned i = 0; i < so->num_outputs; ++i) { + if (so->outputs[i].location == location) + return &so->outputs[i]; + } + + return NULL; +} + static void build_streamout_vertex(struct radv_shader_context *ctx, LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw, unsigned stream, LLVMValueRef offset_vtx, @@ -3195,25 +3206,79 @@ static void build_streamout_vertex(struct radv_shader_context *ctx, offset[buffer] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 2, false), ""); } - for (unsigned i = 0; i < so->num_outputs; ++i) { - struct radv_stream_output *output = - &ctx->shader_info->so.outputs[i]; + if (ctx->stage == MESA_SHADER_GEOMETRY) { + struct radv_shader_output_values outputs[AC_LLVM_MAX_OUTPUTS]; + unsigned noutput = 0; + unsigned out_idx = 0; - if (stream != output->stream) - continue; + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + unsigned output_usage_mask = + ctx->shader_info->gs.output_usage_mask[i]; + uint8_t output_stream = + output_stream = ctx->shader_info->gs.output_streams[i]; - struct radv_shader_output_values out = {}; - - for (unsigned comp = 0; comp < 4; comp++) { - if (!(output->component_mask & (1 << comp))) + if (!(ctx->output_mask & (1ull << i)) || + output_stream != stream) continue; - tmp = ac_build_gep0(&ctx->ac, vertexptr, - LLVMConstInt(ctx->ac.i32, 4 * i + comp, false)); - out.values[comp] = LLVMBuildLoad(builder, tmp, ""); + outputs[noutput].slot_name = i; + outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1; + outputs[noutput].usage_mask = output_usage_mask; + + int length = util_last_bit(output_usage_mask); + + for (unsigned j = 0; j < length; j++, out_idx++) { + if (!(output_usage_mask & (1 << j))) + continue; + + tmp = ac_build_gep0(&ctx->ac, vertexptr, + LLVMConstInt(ctx->ac.i32, out_idx, false)); + outputs[noutput].values[j] = LLVMBuildLoad(builder, tmp, ""); + } + + for (unsigned j = length; j < 4; j++) + outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32); + + noutput++; } - radv_emit_stream_output(ctx, so_buffer, offset, output, &out); + for (unsigned i = 0; i < noutput; i++) { + struct radv_stream_output *output = + radv_get_stream_output_by_loc(so, outputs[i].slot_name); + + if (!output || + output->stream != stream) + continue; + + struct radv_shader_output_values out = {}; + + for (unsigned j = 0; j < 4; j++) { + out.values[j] = outputs[i].values[j]; + } + + radv_emit_stream_output(ctx, so_buffer, offset, output, &out); + } + } else { + for (unsigned i = 0; i < so->num_outputs; ++i) { + struct radv_stream_output *output = + &ctx->shader_info->so.outputs[i]; + + if (stream != output->stream) + continue; + + struct radv_shader_output_values out = {}; + + for (unsigned comp = 0; comp < 4; comp++) { + if (!(output->component_mask & (1 << comp))) + continue; + + tmp = ac_build_gep0(&ctx->ac, vertexptr, + LLVMConstInt(ctx->ac.i32, 4 * i + comp, false)); + out.values[comp] = LLVMBuildLoad(builder, tmp, ""); + } + + radv_emit_stream_output(ctx, so_buffer, offset, output, &out); + } } }