From 9fa0f5f1ba5671fd528c8809f4a60c381075c3bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 14 Feb 2021 06:48:43 -0500 Subject: [PATCH] radeonsi: implement 16-bit VS inputs Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../drivers/radeonsi/si_shader_llvm_vs.c | 44 +++++++++++++------ src/gallium/drivers/radeonsi/si_shader_nir.c | 6 ++- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 2072644013b..1b1067621cc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -95,6 +95,9 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L return; } + unsigned bit_size = info->input_fp16_lo_hi_valid[input_index] & 0x1 ? 16 : 32; + LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32; + LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32; unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs; union si_vs_fix_fetch fix_fetch; LLVMValueRef vb_desc; @@ -129,6 +132,19 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L for (unsigned i = 0; i < 4; ++i) out[i] = LLVMBuildExtractElement(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, i, false), ""); + + if (bit_size == 16) { + if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT || + fix_fetch.u.format == AC_FETCH_FORMAT_SINT) { + for (unsigned i = 0; i < 4; i++) + out[i] = LLVMBuildTrunc(ctx->ac.builder, out[i], ctx->ac.i16, ""); + } else { + for (unsigned i = 0; i < 4; i++) { + out[i] = ac_to_float(&ctx->ac, out[i]); + out[i] = LLVMBuildFPTrunc(ctx->ac.builder, out[i], ctx->ac.f16, ""); + } + } + } return; } @@ -158,7 +174,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L for (unsigned i = 0; i < num_fetches; ++i) { LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0); fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset, - channels_per_fetch, 0, true, false, false); + channels_per_fetch, 0, true, + bit_size == 16, false); } if (num_fetches == 1 && channels_per_fetch > 1) { @@ -172,27 +189,28 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L } for (unsigned i = num_fetches; i < 4; ++i) - fetches[i] = LLVMGetUndef(ctx->ac.f32); + fetches[i] = LLVMGetUndef(float_type); if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2 && required_channels == 4) { if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT || fix_fetch.u.format == AC_FETCH_FORMAT_SINT) - fetches[3] = ctx->ac.i32_1; + fetches[3] = LLVMConstInt(int_type, 1, 0); else - fetches[3] = ctx->ac.f32_1; + fetches[3] = LLVMConstReal(float_type, 1); } else if (fix_fetch.u.log_size == 3 && (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM || fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED || fix_fetch.u.format == AC_FETCH_FORMAT_SINT) && required_channels == 4) { + /* For 2_10_10_10, the hardware returns an unsigned value; * convert it to a signed one. */ LLVMValueRef tmp = fetches[3]; - LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0); + LLVMValueRef c30 = LLVMConstInt(int_type, 30, 0); /* First, recover the sign-extended signed integer value. */ if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) - tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, ctx->ac.i32, ""); + tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, int_type, ""); else tmp = ac_to_integer(&ctx->ac, tmp); @@ -204,18 +222,18 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L */ tmp = LLVMBuildShl( ctx->ac.builder, tmp, - fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, ""); + fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(int_type, 7, 0) : c30, ""); tmp = LLVMBuildAShr(ctx->ac.builder, tmp, c30, ""); /* Convert back to the right type. */ if (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM) { LLVMValueRef clamp; - LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0); - tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, ""); + LLVMValueRef neg_one = LLVMConstReal(float_type, -1.0); + tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, ""); clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, tmp, neg_one, ""); tmp = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, tmp, ""); } else if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) { - tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, ""); + tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, ""); } fetches[3] = tmp; @@ -234,10 +252,8 @@ void si_llvm_load_vs_inputs(struct si_shader_context *ctx, struct nir_shader *ni load_input_vs(ctx, i, values); - for (unsigned chan = 0; chan < 4; chan++) { - ctx->inputs[i * 4 + chan] = - LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, ""); - } + for (unsigned chan = 0; chan < 4; chan++) + ctx->inputs[i * 4 + chan] = ac_to_integer(&ctx->ac, values[chan]); } } diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 68f6e4295c0..393a0c01d60 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -72,8 +72,10 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr } assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered"); - /* Convert the 16-bit component mask to a 32-bit component mask. */ - if (bit_size == 16) { + /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs + * where the mask is untyped. + */ + if (bit_size == 16 && !is_input) { unsigned new_mask = 0; for (unsigned i = 0; i < 4; i++) { if (mask & (1 << i))