radeonsi: implement 16-bit VS inputs

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9051>
2021-02-14 06:48:43 -05:00 · 2021-02-14 06:48:43 -05:00 · 9fa0f5f1ba
parent 7db43960f6
commit 9fa0f5f1ba
2 changed files with 34 additions and 16 deletions
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@ -95,6 +95,9 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
      return;
   }

+   unsigned bit_size = info->input_fp16_lo_hi_valid[input_index] & 0x1 ? 16 : 32;
+   LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32;
+   LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32;
   unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
   union si_vs_fix_fetch fix_fetch;
   LLVMValueRef vb_desc;
@ -129,6 +132,19 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
      for (unsigned i = 0; i < 4; ++i)
         out[i] =
            LLVMBuildExtractElement(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, i, false), "");
+
+      if (bit_size == 16) {
+         if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT ||
+             fix_fetch.u.format == AC_FETCH_FORMAT_SINT) {
+            for (unsigned i = 0; i < 4; i++)
+               out[i] = LLVMBuildTrunc(ctx->ac.builder, out[i], ctx->ac.i16, "");
+         } else {
+            for (unsigned i = 0; i < 4; i++) {
+               out[i] = ac_to_float(&ctx->ac, out[i]);
+               out[i] = LLVMBuildFPTrunc(ctx->ac.builder, out[i], ctx->ac.f16, "");
+            }
+         }
+      }
      return;
   }

@ -158,7 +174,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
   for (unsigned i = 0; i < num_fetches; ++i) {
      LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0);
      fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset,
-                                               channels_per_fetch, 0, true, false, false);
+                                               channels_per_fetch, 0, true,
+                                               bit_size == 16, false);
   }

   if (num_fetches == 1 && channels_per_fetch > 1) {
@ -172,27 +189,28 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
   }

   for (unsigned i = num_fetches; i < 4; ++i)
-      fetches[i] = LLVMGetUndef(ctx->ac.f32);
+      fetches[i] = LLVMGetUndef(float_type);

   if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2 && required_channels == 4) {
      if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT || fix_fetch.u.format == AC_FETCH_FORMAT_SINT)
-         fetches[3] = ctx->ac.i32_1;
+         fetches[3] = LLVMConstInt(int_type, 1, 0);
      else
-         fetches[3] = ctx->ac.f32_1;
+         fetches[3] = LLVMConstReal(float_type, 1);
   } else if (fix_fetch.u.log_size == 3 &&
              (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ||
               fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED ||
               fix_fetch.u.format == AC_FETCH_FORMAT_SINT) &&
              required_channels == 4) {
+
      /* For 2_10_10_10, the hardware returns an unsigned value;
       * convert it to a signed one.
       */
      LLVMValueRef tmp = fetches[3];
-      LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
+      LLVMValueRef c30 = LLVMConstInt(int_type, 30, 0);

      /* First, recover the sign-extended signed integer value. */
      if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED)
-         tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, ctx->ac.i32, "");
+         tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, int_type, "");
      else
         tmp = ac_to_integer(&ctx->ac, tmp);

@ -204,18 +222,18 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
       */
      tmp = LLVMBuildShl(
         ctx->ac.builder, tmp,
-         fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
+         fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(int_type, 7, 0) : c30, "");
      tmp = LLVMBuildAShr(ctx->ac.builder, tmp, c30, "");

      /* Convert back to the right type. */
      if (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM) {
         LLVMValueRef clamp;
-         LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
-         tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, "");
+         LLVMValueRef neg_one = LLVMConstReal(float_type, -1.0);
+         tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, "");
         clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, tmp, neg_one, "");
         tmp = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, tmp, "");
      } else if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) {
-         tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, "");
+         tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, "");
      }

      fetches[3] = tmp;
@ -234,10 +252,8 @@ void si_llvm_load_vs_inputs(struct si_shader_context *ctx, struct nir_shader *ni

      load_input_vs(ctx, i, values);

-      for (unsigned chan = 0; chan < 4; chan++) {
-         ctx->inputs[i * 4 + chan] =
-            LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
-      }
+      for (unsigned chan = 0; chan < 4; chan++)
+         ctx->inputs[i * 4 + chan] = ac_to_integer(&ctx->ac, values[chan]);
   }
 }

--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@ -72,8 +72,10 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
   }
   assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");

-   /* Convert the 16-bit component mask to a 32-bit component mask. */
-   if (bit_size == 16) {
+   /* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs
+    * where the mask is untyped.
+    */
+   if (bit_size == 16 && !is_input) {
      unsigned new_mask = 0;
      for (unsigned i = 0; i < 4; i++) {
         if (mask & (1 << i))