gallivm: Allow to conversions to/from registers of different sizes.

Allow for example to convert from 4 x float32 to 4 x unorm8 and vice versa. Uses code and ideas from Brian Paul.
2010-07-01 12:33:34 +01:00 · 2010-07-01 12:33:34 +01:00 · b919bb7f61
parent a70ec096aa
commit b919bb7f61
4 changed files with 141 additions and 25 deletions
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@ -83,6 +83,9 @@
 *
 * Although the result values can be scaled to an arbitrary bit width specified
 * by dst_width, the actual result type will have the same width.
 *
 * Ex: src = { float, float, float, float }
 * return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
 */
 LLVMValueRef
 lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
 /**
 * Inverse of lp_build_clamped_float_to_unsigned_norm above.
 * Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
 * return {float, float, float, float} with values in range [0, 1].
 */
 LLVMValueRef
 lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
@ -219,14 +224,13 @@ lp_build_conv(LLVMBuilderRef builder,
   unsigned num_tmps;
   unsigned i;
   /* Register width must remain constant */
   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
   tmp_type = src_type;
   for(i = 0; i < num_srcs; ++i)
@ -330,25 +334,11 @@ lp_build_conv(LLVMBuilderRef builder,
   assert(!tmp_type.floating || tmp_type.width == dst_type.width);
-   if(tmp_type.width > dst_type.width) {
+   lp_build_resize(builder, tmp_type, dst_type, tmp, num_srcs, tmp, num_dsts);
      assert(num_dsts == 1);
      tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
      tmp_type.width = dst_type.width;
      tmp_type.length = dst_type.length;
      num_tmps = 1;
   }
-   if(tmp_type.width < dst_type.width) {
+   tmp_type.width  = dst_type.width;
-      assert(num_tmps == 1);
+   tmp_type.length = dst_type.length;
-      lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
+   num_tmps        = num_dsts;
      tmp_type.width = dst_type.width;
      tmp_type.length = dst_type.length;
      num_tmps = num_dsts;
   }
   assert(tmp_type.width == dst_type.width);
   assert(tmp_type.length == dst_type.length);
   assert(num_tmps == num_dsts);
   /*
    * Scale to the widest range
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@ -427,3 +427,109 @@ lp_build_pack(LLVMBuilderRef builder,
   return tmp[0];
 }
 /**
 * Truncate or expand the bitwidth
 */
 void
 lp_build_resize(LLVMBuilderRef builder,
                struct lp_type src_type,
                struct lp_type dst_type,
                const LLVMValueRef *src, unsigned num_srcs,
                LLVMValueRef *dst, unsigned num_dsts)
 {
   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
   unsigned i;
   assert(!src_type.floating || src_type.width == dst_type.width);
   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
   /* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
   assert(num_srcs == 1 || num_dsts == 1);
   assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
   assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
   assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
   if (src_type.width > dst_type.width) {
      /*
       * Truncate bit width.
       */
      assert(num_dsts == 1);
      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
        /*
         * Register width remains constant -- use vector packing intrinsics
         */
         tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
      }
      else {
         /*
          * Do it element-wise.
          */
         assert(src_type.length == dst_type.length);
         tmp[0] = lp_build_undef(dst_type);
         for (i = 0; i < dst_type.length; ++i) {
            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
            LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
            val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
            tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
         }
      }
   }
   else if (src_type.width < dst_type.width) {
      /*
       * Expand bit width.
       */
      assert(num_srcs == 1);
      if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
         /*
          * Register width remains constant -- use vector unpack intrinsics
          */
         lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
      }
      else {
         /*
          * Do it element-wise.
          */
         assert(src_type.length == dst_type.length);
         tmp[0] = lp_build_undef(dst_type);
         for (i = 0; i < dst_type.length; ++i) {
            LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
            LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
            if (src_type.sign && dst_type.sign) {
               val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
            } else {
               val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
            }
            tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
         }
      }
   }
   else {
      /*
       * No-op
       */
      assert(num_srcs == 1);
      assert(num_dsts == 1);
      tmp[0] = src[0];
   }
   for(i = 0; i < num_dsts; ++i)
      dst[i] = tmp[i];
 }
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h
@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder,
              const LLVMValueRef *src, unsigned num_srcs);
 void
 lp_build_resize(LLVMBuilderRef builder,
                struct lp_type src_type,
                struct lp_type dst_type,
                const LLVMValueRef *src, unsigned num_srcs,
                LLVMValueRef *dst, unsigned num_dsts);
 #endif /* !LP_BLD_PACK_H */
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@ -167,19 +167,26 @@ test_one(unsigned verbose,
   unsigned i, j;
   void *code;
   if (src_type.width * src_type.length != dst_type.width * dst_type.length ||
       src_type.length != dst_type.length) {
      return TRUE;
   }
   if(verbose >= 1)
      dump_conv_types(stdout, src_type, dst_type);
-   if(src_type.length > dst_type.length) {
+   if (src_type.length > dst_type.length) {
      num_srcs = 1;
      num_dsts = src_type.length/dst_type.length;
   }
-   else  {
+   else if (src_type.length < dst_type.length) {
      num_dsts = 1;
      num_srcs = dst_type.length/src_type.length;
   }
-
+   else  {
-   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+      num_dsts = 1;
      num_srcs = 1;
   }
   /* We must not loose or gain channels. Only precision */
   assert(src_type.length * num_srcs == dst_type.length * num_dsts);
@ -381,6 +388,11 @@ const struct lp_type conv_types[] = {
   {  FALSE, FALSE,  TRUE, FALSE,     8,  16 },
   {  FALSE, FALSE, FALSE,  TRUE,     8,  16 },
   {  FALSE, FALSE, FALSE, FALSE,     8,  16 },
   {  FALSE, FALSE,  TRUE,  TRUE,     8,   4 },
   {  FALSE, FALSE,  TRUE, FALSE,     8,   4 },
   {  FALSE, FALSE, FALSE,  TRUE,     8,   4 },
   {  FALSE, FALSE, FALSE, FALSE,     8,   4 },
 };