gallivm: generalize the compressed format soa fetch a bit

This can now handle rgtc (unorm) too - this path no longer handles plain formats, but that's unnecessary they now all have their proper SoA unpack (this will still be dog-slow though due to the actual fetch being per-pixel util fallbacks). Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
2016-12-21 04:30:02 +01:00 · 2016-12-21 04:30:02 +01:00 · cb81460dcc
parent 3c98e3cd63
commit cb81460dcc
1 changed files with 49 additions and 37 deletions
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@ -733,64 +733,69 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,

   /*
    * Try calling lp_build_fetch_rgba_aos for all pixels.
+    * Should only really hit subsampled, compressed
+    * (for s3tc srgb too, for rgtc the unorm ones only) by now.
+    * (This is invalid for plain 8unorm formats because we're lazy with
+    * the swizzle since some results would arrive swizzled, some not.)
    */

-   if (util_format_fits_8unorm(format_desc) &&
+   if ((format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) &&
+       (util_format_fits_8unorm(format_desc) ||
+        format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) &&
       type.floating && type.width == 32 &&
       (type.length == 1 || (type.length % 4 == 0))) {
      struct lp_type tmp_type;
-      LLVMValueRef tmp;
+      struct lp_build_context bld;
+      LLVMValueRef packed, rgba[4];
+      const struct util_format_description *flinear_desc;
+      const struct util_format_description *frgba8_desc;
+      unsigned chan;

+      lp_build_context_init(&bld, gallivm, type);
+
+      /*
+       * Make sure the conversion in aos really only does convert to rgba8
+       * and not anything more (so use linear format, adjust type).
+       */
+      flinear_desc = util_format_description(util_format_linear(format));
      memset(&tmp_type, 0, sizeof tmp_type);
      tmp_type.width = 8;
      tmp_type.length = type.length * 4;
      tmp_type.norm = TRUE;

-      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                    aligned, base_ptr, offset, i, j, cache);
+      packed = lp_build_fetch_rgba_aos(gallivm, flinear_desc, tmp_type,
+                                       aligned, base_ptr, offset, i, j, cache);
+      packed = LLVMBuildBitCast(builder, packed, bld.int_vec_type, "");

-      lp_build_rgba8_to_fi32_soa(gallivm,
-                                type,
-                                tmp,
-                                rgba_out);
-
-      return;
-   }
-
-   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
-       /* non-srgb case is already handled above */
-       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
-       type.floating && type.width == 32 &&
-       (type.length == 1 || (type.length % 4 == 0)) &&
-       cache) {
-      const struct util_format_description *format_decompressed;
-      const struct util_format_description *flinear_desc;
-      LLVMValueRef packed;
-      flinear_desc = util_format_description(util_format_linear(format_desc->format));
-      /* This probably only works with aligned data */
-      packed = lp_build_fetch_cached_texels(gallivm,
-                                            flinear_desc,
-                                            type.length,
-                                            base_ptr,
-                                            offset,
-                                            i, j,
-                                            cache);
-      packed = LLVMBuildBitCast(builder, packed,
-                                lp_build_int_vec_type(gallivm, type), "");
      /*
-       * The values are now packed so they match ordinary srgb RGBA8 format,
+       * The values are now packed so they match ordinary (srgb) RGBA8 format,
       * hence need to use matching format for unpack.
       */
-      format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
-
+      frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_UNORM);
+      if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+         assert(format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC);
+         frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
+      }
      lp_build_unpack_rgba_soa(gallivm,
-                               format_decompressed,
+                               frgba8_desc,
                               type,
-                               packed, rgba_out);
+                               packed, rgba);

+      /*
+       * We converted 4 channels. Make sure llvm can drop unneeded ones
+       * (luckily the rgba order is fixed, only LA needs special case).
+       */
+      for (chan = 0; chan < 4; chan++) {
+         enum pipe_swizzle swizzle = format_desc->swizzle[chan];
+         if (chan == 3 && util_format_is_luminance_alpha(format)) {
+            swizzle = PIPE_SWIZZLE_W;
+         }
+         rgba_out[chan] = lp_build_swizzle_soa_channel(&bld, rgba, swizzle);
+      }
      return;
   }

+
   /*
    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
    *
@ -798,6 +803,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
    * miss some opportunities to do vectorization, but this is
    * convenient for formats or scenarios for which there was no
    * opportunity or incentive to optimize.
+    *
+    * We do NOT want to end up here, this typically is quite terrible,
+    * in particular if the formats have less than 4 channels.
+    *
+    * Right now, this should only be hit for:
+    * - RGTC snorm formats
+    *   (those miss fast fetch functions hence they are terrible anyway)
    */

   {