llvmpipe: asst. clean-ups in lp_linear_interp.c

Comments, move var decls, fix code formatting. Signed-off-by: Brian Paul <brianp@vmware.com> Reviewed-by: Dave Airlie <airlied@redhat.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17561>
2022-06-22 12:27:44 -06:00 · 2022-06-22 12:27:44 -06:00 · ae9ec54f68
parent 49966374c8
commit ae9ec54f68
1 changed files with 20 additions and 23 deletions
--- a/src/gallium/drivers/llvmpipe/lp_linear_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_linear_interp.c
@ -71,22 +71,21 @@ interp_0_8(struct lp_linear_elem *elem)
   struct lp_linear_interp *interp = (struct lp_linear_interp *)elem;
   uint32_t *row = interp->row;
   __m128i a0 = interp->a0;
-   __m128i dadx = interp->dadx;
-   int width = (interp->width + 3) & ~3;
-   int i;
+   const __m128i dadx = interp->dadx;
+   const int width = (interp->width + 3) & ~3;

-   for (i = 0; i < width; i += 4) {
-      __m128i l, h;
+   for (int i = 0; i < width; i += 4) {
+      __m128i l = _mm_srai_epi16(a0, 7); // l = a0 >> 7
+      a0 = _mm_add_epi16(a0, dadx);      // a0 += dadx

-      l = _mm_srai_epi16(a0, 7);
-      a0 = _mm_add_epi16(a0, dadx);
-
-      h = _mm_srai_epi16(a0, 7);
-      a0 = _mm_add_epi16(a0, dadx);
+      __m128i h = _mm_srai_epi16(a0, 7); // h = a0 >> 7
+      a0 = _mm_add_epi16(a0, dadx);      // a0 += dadx

+      // pack l[0..7] and h[0..7] as 16 bytes
      *(__m128i *)&row[i] =  _mm_packus_epi16(l, h);
   }

+   // advance to next row
   interp->a0 = _mm_add_epi16(interp->a0, interp->dady);
   return interp->row;
 }
@ -133,7 +132,6 @@ lp_linear_init_interp(struct lp_linear_interp *interp,
   int16_t s0_fp[8];
   int16_t dsdx_fp[4];
   int16_t dsdy_fp[4];
-   int j;

   /* Zero coefficients to avoid using uninitialised values */
   memset(s0, 0, sizeof(s0));
@ -144,7 +142,7 @@ lp_linear_init_interp(struct lp_linear_interp *interp,
   memset(dsdy_fp, 0, sizeof(dsdy_fp));

   if (perspective) {
-      for (j = 0; j < 4; j++) {
+      for (unsigned j = 0; j < 4; j++) {
         if (usage_mask & (1<<j)) {
            s0[j]   =   a0[j] * oow;
            dsdx[j] = dadx[j] * oow;
@ -152,7 +150,7 @@ lp_linear_init_interp(struct lp_linear_interp *interp,
         }
      }
   } else {
-      for (j = 0; j < 4; j++) {
+      for (unsigned j = 0; j < 4; j++) {
         if (usage_mask & (1<<j)) {
            s0[j]   =   a0[j];
            dsdx[j] = dadx[j];
@ -175,31 +173,32 @@ lp_linear_init_interp(struct lp_linear_interp *interp,
    *   - check all interpolants for min/max 0..1 (else mark as
    *          non-fastpath)
    */
-   for (j = 0; j < 4; j++) {
+   for (unsigned j = 0; j < 4; j++) {
      if (usage_mask & (1<<j)) {
+         // compute texcoords at rect corners
         float a = s0[j];
         float b = s0[j] + (width  - 1) * dsdx[j];
         float c = s0[j] + (height - 1) * dsdy[j];
         float d = s0[j] + (height - 1) * dsdy[j] + (width - 1) * dsdx[j];

         if (MIN4(a,b,c,d) < 0.0)
-            FAIL("min < 0.0");
+            FAIL("min < 0.0"); // out of bounds

         if (MAX4(a,b,c,d) > 1.0)
-            FAIL("max > 1.0");
+            FAIL("max > 1.0"); // out of bounds

         dsdx_fp[j]   = float_to_sfixed_1_15(dsdx[j]);
         dsdy_fp[j]   = float_to_sfixed_1_15(dsdy[j]);

-         s0_fp[j]     = float_to_ufixed_1_15(s0[j]);
-         s0_fp[j + 4] = s0_fp[j] + dsdx_fp[j];
+         s0_fp[j]     = float_to_ufixed_1_15(s0[j]);  // first pixel
+         s0_fp[j + 4] = s0_fp[j] + dsdx_fp[j];        // second pixel

         dsdx_fp[j] *= 2;
      }
   }

   interp->width = align(width, 4);
-
+   /* RGBA->BGRA swizzle here */
   interp->a0    = _mm_setr_epi16(s0_fp[2], s0_fp[1], s0_fp[0], s0_fp[3],
                                  s0_fp[6], s0_fp[5], s0_fp[4], s0_fp[7]);

@ -215,12 +214,10 @@ lp_linear_init_interp(struct lp_linear_interp *interp,
   if (dsdy[0] == 0 &&
       dsdy[1] == 0 &&
       dsdy[2] == 0 &&
-       dsdy[3] == 0)
-   {
+       dsdy[3] == 0) {
      interp_0_8(&interp->base);
      interp->base.fetch = interp_noop;
-   }
-   else {
+   } else {
      interp->base.fetch = interp_0_8;
   }