llvmpipe: Call lp_rast_shade_quads from tri rasterizer.

2009-10-08 19:53:14 +01:00 · 2009-10-08 19:53:14 +01:00 · 7ef36171d5
parent 07ee87e664
commit 7ef36171d5
2 changed files with 96 additions and 195 deletions
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@ -92,6 +92,9 @@ struct lp_rast_triangle {
   float dx12;
   float dx23;
   float dx31;
   /* inputs for the shader */
   struct lp_rast_shader_inputs *inputs;
 };
 struct clear_tile {
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@ -29,80 +29,46 @@
 * Rasterization for binned triangles within a tile
 */
 #include "util/u_math.h"
 #include "lp_quad.h"
 #include "lp_quad_pipe.h"
 #include "lp_rast_priv.h"
 #include "lp_tile_soa.h"
-#define BLOCKSIZE 4
+#define BLOCKSIZE 8
 /* Convert 8x8 block into four runs of quads and render each in turn.
 */
 #if (BLOCKSIZE == 8)
-static void block_full( struct lp_rast_triangle *tri, int x, int y )
+static void block_full( struct lp_rasterizer *rast,
                        const struct lp_rast_triangle *tri,
                        int x, int y )
 {
-   struct quad_header *ptrs[4];
+   const unsigned masks[4] = {~0, ~0, ~0, ~0};
   int i;
   tri->quad[0].input.x0 = x + 0;
   tri->quad[1].input.x0 = x + 2;
   tri->quad[2].input.x0 = x + 4;
   tri->quad[3].input.x0 = x + 6;
   for (i = 0; i < 4; i++, y += 2) {
      tri->quad[0].inout.mask = 0xf;
      tri->quad[1].inout.mask = 0xf;
      tri->quad[2].inout.mask = 0xf;
      tri->quad[3].inout.mask = 0xf;
      tri->quad[0].input.y0 = y;
      tri->quad[1].input.y0 = y;
      tri->quad[2].input.y0 = y;
      tri->quad[3].input.y0 = y;
      /* XXX: don't bother with this ptrs business */
      ptrs[0] = &tri->quad[0];
      ptrs[1] = &tri->quad[1];
      ptrs[2] = &tri->quad[2];
      ptrs[3] = &tri->quad[3];
      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 );
   }
 }
 #else
 static void block_full( struct lp_rast_triangle *tri, int x, int y )
 {
   struct quad_header *ptrs[4];
   int iy;
-   tri->quad[0].input.x0 = x + 0;
+   for (iy = 0; iy < 8; iy += 2)
-   tri->quad[1].input.x0 = x + 2;
+      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
 }
 #else
 static void block_full( struct lp_rasterizer *rast,
                        const struct lp_rast_triangle *tri,
                        int x, int y )
 {
   const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */
   int iy;
-   for (iy = 0; iy < 4; iy += 2) {
+   for (iy = 0; iy < 4; iy += 2)
-      tri->quad[0].inout.mask = 0xf;
+      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
      tri->quad[1].inout.mask = 0xf;
      tri->quad[0].input.y0 = y + iy;
      tri->quad[1].input.y0 = y + iy;
      /* XXX: don't bother with this ptrs business */
      ptrs[0] = &tri->quad[0];
      ptrs[1] = &tri->quad[1];
      tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 );
   }
 }
 #endif
-static void
+static INLINE unsigned
-do_quad( struct lp_rasterizer *rast,
+do_quad( const struct lp_rast_triangle *tri,
 	 int x, int y,
 	 float c1, float c2, float c3 )
 {
   struct lp_rast_triangle *tri = rast->tri;
   struct quad_header *quad = &rast->quad[0];
   float xstep1 = -tri->dy12;
   float xstep2 = -tri->dy23;
   float xstep3 = -tri->dy31;
@ -111,43 +77,41 @@ do_quad( struct lp_rasterizer *rast,
   float ystep2 = tri->dx23;
   float ystep3 = tri->dx31;
-   quad->input.x0 = x;
+   unsigned mask = 0;
   quad->input.y0 = y;
   quad->inout.mask = 0;
   if (c1 > 0 &&
       c2 > 0 &&
       c3 > 0)
-      quad->inout.mask |= 1;
+      mask |= 1;
   if (c1 + xstep1 > 0 && 
       c2 + xstep2 > 0 && 
       c3 + xstep3 > 0)
-      quad->inout.mask |= 2;
+      mask |= 2;
   if (c1 + ystep1 > 0 && 
       c2 + ystep2 > 0 && 
       c3 + ystep3 > 0)
-      quad->inout.mask |= 4;
+      mask |= 4;
   if (c1 + ystep1 + xstep1 > 0 && 
       c2 + ystep2 + xstep2 > 0 && 
       c3 + ystep3 + xstep3 > 0)
-      quad->inout.mask |= 8;
+      mask |= 8;
-   if (quad->inout.mask)
+   return mask;
      rast->state->run( rast->state->state, &quad, 1 );
 }
 /* Evaluate each pixel in a block, generate a mask and possibly render
 * the quad:
 */
 static void
-do_block( struct lp_rast_triangle *tri,
+do_block( struct lp_rasterizer *rast,
-	 int x, int y,
+          const struct lp_rast_triangle *tri,
-	 float c1,
+          int x, int y,
-	 float c2,
+          float c1,
-	 float c3 )
+          float c2,
          float c3 )
 {
   const int step = 2;
@ -166,19 +130,24 @@ do_block( struct lp_rast_triangle *tri,
      float cx2 = c2;
      float cx3 = c3;
      unsigned masks[4] = {0, 0, 0, 0};
      for (ix = 0; ix < BLOCKSIZE; ix += 2) {
-	 do_quad(tri, x+ix, y+iy, cx1, cx2, cx3);
+	 masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3);
 	 cx1 += xstep1;
 	 cx2 += xstep2;
 	 cx3 += xstep3;
      }
      lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
      c1 += ystep1;
      c2 += ystep2;
      c3 += ystep3;
   }
 }
@ -187,8 +156,9 @@ do_block( struct lp_rast_triangle *tri,
 * for this triangle:
 */
 void lp_rast_triangle( struct lp_rasterizer *rast,
-		       const struct lp_rast_triangle *tri )
+                       const union lp_rast_cmd_arg *arg )
 {
   const struct lp_rast_triangle *tri = arg->triangle;
   int minx, maxx, miny, maxy;
   /* Clamp to tile dimensions:
@ -205,136 +175,64 @@ void lp_rast_triangle( struct lp_rasterizer *rast,
      return;
   }
-   /* Bind parameter interpolants:
+   const int step = BLOCKSIZE;
    */
   for (i = 0; i < Elements(rast->quad); i++) {
      rast->quad[i].coef = tri->coef;
      rast->quad[i].posCoef = &tri->position_coef;
   }
-   /* Small area?
+   float ei1 = tri->ei1 * step;
-    */
+   float ei2 = tri->ei2 * step;
-   if (miny + 16 > maxy &&
+   float ei3 = tri->ei3 * step;
-       minx + 16 > maxx)
+
   float eo1 = tri->eo1 * step;
   float eo2 = tri->eo2 * step;
   float eo3 = tri->eo3 * step;
   float xstep1 = -step * tri->dy12;
   float xstep2 = -step * tri->dy23;
   float xstep3 = -step * tri->dy31;
   float ystep1 = step * tri->dx12;
   float ystep2 = step * tri->dx23;
   float ystep3 = step * tri->dx31;
   int x, y;
   minx &= ~(step-1);
   miny &= ~(step-1);
   for (y = miny; y < maxy; y += step)
   {
-      const int step = 2;
+      float cx1 = c1;
      float cx2 = c2;
      float cx3 = c3;
-      float xstep1 = -step * tri->dy12;
+      for (x = minx; x < maxx; x += step)
-      float xstep2 = -step * tri->dy23;
+      {
-      float xstep3 = -step * tri->dy31;
+         if (cx1 + eo1 < 0 ||
             cx2 + eo2 < 0 ||
             cx3 + eo3 < 0)
         {
         }
         else if (cx1 + ei1 > 0 &&
                  cx2 + ei2 > 0 &&
                  cx3 + ei3 > 0)
         {
            block_full(rast, tri, x, y); /* trivial accept */
         }
         else
         {
            do_block(rast, tri, x, y, cx1, cx2, cx3);
         }
-      float ystep1 = step * tri->dx12;
+         /* Iterate cx values across the region:
-      float ystep2 = step * tri->dx23;
+          */
-      float ystep3 = step * tri->dx31;
+         cx1 += xstep1;
         cx2 += xstep2;
         cx3 += xstep3;
      }
-      float eo1 = tri->eo1 * step;
+      /* Iterate c values down the region:
      float eo2 = tri->eo2 * step;
      float eo3 = tri->eo3 * step;
      int x, y;
      minx &= ~(step-1);
      maxx &= ~(step-1);
      /* Subdivide space into NxM blocks, where each block is square and
       * power-of-four in dimension.
       *
       * Trivially accept or reject blocks, else jump to per-pixel
       * examination above.
       */
-      for (y = miny; y < maxy; y += step)
+      c1 += ystep1;
-      {
+      c2 += ystep2;
-	 float cx1 = c1;
+      c3 += ystep3;
 	 float cx2 = c2;
 	 float cx3 = c3;
 	 for (x = minx; x < maxx; x += step)
 	 {
 	    if (cx1 + eo1 < 0 || 
 		cx2 + eo2 < 0 ||
 		cx3 + eo3 < 0) 
 	    {
 	    }
 	    else 
 	    {
 	       do_quad(&tri, x, y, cx1, cx2, cx3);
 	    }
 	    /* Iterate cx values across the region:
 	     */
 	    cx1 += xstep1;
 	    cx2 += xstep2;
 	    cx3 += xstep3;
 	 }
 	 /* Iterate c values down the region:
 	  */
 	 c1 += ystep1;
 	 c2 += ystep2;
 	 c3 += ystep3;    
      }
   }
   else 
   {
      const int step = BLOCKSIZE;
      float ei1 = tri->ei1 * step;
      float ei2 = tri->ei2 * step;
      float ei3 = tri->ei3 * step;
      float eo1 = tri->eo1 * step;
      float eo2 = tri->eo2 * step;
      float eo3 = tri->eo3 * step;
      float xstep1 = -step * tri->dy12;
      float xstep2 = -step * tri->dy23;
      float xstep3 = -step * tri->dy31;
      float ystep1 = step * tri->dx12;
      float ystep2 = step * tri->dx23;
      float ystep3 = step * tri->dx31;
      int x, y;
      minx &= ~(step-1);
      miny &= ~(step-1);
      for (y = miny; y < maxy; y += step)
      {
 	 float cx1 = c1;
 	 float cx2 = c2;
 	 float cx3 = c3;
 	 for (x = minx; x < maxx; x += step)
 	 {
 	    if (cx1 + eo1 < 0 || 
 		cx2 + eo2 < 0 ||
 		cx3 + eo3 < 0) 
 	    {
 	    }
 	    else if (cx1 + ei1 > 0 &&
 		     cx2 + ei2 > 0 &&
 		     cx3 + ei3 > 0) 
 	    {
 	       block_full(&tri, x, y); /* trivial accept */
 	    }
 	    else 
 	    {
 	       do_block(&tri, x, y, cx1, cx2, cx3);
 	    }
 	    /* Iterate cx values across the region:
 	     */
 	    cx1 += xstep1;
 	    cx2 += xstep2;
 	    cx3 += xstep3;
 	 }
 	 /* Iterate c values down the region:
 	  */
 	 c1 += ystep1;
 	 c2 += ystep2;
 	 c3 += ystep3;    
      }
   }
 }