llvmpipe: Call lp_rast_shade_quads from tri rasterizer.

This commit is contained in:
José Fonseca 2009-10-08 19:53:14 +01:00
parent 07ee87e664
commit 7ef36171d5
2 changed files with 96 additions and 195 deletions

View File

@ -92,6 +92,9 @@ struct lp_rast_triangle {
float dx12; float dx12;
float dx23; float dx23;
float dx31; float dx31;
/* inputs for the shader */
struct lp_rast_shader_inputs *inputs;
}; };
struct clear_tile { struct clear_tile {

View File

@ -29,80 +29,46 @@
* Rasterization for binned triangles within a tile * Rasterization for binned triangles within a tile
*/ */
#include "util/u_math.h"
#include "lp_quad.h" #include "lp_quad.h"
#include "lp_quad_pipe.h"
#include "lp_rast_priv.h" #include "lp_rast_priv.h"
#include "lp_tile_soa.h"
#define BLOCKSIZE 4 #define BLOCKSIZE 8
/* Convert 8x8 block into four runs of quads and render each in turn. /* Convert 8x8 block into four runs of quads and render each in turn.
*/ */
#if (BLOCKSIZE == 8) #if (BLOCKSIZE == 8)
static void block_full( struct lp_rast_triangle *tri, int x, int y ) static void block_full( struct lp_rasterizer *rast,
const struct lp_rast_triangle *tri,
int x, int y )
{ {
struct quad_header *ptrs[4]; const unsigned masks[4] = {~0, ~0, ~0, ~0};
int i;
tri->quad[0].input.x0 = x + 0;
tri->quad[1].input.x0 = x + 2;
tri->quad[2].input.x0 = x + 4;
tri->quad[3].input.x0 = x + 6;
for (i = 0; i < 4; i++, y += 2) {
tri->quad[0].inout.mask = 0xf;
tri->quad[1].inout.mask = 0xf;
tri->quad[2].inout.mask = 0xf;
tri->quad[3].inout.mask = 0xf;
tri->quad[0].input.y0 = y;
tri->quad[1].input.y0 = y;
tri->quad[2].input.y0 = y;
tri->quad[3].input.y0 = y;
/* XXX: don't bother with this ptrs business */
ptrs[0] = &tri->quad[0];
ptrs[1] = &tri->quad[1];
ptrs[2] = &tri->quad[2];
ptrs[3] = &tri->quad[3];
tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 4 );
}
}
#else
static void block_full( struct lp_rast_triangle *tri, int x, int y )
{
struct quad_header *ptrs[4];
int iy; int iy;
tri->quad[0].input.x0 = x + 0; for (iy = 0; iy < 8; iy += 2)
tri->quad[1].input.x0 = x + 2; lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
}
#else
static void block_full( struct lp_rasterizer *rast,
const struct lp_rast_triangle *tri,
int x, int y )
{
const unsigned masks[4] = {~0, ~0, 0, 0}; /* FIXME: Wasting quads!!! */
int iy;
for (iy = 0; iy < 4; iy += 2) { for (iy = 0; iy < 4; iy += 2)
tri->quad[0].inout.mask = 0xf; lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
tri->quad[1].inout.mask = 0xf;
tri->quad[0].input.y0 = y + iy;
tri->quad[1].input.y0 = y + iy;
/* XXX: don't bother with this ptrs business */
ptrs[0] = &tri->quad[0];
ptrs[1] = &tri->quad[1];
tri->llvmpipe->quad.first->run( tri->llvmpipe->quad.first, ptrs, 2 );
}
} }
#endif #endif
static void static INLINE unsigned
do_quad( struct lp_rasterizer *rast, do_quad( const struct lp_rast_triangle *tri,
int x, int y, int x, int y,
float c1, float c2, float c3 ) float c1, float c2, float c3 )
{ {
struct lp_rast_triangle *tri = rast->tri;
struct quad_header *quad = &rast->quad[0];
float xstep1 = -tri->dy12; float xstep1 = -tri->dy12;
float xstep2 = -tri->dy23; float xstep2 = -tri->dy23;
float xstep3 = -tri->dy31; float xstep3 = -tri->dy31;
@ -111,43 +77,41 @@ do_quad( struct lp_rasterizer *rast,
float ystep2 = tri->dx23; float ystep2 = tri->dx23;
float ystep3 = tri->dx31; float ystep3 = tri->dx31;
quad->input.x0 = x; unsigned mask = 0;
quad->input.y0 = y;
quad->inout.mask = 0;
if (c1 > 0 && if (c1 > 0 &&
c2 > 0 && c2 > 0 &&
c3 > 0) c3 > 0)
quad->inout.mask |= 1; mask |= 1;
if (c1 + xstep1 > 0 && if (c1 + xstep1 > 0 &&
c2 + xstep2 > 0 && c2 + xstep2 > 0 &&
c3 + xstep3 > 0) c3 + xstep3 > 0)
quad->inout.mask |= 2; mask |= 2;
if (c1 + ystep1 > 0 && if (c1 + ystep1 > 0 &&
c2 + ystep2 > 0 && c2 + ystep2 > 0 &&
c3 + ystep3 > 0) c3 + ystep3 > 0)
quad->inout.mask |= 4; mask |= 4;
if (c1 + ystep1 + xstep1 > 0 && if (c1 + ystep1 + xstep1 > 0 &&
c2 + ystep2 + xstep2 > 0 && c2 + ystep2 + xstep2 > 0 &&
c3 + ystep3 + xstep3 > 0) c3 + ystep3 + xstep3 > 0)
quad->inout.mask |= 8; mask |= 8;
if (quad->inout.mask) return mask;
rast->state->run( rast->state->state, &quad, 1 );
} }
/* Evaluate each pixel in a block, generate a mask and possibly render /* Evaluate each pixel in a block, generate a mask and possibly render
* the quad: * the quad:
*/ */
static void static void
do_block( struct lp_rast_triangle *tri, do_block( struct lp_rasterizer *rast,
int x, int y, const struct lp_rast_triangle *tri,
float c1, int x, int y,
float c2, float c1,
float c3 ) float c2,
float c3 )
{ {
const int step = 2; const int step = 2;
@ -166,19 +130,24 @@ do_block( struct lp_rast_triangle *tri,
float cx2 = c2; float cx2 = c2;
float cx3 = c3; float cx3 = c3;
unsigned masks[4] = {0, 0, 0, 0};
for (ix = 0; ix < BLOCKSIZE; ix += 2) { for (ix = 0; ix < BLOCKSIZE; ix += 2) {
do_quad(tri, x+ix, y+iy, cx1, cx2, cx3); masks[ix >> 1] = do_quad(tri, x + ix, y + iy, cx1, cx2, cx3);
cx1 += xstep1; cx1 += xstep1;
cx2 += xstep2; cx2 += xstep2;
cx3 += xstep3; cx3 += xstep3;
} }
lp_rast_shade_quads(rast, tri->inputs, x, y + iy, masks);
c1 += ystep1; c1 += ystep1;
c2 += ystep2; c2 += ystep2;
c3 += ystep3; c3 += ystep3;
} }
} }
@ -187,8 +156,9 @@ do_block( struct lp_rast_triangle *tri,
* for this triangle: * for this triangle:
*/ */
void lp_rast_triangle( struct lp_rasterizer *rast, void lp_rast_triangle( struct lp_rasterizer *rast,
const struct lp_rast_triangle *tri ) const union lp_rast_cmd_arg *arg )
{ {
const struct lp_rast_triangle *tri = arg->triangle;
int minx, maxx, miny, maxy; int minx, maxx, miny, maxy;
/* Clamp to tile dimensions: /* Clamp to tile dimensions:
@ -205,136 +175,64 @@ void lp_rast_triangle( struct lp_rasterizer *rast,
return; return;
} }
/* Bind parameter interpolants: const int step = BLOCKSIZE;
*/
for (i = 0; i < Elements(rast->quad); i++) {
rast->quad[i].coef = tri->coef;
rast->quad[i].posCoef = &tri->position_coef;
}
/* Small area? float ei1 = tri->ei1 * step;
*/ float ei2 = tri->ei2 * step;
if (miny + 16 > maxy && float ei3 = tri->ei3 * step;
minx + 16 > maxx)
float eo1 = tri->eo1 * step;
float eo2 = tri->eo2 * step;
float eo3 = tri->eo3 * step;
float xstep1 = -step * tri->dy12;
float xstep2 = -step * tri->dy23;
float xstep3 = -step * tri->dy31;
float ystep1 = step * tri->dx12;
float ystep2 = step * tri->dx23;
float ystep3 = step * tri->dx31;
int x, y;
minx &= ~(step-1);
miny &= ~(step-1);
for (y = miny; y < maxy; y += step)
{ {
const int step = 2; float cx1 = c1;
float cx2 = c2;
float cx3 = c3;
float xstep1 = -step * tri->dy12; for (x = minx; x < maxx; x += step)
float xstep2 = -step * tri->dy23; {
float xstep3 = -step * tri->dy31; if (cx1 + eo1 < 0 ||
cx2 + eo2 < 0 ||
cx3 + eo3 < 0)
{
}
else if (cx1 + ei1 > 0 &&
cx2 + ei2 > 0 &&
cx3 + ei3 > 0)
{
block_full(rast, tri, x, y); /* trivial accept */
}
else
{
do_block(rast, tri, x, y, cx1, cx2, cx3);
}
float ystep1 = step * tri->dx12; /* Iterate cx values across the region:
float ystep2 = step * tri->dx23; */
float ystep3 = step * tri->dx31; cx1 += xstep1;
cx2 += xstep2;
cx3 += xstep3;
}
float eo1 = tri->eo1 * step; /* Iterate c values down the region:
float eo2 = tri->eo2 * step;
float eo3 = tri->eo3 * step;
int x, y;
minx &= ~(step-1);
maxx &= ~(step-1);
/* Subdivide space into NxM blocks, where each block is square and
* power-of-four in dimension.
*
* Trivially accept or reject blocks, else jump to per-pixel
* examination above.
*/ */
for (y = miny; y < maxy; y += step) c1 += ystep1;
{ c2 += ystep2;
float cx1 = c1; c3 += ystep3;
float cx2 = c2;
float cx3 = c3;
for (x = minx; x < maxx; x += step)
{
if (cx1 + eo1 < 0 ||
cx2 + eo2 < 0 ||
cx3 + eo3 < 0)
{
}
else
{
do_quad(&tri, x, y, cx1, cx2, cx3);
}
/* Iterate cx values across the region:
*/
cx1 += xstep1;
cx2 += xstep2;
cx3 += xstep3;
}
/* Iterate c values down the region:
*/
c1 += ystep1;
c2 += ystep2;
c3 += ystep3;
}
}
else
{
const int step = BLOCKSIZE;
float ei1 = tri->ei1 * step;
float ei2 = tri->ei2 * step;
float ei3 = tri->ei3 * step;
float eo1 = tri->eo1 * step;
float eo2 = tri->eo2 * step;
float eo3 = tri->eo3 * step;
float xstep1 = -step * tri->dy12;
float xstep2 = -step * tri->dy23;
float xstep3 = -step * tri->dy31;
float ystep1 = step * tri->dx12;
float ystep2 = step * tri->dx23;
float ystep3 = step * tri->dx31;
int x, y;
minx &= ~(step-1);
miny &= ~(step-1);
for (y = miny; y < maxy; y += step)
{
float cx1 = c1;
float cx2 = c2;
float cx3 = c3;
for (x = minx; x < maxx; x += step)
{
if (cx1 + eo1 < 0 ||
cx2 + eo2 < 0 ||
cx3 + eo3 < 0)
{
}
else if (cx1 + ei1 > 0 &&
cx2 + ei2 > 0 &&
cx3 + ei3 > 0)
{
block_full(&tri, x, y); /* trivial accept */
}
else
{
do_block(&tri, x, y, cx1, cx2, cx3);
}
/* Iterate cx values across the region:
*/
cx1 += xstep1;
cx2 += xstep2;
cx3 += xstep3;
}
/* Iterate c values down the region:
*/
c1 += ystep1;
c2 += ystep2;
c3 += ystep3;
}
} }
} }