llvmpipe: reorganize block4 loop, nice speedup

isosurf 95->115 fps just by exchanging the two inner loops in this
function...
This commit is contained in:
Keith Whitwell 2010-08-15 16:21:46 +01:00
parent 2d53dc873e
commit 510b035394
2 changed files with 19 additions and 12 deletions

View File

@ -113,6 +113,21 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
static INLINE unsigned
build_mask(int c, const int *step)
{
int mask = 0;
int i;
for (i = 0; i < 16; i++) {
mask |= ((c + step[i]) >> 31) & (1 << i);
}
return mask;
}
#define TAG(x) x##_1
#define NR_PLANES 1
#include "lp_rast_tri_tmp.h"

View File

@ -46,19 +46,11 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
int x, int y,
const int *c)
{
unsigned mask = 0;
int i;
unsigned mask = 0xffff;
int j;
for (i = 0; i < 16; i++) {
int any_negative = 0;
int j;
for (j = 0; j < NR_PLANES; j++)
any_negative |= (c[j] - 1 + plane[j].step[i]);
any_negative >>= 31;
mask |= (~any_negative) & (1 << i);
for (j = 0; j < NR_PLANES; j++) {
mask &= ~build_mask(c[j] - 1, plane[j].step);
}
/* Now pass to the shader: