llvmpipe: version of block4 which doesn't need the full step array

No noticable slowdown with isosurf.
This commit is contained in:
Keith Whitwell 2010-08-15 16:32:45 +01:00
parent 510b035394
commit 515194968d
2 changed files with 27 additions and 7 deletions

View File

@ -115,19 +115,37 @@ block_full_16(struct lp_rasterizer_task *task,
static INLINE unsigned
build_mask(int c, const int *step)
build_mask(int c, int dcdx, int dcdy)
{
int mask = 0;
int i;
for (i = 0; i < 16; i++) {
mask |= ((c + step[i]) >> 31) & (1 << i);
}
int c0 = c;
int c1 = c0 + dcdx;
int c2 = c1 + dcdx;
int c3 = c2 + dcdx;
mask |= ((c0 + 0 * dcdy) >> 31) & (1 << 0);
mask |= ((c0 + 1 * dcdy) >> 31) & (1 << 2);
mask |= ((c0 + 2 * dcdy) >> 31) & (1 << 8);
mask |= ((c0 + 3 * dcdy) >> 31) & (1 << 10);
mask |= ((c1 + 0 * dcdy) >> 31) & (1 << 1);
mask |= ((c1 + 1 * dcdy) >> 31) & (1 << 3);
mask |= ((c1 + 2 * dcdy) >> 31) & (1 << 9);
mask |= ((c1 + 3 * dcdy) >> 31) & (1 << 11);
mask |= ((c2 + 0 * dcdy) >> 31) & (1 << 4);
mask |= ((c2 + 1 * dcdy) >> 31) & (1 << 6);
mask |= ((c2 + 2 * dcdy) >> 31) & (1 << 12);
mask |= ((c2 + 3 * dcdy) >> 31) & (1 << 14);
mask |= ((c3 + 0 * dcdy) >> 31) & (1 << 5);
mask |= ((c3 + 1 * dcdy) >> 31) & (1 << 7);
mask |= ((c3 + 2 * dcdy) >> 31) & (1 << 13);
mask |= ((c3 + 3 * dcdy) >> 31) & (1 << 15);
return mask;
}
#define TAG(x) x##_1
#define NR_PLANES 1
#include "lp_rast_tri_tmp.h"

View File

@ -50,7 +50,9 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
int j;
for (j = 0; j < NR_PLANES; j++) {
mask &= ~build_mask(c[j] - 1, plane[j].step);
mask &= ~build_mask(c[j] - 1,
plane[j].step[1],
plane[j].step[2]);
}
/* Now pass to the shader: