llvmpipe: reorganize block4 loop, nice speedup
isosurf 95->115 fps just by exchanging the two inner loops in this function...
This commit is contained in:
parent
2d53dc873e
commit
510b035394
|
@ -113,6 +113,21 @@ block_full_16(struct lp_rasterizer_task *task,
|
|||
block_full_4(task, tri, x + ix, y + iy);
|
||||
}
|
||||
|
||||
|
||||
static INLINE unsigned
|
||||
build_mask(int c, const int *step)
|
||||
{
|
||||
int mask = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
mask |= ((c + step[i]) >> 31) & (1 << i);
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
#define TAG(x) x##_1
|
||||
#define NR_PLANES 1
|
||||
#include "lp_rast_tri_tmp.h"
|
||||
|
|
|
@ -46,19 +46,11 @@ TAG(do_block_4)(struct lp_rasterizer_task *task,
|
|||
int x, int y,
|
||||
const int *c)
|
||||
{
|
||||
unsigned mask = 0;
|
||||
int i;
|
||||
unsigned mask = 0xffff;
|
||||
int j;
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
int any_negative = 0;
|
||||
int j;
|
||||
|
||||
for (j = 0; j < NR_PLANES; j++)
|
||||
any_negative |= (c[j] - 1 + plane[j].step[i]);
|
||||
|
||||
any_negative >>= 31;
|
||||
|
||||
mask |= (~any_negative) & (1 << i);
|
||||
for (j = 0; j < NR_PLANES; j++) {
|
||||
mask &= ~build_mask(c[j] - 1, plane[j].step);
|
||||
}
|
||||
|
||||
/* Now pass to the shader:
|
||||
|
|
Loading…
Reference in New Issue