r300: Fix cos & add scs to fragment program.

So this do :
 - Fixes COS.
 - Does range reductions for SIN & COS.
 - Adds SCS.
 - removes the optimized version of SIN & COS.
 - tweaked weight (should help on precision).
 - fixed a copy paste typo in emit_arith().
This commit is contained in:
Rune Peterson 2007-02-14 23:10:52 +01:00 committed by Jerome Glisse
parent 1e2b469635
commit e4c772bead
2 changed files with 185 additions and 89 deletions

View File

@ -731,7 +731,7 @@ struct r300_fragment_program {
int max_temp_idx;
/* the index of the sin constant is stored here */
GLint const_sin;
GLint const_sin[2];
GLuint optimization;
};

View File

@ -33,7 +33,6 @@
/*TODO'S
*
* - SCS instructions
* - Depth write, WPOS/FOGC inputs
* - FogOption
* - Verify results of opcodes for accuracy, I've only checked them
@ -1081,7 +1080,7 @@ static void emit_arith(struct r300_fragment_program *rp,
break;
}
if (emit_sop &&
(s_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_VECTOR)) {
(s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) {
vpos = spos = MAX2(vpos, spos);
break;
}
@ -1204,6 +1203,25 @@ static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr)
}
#endif
static void make_sin_const(struct r300_fragment_program *rp)
{
if(rp->const_sin[0] == -1){
GLfloat cnstv[4];
cnstv[0] = 1.273239545; // 4/PI
cnstv[1] =-0.405284735; // -4/(PI*PI)
cnstv[2] = 3.141592654; // PI
cnstv[3] = 0.2225; // weight
rp->const_sin[0] = emit_const4fv(rp, cnstv);
cnstv[0] = 0.5;
cnstv[1] = -1.5;
cnstv[2] = 0.159154943; // 1/(2*PI)
cnstv[3] = 6.283185307; // 2*PI
rp->const_sin[1] = emit_const4fv(rp, cnstv);
}
}
static GLboolean parse_program(struct r300_fragment_program *rp)
{
struct gl_fragment_program *mp = &rp->mesa_program;
@ -1260,84 +1278,68 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
* cos using a parabola (see SIN):
* cos(x):
* x += PI/2
* x = (x < PI)?x : x-2*PI
* x = (x/(2*PI))+0.5
* x = frac(x)
* x = (x*2*PI)-PI
* result = sin(x)
*/
temp = get_temp_reg(rp);
if(rp->const_sin == -1){
cnstv[0] = 1.273239545;
cnstv[1] =-0.405284735;
cnstv[2] = 3.141592654;
cnstv[3] = 0.225;
rp->const_sin = emit_const4fv(rp, cnstv);
}
cnst = rp->const_sin;
make_sin_const(rp);
src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
pfs_half,
undef,
undef,
0);
/* add 0.5*PI and do range reduction */
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(cnst, Z, Z, Z, Z), //PI
swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI
pfs_half,
swizzle(keep(src[0]), X, X, X, X),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
negate(swizzle(temp, W, W, W, W)), //-2
swizzle(cnst, Z, Z, Z, Z), //PI
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, X, X, X, X),
swizzle(rp->const_sin[1], Z, Z, Z, Z),
pfs_half,
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
swizzle(cnst, Z, Z, Z, Z), //PI
negate(pfs_half),
swizzle(src[0], X, X, X, X),
0);
emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
swizzle(temp, W, W, W, W),
emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
swizzle(temp, X, X, X, X),
swizzle(temp, Y, Y, Y, Y),
undef,
undef,
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
swizzle(temp, X, X, X, X),
swizzle(rp->const_sin[1], W, W, W, W), //2*PI
negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI
0);
/* SIN */
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
swizzle(temp, Z, Z, Z, Z),
cnst,
rp->const_sin[0],
pfs_zero,
0);
if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
emit_arith(rp, PFS_OP_MAD, dest, mask,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(temp, Z, Z, Z, Z)),
swizzle(temp, X, X, X, X),
flags);
}else{
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(temp, Z, Z, Z, Z)),
swizzle(temp, X, X, X, X),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(temp, Z, Z, Z, Z)),
swizzle(temp, X, X, X, X),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
swizzle(temp, X, X, X, X),
absolute(swizzle(temp, X, X, X, X)),
negate(swizzle(temp, X, X, X, X)),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
swizzle(temp, X, X, X, X),
absolute(swizzle(temp, X, X, X, X)),
negate(swizzle(temp, X, X, X, X)),
0);
emit_arith(rp, PFS_OP_MAD, dest, mask,
swizzle(temp, Y, Y, Y, Y),
swizzle(cnst, W, W, W, W),
swizzle(temp, X, X, X, X),
flags);
}
emit_arith(rp, PFS_OP_MAD, dest, mask,
swizzle(temp, Y, Y, Y, Y),
swizzle(rp->const_sin[0], W, W, W, W),
swizzle(temp, X, X, X, X),
flags);
free_temp(rp, temp);
break;
case OPCODE_DP3:
@ -1577,7 +1579,93 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
flags);
break;
case OPCODE_SCS:
ERROR("SCS not implemented\n");
/*
* cos using a parabola (see SIN):
* cos(x):
* x += PI/2
* x = (x/(2*PI))+0.5
* x = frac(x)
* x = (x*2*PI)-PI
* result = sin(x)
*/
temp = get_temp_reg(rp);
make_sin_const(rp);
src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
/* add 0.5*PI and do range reduction */
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X|WRITEMASK_Y,
swizzle(rp->const_sin[0], Z, Z, Z, Z),
rp->const_sin[1],
swizzle(keep(src[0]), X, X, X, X),
0);
emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_W,
swizzle(rp->const_sin[0], Z, Z, Z, Z),
negate(pfs_half),
swizzle(keep(src[0]), X, X, X, X),
0);
emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
swizzle(temp, X, X, X, X),
swizzle(temp, Y, Y, Y, Y),
swizzle(temp, W, W, W, W),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
swizzle(temp, Z, Z, Z, Z),
rp->const_sin[0],
pfs_zero,
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(temp, Z, Z, Z, Z)),
swizzle(temp, X, X, X, X),
0);
if(mask & WRITEMASK_Y)
{
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
swizzle(keep(src[0]), X, X, X, X),
rp->const_sin[0],
pfs_zero,
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(keep(src[0]), X, X, X, X)),
swizzle(temp, X, X, X, X),
0);
}
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
swizzle(temp, W, W, W, W),
absolute(swizzle(temp, W, W, W, W)),
negate(swizzle(temp, W, W, W, W)),
0);
emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_X,
swizzle(temp, Z, Z, Z, Z),
swizzle(rp->const_sin[0], W, W, W, W),
swizzle(temp, W, W, W, W),
flags);
if(mask & WRITEMASK_Y)
{
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
swizzle(temp, X, X, X, X),
absolute(swizzle(temp, X, X, X, X)),
negate(swizzle(temp, X, X, X, X)),
0);
emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
swizzle(temp, W, W, W, W),
swizzle(rp->const_sin[0], W, W, W, W),
swizzle(temp, X, X, X, X),
flags);
}
free_temp(rp, temp);
break;
case OPCODE_SGE:
src[0] = t_src(rp, fpi->SrcReg[0]);
@ -1603,48 +1691,56 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
*/
temp = get_temp_reg(rp);
if(rp->const_sin == -1){
cnstv[0] = 1.273239545;
cnstv[1] =-0.405284735;
cnstv[2] = 3.141592654;
cnstv[3] = 0.225;
rp->const_sin = emit_const4fv(rp, cnstv);
}
cnst = rp->const_sin;
make_sin_const(rp);
src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
/* do range reduction */
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(keep(src[0]), X, X, X, X),
cnst,
swizzle(rp->const_sin[1], Z, Z, Z, Z),
pfs_half,
0);
emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
swizzle(temp, X, X, X, X),
undef,
undef,
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
swizzle(temp, X, X, X, X),
swizzle(rp->const_sin[1], W, W, W, W), //2*PI
negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI
0);
/* SIN */
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
swizzle(temp, Z, Z, Z, Z),
rp->const_sin[0],
pfs_zero,
0);
if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
emit_arith(rp, PFS_OP_MAD, dest, mask,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(src[0], X, X, X, X)),
swizzle(temp, X, X, X, X),
flags);
}else{
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(src[0], X, X, X, X)),
swizzle(temp, X, X, X, X),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
swizzle(temp, Y, Y, Y, Y),
absolute(swizzle(temp, Z, Z, Z, Z)),
swizzle(temp, X, X, X, X),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
swizzle(temp, X, X, X, X),
absolute(swizzle(temp, X, X, X, X)),
negate(swizzle(temp, X, X, X, X)),
0);
emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
swizzle(temp, X, X, X, X),
absolute(swizzle(temp, X, X, X, X)),
negate(swizzle(temp, X, X, X, X)),
0);
emit_arith(rp, PFS_OP_MAD, dest, mask,
swizzle(temp, Y, Y, Y, Y),
swizzle(cnst, W, W, W, W),
swizzle(temp, X, X, X, X),
flags);
}
emit_arith(rp, PFS_OP_MAD, dest, mask,
swizzle(temp, Y, Y, Y, Y),
swizzle(rp->const_sin[0], W, W, W, W),
swizzle(temp, X, X, X, X),
flags);
free_temp(rp, temp);
break;
case OPCODE_SLT:
@ -1739,7 +1835,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp)
rp->max_temp_idx = 0;
rp->node[0].alu_end = -1;
rp->node[0].tex_end = -1;
rp->const_sin = -1;
rp->const_sin[0] = -1;
_mesa_memset(cs, 0, sizeof(*rp->cs));
for (i=0;i<PFS_MAX_ALU_INST;i++) {