r300_fragprog: Emulate trigonometric functions in radeon_program_alu

This commit is contained in:
Nicolai Haehnle 2008-07-05 23:54:31 +02:00
parent 4746752f16
commit 2b2cb56656
4 changed files with 153 additions and 207 deletions

View File

@ -408,12 +408,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300,
struct radeon_program_transformation transformations[] = {
{ &transform_TEX, &compiler },
{ &radeonTransformALU, 0 }
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(
r300->radeon.glCtx,
compiler.program,
2, transformations);
3, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Fragment Program: After transformations:\n");

View File

@ -1423,40 +1423,11 @@ static void emit_arith(struct r300_pfs_compile_state *cs,
return;
}
static GLfloat SinCosConsts[2][4] = {
{
1.273239545, // 4/PI
-0.405284735, // -4/(PI*PI)
3.141592654, // PI
0.2225 // weight
},
{
0.75,
0.0,
0.159154943, // 1/(2*PI)
6.283185307 // 2*PI
}
};
static GLuint emit_sincosconsts(struct r300_pfs_compile_state *cs, int i)
{
struct prog_src_register srcreg;
GLuint constant_swizzle;
srcreg.File = PROGRAM_CONSTANT;
srcreg.Index = _mesa_add_unnamed_constant(cs->compiler->program->Parameters,
SinCosConsts[i], 4, &constant_swizzle);
srcreg.Swizzle = constant_swizzle;
return emit_const4fv(cs, srcreg);
}
static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi)
{
COMPILE_STATE;
GLuint src[3], dest, temp[2];
GLuint src[3], dest;
int flags, mask = 0;
int const_sin[2];
if (fpi->SaturateMode == SATURATE_ZERO_ONE)
flags = PFS_FLAG_SAT;
@ -1485,60 +1456,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
emit_arith(cs, PFS_OP_CMP, dest, mask,
src[2], src[1], src[0], flags);
break;
case OPCODE_COS:
/*
* cos using a parabola (see SIN):
* cos(x):
* x = (x/(2*PI))+0.75
* x = frac(x)
* x = (x*2*PI)-PI
* result = sin(x)
*/
temp[0] = get_temp_reg(cs);
const_sin[0] = emit_sincosconsts(cs, 0);
const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* add 0.5*PI and do range reduction */
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
swizzle(src[0], X, X, X, X),
swizzle(const_sin[1], Z, Z, Z, Z),
swizzle(const_sin[1], X, X, X, X), 0);
emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X,
swizzle(temp[0], X, X, X, X),
undef, undef, 0);
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI
0);
/* SIN */
emit_arith(cs, PFS_OP_MAD, temp[0],
WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
Z, Z, Z,
Z),
const_sin[0], pfs_zero, 0);
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
swizzle(temp[0], Y, Y, Y, Y),
absolute(swizzle(temp[0], Z, Z, Z, Z)),
swizzle(temp[0], X, X, X, X), 0);
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y,
swizzle(temp[0], X, X, X, X),
absolute(swizzle(temp[0], X, X, X, X)),
negate(swizzle(temp[0], X, X, X, X)), 0);
emit_arith(cs, PFS_OP_MAD, dest, mask,
swizzle(temp[0], Y, Y, Y, Y),
swizzle(const_sin[0], W, W, W, W),
swizzle(temp[0], X, X, X, X), flags);
free_temp(cs, temp[0]);
break;
case OPCODE_DP3:
src[0] = t_src(cs, fpi->SrcReg[0]);
src[1] = t_src(cs, fpi->SrcReg[1]);
@ -1609,127 +1526,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst
emit_arith(cs, PFS_OP_RSQ, dest, mask,
absolute(src[0]), pfs_zero, pfs_zero, flags);
break;
case OPCODE_SCS:
/*
* scs using a parabola :
* scs(x):
* result.x = sin(-abs(x)+0.5*PI) (cos)
* result.y = sin(x) (sin)
*
*/
temp[0] = get_temp_reg(cs);
temp[1] = get_temp_reg(cs);
const_sin[0] = emit_sincosconsts(cs, 0);
const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* x = -abs(x)+0.5*PI */
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI
pfs_half,
negate(abs
(swizzle(keep(src[0]), X, X, X, X))),
0);
/* C*x (sin) */
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W,
swizzle(const_sin[0], Y, Y, Y, Y),
swizzle(keep(src[0]), X, X, X, X),
pfs_zero, 0);
/* B*x, C*x (cos) */
emit_arith(cs, PFS_OP_MAD, temp[0],
WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
Z, Z, Z,
Z),
const_sin[0], pfs_zero, 0);
/* B*x (sin) */
emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W,
swizzle(const_sin[0], X, X, X, X),
keep(src[0]), pfs_zero, 0);
/* y = B*x + C*x*abs(x) (sin) */
emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z,
absolute(src[0]),
swizzle(temp[0], W, W, W, W),
swizzle(temp[1], W, W, W, W), 0);
/* y = B*x + C*x*abs(x) (cos) */
emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W,
swizzle(temp[0], Y, Y, Y, Y),
absolute(swizzle(temp[0], Z, Z, Z, Z)),
swizzle(temp[0], X, X, X, X), 0);
/* y*abs(y) - y (cos), y*abs(y) - y (sin) */
emit_arith(cs, PFS_OP_MAD, temp[0],
WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1],
W, Z, Y,
X),
absolute(swizzle(temp[1], W, Z, Y, X)),
negate(swizzle(temp[1], W, Z, Y, X)), 0);
/* dest.xy = mad(temp.xy, P, temp2.wz) */
emit_arith(cs, PFS_OP_MAD, dest,
mask & (WRITEMASK_X | WRITEMASK_Y), temp[0],
swizzle(const_sin[0], W, W, W, W),
swizzle(temp[1], W, Z, Y, X), flags);
free_temp(cs, temp[0]);
free_temp(cs, temp[1]);
break;
case OPCODE_SIN:
/*
* using a parabola:
* sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x)
* extra precision is obtained by weighting against
* itself squared.
*/
temp[0] = get_temp_reg(cs);
const_sin[0] = emit_sincosconsts(cs, 0);
const_sin[1] = emit_sincosconsts(cs, 1);
src[0] = t_scalar_src(cs, fpi->SrcReg[0]);
/* do range reduction */
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
swizzle(keep(src[0]), X, X, X, X),
swizzle(const_sin[1], Z, Z, Z, Z),
pfs_half, 0);
emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X,
swizzle(temp[0], X, X, X, X),
undef, undef, 0);
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI
negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI
0);
/* SIN */
emit_arith(cs, PFS_OP_MAD, temp[0],
WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0],
Z, Z, Z,
Z),
const_sin[0], pfs_zero, 0);
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X,
swizzle(temp[0], Y, Y, Y, Y),
absolute(swizzle(temp[0], Z, Z, Z, Z)),
swizzle(temp[0], X, X, X, X), 0);
emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y,
swizzle(temp[0], X, X, X, X),
absolute(swizzle(temp[0], X, X, X, X)),
negate(swizzle(temp[0], X, X, X, X)), 0);
emit_arith(cs, PFS_OP_MAD, dest, mask,
swizzle(temp[0], Y, Y, Y, Y),
swizzle(const_sin[0], W, W, W, W),
swizzle(temp[0], X, X, X, X), flags);
free_temp(cs, temp[0]);
break;
case OPCODE_TEX:
emit_tex(cs, fpi, R300_TEX_OP_LD);
break;

View File

@ -149,6 +149,14 @@ static struct prog_src_register srcregswz(int file, int index, int swz)
return src;
}
static struct prog_src_register absolute(struct prog_src_register reg)
{
struct prog_src_register newreg = reg;
newreg.Abs = 1;
newreg.NegateAbs = 0;
return newreg;
}
static struct prog_src_register negate(struct prog_src_register reg)
{
struct prog_src_register newreg = reg;
@ -412,3 +420,139 @@ GLboolean radeonTransformALU(struct radeon_transform_context* t,
return GL_FALSE;
}
}
static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
{
static const GLfloat SinCosConsts[2][4] = {
{
1.273239545, // 4/PI
-0.405284735, // -4/(PI*PI)
3.141592654, // PI
0.2225 // weight
},
{
0.75,
0.5,
0.159154943, // 1/(2*PI)
6.283185307 // 2*PI
}
};
int i;
for(i = 0; i < 2; ++i) {
GLuint swz;
constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
ASSERT(swz == SWIZZLE_NOOP);
}
}
/**
* Approximate sin(x), where x is clamped to (-pi/2, pi/2).
*
* MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
* MAD tmp.x, tmp.y, |src|, tmp.x
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
* MAD dest, tmp.y, weight, tmp.x
*/
static void sin_approx(struct radeon_transform_context* t,
struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
{
GLuint tempreg = radeonFindFreeTemporary(t);
emit2(t->Program, OPCODE_MUL, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcreg(PROGRAM_CONSTANT, constants[0]));
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_X),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_Y),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
emit3(t->Program, OPCODE_MAD, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
}
/**
* Translate the trigonometric functions COS, SIN, and SCS
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
struct prog_instruction* inst,
void* unused)
{
if (inst->Opcode != OPCODE_COS &&
inst->Opcode != OPCODE_SIN &&
inst->Opcode != OPCODE_SCS)
return GL_FALSE;
GLuint constants[2];
GLuint tempreg = radeonFindFreeTemporary(t);
sincos_constants(t, constants);
if (inst->Opcode == OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
sin_approx(t, inst->DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else if (inst->Opcode == OPCODE_SIN) {
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
sin_approx(t, inst->DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else {
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
emit1(t->Program, OPCODE_FRC, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg));
emit3(t->Program, OPCODE_MAD, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
struct prog_dst_register dst = inst->DstReg;
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
sin_approx(t, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
constants);
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
sin_approx(t, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
constants);
}
return GL_TRUE;
}

View File

@ -35,4 +35,9 @@ GLboolean radeonTransformALU(
struct prog_instruction*,
void*);
GLboolean radeonTransformTrigSimple(
struct radeon_transform_context *t,
struct prog_instruction*,
void*);
#endif /* __RADEON_PROGRAM_ALU_H_ */