r300/compiler: implement SIN+COS+SCS for vertex shaders
This commit is contained in:
parent
722db9fa77
commit
f855e16afa
|
@ -360,6 +360,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
|
||||||
switch (vpi->Opcode) {
|
switch (vpi->Opcode) {
|
||||||
case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
|
case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
|
||||||
case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
|
case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
|
||||||
|
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
|
||||||
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
|
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
|
||||||
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
|
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
|
||||||
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
|
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
|
||||||
|
@ -378,6 +379,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
|
||||||
case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
|
case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
|
||||||
case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
|
case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
|
||||||
case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
|
case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
|
||||||
|
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
|
||||||
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
|
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
|
||||||
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
|
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
|
||||||
default:
|
default:
|
||||||
|
@ -605,8 +607,9 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
|
||||||
{
|
{
|
||||||
struct radeon_program_transformation transformations[] = {
|
struct radeon_program_transformation transformations[] = {
|
||||||
{ &r300_transform_vertex_alu, 0 },
|
{ &r300_transform_vertex_alu, 0 },
|
||||||
|
{ &r300_transform_trig_scale_vertex, 0 }
|
||||||
};
|
};
|
||||||
radeonLocalTransform(&compiler->Base, 1, transformations);
|
radeonLocalTransform(&compiler->Base, 2, transformations);
|
||||||
}
|
}
|
||||||
|
|
||||||
debug_program_log(compiler, "after native rewrite");
|
debug_program_log(compiler, "after native rewrite");
|
||||||
|
|
|
@ -848,6 +848,34 @@ int radeonTransformTrigSimple(struct radeon_compiler* c,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
|
||||||
|
struct rc_instruction *inst,
|
||||||
|
unsigned srctmp)
|
||||||
|
{
|
||||||
|
if (inst->U.I.Opcode == RC_OPCODE_COS) {
|
||||||
|
emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
|
||||||
|
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
|
||||||
|
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
|
||||||
|
emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
|
||||||
|
inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
|
||||||
|
} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
|
||||||
|
struct rc_dst_register moddst = inst->U.I.DstReg;
|
||||||
|
|
||||||
|
if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
|
||||||
|
moddst.WriteMask = RC_MASK_X;
|
||||||
|
emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
|
||||||
|
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
|
||||||
|
}
|
||||||
|
if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
|
||||||
|
moddst.WriteMask = RC_MASK_Y;
|
||||||
|
emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
|
||||||
|
srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rc_remove_instruction(inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Transform the trigonometric functions COS, SIN, and SCS
|
* Transform the trigonometric functions COS, SIN, and SCS
|
||||||
|
@ -880,29 +908,48 @@ int radeonTransformTrigScale(struct radeon_compiler* c,
|
||||||
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
|
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
|
||||||
srcreg(RC_FILE_TEMPORARY, temp));
|
srcreg(RC_FILE_TEMPORARY, temp));
|
||||||
|
|
||||||
if (inst->U.I.Opcode == RC_OPCODE_COS) {
|
r300_transform_SIN_COS_SCS(c, inst, temp);
|
||||||
emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
|
return 1;
|
||||||
srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
|
}
|
||||||
} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
|
|
||||||
emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
|
|
||||||
inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
|
|
||||||
} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
|
|
||||||
struct rc_dst_register moddst = inst->U.I.DstReg;
|
|
||||||
|
|
||||||
if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
|
/**
|
||||||
moddst.WriteMask = RC_MASK_X;
|
* Transform the trigonometric functions COS, SIN, and SCS
|
||||||
emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
|
* so that the input to COS and SIN is always in the range [-PI, PI].
|
||||||
srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
|
* SCS is replaced by one COS and one SIN instruction.
|
||||||
}
|
*/
|
||||||
if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
|
int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
|
||||||
moddst.WriteMask = RC_MASK_Y;
|
struct rc_instruction *inst,
|
||||||
emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
|
void *unused)
|
||||||
srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
|
{
|
||||||
}
|
if (inst->U.I.Opcode != RC_OPCODE_COS &&
|
||||||
}
|
inst->U.I.Opcode != RC_OPCODE_SIN &&
|
||||||
|
inst->U.I.Opcode != RC_OPCODE_SCS)
|
||||||
|
return 0;
|
||||||
|
|
||||||
rc_remove_instruction(inst);
|
/* Repeat x in the range [-PI, PI]:
|
||||||
|
*
|
||||||
|
* repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
|
||||||
|
*/
|
||||||
|
|
||||||
|
static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
|
||||||
|
unsigned int temp;
|
||||||
|
unsigned int constant;
|
||||||
|
|
||||||
|
temp = rc_find_free_temporary(c);
|
||||||
|
constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
|
||||||
|
|
||||||
|
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
|
||||||
|
swizzle_xxxx(inst->U.I.SrcReg[0]),
|
||||||
|
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
|
||||||
|
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
|
||||||
|
emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
|
||||||
|
srcreg(RC_FILE_TEMPORARY, temp));
|
||||||
|
emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
|
||||||
|
srcreg(RC_FILE_TEMPORARY, temp),
|
||||||
|
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
|
||||||
|
srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
|
||||||
|
|
||||||
|
r300_transform_SIN_COS_SCS(c, inst, temp);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,11 @@ int radeonTransformTrigScale(
|
||||||
struct rc_instruction * inst,
|
struct rc_instruction * inst,
|
||||||
void*);
|
void*);
|
||||||
|
|
||||||
|
int r300_transform_trig_scale_vertex(
|
||||||
|
struct radeon_compiler *c,
|
||||||
|
struct rc_instruction *inst,
|
||||||
|
void*);
|
||||||
|
|
||||||
int radeonTransformDeriv(
|
int radeonTransformDeriv(
|
||||||
struct radeon_compiler * c,
|
struct radeon_compiler * c,
|
||||||
struct rc_instruction * inst,
|
struct rc_instruction * inst,
|
||||||
|
|
Loading…
Reference in New Issue