r300/compiler: Enable presubtract sources
The r300 compiler can now emit instructions that select from the presubtract source. A peephole optimization has been added to convert instructions like: ADD Temp[0].x, none.1, -Temp[1].x into the INV (1 - src0) presubtract operation.
This commit is contained in:
parent
d8a3662008
commit
63432ecfce
|
@ -387,6 +387,7 @@ static void r300_translate_fragment_shader(
|
|||
compiler.Base.is_r500 = r300->screen->caps.is_r500;
|
||||
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
|
||||
compiler.Base.has_half_swizzles = TRUE;
|
||||
compiler.Base.has_presub = TRUE;
|
||||
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
|
||||
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
|
||||
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
|
||||
|
|
|
@ -208,6 +208,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
|
|||
compiler.Base.is_r500 = r300->screen->caps.is_r500;
|
||||
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
|
||||
compiler.Base.has_half_swizzles = FALSE;
|
||||
compiler.Base.has_presub = FALSE;
|
||||
compiler.Base.max_temp_regs = 32;
|
||||
compiler.Base.max_constants = 256;
|
||||
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
|
||||
|
|
|
@ -31,6 +31,24 @@
|
|||
|
||||
#include "../r300_reg.h"
|
||||
|
||||
static void presub_string(char out[10], unsigned int inst)
|
||||
{
|
||||
switch(inst & 0x600000){
|
||||
case R300_ALU_SRCP_1_MINUS_2_SRC0:
|
||||
sprintf(out, "bias");
|
||||
break;
|
||||
case R300_ALU_SRCP_SRC1_MINUS_SRC0:
|
||||
sprintf(out, "sub");
|
||||
break;
|
||||
case R300_ALU_SRCP_SRC1_PLUS_SRC0:
|
||||
sprintf(out, "add");
|
||||
break;
|
||||
case R300_ALU_SRCP_1_MINUS_SRC0:
|
||||
sprintf(out, "inv ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* just some random things... */
|
||||
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
|
@ -98,8 +116,8 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
|||
|
||||
for (i = alu_offset;
|
||||
i <= alu_offset + alu_end; ++i) {
|
||||
char srcc[3][10], dstc[20];
|
||||
char srca[3][10], dsta[20];
|
||||
char srcc[4][10], dstc[20];
|
||||
char srca[4][10], dsta[20];
|
||||
char argc[3][20];
|
||||
char arga[3][20];
|
||||
char flags[5], tmp[10];
|
||||
|
@ -142,6 +160,9 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
|||
flags);
|
||||
strcat(dstc, tmp);
|
||||
}
|
||||
/* Presub */
|
||||
presub_string(srcc[3], code->alu.inst[i].rgb_inst);
|
||||
presub_string(srca[3], code->alu.inst[i].alpha_inst);
|
||||
|
||||
dsta[0] = 0;
|
||||
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
|
||||
|
@ -160,11 +181,12 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
|||
}
|
||||
|
||||
fprintf(stderr,
|
||||
"%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
|
||||
" w: %3s %3s %3s -> %-20s (%08x)\n", i,
|
||||
srcc[0], srcc[1], srcc[2], dstc,
|
||||
"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
|
||||
" w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
|
||||
srcc[0], srcc[1], srcc[2], srcc[3], dstc,
|
||||
code->alu.inst[i].rgb_addr, srca[0], srca[1],
|
||||
srca[2], dsta, code->alu.inst[i].alpha_addr);
|
||||
srca[2], srca[3], dsta,
|
||||
code->alu.inst[i].alpha_addr);
|
||||
|
||||
for (j = 0; j < 3; ++j) {
|
||||
int regc = code->alu.inst[i].rgb_inst >> (j * 7);
|
||||
|
@ -194,6 +216,24 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
|||
}
|
||||
} else if (d < 15) {
|
||||
sprintf(buf, "%s.www", srca[d - 12]);
|
||||
} else if (d < 20 ) {
|
||||
switch(d) {
|
||||
case R300_ALU_ARGC_SRCP_XYZ:
|
||||
sprintf(buf, "srcp.xyz");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_XXX:
|
||||
sprintf(buf, "srcp.xxx");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_YYY:
|
||||
sprintf(buf, "srcp.yyy");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_ZZZ:
|
||||
sprintf(buf, "srcp.zzz");
|
||||
break;
|
||||
case R300_ALU_ARGC_SRCP_WWW:
|
||||
sprintf(buf, "srcp.www");
|
||||
break;
|
||||
}
|
||||
} else if (d == 20) {
|
||||
sprintf(buf, "0.0");
|
||||
} else if (d == 21) {
|
||||
|
@ -231,6 +271,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
|||
'x' + (char)(d % 3));
|
||||
} else if (d < 12) {
|
||||
sprintf(buf, "%s.w", srca[d - 9]);
|
||||
} else if (d < 16) {
|
||||
switch(d) {
|
||||
case R300_ALU_ARGA_SRCP_X:
|
||||
sprintf(buf, "srcp.x");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_Y:
|
||||
sprintf(buf, "srcp.y");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_Z:
|
||||
sprintf(buf, "srcp.z");
|
||||
break;
|
||||
case R300_ALU_ARGA_SRCP_W:
|
||||
sprintf(buf, "srcp.w");
|
||||
break;
|
||||
}
|
||||
} else if (d == 16) {
|
||||
sprintf(buf, "0.0");
|
||||
} else if (d == 17) {
|
||||
|
@ -247,11 +302,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
|
|||
buf, (rega & 64) ? "|" : "");
|
||||
}
|
||||
|
||||
fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
|
||||
fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n"
|
||||
" w: %8s %8s %8s op: %08x\n",
|
||||
argc[0], argc[1], argc[2],
|
||||
code->alu.inst[i].rgb_inst, arga[0], arga[1],
|
||||
arga[2], code->alu.inst[i].alpha_inst);
|
||||
code->alu.inst[i].rgb_inst,
|
||||
code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
|
||||
"NOP" : "",
|
||||
arga[0], arga[1],arga[2],
|
||||
code->alu.inst[i].alpha_inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -164,6 +164,53 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
|
|||
code->alu.inst[ip].alpha_inst |= arg << (7*j);
|
||||
}
|
||||
|
||||
/* Presubtract */
|
||||
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_2_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_SRC1_PLUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_SRC1_MINUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->alu.inst[ip].rgb_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_SRC0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_2_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_SRC1_PLUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_SRC1_MINUS_SRC0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->alu.inst[ip].alpha_inst |=
|
||||
R300_ALU_SRCP_1_MINUS_SRC0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->RGB.Saturate)
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
|
||||
if (inst->Alpha.Saturate)
|
||||
|
@ -198,6 +245,8 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
|
|||
emit->node_flags |= R300_W_OUT;
|
||||
c->code->writes_depth = 1;
|
||||
}
|
||||
if (inst->Nop)
|
||||
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -44,25 +44,25 @@ struct swizzle_data {
|
|||
unsigned int hash; /**< swizzle value this matches */
|
||||
unsigned int base; /**< base value for hw swizzle */
|
||||
unsigned int stride; /**< difference in base between arg0/1/2 */
|
||||
unsigned int srcp_stride; /**< difference in base between arg0/scrp */
|
||||
};
|
||||
|
||||
static const struct swizzle_data native_swizzles[] = {
|
||||
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
|
||||
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
|
||||
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
|
||||
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
|
||||
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
|
||||
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
|
||||
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
|
||||
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
|
||||
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
|
||||
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
|
||||
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
|
||||
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
|
||||
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
|
||||
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
|
||||
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
|
||||
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
|
||||
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
|
||||
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
|
||||
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
|
||||
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
|
||||
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
|
||||
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
|
||||
};
|
||||
|
||||
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
|
||||
|
||||
|
||||
/**
|
||||
* Find a native RGB swizzle that matches the given swizzle.
|
||||
* Returns 0 if none found.
|
||||
|
@ -205,7 +205,11 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
|
|||
return 0;
|
||||
}
|
||||
|
||||
return sd->base + src*sd->stride;
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
return sd->base + sd->srcp_stride;
|
||||
} else {
|
||||
return sd->base + src*sd->stride;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -215,6 +219,9 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
|
|||
*/
|
||||
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
|
||||
{
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
return R300_ALU_ARGA_SRCP_X + swizzle;
|
||||
}
|
||||
if (swizzle < 3)
|
||||
return swizzle + 3*src;
|
||||
|
||||
|
|
|
@ -260,6 +260,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
|
|||
|
||||
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
|
||||
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
|
||||
if (inst->Nop) {
|
||||
code->inst[ip].inst0 |= R500_INST_NOP;
|
||||
}
|
||||
if (inst->Alpha.DepthWriteMask) {
|
||||
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
|
||||
c->code->writes_depth = 1;
|
||||
|
@ -275,6 +278,40 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
|
|||
if (inst->Alpha.Saturate)
|
||||
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
|
||||
|
||||
/* Set the presubtract operation. */
|
||||
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
case RC_PRESUB_BIAS:
|
||||
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
|
||||
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
|
||||
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
|
||||
|
|
|
@ -42,6 +42,7 @@ struct radeon_compiler {
|
|||
/* Hardware specification. */
|
||||
unsigned is_r500:1;
|
||||
unsigned has_half_swizzles:1;
|
||||
unsigned has_presub:1;
|
||||
unsigned disable_optimizations:1;
|
||||
unsigned max_temp_regs;
|
||||
unsigned max_constants;
|
||||
|
|
|
@ -29,6 +29,25 @@
|
|||
|
||||
#include "radeon_program.h"
|
||||
|
||||
static void reads_normal_callback(
|
||||
rc_read_write_chan_fn cb,
|
||||
struct rc_instruction * fullinst,
|
||||
struct rc_src_register src,
|
||||
void * userdata)
|
||||
{
|
||||
unsigned int refmask = 0;
|
||||
unsigned int chan;
|
||||
for(chan = 0; chan < 4; chan++) {
|
||||
refmask |= 1 << GET_SWZ(src.Swizzle, chan);
|
||||
}
|
||||
refmask &= RC_MASK_XYZW;
|
||||
|
||||
if (refmask)
|
||||
cb(userdata, fullinst, src.File, src.Index, refmask);
|
||||
|
||||
if (refmask && src.RelAddr)
|
||||
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
|
||||
}
|
||||
|
||||
static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
|
||||
{
|
||||
|
@ -36,21 +55,60 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn
|
|||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
|
||||
|
||||
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
|
||||
unsigned int refmask = 0;
|
||||
|
||||
if (inst->SrcReg[src].File == RC_FILE_NONE)
|
||||
return;
|
||||
|
||||
for(unsigned int chan = 0; chan < 4; ++chan)
|
||||
refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
|
||||
if (inst->SrcReg[src].File == RC_FILE_PRESUB) {
|
||||
unsigned int i;
|
||||
unsigned int srcp_regs = rc_presubtract_src_reg_count(
|
||||
inst->PreSub.Opcode);
|
||||
for( i = 0; i < srcp_regs; i++) {
|
||||
reads_normal_callback(cb, fullinst,
|
||||
inst->PreSub.SrcReg[i],
|
||||
userdata);
|
||||
}
|
||||
} else {
|
||||
reads_normal_callback(cb, fullinst,
|
||||
inst->SrcReg[src], userdata);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
refmask &= RC_MASK_XYZW;
|
||||
static void pair_get_src_refmasks(unsigned int * refmasks,
|
||||
struct rc_pair_instruction * inst,
|
||||
unsigned int swz, unsigned int src)
|
||||
{
|
||||
if (swz >= 4)
|
||||
return;
|
||||
|
||||
if (refmask)
|
||||
cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
|
||||
if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
|
||||
if(src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
int srcp_regs =
|
||||
rc_presubtract_src_reg_count(
|
||||
inst->RGB.Src[src].Index);
|
||||
for(i = 0; i < srcp_regs; i++) {
|
||||
refmasks[i] |= 1 << swz;
|
||||
}
|
||||
}
|
||||
else {
|
||||
refmasks[src] |= 1 << swz;
|
||||
}
|
||||
}
|
||||
|
||||
if (refmask && inst->SrcReg[src].RelAddr)
|
||||
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
|
||||
if (swz == RC_SWIZZLE_W) {
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
int srcp_regs = rc_presubtract_src_reg_count(
|
||||
inst->Alpha.Src[src].Index);
|
||||
for(i = 0; i < srcp_regs; i++) {
|
||||
refmasks[i] |= 1 << swz;
|
||||
}
|
||||
}
|
||||
else {
|
||||
refmasks[src] |= 1 << swz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,24 +117,19 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn
|
|||
struct rc_pair_instruction * inst = &fullinst->U.P;
|
||||
unsigned int refmasks[3] = { 0, 0, 0 };
|
||||
|
||||
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
|
||||
unsigned int arg;
|
||||
|
||||
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
for(unsigned int chan = 0; chan < 3; ++chan) {
|
||||
unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
|
||||
if (swz < 4)
|
||||
refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
|
||||
|
||||
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
if (inst->Alpha.Arg[arg].Swizzle < 4)
|
||||
refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
|
||||
for(arg = 0; arg < 3; ++arg) {
|
||||
unsigned int chan;
|
||||
for(chan = 0; chan < 3; ++chan) {
|
||||
unsigned int swz_rgb =
|
||||
GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
|
||||
unsigned int swz_alpha =
|
||||
GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
|
||||
pair_get_src_refmasks(refmasks, inst, swz_rgb,
|
||||
inst->RGB.Arg[arg].Source);
|
||||
pair_get_src_refmasks(refmasks, inst, swz_alpha,
|
||||
inst->Alpha.Arg[arg].Source);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -212,10 +265,25 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
|
|||
rc_register_file file = inst->SrcReg[src].File;
|
||||
unsigned int index = inst->SrcReg[src].Index;
|
||||
|
||||
cb(userdata, fullinst, &file, &index);
|
||||
if (file == RC_FILE_PRESUB) {
|
||||
unsigned int i;
|
||||
unsigned int srcp_srcs = rc_presubtract_src_reg_count(
|
||||
inst->PreSub.Opcode);
|
||||
for(i = 0; i < srcp_srcs; i++) {
|
||||
file = inst->PreSub.SrcReg[i].File;
|
||||
index = inst->PreSub.SrcReg[i].Index;
|
||||
cb(userdata, fullinst, &file, &index);
|
||||
inst->PreSub.SrcReg[i].File = file;
|
||||
inst->PreSub.SrcReg[i].Index = index;
|
||||
}
|
||||
|
||||
inst->SrcReg[src].File = file;
|
||||
inst->SrcReg[src].Index = index;
|
||||
}
|
||||
else {
|
||||
cb(userdata, fullinst, &file, &index);
|
||||
|
||||
inst->SrcReg[src].File = file;
|
||||
inst->SrcReg[src].Index = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,13 @@
|
|||
#include "radeon_compiler.h"
|
||||
#include "radeon_swizzle.h"
|
||||
|
||||
struct peephole_state {
|
||||
struct rc_instruction * Inst;
|
||||
/** Stores a bitmask of the components that are still "alive" (i.e.
|
||||
* they have not been written to since Inst was executed.)
|
||||
*/
|
||||
unsigned int WriteMask;
|
||||
};
|
||||
|
||||
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
|
||||
{
|
||||
|
@ -54,7 +61,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
|
|||
return combine;
|
||||
}
|
||||
|
||||
struct peephole_state {
|
||||
struct copy_propagate_state {
|
||||
struct radeon_compiler * C;
|
||||
struct rc_instruction * Mov;
|
||||
unsigned int Conflict:1;
|
||||
|
@ -84,10 +91,10 @@ struct peephole_state {
|
|||
* @param index The index of the source register.
|
||||
* @param mask The components of the source register that are being read from.
|
||||
*/
|
||||
static void peephole_scan_read(void * data, struct rc_instruction * inst,
|
||||
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
{
|
||||
struct peephole_state * s = data;
|
||||
struct copy_propagate_state * s = data;
|
||||
|
||||
/* XXX This could probably be handled better. */
|
||||
if (file == RC_FILE_ADDRESS) {
|
||||
|
@ -123,10 +130,10 @@ static void peephole_scan_read(void * data, struct rc_instruction * inst,
|
|||
}
|
||||
}
|
||||
|
||||
static void peephole_scan_write(void * data, struct rc_instruction * inst,
|
||||
static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
{
|
||||
struct peephole_state * s = data;
|
||||
struct copy_propagate_state * s = data;
|
||||
|
||||
if (s->BranchDepth < 0)
|
||||
return;
|
||||
|
@ -146,9 +153,9 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
|
|||
}
|
||||
}
|
||||
|
||||
static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
|
||||
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
|
||||
{
|
||||
struct peephole_state s;
|
||||
struct copy_propagate_state s;
|
||||
|
||||
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
|
||||
inst_mov->U.I.DstReg.RelAddr ||
|
||||
|
@ -170,14 +177,23 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
|
|||
for(struct rc_instruction * inst = inst_mov->Next;
|
||||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
/* XXX In the future we might be able to make the optimizer
|
||||
* smart enough to handle loops. */
|
||||
if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
|
||||
|| inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
|
||||
return;
|
||||
}
|
||||
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
|
||||
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
|
||||
|
||||
/* It is possible to do copy propigation in this situation,
|
||||
* just not right now, see peephole_add_presub_inv() */
|
||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
|
||||
info->NumSrcRegs > 2) {
|
||||
return;
|
||||
}
|
||||
|
||||
rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
|
||||
rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
|
||||
if (s.Conflict)
|
||||
return;
|
||||
|
||||
|
@ -206,7 +222,6 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
|
|||
inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
|
||||
if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
|
||||
inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
|
||||
|
@ -217,8 +232,11 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
|
|||
refmask |= (1 << swz) & RC_MASK_XYZW;
|
||||
}
|
||||
|
||||
if ((refmask & s.MovMask) == refmask)
|
||||
if ((refmask & s.MovMask) == refmask) {
|
||||
inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
|
||||
if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
|
||||
inst->U.I.PreSub = s.Mov->U.I.PreSub;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -283,7 +301,6 @@ static int is_src_uniform_constant(struct rc_src_register src,
|
|||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static void constant_folding_mad(struct rc_instruction * inst)
|
||||
{
|
||||
rc_swizzle swz;
|
||||
|
@ -379,7 +396,6 @@ static void constant_folding_add(struct rc_instruction * inst)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Replace 0.0, 1.0 and 0.5 immediate constants by their
|
||||
* respective swizzles. Simplify instructions like ADD dst, src, 0;
|
||||
|
@ -454,6 +470,204 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
|
|||
constant_folding_add(inst);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function returns a writemask that indicates wich components are
|
||||
* read by src and also written by dst.
|
||||
*/
|
||||
static unsigned int src_reads_dst_mask(struct rc_src_register src,
|
||||
struct rc_dst_register dst)
|
||||
{
|
||||
unsigned int mask = 0;
|
||||
unsigned int i;
|
||||
if (dst.File != src.File || dst.Index != src.Index) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(i = 0; i < 4; i++) {
|
||||
mask |= 1 << GET_SWZ(src.Swizzle, i);
|
||||
}
|
||||
mask &= RC_MASK_XYZW;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
|
||||
* in any of its channels. Return 0 otherwise. */
|
||||
static int src_has_const_swz(struct rc_src_register src) {
|
||||
int chan;
|
||||
for(chan = 0; chan < 4; chan++) {
|
||||
unsigned int swz = GET_SWZ(src.Swizzle, chan);
|
||||
if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
|
||||
|| swz == RC_SWIZZLE_ONE) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void peephole_scan_write(void * data, struct rc_instruction * inst,
|
||||
rc_register_file file, unsigned int index, unsigned int mask)
|
||||
{
|
||||
struct peephole_state * s = data;
|
||||
if(s->Inst->U.I.DstReg.File == file
|
||||
&& s->Inst->U.I.DstReg.Index == index) {
|
||||
unsigned int common_mask = s->WriteMask & mask;
|
||||
s->WriteMask &= ~common_mask;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
|
||||
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
|
||||
* of the add instruction must have the constatnt 1 swizzle. This function
|
||||
* does not check const registers to see if their value is 1.0, so it should
|
||||
* be called after the constant_folding optimization.
|
||||
* @return
|
||||
* 0 if the ADD instruction is still part of the program.
|
||||
* 1 if the ADD instruction is no longer part of the program.
|
||||
*/
|
||||
static int peephole_add_presub_inv(
|
||||
struct radeon_compiler * c,
|
||||
struct rc_instruction * inst_add)
|
||||
{
|
||||
unsigned int i, swz, mask;
|
||||
unsigned int can_remove = 0;
|
||||
unsigned int cant_sub = 0;
|
||||
struct rc_instruction * inst;
|
||||
struct peephole_state s;
|
||||
|
||||
if (inst_add->U.I.SaturateMode)
|
||||
return 0;
|
||||
|
||||
mask = inst_add->U.I.DstReg.WriteMask;
|
||||
|
||||
/* Check if src0 is 1. */
|
||||
/* XXX It would be nice to use is_src_uniform_constant here, but that
|
||||
* function only works if the register's file is RC_FILE_NONE */
|
||||
for(i = 0; i < 4; i++ ) {
|
||||
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
|
||||
if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
|
||||
&& swz != RC_SWIZZLE_ONE) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check src1. */
|
||||
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
|
||||
inst_add->U.I.DstReg.WriteMask
|
||||
|| inst_add->U.I.SrcReg[1].Abs
|
||||
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
|
||||
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
|
||||
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Setup the peephole_state information. */
|
||||
s.Inst = inst_add;
|
||||
s.WriteMask = inst_add->U.I.DstReg.WriteMask;
|
||||
|
||||
/* For all instructions that read inst_add->U.I.DstReg before it is
|
||||
* written again, use the 1 - src0 presubtact instead. */
|
||||
for(inst = inst_add->Next; inst != &c->Program.Instructions;
|
||||
inst = inst->Next) {
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
for(i = 0; i < info->NumSrcRegs; i++) {
|
||||
if(inst_add->U.I.DstReg.WriteMask !=
|
||||
src_reads_dst_mask(inst->U.I.SrcReg[i],
|
||||
inst_add->U.I.DstReg)) {
|
||||
continue;
|
||||
}
|
||||
if (cant_sub) {
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
/* XXX: There are some situations where instructions
|
||||
* with more than 2 src registers can use the
|
||||
* presubtract select, but to keep things simple we
|
||||
* will disable presubtract on these instructions for
|
||||
* now. Note: This if statement should not be pulled
|
||||
* outside of the loop, because it only applies to
|
||||
* instructions that could potentially use the
|
||||
* presubtract source. */
|
||||
if (info->NumSrcRegs > 2) {
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* We can't use more than one presubtract value in an
|
||||
* instruction, unless the two prsubtract operations
|
||||
* are the same and read from the same registers. */
|
||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
|
||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
|
||||
|| inst->U.I.PreSub.SrcReg[0].File !=
|
||||
inst_add->U.I.SrcReg[1].File
|
||||
|| inst->U.I.PreSub.SrcReg[0].Index !=
|
||||
inst_add->U.I.SrcReg[1].Index) {
|
||||
|
||||
can_remove = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* We must be careful not to modify inst_add, since it
|
||||
* is possible it will remain part of the program. */
|
||||
inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
|
||||
inst->U.I.PreSub.SrcReg[0].Negate = 0;
|
||||
inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
|
||||
inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
|
||||
inst->U.I.PreSub.SrcReg[0]);
|
||||
|
||||
inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
|
||||
inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
|
||||
can_remove = 1;
|
||||
}
|
||||
if(!can_remove)
|
||||
break;
|
||||
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
|
||||
/* If all components of inst_add's destination register have
|
||||
* been written to by subsequent instructions, the original
|
||||
* value of the destination register is no longer valid and
|
||||
* we can't keep doing substitutions. */
|
||||
if (!s.WriteMask){
|
||||
break;
|
||||
}
|
||||
/* Make this instruction doesn't write to the presubtract source. */
|
||||
if (inst->U.I.DstReg.WriteMask &
|
||||
src_reads_dst_mask(inst_add->U.I.SrcReg[1],
|
||||
inst->U.I.DstReg)
|
||||
|| info->IsFlowControl) {
|
||||
cant_sub = 1;
|
||||
}
|
||||
}
|
||||
if(can_remove) {
|
||||
rc_remove_instruction(inst_add);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
* 0 if inst is still part of the program.
|
||||
* 1 if inst is no longer part of the program.
|
||||
*/
|
||||
static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
|
||||
{
|
||||
switch(inst->U.I.Opcode){
|
||||
case RC_OPCODE_ADD:
|
||||
if (c->has_presub) {
|
||||
if(peephole_add_presub_inv(c, inst))
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void rc_optimize(struct radeon_compiler * c, void *user)
|
||||
{
|
||||
struct rc_instruction * inst = c->Program.Instructions.Next;
|
||||
|
@ -463,8 +677,11 @@ void rc_optimize(struct radeon_compiler * c, void *user)
|
|||
|
||||
constant_folding(c, cur);
|
||||
|
||||
if(peephole(c, cur))
|
||||
continue;
|
||||
|
||||
if (cur->U.I.Opcode == RC_OPCODE_MOV) {
|
||||
peephole(c, cur);
|
||||
copy_propagate(c, cur);
|
||||
/* cur may no longer be part of the program */
|
||||
}
|
||||
}
|
||||
|
|
|
@ -279,11 +279,118 @@ static int destructive_merge_instructions(
|
|||
struct rc_pair_instruction * rgb,
|
||||
struct rc_pair_instruction * alpha)
|
||||
{
|
||||
const struct rc_opcode_info * opcode;
|
||||
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
|
||||
assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
|
||||
|
||||
/* Presubtract registers need to be merged first so that registers
|
||||
* needed by the presubtract operation can be placed in src0 and/or
|
||||
* src1. */
|
||||
|
||||
/* Merge the rgb presubtract registers. */
|
||||
const struct rc_opcode_info * rgb_info =
|
||||
rc_get_opcode_info(rgb->RGB.Opcode);
|
||||
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
unsigned int srcp_src;
|
||||
unsigned int srcp_regs;
|
||||
if (rgb->RGB.Src[RC_PAIR_PRESUB_SRC].Used)
|
||||
return 0;
|
||||
srcp_regs = rc_presubtract_src_reg_count(
|
||||
alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
|
||||
unsigned int arg;
|
||||
int free_source;
|
||||
struct radeon_pair_instruction_source srcp =
|
||||
alpha->RGB.Src[srcp_src];
|
||||
struct radeon_pair_instruction_source temp;
|
||||
/* 2nd arg of 1 means this is an rgb source.
|
||||
* 3rd arg of 0 means this is not an alpha source. */
|
||||
free_source = rc_pair_alloc_source(rgb, 1, 0,
|
||||
srcp.File, srcp.Index);
|
||||
/* If free_source == srcp_src, then either the
|
||||
* presubtract source is already in the correct place. */
|
||||
if (free_source == srcp_src)
|
||||
continue;
|
||||
/* If free_source < 0 then there are no free source
|
||||
* slots. */
|
||||
if (free_source < 0)
|
||||
return 0;
|
||||
/* Shuffle the sources, so we can put the
|
||||
* presubtract source in the correct place. */
|
||||
for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
|
||||
/*If this arg does not read from an rgb source,
|
||||
* do nothing. */
|
||||
if (rc_source_type_that_arg_reads(
|
||||
rgb->RGB.Arg[arg].Source,
|
||||
rgb->RGB.Arg[arg].Swizzle, 3)
|
||||
!= RC_PAIR_SOURCE_RGB) {
|
||||
continue;
|
||||
}
|
||||
if (rgb->RGB.Arg[arg].Source == srcp_src)
|
||||
rgb->RGB.Arg[arg].Source = free_source;
|
||||
/* We need to do this just in case register
|
||||
* is one of the sources already, but in the
|
||||
* wrong spot. */
|
||||
else if(rgb->RGB.Arg[arg].Source == free_source)
|
||||
rgb->RGB.Arg[arg].Source = srcp_src;
|
||||
}
|
||||
temp = rgb->RGB.Src[srcp_src];
|
||||
rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
|
||||
rgb->RGB.Src[free_source] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Merge the alpha presubtract registers */
|
||||
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
unsigned int srcp_src;
|
||||
unsigned int srcp_regs;
|
||||
if(rgb->Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
|
||||
return 0;
|
||||
|
||||
srcp_regs = rc_presubtract_src_reg_count(
|
||||
alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
|
||||
unsigned int arg;
|
||||
int free_source;
|
||||
struct radeon_pair_instruction_source srcp =
|
||||
alpha->Alpha.Src[srcp_src];
|
||||
struct radeon_pair_instruction_source temp;
|
||||
/* 2nd arg of 0 means this is not an rgb source.
|
||||
* 3rd arg of 1 means this is an alpha source. */
|
||||
free_source = rc_pair_alloc_source(rgb, 0, 1,
|
||||
srcp.File, srcp.Index);
|
||||
/* If free_source == srcp_src, then either the
|
||||
* presubtract source is already in the correct place. */
|
||||
if (free_source == srcp_src)
|
||||
continue;
|
||||
/* If free_source < 0 then there are no free source
|
||||
* slots. */
|
||||
if (free_source < 0)
|
||||
return 0;
|
||||
/* Shuffle the sources, so we can put the
|
||||
* presubtract source in the correct place. */
|
||||
for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
|
||||
/*If this arg does not read from an alpha
|
||||
* source, do nothing. */
|
||||
if (rc_source_type_that_arg_reads(
|
||||
rgb->RGB.Arg[arg].Source,
|
||||
rgb->RGB.Arg[arg].Swizzle, 3)
|
||||
!= RC_PAIR_SOURCE_ALPHA) {
|
||||
continue;
|
||||
}
|
||||
if (rgb->RGB.Arg[arg].Source == srcp_src)
|
||||
rgb->RGB.Arg[arg].Source = free_source;
|
||||
else if (rgb->RGB.Arg[arg].Source == free_source)
|
||||
rgb->RGB.Arg[arg].Source = srcp_src;
|
||||
}
|
||||
temp = rgb->Alpha.Src[srcp_src];
|
||||
rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
|
||||
rgb->Alpha.Src[free_source] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy alpha args into rgb */
|
||||
const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
|
||||
opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
|
||||
|
||||
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
unsigned int srcrgb = 0;
|
||||
|
@ -351,7 +458,52 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void presub_nop(struct rc_instruction * emitted) {
|
||||
int prev_rgb_index, prev_alpha_index, i, num_src;
|
||||
|
||||
/* We don't need a nop if the previous instruction is a TEX. */
|
||||
if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
|
||||
return;
|
||||
}
|
||||
if (emitted->Prev->U.P.RGB.WriteMask)
|
||||
prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
|
||||
else
|
||||
prev_rgb_index = -1;
|
||||
if (emitted->Prev->U.P.Alpha.WriteMask)
|
||||
prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
|
||||
else
|
||||
prev_alpha_index = 1;
|
||||
|
||||
/* Check the previous rgb instruction */
|
||||
if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
num_src = rc_presubtract_src_reg_count(
|
||||
emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for (i = 0; i < num_src; i++) {
|
||||
unsigned int index = emitted->U.P.RGB.Src[i].Index;
|
||||
if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
|
||||
&& (index == prev_rgb_index
|
||||
|| index == prev_alpha_index)) {
|
||||
emitted->Prev->U.P.Nop = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Check the previous alpha instruction. */
|
||||
if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
|
||||
return;
|
||||
|
||||
num_src = rc_presubtract_src_reg_count(
|
||||
emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for (i = 0; i < num_src; i++) {
|
||||
unsigned int index = emitted->U.P.Alpha.Src[i].Index;
|
||||
if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
|
||||
&& (index == prev_rgb_index || index == prev_alpha_index)) {
|
||||
emitted->Prev->U.P.Nop = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Find a good ALU instruction or pair of ALU instruction and emit it.
|
||||
*
|
||||
|
@ -408,6 +560,10 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
|
|||
commit_alu_instruction(s, sinst);
|
||||
success: ;
|
||||
}
|
||||
/* If the instruction we just emitted uses a presubtract value, and
|
||||
* the presubtract sources were written by the previous intstruction,
|
||||
* the previous instruction needs a nop. */
|
||||
presub_nop(before->Prev);
|
||||
}
|
||||
|
||||
static void scan_read(void * data, struct rc_instruction * inst,
|
||||
|
|
|
@ -127,6 +127,18 @@ static void classify_instruction(struct rc_sub_instruction * inst,
|
|||
}
|
||||
}
|
||||
|
||||
static void src_uses(struct rc_src_register src, unsigned int * rgb,
|
||||
unsigned int * alpha)
|
||||
{
|
||||
int j;
|
||||
for(j = 0; j < 4; ++j) {
|
||||
unsigned int swz = GET_SWZ(src.Swizzle, j);
|
||||
if (swz < 3)
|
||||
*rgb = 1;
|
||||
else if (swz < 4)
|
||||
*alpha = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the given ALU instruction's opcodes and source operands into the given pair,
|
||||
|
@ -158,12 +170,51 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
|
|||
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
|
||||
int i;
|
||||
|
||||
/* Presubtract handling:
|
||||
* We need to make sure that the values used by the presubtract
|
||||
* operation end up in src0 or src1. */
|
||||
if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
|
||||
/* rc_pair_alloc_source() will fill in data for
|
||||
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
|
||||
int j;
|
||||
for(j = 0; j < 3; j++) {
|
||||
int src_regs;
|
||||
if(inst->SrcReg[j].File != RC_FILE_PRESUB)
|
||||
continue;
|
||||
|
||||
src_regs = rc_presubtract_src_reg_count(
|
||||
inst->PreSub.Opcode);
|
||||
for(i = 0; i < src_regs; i++) {
|
||||
unsigned int rgb = 0;
|
||||
unsigned int alpha = 0;
|
||||
src_uses(inst->SrcReg[j], &rgb, &alpha);
|
||||
if(rgb) {
|
||||
pair->RGB.Src[i].File =
|
||||
inst->PreSub.SrcReg[i].File;
|
||||
pair->RGB.Src[i].Index =
|
||||
inst->PreSub.SrcReg[i].Index;
|
||||
pair->RGB.Src[i].Used = 1;
|
||||
}
|
||||
if(alpha) {
|
||||
pair->Alpha.Src[i].File =
|
||||
inst->PreSub.SrcReg[i].File;
|
||||
pair->Alpha.Src[i].Index =
|
||||
inst->PreSub.SrcReg[i].Index;
|
||||
pair->Alpha.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < opcode->NumSrcRegs; ++i) {
|
||||
int source;
|
||||
if (needrgb && !istranscendent) {
|
||||
unsigned int srcrgb = 0;
|
||||
unsigned int srcalpha = 0;
|
||||
int j;
|
||||
/* We don't care about the alpha channel here. We only
|
||||
* want the part of the swizzle that writes to rgb,
|
||||
* since we are creating an rgb instruction. */
|
||||
for(j = 0; j < 3; ++j) {
|
||||
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
|
||||
if (swz < 3)
|
||||
|
@ -173,6 +224,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
|
|||
}
|
||||
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
|
||||
inst->SrcReg[i].File, inst->SrcReg[i].Index);
|
||||
assert(source != -1);
|
||||
pair->RGB.Arg[i].Source = source;
|
||||
pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
|
||||
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
|
||||
|
@ -188,6 +240,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
|
|||
srcalpha = 1;
|
||||
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
|
||||
inst->SrcReg[i].File, inst->SrcReg[i].Index);
|
||||
assert(source != -1);
|
||||
pair->Alpha.Arg[i].Source = source;
|
||||
pair->Alpha.Arg[i].Swizzle = swz;
|
||||
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
|
||||
|
|
|
@ -39,7 +39,7 @@
|
|||
struct radeon_compiler;
|
||||
|
||||
struct rc_src_register {
|
||||
unsigned int File:3;
|
||||
unsigned int File:4;
|
||||
|
||||
/** Negative values may be used for relative addressing. */
|
||||
signed int Index:(RC_REGISTER_INDEX_BITS+1);
|
||||
|
@ -64,6 +64,11 @@ struct rc_dst_register {
|
|||
unsigned int WriteMask:4;
|
||||
};
|
||||
|
||||
struct rc_presub_instruction {
|
||||
rc_presubtract_op Opcode;
|
||||
struct rc_src_register SrcReg[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* Instructions are maintained by the compiler in a doubly linked list
|
||||
* of these structures.
|
||||
|
@ -108,6 +113,10 @@ struct rc_sub_instruction {
|
|||
/** True if tex instruction should do shadow comparison */
|
||||
unsigned int TexShadow:1;
|
||||
/*@}*/
|
||||
|
||||
/** This holds information about the presubtract operation used by
|
||||
* this instruction. */
|
||||
struct rc_presub_instruction PreSub;
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
|
|
|
@ -79,7 +79,13 @@ typedef enum {
|
|||
/**
|
||||
* Indicates a special register, see RC_SPECIAL_xxx.
|
||||
*/
|
||||
RC_FILE_SPECIAL
|
||||
RC_FILE_SPECIAL,
|
||||
|
||||
/**
|
||||
* Indicates this register should use the result of the presubtract
|
||||
* operation.
|
||||
*/
|
||||
RC_FILE_PRESUB
|
||||
} rc_register_file;
|
||||
|
||||
enum {
|
||||
|
@ -147,4 +153,32 @@ typedef enum {
|
|||
RC_ALURESULT_W
|
||||
} rc_write_aluresult;
|
||||
|
||||
typedef enum {
|
||||
RC_PRESUB_NONE = 0,
|
||||
|
||||
/** 1 - 2 * src0 */
|
||||
RC_PRESUB_BIAS,
|
||||
|
||||
/** src1 - src0 */
|
||||
RC_PRESUB_SUB,
|
||||
|
||||
/** src1 + src0 */
|
||||
RC_PRESUB_ADD,
|
||||
|
||||
/** 1 - src0 */
|
||||
RC_PRESUB_INV
|
||||
} rc_presubtract_op;
|
||||
|
||||
static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
|
||||
switch(op){
|
||||
case RC_PRESUB_BIAS:
|
||||
case RC_PRESUB_INV:
|
||||
return 1;
|
||||
case RC_PRESUB_ADD:
|
||||
case RC_PRESUB_SUB:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
#endif /* RADEON_PROGRAM_CONSTANTS_H */
|
||||
|
|
|
@ -38,26 +38,52 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
|
|||
{
|
||||
int candidate = -1;
|
||||
int candidate_quality = -1;
|
||||
unsigned int alpha_used = 0;
|
||||
unsigned int rgb_used = 0;
|
||||
int i;
|
||||
|
||||
if ((!rgb && !alpha) || file == RC_FILE_NONE)
|
||||
return 0;
|
||||
|
||||
if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
if (file == RC_FILE_PRESUB) {
|
||||
if (index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
rgb_used++;
|
||||
}
|
||||
}
|
||||
|
||||
if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
if (file == RC_FILE_PRESUB) {
|
||||
if (index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
alpha_used++;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < 3; ++i) {
|
||||
int q = 0;
|
||||
if (rgb) {
|
||||
if (pair->RGB.Src[i].Used) {
|
||||
if (pair->RGB.Src[i].File != file ||
|
||||
pair->RGB.Src[i].Index != index)
|
||||
pair->RGB.Src[i].Index != index) {
|
||||
rgb_used++;
|
||||
continue;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
}
|
||||
if (alpha) {
|
||||
if (pair->Alpha.Src[i].Used) {
|
||||
if (pair->Alpha.Src[i].File != file ||
|
||||
pair->Alpha.Src[i].Index != index)
|
||||
pair->Alpha.Src[i].Index != index) {
|
||||
alpha_used++;
|
||||
continue;
|
||||
}
|
||||
q++;
|
||||
}
|
||||
}
|
||||
|
@ -66,19 +92,156 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
|
|||
candidate = i;
|
||||
}
|
||||
}
|
||||
if (candidate < 0 || (rgb && rgb_used > 2) || (alpha && alpha_used > 2))
|
||||
return -1;
|
||||
|
||||
if (candidate >= 0) {
|
||||
if (rgb) {
|
||||
pair->RGB.Src[candidate].Used = 1;
|
||||
pair->RGB.Src[candidate].File = file;
|
||||
pair->RGB.Src[candidate].Index = index;
|
||||
/* candidate >= 0 */
|
||||
|
||||
/* Even if we have a presub src, the above loop needs to run,
|
||||
* because we still need to make sure there is a free source.
|
||||
*/
|
||||
if (file == RC_FILE_PRESUB)
|
||||
candidate = RC_PAIR_PRESUB_SRC;
|
||||
|
||||
if (rgb) {
|
||||
pair->RGB.Src[candidate].Used = 1;
|
||||
pair->RGB.Src[candidate].File = file;
|
||||
pair->RGB.Src[candidate].Index = index;
|
||||
if (candidate == RC_PAIR_PRESUB_SRC) {
|
||||
/* For registers with the RC_FILE_PRESUB file,
|
||||
* the index stores the presubtract op. */
|
||||
int src_regs = rc_presubtract_src_reg_count(index);
|
||||
for(i = 0; i < src_regs; i++) {
|
||||
pair->RGB.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
if (alpha) {
|
||||
pair->Alpha.Src[candidate].Used = 1;
|
||||
pair->Alpha.Src[candidate].File = file;
|
||||
pair->Alpha.Src[candidate].Index = index;
|
||||
}
|
||||
if (alpha) {
|
||||
pair->Alpha.Src[candidate].Used = 1;
|
||||
pair->Alpha.Src[candidate].File = file;
|
||||
pair->Alpha.Src[candidate].Index = index;
|
||||
if (candidate == RC_PAIR_PRESUB_SRC) {
|
||||
/* For registers with the RC_FILE_PRESUB file,
|
||||
* the index stores the presubtract op. */
|
||||
int src_regs = rc_presubtract_src_reg_count(index);
|
||||
for(i=0; i < src_regs; i++) {
|
||||
pair->Alpha.Src[i].Used = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return candidate;
|
||||
}
|
||||
|
||||
static void pair_foreach_source_callback(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb,
|
||||
unsigned int swz,
|
||||
unsigned int src)
|
||||
{
|
||||
/* swz > 3 means that the swizzle is either not used, or a constant
|
||||
* swizzle (e.g. 0, 1, 0.5). */
|
||||
if(swz > 3)
|
||||
return;
|
||||
|
||||
if(swz == RC_SWIZZLE_W) {
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
unsigned int src_count = rc_presubtract_src_reg_count(
|
||||
pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for(i = 0; i < src_count; i++) {
|
||||
cb(data, &pair->Alpha.Src[i]);
|
||||
}
|
||||
} else {
|
||||
cb(data, &pair->Alpha.Src[src]);
|
||||
}
|
||||
} else {
|
||||
if (src == RC_PAIR_PRESUB_SRC) {
|
||||
unsigned int i;
|
||||
unsigned int src_count = rc_presubtract_src_reg_count(
|
||||
pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
|
||||
for(i = 0; i < src_count; i++) {
|
||||
cb(data, &pair->RGB.Src[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
cb(data, &pair->RGB.Src[src]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void rc_pair_foreach_source_that_alpha_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb)
|
||||
{
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(pair->Alpha.Opcode);
|
||||
for(i = 0; i < info->NumSrcRegs; i++) {
|
||||
pair_foreach_source_callback(pair, data, cb,
|
||||
GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
|
||||
pair->Alpha.Arg[i].Source);
|
||||
}
|
||||
}
|
||||
|
||||
void rc_pair_foreach_source_that_rgb_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb)
|
||||
{
|
||||
unsigned int i;
|
||||
const struct rc_opcode_info * info =
|
||||
rc_get_opcode_info(pair->RGB.Opcode);
|
||||
for(i = 0; i < info->NumSrcRegs; i++) {
|
||||
unsigned int chan;
|
||||
unsigned int swz = RC_SWIZZLE_UNUSED;
|
||||
/* Find a swizzle that is either X,Y,Z,or W. We assume here
|
||||
* that if one channel swizzles X,Y, or Z, then none of the
|
||||
* other channels swizzle W, and vice-versa. */
|
||||
for(chan = 0; chan < 4; chan++) {
|
||||
swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
|
||||
if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|
||||
|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
|
||||
continue;
|
||||
}
|
||||
pair_foreach_source_callback(pair, data, cb,
|
||||
swz,
|
||||
pair->RGB.Arg[i].Source);
|
||||
}
|
||||
}
|
||||
|
||||
/*return 0 for rgb, 1 for alpha -1 for error. */
|
||||
|
||||
rc_pair_source_type rc_source_type_that_arg_reads(
|
||||
unsigned int source,
|
||||
unsigned int swizzle,
|
||||
unsigned int channels)
|
||||
{
|
||||
unsigned int chan;
|
||||
unsigned int swz = RC_SWIZZLE_UNUSED;
|
||||
int isRGB = 0;
|
||||
int isAlpha = 0;
|
||||
/* Find a swizzle that is either X,Y,Z,or W. We assume here
|
||||
* that if one channel swizzles X,Y, or Z, then none of the
|
||||
* other channels swizzle W, and vice-versa. */
|
||||
for(chan = 0; chan < channels; chan++) {
|
||||
swz = GET_SWZ(swizzle, chan);
|
||||
if (swz == RC_SWIZZLE_W) {
|
||||
isAlpha = 1;
|
||||
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|
||||
|| swz == RC_SWIZZLE_Z) {
|
||||
isRGB = 1;
|
||||
}
|
||||
}
|
||||
assert(!isRGB || !isAlpha);
|
||||
|
||||
if(!isRGB && !isAlpha)
|
||||
return RC_PAIR_SOURCE_NONE;
|
||||
|
||||
if (isRGB)
|
||||
return RC_PAIR_SOURCE_RGB;
|
||||
/*isAlpha*/
|
||||
return RC_PAIR_SOURCE_ALPHA;
|
||||
}
|
||||
|
|
|
@ -49,6 +49,11 @@ struct radeon_compiler;
|
|||
* see \ref rc_pair_translate
|
||||
*/
|
||||
|
||||
/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
|
||||
* the presubtract value will be used, and
|
||||
* {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
|
||||
*/
|
||||
#define RC_PAIR_PRESUB_SRC 3
|
||||
|
||||
struct radeon_pair_instruction_source {
|
||||
unsigned int Used:1;
|
||||
|
@ -64,7 +69,7 @@ struct radeon_pair_instruction_rgb {
|
|||
unsigned int OutputWriteMask:3;
|
||||
unsigned int Saturate:1;
|
||||
|
||||
struct radeon_pair_instruction_source Src[3];
|
||||
struct radeon_pair_instruction_source Src[4];
|
||||
|
||||
struct {
|
||||
unsigned int Source:2;
|
||||
|
@ -83,7 +88,7 @@ struct radeon_pair_instruction_alpha {
|
|||
unsigned int DepthWriteMask:1;
|
||||
unsigned int Saturate:1;
|
||||
|
||||
struct radeon_pair_instruction_source Src[3];
|
||||
struct radeon_pair_instruction_source Src[4];
|
||||
|
||||
struct {
|
||||
unsigned int Source:2;
|
||||
|
@ -99,8 +104,17 @@ struct rc_pair_instruction {
|
|||
|
||||
unsigned int WriteALUResult:2;
|
||||
unsigned int ALUResultCompare:3;
|
||||
unsigned int Nop:1;
|
||||
};
|
||||
|
||||
typedef void (*rc_pair_foreach_src_fn)
|
||||
(void *, struct radeon_pair_instruction_source *);
|
||||
|
||||
typedef enum {
|
||||
RC_PAIR_SOURCE_NONE = 0,
|
||||
RC_PAIR_SOURCE_RGB,
|
||||
RC_PAIR_SOURCE_ALPHA
|
||||
} rc_pair_source_type;
|
||||
|
||||
/**
|
||||
* General helper functions for dealing with the paired instruction format.
|
||||
|
@ -109,6 +123,21 @@ struct rc_pair_instruction {
|
|||
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
|
||||
unsigned int rgb, unsigned int alpha,
|
||||
rc_register_file file, unsigned int index);
|
||||
|
||||
void rc_pair_foreach_source_that_alpha_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb);
|
||||
|
||||
void rc_pair_foreach_source_that_rgb_reads(
|
||||
struct rc_pair_instruction * pair,
|
||||
void * data,
|
||||
rc_pair_foreach_src_fn cb);
|
||||
|
||||
rc_pair_source_type rc_source_type_that_arg_reads(
|
||||
unsigned int source,
|
||||
unsigned int swizzle,
|
||||
unsigned int channels);
|
||||
/*@}*/
|
||||
|
||||
|
||||
|
|
|
@ -38,6 +38,24 @@ static const char * textarget_to_string(rc_texture_target target)
|
|||
}
|
||||
}
|
||||
|
||||
static const char * presubtract_op_to_string(rc_presubtract_op op)
|
||||
{
|
||||
switch(op) {
|
||||
case RC_PRESUB_NONE:
|
||||
return "NONE";
|
||||
case RC_PRESUB_BIAS:
|
||||
return "(1 - 2 * src0)";
|
||||
case RC_PRESUB_SUB:
|
||||
return "(src1 - src0)";
|
||||
case RC_PRESUB_ADD:
|
||||
return "(src1 + src0)";
|
||||
case RC_PRESUB_INV:
|
||||
return "(1 - src0)";
|
||||
default:
|
||||
return "BAD_PRESUBTRACT_OP";
|
||||
}
|
||||
}
|
||||
|
||||
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
|
||||
{
|
||||
if (func == RC_COMPARE_FUNC_NEVER) {
|
||||
|
@ -125,7 +143,43 @@ static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate
|
|||
}
|
||||
}
|
||||
|
||||
static void rc_print_src_register(FILE * f, struct rc_src_register src)
|
||||
static void rc_print_presub_instruction(FILE * f,
|
||||
struct rc_presub_instruction inst)
|
||||
{
|
||||
fprintf(f,"(");
|
||||
switch(inst.Opcode){
|
||||
case RC_PRESUB_BIAS:
|
||||
fprintf(f, "1 - 2 * ");
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_SUB:
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
fprintf(f, " - ");
|
||||
rc_print_register(f, inst.SrcReg[1].File,
|
||||
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_ADD:
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
fprintf(f, " + ");
|
||||
rc_print_register(f, inst.SrcReg[1].File,
|
||||
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
|
||||
break;
|
||||
case RC_PRESUB_INV:
|
||||
fprintf(f, "1 - ");
|
||||
rc_print_register(f, inst.SrcReg[0].File,
|
||||
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
fprintf(f, ")");
|
||||
}
|
||||
|
||||
static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
|
||||
struct rc_src_register src)
|
||||
{
|
||||
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
|
||||
|
||||
|
@ -134,7 +188,10 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src)
|
|||
if (src.Abs)
|
||||
fprintf(f, "|");
|
||||
|
||||
rc_print_register(f, src.File, src.Index, src.RelAddr);
|
||||
if(src.File == RC_FILE_PRESUB)
|
||||
rc_print_presub_instruction(f, inst->U.I.PreSub);
|
||||
else
|
||||
rc_print_register(f, src.File, src.Index, src.RelAddr);
|
||||
|
||||
if (src.Abs && !trivial_negate)
|
||||
fprintf(f, "|");
|
||||
|
@ -198,7 +255,7 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
|
|||
if (reg > 0)
|
||||
fprintf(f, ",");
|
||||
fprintf(f, " ");
|
||||
rc_print_src_register(f, inst->U.I.SrcReg[reg]);
|
||||
rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
|
||||
}
|
||||
|
||||
if (opcode->HasTexture) {
|
||||
|
@ -247,6 +304,16 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
|
|||
printedsrc = 1;
|
||||
}
|
||||
}
|
||||
if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
fprintf(f, ", srcp.xyz = %s",
|
||||
presubtract_op_to_string(
|
||||
inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
|
||||
}
|
||||
if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
|
||||
fprintf(f, ", srcp.w = %s",
|
||||
presubtract_op_to_string(
|
||||
inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
|
||||
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
|
||||
|
@ -272,7 +339,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
|
|||
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
|
||||
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
|
||||
fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
|
||||
fprintf(f, ", %s%ssrc", neg, abs);
|
||||
if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
|
||||
fprintf(f,"p");
|
||||
else
|
||||
fprintf(f,"%d", inst->RGB.Arg[arg].Source);
|
||||
fprintf(f,".%c%c%c%s",
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
|
||||
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
|
||||
|
@ -300,7 +372,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
|
|||
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
|
||||
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
|
||||
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
|
||||
fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
|
||||
fprintf(f, ", %s%ssrc", neg, abs);
|
||||
if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
|
||||
fprintf(f,"p");
|
||||
else
|
||||
fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
|
||||
fprintf(f,".%c%s",
|
||||
rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
|
||||
}
|
||||
fprintf(f, "\n");
|
||||
|
|
|
@ -27,6 +27,16 @@
|
|||
|
||||
#include "radeon_remove_constants.h"
|
||||
|
||||
static void remap_regs(void * userdata, struct rc_instruction * inst,
|
||||
rc_register_file * pfile, unsigned int * pindex)
|
||||
{
|
||||
unsigned *inv_remap_table = userdata;
|
||||
|
||||
if (*pfile == RC_FILE_CONSTANT) {
|
||||
*pindex = inv_remap_table[*pindex];
|
||||
}
|
||||
}
|
||||
|
||||
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
||||
{
|
||||
unsigned **out_remap_table = (unsigned**)user;
|
||||
|
@ -51,6 +61,10 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
|||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
/* XXX: This loop and the if statement after it should be
|
||||
* replaced by a call to one of the rc_for_all_reads_* functions.
|
||||
* The reason it does not use one of those functions now is
|
||||
* because none of them have RelAddr as an argument. */
|
||||
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
|
||||
if (inst->U.I.SrcReg[i].RelAddr) {
|
||||
|
@ -60,6 +74,18 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
|||
}
|
||||
}
|
||||
}
|
||||
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
|
||||
unsigned int i;
|
||||
unsigned int srcp_regs = rc_presubtract_src_reg_count(
|
||||
inst->U.I.PreSub.Opcode);
|
||||
for( i = 0; i < srcp_regs; i++) {
|
||||
if (inst->U.I.PreSub.SrcReg[i].File ==
|
||||
RC_FILE_CONSTANT) {
|
||||
const_used[
|
||||
inst->U.I.PreSub.SrcReg[i].Index] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Pass 2: If there is relative addressing, mark all externals as used. */
|
||||
|
@ -100,13 +126,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
|
|||
if (!is_identity) {
|
||||
for (struct rc_instruction *inst = c->Program.Instructions.Next;
|
||||
inst != &c->Program.Instructions; inst = inst->Next) {
|
||||
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
|
||||
|
||||
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
|
||||
if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
|
||||
inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index];
|
||||
}
|
||||
}
|
||||
rc_remap_registers(inst, remap_regs, inv_remap_table);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue