r300/compiler: Enable presubtract sources

The r300 compiler can now emit instructions that select from the presubtract
source.  A peephole optimization has been added to convert instructions like:
ADD Temp[0].x, none.1, -Temp[1].x into the INV (1 - src0) presubtract
operation.
This commit is contained in:
Tom Stellard 2010-07-13 21:25:27 -07:00
parent d8a3662008
commit 63432ecfce
17 changed files with 1072 additions and 92 deletions

View File

@ -387,6 +387,7 @@ static void r300_translate_fragment_shader(
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = TRUE;
compiler.Base.has_presub = TRUE;
compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;

View File

@ -208,6 +208,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
compiler.Base.is_r500 = r300->screen->caps.is_r500;
compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
compiler.Base.has_half_swizzles = FALSE;
compiler.Base.has_presub = FALSE;
compiler.Base.max_temp_regs = 32;
compiler.Base.max_constants = 256;
compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;

View File

@ -31,6 +31,24 @@
#include "../r300_reg.h"
static void presub_string(char out[10], unsigned int inst)
{
switch(inst & 0x600000){
case R300_ALU_SRCP_1_MINUS_2_SRC0:
sprintf(out, "bias");
break;
case R300_ALU_SRCP_SRC1_MINUS_SRC0:
sprintf(out, "sub");
break;
case R300_ALU_SRCP_SRC1_PLUS_SRC0:
sprintf(out, "add");
break;
case R300_ALU_SRCP_1_MINUS_SRC0:
sprintf(out, "inv ");
break;
}
}
/* just some random things... */
void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
{
@ -98,8 +116,8 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
for (i = alu_offset;
i <= alu_offset + alu_end; ++i) {
char srcc[3][10], dstc[20];
char srca[3][10], dsta[20];
char srcc[4][10], dstc[20];
char srca[4][10], dsta[20];
char argc[3][20];
char arga[3][20];
char flags[5], tmp[10];
@ -142,6 +160,9 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
flags);
strcat(dstc, tmp);
}
/* Presub */
presub_string(srcc[3], code->alu.inst[i].rgb_inst);
presub_string(srca[3], code->alu.inst[i].alpha_inst);
dsta[0] = 0;
if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
@ -160,11 +181,12 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
}
fprintf(stderr,
"%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
" w: %3s %3s %3s -> %-20s (%08x)\n", i,
srcc[0], srcc[1], srcc[2], dstc,
"%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
" w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
srcc[0], srcc[1], srcc[2], srcc[3], dstc,
code->alu.inst[i].rgb_addr, srca[0], srca[1],
srca[2], dsta, code->alu.inst[i].alpha_addr);
srca[2], srca[3], dsta,
code->alu.inst[i].alpha_addr);
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].rgb_inst >> (j * 7);
@ -194,6 +216,24 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
}
} else if (d < 15) {
sprintf(buf, "%s.www", srca[d - 12]);
} else if (d < 20 ) {
switch(d) {
case R300_ALU_ARGC_SRCP_XYZ:
sprintf(buf, "srcp.xyz");
break;
case R300_ALU_ARGC_SRCP_XXX:
sprintf(buf, "srcp.xxx");
break;
case R300_ALU_ARGC_SRCP_YYY:
sprintf(buf, "srcp.yyy");
break;
case R300_ALU_ARGC_SRCP_ZZZ:
sprintf(buf, "srcp.zzz");
break;
case R300_ALU_ARGC_SRCP_WWW:
sprintf(buf, "srcp.www");
break;
}
} else if (d == 20) {
sprintf(buf, "0.0");
} else if (d == 21) {
@ -231,6 +271,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
'x' + (char)(d % 3));
} else if (d < 12) {
sprintf(buf, "%s.w", srca[d - 9]);
} else if (d < 16) {
switch(d) {
case R300_ALU_ARGA_SRCP_X:
sprintf(buf, "srcp.x");
break;
case R300_ALU_ARGA_SRCP_Y:
sprintf(buf, "srcp.y");
break;
case R300_ALU_ARGA_SRCP_Z:
sprintf(buf, "srcp.z");
break;
case R300_ALU_ARGA_SRCP_W:
sprintf(buf, "srcp.w");
break;
}
} else if (d == 16) {
sprintf(buf, "0.0");
} else if (d == 17) {
@ -247,11 +302,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
buf, (rega & 64) ? "|" : "");
}
fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
fprintf(stderr, " xyz: %8s %8s %8s op: %08x %s\n"
" w: %8s %8s %8s op: %08x\n",
argc[0], argc[1], argc[2],
code->alu.inst[i].rgb_inst, arga[0], arga[1],
arga[2], code->alu.inst[i].alpha_inst);
code->alu.inst[i].rgb_inst,
code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
"NOP" : "",
arga[0], arga[1],arga[2],
code->alu.inst[i].alpha_inst);
}
}
}

View File

@ -164,6 +164,53 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
code->alu.inst[ip].alpha_inst |= arg << (7*j);
}
/* Presubtract */
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_1_MINUS_2_SRC0;
break;
case RC_PRESUB_ADD:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_SRC1_PLUS_SRC0;
break;
case RC_PRESUB_SUB:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_SRC1_MINUS_SRC0;
break;
case RC_PRESUB_INV:
code->alu.inst[ip].rgb_inst |=
R300_ALU_SRCP_1_MINUS_SRC0;
break;
default:
break;
}
}
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_1_MINUS_2_SRC0;
break;
case RC_PRESUB_ADD:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_SRC1_PLUS_SRC0;
break;
case RC_PRESUB_SUB:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_SRC1_MINUS_SRC0;
break;
case RC_PRESUB_INV:
code->alu.inst[ip].alpha_inst |=
R300_ALU_SRCP_1_MINUS_SRC0;
break;
default:
break;
}
}
if (inst->RGB.Saturate)
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
@ -198,6 +245,8 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
emit->node_flags |= R300_W_OUT;
c->code->writes_depth = 1;
}
if (inst->Nop)
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
return 1;
}

View File

@ -44,25 +44,25 @@ struct swizzle_data {
unsigned int hash; /**< swizzle value this matches */
unsigned int base; /**< base value for hw swizzle */
unsigned int stride; /**< difference in base between arg0/1/2 */
unsigned int srcp_stride; /**< difference in base between arg0/scrp */
};
static const struct swizzle_data native_swizzles[] = {
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
};
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
/**
* Find a native RGB swizzle that matches the given swizzle.
* Returns 0 if none found.
@ -205,7 +205,11 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
return 0;
}
return sd->base + src*sd->stride;
if (src == RC_PAIR_PRESUB_SRC) {
return sd->base + sd->srcp_stride;
} else {
return sd->base + src*sd->stride;
}
}
@ -215,6 +219,9 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
*/
unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
{
if (src == RC_PAIR_PRESUB_SRC) {
return R300_ALU_ARGA_SRCP_X + swizzle;
}
if (swizzle < 3)
return swizzle + 3*src;

View File

@ -260,6 +260,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Nop) {
code->inst[ip].inst0 |= R500_INST_NOP;
}
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
c->code->writes_depth = 1;
@ -275,6 +278,40 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
if (inst->Alpha.Saturate)
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
/* Set the presubtract operation. */
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
break;
case RC_PRESUB_SUB:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
break;
case RC_PRESUB_ADD:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
break;
case RC_PRESUB_INV:
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
break;
default:
break;
}
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
case RC_PRESUB_BIAS:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
break;
case RC_PRESUB_SUB:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
break;
case RC_PRESUB_ADD:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
break;
case RC_PRESUB_INV:
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
break;
default:
break;
}
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));

View File

@ -42,6 +42,7 @@ struct radeon_compiler {
/* Hardware specification. */
unsigned is_r500:1;
unsigned has_half_swizzles:1;
unsigned has_presub:1;
unsigned disable_optimizations:1;
unsigned max_temp_regs;
unsigned max_constants;

View File

@ -29,6 +29,25 @@
#include "radeon_program.h"
static void reads_normal_callback(
rc_read_write_chan_fn cb,
struct rc_instruction * fullinst,
struct rc_src_register src,
void * userdata)
{
unsigned int refmask = 0;
unsigned int chan;
for(chan = 0; chan < 4; chan++) {
refmask |= 1 << GET_SWZ(src.Swizzle, chan);
}
refmask &= RC_MASK_XYZW;
if (refmask)
cb(userdata, fullinst, src.File, src.Index, refmask);
if (refmask && src.RelAddr)
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
{
@ -36,21 +55,60 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
unsigned int refmask = 0;
if (inst->SrcReg[src].File == RC_FILE_NONE)
return;
for(unsigned int chan = 0; chan < 4; ++chan)
refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
if (inst->SrcReg[src].File == RC_FILE_PRESUB) {
unsigned int i;
unsigned int srcp_regs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
for( i = 0; i < srcp_regs; i++) {
reads_normal_callback(cb, fullinst,
inst->PreSub.SrcReg[i],
userdata);
}
} else {
reads_normal_callback(cb, fullinst,
inst->SrcReg[src], userdata);
}
}
}
refmask &= RC_MASK_XYZW;
static void pair_get_src_refmasks(unsigned int * refmasks,
struct rc_pair_instruction * inst,
unsigned int swz, unsigned int src)
{
if (swz >= 4)
return;
if (refmask)
cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
if(src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
int srcp_regs =
rc_presubtract_src_reg_count(
inst->RGB.Src[src].Index);
for(i = 0; i < srcp_regs; i++) {
refmasks[i] |= 1 << swz;
}
}
else {
refmasks[src] |= 1 << swz;
}
}
if (refmask && inst->SrcReg[src].RelAddr)
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
if (swz == RC_SWIZZLE_W) {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
int srcp_regs = rc_presubtract_src_reg_count(
inst->Alpha.Src[src].Index);
for(i = 0; i < srcp_regs; i++) {
refmasks[i] |= 1 << swz;
}
}
else {
refmasks[src] |= 1 << swz;
}
}
}
@ -59,24 +117,19 @@ static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
unsigned int arg;
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
for(unsigned int chan = 0; chan < 3; ++chan) {
unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
if (swz < 4)
refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
}
}
}
if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
if (inst->Alpha.Arg[arg].Swizzle < 4)
refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
for(arg = 0; arg < 3; ++arg) {
unsigned int chan;
for(chan = 0; chan < 3; ++chan) {
unsigned int swz_rgb =
GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
unsigned int swz_alpha =
GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
pair_get_src_refmasks(refmasks, inst, swz_rgb,
inst->RGB.Arg[arg].Source);
pair_get_src_refmasks(refmasks, inst, swz_alpha,
inst->Alpha.Arg[arg].Source);
}
}
@ -212,10 +265,25 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_register_file file = inst->SrcReg[src].File;
unsigned int index = inst->SrcReg[src].Index;
cb(userdata, fullinst, &file, &index);
if (file == RC_FILE_PRESUB) {
unsigned int i;
unsigned int srcp_srcs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
for(i = 0; i < srcp_srcs; i++) {
file = inst->PreSub.SrcReg[i].File;
index = inst->PreSub.SrcReg[i].Index;
cb(userdata, fullinst, &file, &index);
inst->PreSub.SrcReg[i].File = file;
inst->PreSub.SrcReg[i].Index = index;
}
inst->SrcReg[src].File = file;
inst->SrcReg[src].Index = index;
}
else {
cb(userdata, fullinst, &file, &index);
inst->SrcReg[src].File = file;
inst->SrcReg[src].Index = index;
}
}
}

View File

@ -30,6 +30,13 @@
#include "radeon_compiler.h"
#include "radeon_swizzle.h"
struct peephole_state {
struct rc_instruction * Inst;
/** Stores a bitmask of the components that are still "alive" (i.e.
* they have not been written to since Inst was executed.)
*/
unsigned int WriteMask;
};
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
{
@ -54,7 +61,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
return combine;
}
struct peephole_state {
struct copy_propagate_state {
struct radeon_compiler * C;
struct rc_instruction * Mov;
unsigned int Conflict:1;
@ -84,10 +91,10 @@ struct peephole_state {
* @param index The index of the source register.
* @param mask The components of the source register that are being read from.
*/
static void peephole_scan_read(void * data, struct rc_instruction * inst,
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct peephole_state * s = data;
struct copy_propagate_state * s = data;
/* XXX This could probably be handled better. */
if (file == RC_FILE_ADDRESS) {
@ -123,10 +130,10 @@ static void peephole_scan_read(void * data, struct rc_instruction * inst,
}
}
static void peephole_scan_write(void * data, struct rc_instruction * inst,
static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct peephole_state * s = data;
struct copy_propagate_state * s = data;
if (s->BranchDepth < 0)
return;
@ -146,9 +153,9 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
}
}
static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
struct peephole_state s;
struct copy_propagate_state s;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst_mov->U.I.DstReg.RelAddr ||
@ -170,14 +177,23 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
for(struct rc_instruction * inst = inst_mov->Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
/* XXX In the future we might be able to make the optimizer
* smart enough to handle loops. */
if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
|| inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
return;
}
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
/* It is possible to do copy propigation in this situation,
* just not right now, see peephole_add_presub_inv() */
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
info->NumSrcRegs > 2) {
return;
}
rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
if (s.Conflict)
return;
@ -206,7 +222,6 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
@ -217,8 +232,11 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
refmask |= (1 << swz) & RC_MASK_XYZW;
}
if ((refmask & s.MovMask) == refmask)
if ((refmask & s.MovMask) == refmask) {
inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
inst->U.I.PreSub = s.Mov->U.I.PreSub;
}
}
}
@ -283,7 +301,6 @@ static int is_src_uniform_constant(struct rc_src_register src,
return 1;
}
static void constant_folding_mad(struct rc_instruction * inst)
{
rc_swizzle swz;
@ -379,7 +396,6 @@ static void constant_folding_add(struct rc_instruction * inst)
}
}
/**
* Replace 0.0, 1.0 and 0.5 immediate constants by their
* respective swizzles. Simplify instructions like ADD dst, src, 0;
@ -454,6 +470,204 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
constant_folding_add(inst);
}
/**
* This function returns a writemask that indicates wich components are
* read by src and also written by dst.
*/
static unsigned int src_reads_dst_mask(struct rc_src_register src,
struct rc_dst_register dst)
{
unsigned int mask = 0;
unsigned int i;
if (dst.File != src.File || dst.Index != src.Index) {
return 0;
}
for(i = 0; i < 4; i++) {
mask |= 1 << GET_SWZ(src.Swizzle, i);
}
mask &= RC_MASK_XYZW;
return mask;
}
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
* in any of its channels. Return 0 otherwise. */
static int src_has_const_swz(struct rc_src_register src) {
int chan;
for(chan = 0; chan < 4; chan++) {
unsigned int swz = GET_SWZ(src.Swizzle, chan);
if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
|| swz == RC_SWIZZLE_ONE) {
return 1;
}
}
return 0;
}
static void peephole_scan_write(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct peephole_state * s = data;
if(s->Inst->U.I.DstReg.File == file
&& s->Inst->U.I.DstReg.Index == index) {
unsigned int common_mask = s->WriteMask & mask;
s->WriteMask &= ~common_mask;
}
}
/**
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
* of the add instruction must have the constatnt 1 swizzle. This function
* does not check const registers to see if their value is 1.0, so it should
* be called after the constant_folding optimization.
* @return
* 0 if the ADD instruction is still part of the program.
* 1 if the ADD instruction is no longer part of the program.
*/
static int peephole_add_presub_inv(
struct radeon_compiler * c,
struct rc_instruction * inst_add)
{
unsigned int i, swz, mask;
unsigned int can_remove = 0;
unsigned int cant_sub = 0;
struct rc_instruction * inst;
struct peephole_state s;
if (inst_add->U.I.SaturateMode)
return 0;
mask = inst_add->U.I.DstReg.WriteMask;
/* Check if src0 is 1. */
/* XXX It would be nice to use is_src_uniform_constant here, but that
* function only works if the register's file is RC_FILE_NONE */
for(i = 0; i < 4; i++ ) {
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
&& swz != RC_SWIZZLE_ONE) {
return 0;
}
}
/* Check src1. */
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
inst_add->U.I.DstReg.WriteMask
|| inst_add->U.I.SrcReg[1].Abs
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
return 0;
}
/* Setup the peephole_state information. */
s.Inst = inst_add;
s.WriteMask = inst_add->U.I.DstReg.WriteMask;
/* For all instructions that read inst_add->U.I.DstReg before it is
* written again, use the 1 - src0 presubtact instead. */
for(inst = inst_add->Next; inst != &c->Program.Instructions;
inst = inst->Next) {
const struct rc_opcode_info * info =
rc_get_opcode_info(inst->U.I.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
if(inst_add->U.I.DstReg.WriteMask !=
src_reads_dst_mask(inst->U.I.SrcReg[i],
inst_add->U.I.DstReg)) {
continue;
}
if (cant_sub) {
can_remove = 0;
break;
}
/* XXX: There are some situations where instructions
* with more than 2 src registers can use the
* presubtract select, but to keep things simple we
* will disable presubtract on these instructions for
* now. Note: This if statement should not be pulled
* outside of the loop, because it only applies to
* instructions that could potentially use the
* presubtract source. */
if (info->NumSrcRegs > 2) {
can_remove = 0;
break;
}
/* We can't use more than one presubtract value in an
* instruction, unless the two prsubtract operations
* are the same and read from the same registers. */
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
|| inst->U.I.PreSub.SrcReg[0].File !=
inst_add->U.I.SrcReg[1].File
|| inst->U.I.PreSub.SrcReg[0].Index !=
inst_add->U.I.SrcReg[1].Index) {
can_remove = 0;
break;
}
}
/* We must be careful not to modify inst_add, since it
* is possible it will remain part of the program. */
inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
inst->U.I.PreSub.SrcReg[0].Negate = 0;
inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
inst->U.I.PreSub.SrcReg[0]);
inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
can_remove = 1;
}
if(!can_remove)
break;
rc_for_all_writes_mask(inst, peephole_scan_write, &s);
/* If all components of inst_add's destination register have
* been written to by subsequent instructions, the original
* value of the destination register is no longer valid and
* we can't keep doing substitutions. */
if (!s.WriteMask){
break;
}
/* Make this instruction doesn't write to the presubtract source. */
if (inst->U.I.DstReg.WriteMask &
src_reads_dst_mask(inst_add->U.I.SrcReg[1],
inst->U.I.DstReg)
|| info->IsFlowControl) {
cant_sub = 1;
}
}
if(can_remove) {
rc_remove_instruction(inst_add);
return 1;
}
return 0;
}
/**
* @return
* 0 if inst is still part of the program.
* 1 if inst is no longer part of the program.
*/
static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
{
switch(inst->U.I.Opcode){
case RC_OPCODE_ADD:
if (c->has_presub) {
if(peephole_add_presub_inv(c, inst))
return 1;
}
break;
default:
break;
}
return 0;
}
void rc_optimize(struct radeon_compiler * c, void *user)
{
struct rc_instruction * inst = c->Program.Instructions.Next;
@ -463,8 +677,11 @@ void rc_optimize(struct radeon_compiler * c, void *user)
constant_folding(c, cur);
if(peephole(c, cur))
continue;
if (cur->U.I.Opcode == RC_OPCODE_MOV) {
peephole(c, cur);
copy_propagate(c, cur);
/* cur may no longer be part of the program */
}
}

View File

@ -279,11 +279,118 @@ static int destructive_merge_instructions(
struct rc_pair_instruction * rgb,
struct rc_pair_instruction * alpha)
{
const struct rc_opcode_info * opcode;
assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
/* Presubtract registers need to be merged first so that registers
* needed by the presubtract operation can be placed in src0 and/or
* src1. */
/* Merge the rgb presubtract registers. */
const struct rc_opcode_info * rgb_info =
rc_get_opcode_info(rgb->RGB.Opcode);
if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
unsigned int srcp_src;
unsigned int srcp_regs;
if (rgb->RGB.Src[RC_PAIR_PRESUB_SRC].Used)
return 0;
srcp_regs = rc_presubtract_src_reg_count(
alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
unsigned int arg;
int free_source;
struct radeon_pair_instruction_source srcp =
alpha->RGB.Src[srcp_src];
struct radeon_pair_instruction_source temp;
/* 2nd arg of 1 means this is an rgb source.
* 3rd arg of 0 means this is not an alpha source. */
free_source = rc_pair_alloc_source(rgb, 1, 0,
srcp.File, srcp.Index);
/* If free_source == srcp_src, then either the
* presubtract source is already in the correct place. */
if (free_source == srcp_src)
continue;
/* If free_source < 0 then there are no free source
* slots. */
if (free_source < 0)
return 0;
/* Shuffle the sources, so we can put the
* presubtract source in the correct place. */
for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
/*If this arg does not read from an rgb source,
* do nothing. */
if (rc_source_type_that_arg_reads(
rgb->RGB.Arg[arg].Source,
rgb->RGB.Arg[arg].Swizzle, 3)
!= RC_PAIR_SOURCE_RGB) {
continue;
}
if (rgb->RGB.Arg[arg].Source == srcp_src)
rgb->RGB.Arg[arg].Source = free_source;
/* We need to do this just in case register
* is one of the sources already, but in the
* wrong spot. */
else if(rgb->RGB.Arg[arg].Source == free_source)
rgb->RGB.Arg[arg].Source = srcp_src;
}
temp = rgb->RGB.Src[srcp_src];
rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
rgb->RGB.Src[free_source] = temp;
}
}
/* Merge the alpha presubtract registers */
if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
unsigned int srcp_src;
unsigned int srcp_regs;
if(rgb->Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
return 0;
srcp_regs = rc_presubtract_src_reg_count(
alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
unsigned int arg;
int free_source;
struct radeon_pair_instruction_source srcp =
alpha->Alpha.Src[srcp_src];
struct radeon_pair_instruction_source temp;
/* 2nd arg of 0 means this is not an rgb source.
* 3rd arg of 1 means this is an alpha source. */
free_source = rc_pair_alloc_source(rgb, 0, 1,
srcp.File, srcp.Index);
/* If free_source == srcp_src, then either the
* presubtract source is already in the correct place. */
if (free_source == srcp_src)
continue;
/* If free_source < 0 then there are no free source
* slots. */
if (free_source < 0)
return 0;
/* Shuffle the sources, so we can put the
* presubtract source in the correct place. */
for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
/*If this arg does not read from an alpha
* source, do nothing. */
if (rc_source_type_that_arg_reads(
rgb->RGB.Arg[arg].Source,
rgb->RGB.Arg[arg].Swizzle, 3)
!= RC_PAIR_SOURCE_ALPHA) {
continue;
}
if (rgb->RGB.Arg[arg].Source == srcp_src)
rgb->RGB.Arg[arg].Source = free_source;
else if (rgb->RGB.Arg[arg].Source == free_source)
rgb->RGB.Arg[arg].Source = srcp_src;
}
temp = rgb->Alpha.Src[srcp_src];
rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
rgb->Alpha.Src[free_source] = temp;
}
}
/* Copy alpha args into rgb */
const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
unsigned int srcrgb = 0;
@ -351,7 +458,52 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i
return 0;
}
static void presub_nop(struct rc_instruction * emitted) {
int prev_rgb_index, prev_alpha_index, i, num_src;
/* We don't need a nop if the previous instruction is a TEX. */
if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
return;
}
if (emitted->Prev->U.P.RGB.WriteMask)
prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
else
prev_rgb_index = -1;
if (emitted->Prev->U.P.Alpha.WriteMask)
prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
else
prev_alpha_index = 1;
/* Check the previous rgb instruction */
if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
num_src = rc_presubtract_src_reg_count(
emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
for (i = 0; i < num_src; i++) {
unsigned int index = emitted->U.P.RGB.Src[i].Index;
if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
&& (index == prev_rgb_index
|| index == prev_alpha_index)) {
emitted->Prev->U.P.Nop = 1;
return;
}
}
}
/* Check the previous alpha instruction. */
if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
return;
num_src = rc_presubtract_src_reg_count(
emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
for (i = 0; i < num_src; i++) {
unsigned int index = emitted->U.P.Alpha.Src[i].Index;
if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
&& (index == prev_rgb_index || index == prev_alpha_index)) {
emitted->Prev->U.P.Nop = 1;
return;
}
}
}
/**
* Find a good ALU instruction or pair of ALU instruction and emit it.
*
@ -408,6 +560,10 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
commit_alu_instruction(s, sinst);
success: ;
}
/* If the instruction we just emitted uses a presubtract value, and
* the presubtract sources were written by the previous intstruction,
* the previous instruction needs a nop. */
presub_nop(before->Prev);
}
static void scan_read(void * data, struct rc_instruction * inst,

View File

@ -127,6 +127,18 @@ static void classify_instruction(struct rc_sub_instruction * inst,
}
}
static void src_uses(struct rc_src_register src, unsigned int * rgb,
unsigned int * alpha)
{
int j;
for(j = 0; j < 4; ++j) {
unsigned int swz = GET_SWZ(src.Swizzle, j);
if (swz < 3)
*rgb = 1;
else if (swz < 4)
*alpha = 1;
}
}
/**
* Fill the given ALU instruction's opcodes and source operands into the given pair,
@ -158,12 +170,51 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
int i;
/* Presubtract handling:
* We need to make sure that the values used by the presubtract
* operation end up in src0 or src1. */
if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
/* rc_pair_alloc_source() will fill in data for
* pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
int j;
for(j = 0; j < 3; j++) {
int src_regs;
if(inst->SrcReg[j].File != RC_FILE_PRESUB)
continue;
src_regs = rc_presubtract_src_reg_count(
inst->PreSub.Opcode);
for(i = 0; i < src_regs; i++) {
unsigned int rgb = 0;
unsigned int alpha = 0;
src_uses(inst->SrcReg[j], &rgb, &alpha);
if(rgb) {
pair->RGB.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->RGB.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->RGB.Src[i].Used = 1;
}
if(alpha) {
pair->Alpha.Src[i].File =
inst->PreSub.SrcReg[i].File;
pair->Alpha.Src[i].Index =
inst->PreSub.SrcReg[i].Index;
pair->Alpha.Src[i].Used = 1;
}
}
}
}
for(i = 0; i < opcode->NumSrcRegs; ++i) {
int source;
if (needrgb && !istranscendent) {
unsigned int srcrgb = 0;
unsigned int srcalpha = 0;
int j;
/* We don't care about the alpha channel here. We only
* want the part of the swizzle that writes to rgb,
* since we are creating an rgb instruction. */
for(j = 0; j < 3; ++j) {
unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
if (swz < 3)
@ -173,6 +224,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
}
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
assert(source != -1);
pair->RGB.Arg[i].Source = source;
pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
@ -188,6 +240,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
srcalpha = 1;
source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
inst->SrcReg[i].File, inst->SrcReg[i].Index);
assert(source != -1);
pair->Alpha.Arg[i].Source = source;
pair->Alpha.Arg[i].Swizzle = swz;
pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;

View File

@ -39,7 +39,7 @@
struct radeon_compiler;
struct rc_src_register {
unsigned int File:3;
unsigned int File:4;
/** Negative values may be used for relative addressing. */
signed int Index:(RC_REGISTER_INDEX_BITS+1);
@ -64,6 +64,11 @@ struct rc_dst_register {
unsigned int WriteMask:4;
};
struct rc_presub_instruction {
rc_presubtract_op Opcode;
struct rc_src_register SrcReg[2];
};
/**
* Instructions are maintained by the compiler in a doubly linked list
* of these structures.
@ -108,6 +113,10 @@ struct rc_sub_instruction {
/** True if tex instruction should do shadow comparison */
unsigned int TexShadow:1;
/*@}*/
/** This holds information about the presubtract operation used by
* this instruction. */
struct rc_presub_instruction PreSub;
};
typedef enum {

View File

@ -79,7 +79,13 @@ typedef enum {
/**
* Indicates a special register, see RC_SPECIAL_xxx.
*/
RC_FILE_SPECIAL
RC_FILE_SPECIAL,
/**
* Indicates this register should use the result of the presubtract
* operation.
*/
RC_FILE_PRESUB
} rc_register_file;
enum {
@ -147,4 +153,32 @@ typedef enum {
RC_ALURESULT_W
} rc_write_aluresult;
typedef enum {
RC_PRESUB_NONE = 0,
/** 1 - 2 * src0 */
RC_PRESUB_BIAS,
/** src1 - src0 */
RC_PRESUB_SUB,
/** src1 + src0 */
RC_PRESUB_ADD,
/** 1 - src0 */
RC_PRESUB_INV
} rc_presubtract_op;
static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
switch(op){
case RC_PRESUB_BIAS:
case RC_PRESUB_INV:
return 1;
case RC_PRESUB_ADD:
case RC_PRESUB_SUB:
return 2;
default:
return 0;
}
}
#endif /* RADEON_PROGRAM_CONSTANTS_H */

View File

@ -38,26 +38,52 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
{
int candidate = -1;
int candidate_quality = -1;
unsigned int alpha_used = 0;
unsigned int rgb_used = 0;
int i;
if ((!rgb && !alpha) || file == RC_FILE_NONE)
return 0;
if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
if (file == RC_FILE_PRESUB) {
if (index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
return -1;
}
} else {
rgb_used++;
}
}
if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
if (file == RC_FILE_PRESUB) {
if (index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
return -1;
}
} else {
alpha_used++;
}
}
for(i = 0; i < 3; ++i) {
int q = 0;
if (rgb) {
if (pair->RGB.Src[i].Used) {
if (pair->RGB.Src[i].File != file ||
pair->RGB.Src[i].Index != index)
pair->RGB.Src[i].Index != index) {
rgb_used++;
continue;
}
q++;
}
}
if (alpha) {
if (pair->Alpha.Src[i].Used) {
if (pair->Alpha.Src[i].File != file ||
pair->Alpha.Src[i].Index != index)
pair->Alpha.Src[i].Index != index) {
alpha_used++;
continue;
}
q++;
}
}
@ -66,19 +92,156 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
candidate = i;
}
}
if (candidate < 0 || (rgb && rgb_used > 2) || (alpha && alpha_used > 2))
return -1;
if (candidate >= 0) {
if (rgb) {
pair->RGB.Src[candidate].Used = 1;
pair->RGB.Src[candidate].File = file;
pair->RGB.Src[candidate].Index = index;
/* candidate >= 0 */
/* Even if we have a presub src, the above loop needs to run,
* because we still need to make sure there is a free source.
*/
if (file == RC_FILE_PRESUB)
candidate = RC_PAIR_PRESUB_SRC;
if (rgb) {
pair->RGB.Src[candidate].Used = 1;
pair->RGB.Src[candidate].File = file;
pair->RGB.Src[candidate].Index = index;
if (candidate == RC_PAIR_PRESUB_SRC) {
/* For registers with the RC_FILE_PRESUB file,
* the index stores the presubtract op. */
int src_regs = rc_presubtract_src_reg_count(index);
for(i = 0; i < src_regs; i++) {
pair->RGB.Src[i].Used = 1;
}
}
if (alpha) {
pair->Alpha.Src[candidate].Used = 1;
pair->Alpha.Src[candidate].File = file;
pair->Alpha.Src[candidate].Index = index;
}
if (alpha) {
pair->Alpha.Src[candidate].Used = 1;
pair->Alpha.Src[candidate].File = file;
pair->Alpha.Src[candidate].Index = index;
if (candidate == RC_PAIR_PRESUB_SRC) {
/* For registers with the RC_FILE_PRESUB file,
* the index stores the presubtract op. */
int src_regs = rc_presubtract_src_reg_count(index);
for(i=0; i < src_regs; i++) {
pair->Alpha.Src[i].Used = 1;
}
}
}
return candidate;
}
static void pair_foreach_source_callback(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb,
unsigned int swz,
unsigned int src)
{
/* swz > 3 means that the swizzle is either not used, or a constant
* swizzle (e.g. 0, 1, 0.5). */
if(swz > 3)
return;
if(swz == RC_SWIZZLE_W) {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
unsigned int src_count = rc_presubtract_src_reg_count(
pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
for(i = 0; i < src_count; i++) {
cb(data, &pair->Alpha.Src[i]);
}
} else {
cb(data, &pair->Alpha.Src[src]);
}
} else {
if (src == RC_PAIR_PRESUB_SRC) {
unsigned int i;
unsigned int src_count = rc_presubtract_src_reg_count(
pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
for(i = 0; i < src_count; i++) {
cb(data, &pair->RGB.Src[i]);
}
}
else {
cb(data, &pair->RGB.Src[src]);
}
}
}
void rc_pair_foreach_source_that_alpha_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb)
{
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(pair->Alpha.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
pair_foreach_source_callback(pair, data, cb,
GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
pair->Alpha.Arg[i].Source);
}
}
void rc_pair_foreach_source_that_rgb_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb)
{
unsigned int i;
const struct rc_opcode_info * info =
rc_get_opcode_info(pair->RGB.Opcode);
for(i = 0; i < info->NumSrcRegs; i++) {
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
/* Find a swizzle that is either X,Y,Z,or W. We assume here
* that if one channel swizzles X,Y, or Z, then none of the
* other channels swizzle W, and vice-versa. */
for(chan = 0; chan < 4; chan++) {
swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|| swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
continue;
}
pair_foreach_source_callback(pair, data, cb,
swz,
pair->RGB.Arg[i].Source);
}
}
/*return 0 for rgb, 1 for alpha -1 for error. */
rc_pair_source_type rc_source_type_that_arg_reads(
unsigned int source,
unsigned int swizzle,
unsigned int channels)
{
unsigned int chan;
unsigned int swz = RC_SWIZZLE_UNUSED;
int isRGB = 0;
int isAlpha = 0;
/* Find a swizzle that is either X,Y,Z,or W. We assume here
* that if one channel swizzles X,Y, or Z, then none of the
* other channels swizzle W, and vice-versa. */
for(chan = 0; chan < channels; chan++) {
swz = GET_SWZ(swizzle, chan);
if (swz == RC_SWIZZLE_W) {
isAlpha = 1;
} else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
|| swz == RC_SWIZZLE_Z) {
isRGB = 1;
}
}
assert(!isRGB || !isAlpha);
if(!isRGB && !isAlpha)
return RC_PAIR_SOURCE_NONE;
if (isRGB)
return RC_PAIR_SOURCE_RGB;
/*isAlpha*/
return RC_PAIR_SOURCE_ALPHA;
}

View File

@ -49,6 +49,11 @@ struct radeon_compiler;
* see \ref rc_pair_translate
*/
/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
* the presubtract value will be used, and
* {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
*/
#define RC_PAIR_PRESUB_SRC 3
struct radeon_pair_instruction_source {
unsigned int Used:1;
@ -64,7 +69,7 @@ struct radeon_pair_instruction_rgb {
unsigned int OutputWriteMask:3;
unsigned int Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct radeon_pair_instruction_source Src[4];
struct {
unsigned int Source:2;
@ -83,7 +88,7 @@ struct radeon_pair_instruction_alpha {
unsigned int DepthWriteMask:1;
unsigned int Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct radeon_pair_instruction_source Src[4];
struct {
unsigned int Source:2;
@ -99,8 +104,17 @@ struct rc_pair_instruction {
unsigned int WriteALUResult:2;
unsigned int ALUResultCompare:3;
unsigned int Nop:1;
};
typedef void (*rc_pair_foreach_src_fn)
(void *, struct radeon_pair_instruction_source *);
typedef enum {
RC_PAIR_SOURCE_NONE = 0,
RC_PAIR_SOURCE_RGB,
RC_PAIR_SOURCE_ALPHA
} rc_pair_source_type;
/**
* General helper functions for dealing with the paired instruction format.
@ -109,6 +123,21 @@ struct rc_pair_instruction {
int rc_pair_alloc_source(struct rc_pair_instruction *pair,
unsigned int rgb, unsigned int alpha,
rc_register_file file, unsigned int index);
void rc_pair_foreach_source_that_alpha_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb);
void rc_pair_foreach_source_that_rgb_reads(
struct rc_pair_instruction * pair,
void * data,
rc_pair_foreach_src_fn cb);
rc_pair_source_type rc_source_type_that_arg_reads(
unsigned int source,
unsigned int swizzle,
unsigned int channels);
/*@}*/

View File

@ -38,6 +38,24 @@ static const char * textarget_to_string(rc_texture_target target)
}
}
static const char * presubtract_op_to_string(rc_presubtract_op op)
{
switch(op) {
case RC_PRESUB_NONE:
return "NONE";
case RC_PRESUB_BIAS:
return "(1 - 2 * src0)";
case RC_PRESUB_SUB:
return "(src1 - src0)";
case RC_PRESUB_ADD:
return "(src1 + src0)";
case RC_PRESUB_INV:
return "(1 - src0)";
default:
return "BAD_PRESUBTRACT_OP";
}
}
static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
{
if (func == RC_COMPARE_FUNC_NEVER) {
@ -125,7 +143,43 @@ static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate
}
}
static void rc_print_src_register(FILE * f, struct rc_src_register src)
static void rc_print_presub_instruction(FILE * f,
struct rc_presub_instruction inst)
{
fprintf(f,"(");
switch(inst.Opcode){
case RC_PRESUB_BIAS:
fprintf(f, "1 - 2 * ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
case RC_PRESUB_SUB:
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
fprintf(f, " - ");
rc_print_register(f, inst.SrcReg[1].File,
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
break;
case RC_PRESUB_ADD:
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
fprintf(f, " + ");
rc_print_register(f, inst.SrcReg[1].File,
inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
break;
case RC_PRESUB_INV:
fprintf(f, "1 - ");
rc_print_register(f, inst.SrcReg[0].File,
inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
break;
default:
break;
}
fprintf(f, ")");
}
static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
struct rc_src_register src)
{
int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
@ -134,7 +188,10 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src)
if (src.Abs)
fprintf(f, "|");
rc_print_register(f, src.File, src.Index, src.RelAddr);
if(src.File == RC_FILE_PRESUB)
rc_print_presub_instruction(f, inst->U.I.PreSub);
else
rc_print_register(f, src.File, src.Index, src.RelAddr);
if (src.Abs && !trivial_negate)
fprintf(f, "|");
@ -198,7 +255,7 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
if (reg > 0)
fprintf(f, ",");
fprintf(f, " ");
rc_print_src_register(f, inst->U.I.SrcReg[reg]);
rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
}
if (opcode->HasTexture) {
@ -247,6 +304,16 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
printedsrc = 1;
}
}
if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
fprintf(f, ", srcp.xyz = %s",
presubtract_op_to_string(
inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
}
if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
fprintf(f, ", srcp.w = %s",
presubtract_op_to_string(
inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
}
fprintf(f, "\n");
if (inst->RGB.Opcode != RC_OPCODE_NOP) {
@ -272,7 +339,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
fprintf(f, ", %s%ssrc", neg, abs);
if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
fprintf(f,"p");
else
fprintf(f,"%d", inst->RGB.Arg[arg].Source);
fprintf(f,".%c%c%c%s",
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
@ -300,7 +372,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
fprintf(f, ", %s%ssrc", neg, abs);
if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
fprintf(f,"p");
else
fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
fprintf(f,".%c%s",
rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
}
fprintf(f, "\n");

View File

@ -27,6 +27,16 @@
#include "radeon_remove_constants.h"
static void remap_regs(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex)
{
unsigned *inv_remap_table = userdata;
if (*pfile == RC_FILE_CONSTANT) {
*pindex = inv_remap_table[*pindex];
}
}
void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
{
unsigned **out_remap_table = (unsigned**)user;
@ -51,6 +61,10 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
/* XXX: This loop and the if statement after it should be
* replaced by a call to one of the rc_for_all_reads_* functions.
* The reason it does not use one of those functions now is
* because none of them have RelAddr as an argument. */
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
if (inst->U.I.SrcReg[i].RelAddr) {
@ -60,6 +74,18 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
}
}
}
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
unsigned int i;
unsigned int srcp_regs = rc_presubtract_src_reg_count(
inst->U.I.PreSub.Opcode);
for( i = 0; i < srcp_regs; i++) {
if (inst->U.I.PreSub.SrcReg[i].File ==
RC_FILE_CONSTANT) {
const_used[
inst->U.I.PreSub.SrcReg[i].Index] = 1;
}
}
}
}
/* Pass 2: If there is relative addressing, mark all externals as used. */
@ -100,13 +126,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
if (!is_identity) {
for (struct rc_instruction *inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index];
}
}
rc_remap_registers(inst, remap_regs, inv_remap_table);
}
}