r300: fix vertex shader control flow in loops
This fixes 7 loop piglit tests when loop unrolling is disabled. The problem is that we were trying to be smart with breaks and tried to save one predicate instruction for endif in some cases. This worked for simple loops but brought problems for more complex shaders, instead just switch to standard VE_PRED_SNEQ_PUSH ME_PRED_SET_POP combo everywhere. Shader-db results on RV530 show three hurt glmark tests, however I believe the simplification should be worth it. total instructions in shared programs: 123715 -> 123718 (<.01%) instructions in affected programs: 54 -> 57 (5.56%) total predicate in shared programs: 118 -> 121 (2.54%) predicate in affected programs: 6 -> 9 (50.00%) total temps in shared programs: 17304 -> 17307 (0.02%) temps in affected programs: 12 -> 15 (25.00%) Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6468 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip.gawin@zoho.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16470>
This commit is contained in:
parent
46b135738b
commit
23dfae4c81
|
@ -36,7 +36,6 @@ struct vert_fc_state {
|
||||||
unsigned LoopsReserved;
|
unsigned LoopsReserved;
|
||||||
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
|
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
|
||||||
int PredicateReg;
|
int PredicateReg;
|
||||||
unsigned InCFBreak;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static void build_pred_src(
|
static void build_pred_src(
|
||||||
|
@ -161,7 +160,7 @@ static void lower_brk(
|
||||||
{
|
{
|
||||||
if (fc_state->LoopDepth == 1) {
|
if (fc_state->LoopDepth == 1) {
|
||||||
inst->U.I.Opcode = RC_OPCODE_RCP;
|
inst->U.I.Opcode = RC_OPCODE_RCP;
|
||||||
inst->U.I.DstReg.Pred = RC_PRED_INV;
|
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
||||||
inst->U.I.SrcReg[0].Index = 0;
|
inst->U.I.SrcReg[0].Index = 0;
|
||||||
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
|
||||||
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
|
||||||
|
@ -203,17 +202,8 @@ static void lower_if(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
|
if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
|
||||||
fc_state->InCFBreak = 1;
|
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
|
||||||
}
|
|
||||||
if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
|
|
||||||
|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
|
|
||||||
if (fc_state->InCFBreak) {
|
|
||||||
inst->U.I.Opcode = RC_ME_PRED_SEQ;
|
|
||||||
inst->U.I.DstReg.Pred = RC_PRED_SET;
|
|
||||||
} else {
|
|
||||||
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
unsigned swz;
|
unsigned swz;
|
||||||
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
|
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
|
||||||
|
@ -274,17 +264,13 @@ void rc_vert_fc(struct radeon_compiler *c, void *user)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RC_OPCODE_ENDIF:
|
case RC_OPCODE_ENDIF:
|
||||||
if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
|
/* TODO: If LoopDepth == 1 and there is only a single break
|
||||||
struct rc_instruction * to_delete = inst;
|
* we can optimize out the endif just after the break. However
|
||||||
inst = inst->Prev;
|
* previous attempts were buggy, so keep it simple for now.
|
||||||
rc_remove_instruction(to_delete);
|
*/
|
||||||
/* XXX: Delete the endif instruction */
|
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
|
||||||
} else {
|
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
||||||
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
|
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
||||||
build_pred_dst(&inst->U.I.DstReg, &fc_state);
|
|
||||||
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
|
|
||||||
}
|
|
||||||
fc_state.InCFBreak = 0;
|
|
||||||
fc_state.BranchDepth--;
|
fc_state.BranchDepth--;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue