r300: fix vertex shader control flow in loops

This fixes 7 loop piglit tests when loop unrolling is disabled.

The problem is that we were trying to be smart with breaks and
tried to save one predicate instruction for endif in some cases.
This worked for simple loops but brought problems for more complex
shaders, instead just switch to standard VE_PRED_SNEQ_PUSH
ME_PRED_SET_POP combo everywhere.

Shader-db results on RV530 show three hurt glmark tests, however
I believe the simplification should be worth it.

total instructions in shared programs: 123715 -> 123718 (<.01%)
instructions in affected programs: 54 -> 57 (5.56%)
total predicate in shared programs: 118 -> 121 (2.54%)
predicate in affected programs: 6 -> 9 (50.00%)
total temps in shared programs: 17304 -> 17307 (0.02%)
temps in affected programs: 12 -> 15 (25.00%)

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6468
Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip.gawin@zoho.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16470>
This commit is contained in:
Pavel Ondračka 2022-05-11 21:52:44 +02:00 committed by Marge Bot
parent 46b135738b
commit 23dfae4c81
1 changed files with 10 additions and 24 deletions

View File

@ -36,7 +36,6 @@ struct vert_fc_state {
unsigned LoopsReserved;
int PredStack[R500_PVS_MAX_LOOP_DEPTH];
int PredicateReg;
unsigned InCFBreak;
};
static void build_pred_src(
@ -161,7 +160,7 @@ static void lower_brk(
{
if (fc_state->LoopDepth == 1) {
inst->U.I.Opcode = RC_OPCODE_RCP;
inst->U.I.DstReg.Pred = RC_PRED_INV;
inst->U.I.DstReg.Pred = RC_PRED_SET;
inst->U.I.SrcReg[0].Index = 0;
inst->U.I.SrcReg[0].File = RC_FILE_NONE;
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
@ -203,17 +202,8 @@ static void lower_if(
}
}
if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
fc_state->InCFBreak = 1;
}
if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
if (fc_state->InCFBreak) {
inst->U.I.Opcode = RC_ME_PRED_SEQ;
inst->U.I.DstReg.Pred = RC_PRED_SET;
} else {
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
}
if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
inst->U.I.Opcode = RC_ME_PRED_SNEQ;
} else {
unsigned swz;
inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
@ -274,17 +264,13 @@ void rc_vert_fc(struct radeon_compiler *c, void *user)
break;
case RC_OPCODE_ENDIF:
if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
struct rc_instruction * to_delete = inst;
inst = inst->Prev;
rc_remove_instruction(to_delete);
/* XXX: Delete the endif instruction */
} else {
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
build_pred_dst(&inst->U.I.DstReg, &fc_state);
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
}
fc_state.InCFBreak = 0;
/* TODO: If LoopDepth == 1 and there is only a single break
* we can optimize out the endif just after the break. However
* previous attempts were buggy, so keep it simple for now.
*/
inst->U.I.Opcode = RC_ME_PRED_SET_POP;
build_pred_dst(&inst->U.I.DstReg, &fc_state);
build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
fc_state.BranchDepth--;
break;