vc4: Avoid false scheduling dependencies for LOAD_IMMs.
Noticed in shaders with branching, where we ended up scheduling delay slots near the start of a block for the uniforms reset setup. total instructions in shared programs: 93970 -> 93951 (-0.02%) instructions in affected programs: 3117 -> 3098 (-0.61%) 3DMMES performance +0.423087% +/- 0.133521% (n=9,10)
This commit is contained in:
parent
6c34084d8e
commit
51244859e3
|
@ -477,6 +477,11 @@ reads_too_soon_after_write(struct choose_scoreboard *scoreboard, uint64_t inst)
|
|||
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
|
||||
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
|
||||
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
|
||||
|
||||
/* Full immediate loads don't read any registers. */
|
||||
if (sig == QPU_SIG_LOAD_IMM)
|
||||
return false;
|
||||
|
||||
uint32_t src_muxes[] = {
|
||||
QPU_GET_FIELD(inst, QPU_ADD_A),
|
||||
QPU_GET_FIELD(inst, QPU_ADD_B),
|
||||
|
|
|
@ -58,6 +58,10 @@ _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
|
|||
if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_BRANCH)
|
||||
return false;
|
||||
|
||||
/* Load immediates don't read any registers. */
|
||||
if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
|
||||
if (!ignore_a &&
|
||||
src_regs[i].mux == QPU_MUX_A &&
|
||||
|
|
Loading…
Reference in New Issue