v3d: add shader-db stat to count SFU stalls
SFU operations have a latency of 2 cicles, so if their results are used in the following cycle to a SFU instruction, the GPU stalls for an extra cycle until the result is available. This adds the number of stalls to the shader-db debug mode and sum of instruction + stalls to evaluate optimizations to schedule instructions that avoid generating sfu-stalls. v2: Rename v3d_qpu_generates_sfu_stalls to v3d_qpu_instr_is_sfu (Eric) Reviewed-by: Eric Anholt <eric@anholt.net>
This commit is contained in:
parent
f7224014df
commit
c341ab7ffb
|
@ -440,6 +440,8 @@ struct choose_scoreboard {
|
|||
struct dag *dag;
|
||||
int tick;
|
||||
int last_magic_sfu_write_tick;
|
||||
int last_stallable_sfu_reg;
|
||||
int last_stallable_sfu_tick;
|
||||
int last_ldvary_tick;
|
||||
int last_uniforms_reset_tick;
|
||||
int last_thrsw_tick;
|
||||
|
@ -531,6 +533,33 @@ pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard,
|
|||
return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst));
|
||||
}
|
||||
|
||||
static bool
|
||||
qpu_instruction_uses_rf(const struct v3d_qpu_instr *inst,
|
||||
uint32_t waddr) {
|
||||
|
||||
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||
return false;
|
||||
|
||||
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_A) &&
|
||||
inst->raddr_a == waddr)
|
||||
return true;
|
||||
|
||||
if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_B) &&
|
||||
!inst->sig.small_imm && (inst->raddr_b == waddr))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
mux_read_stalls(struct choose_scoreboard *scoreboard,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
return scoreboard->tick == scoreboard->last_stallable_sfu_tick + 1 &&
|
||||
qpu_instruction_uses_rf(inst,
|
||||
scoreboard->last_stallable_sfu_reg);
|
||||
}
|
||||
|
||||
static int
|
||||
get_instruction_priority(const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
|
@ -851,6 +880,16 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
|
|||
scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
|
||||
}
|
||||
|
||||
static void
|
||||
update_scoreboard_for_sfu_stall_waddr(struct choose_scoreboard *scoreboard,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
if (v3d_qpu_instr_is_sfu(inst)) {
|
||||
scoreboard->last_stallable_sfu_reg = inst->alu.add.waddr;
|
||||
scoreboard->last_stallable_sfu_tick = scoreboard->tick;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
||||
const struct v3d_qpu_instr *inst)
|
||||
|
@ -864,6 +903,9 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
|
|||
if (inst->alu.add.magic_write) {
|
||||
update_scoreboard_for_magic_waddr(scoreboard,
|
||||
inst->alu.add.waddr);
|
||||
} else {
|
||||
update_scoreboard_for_sfu_stall_waddr(scoreboard,
|
||||
inst);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1298,6 +1340,8 @@ schedule_instructions(struct v3d_compile *c,
|
|||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
if (mux_read_stalls(scoreboard, inst))
|
||||
c->qpu_inst_stalled_count++;
|
||||
}
|
||||
|
||||
/* Update the uniform index for the rewritten location --
|
||||
|
@ -1481,6 +1525,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
|
|||
scoreboard.last_magic_sfu_write_tick = -10;
|
||||
scoreboard.last_uniforms_reset_tick = -10;
|
||||
scoreboard.last_thrsw_tick = -10;
|
||||
scoreboard.last_stallable_sfu_tick = -10;
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Pre-schedule instructions\n");
|
||||
|
|
|
@ -613,6 +613,7 @@ struct v3d_compile {
|
|||
uint64_t *qpu_insts;
|
||||
uint32_t qpu_inst_count;
|
||||
uint32_t qpu_inst_size;
|
||||
uint32_t qpu_inst_stalled_count;
|
||||
|
||||
/* For the FS, the number of varying inputs not counting the
|
||||
* point/line varyings payload
|
||||
|
|
|
@ -947,7 +947,8 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
|
|||
char *shaderdb;
|
||||
int ret = asprintf(&shaderdb,
|
||||
"%s shader: %d inst, %d threads, %d loops, "
|
||||
"%d uniforms, %d max-temps, %d:%d spills:fills",
|
||||
"%d uniforms, %d max-temps, %d:%d spills:fills, "
|
||||
"%d sfu-stalls, %d inst-and-stalls",
|
||||
vir_get_stage_name(c),
|
||||
c->qpu_inst_count,
|
||||
c->threads,
|
||||
|
@ -955,7 +956,9 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
|
|||
c->num_uniforms,
|
||||
vir_get_max_temps(c),
|
||||
c->spills,
|
||||
c->fills);
|
||||
c->fills,
|
||||
c->qpu_inst_stalled_count,
|
||||
c->qpu_inst_count + c->qpu_inst_stalled_count);
|
||||
if (ret >= 0) {
|
||||
if (V3D_DEBUG & V3D_DEBUG_SHADERDB)
|
||||
fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
|
||||
|
|
|
@ -645,19 +645,10 @@ v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
|
|||
bool
|
||||
v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
switch (inst->alu.add.op) {
|
||||
case V3D_QPU_A_RECIP:
|
||||
case V3D_QPU_A_RSQRT:
|
||||
case V3D_QPU_A_EXP:
|
||||
case V3D_QPU_A_LOG:
|
||||
case V3D_QPU_A_SIN:
|
||||
case V3D_QPU_A_RSQRT2:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (v3d_qpu_instr_is_sfu(inst))
|
||||
return true;
|
||||
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if (inst->alu.add.magic_write &&
|
||||
v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
|
||||
return true;
|
||||
|
@ -672,6 +663,25 @@ v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
switch (inst->alu.add.op) {
|
||||
case V3D_QPU_A_RECIP:
|
||||
case V3D_QPU_A_RSQRT:
|
||||
case V3D_QPU_A_EXP:
|
||||
case V3D_QPU_A_LOG:
|
||||
case V3D_QPU_A_SIN:
|
||||
case V3D_QPU_A_RSQRT2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst)
|
||||
{
|
||||
|
|
|
@ -447,6 +447,7 @@ bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
|||
bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||
|
|
Loading…
Reference in New Issue