diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c index 0afd04861a3..3984cd60e6e 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_dump.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_dump.c @@ -48,6 +48,11 @@ static void dump_instr_name(struct ir3_dump_ctx *ctx, fprintf(ctx->f, "%03u: ", instr->depth); } + if (instr->flags & IR3_INSTR_SY) + fprintf(ctx->f, "(sy)"); + if (instr->flags & IR3_INSTR_SS) + fprintf(ctx->f, "(ss)"); + if (is_meta(instr)) { switch(instr->opc) { case OPC_META_PHI: diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index f4834a3778c..5df57e776f9 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -526,18 +526,41 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) } } + /* cat5+ does not have an (ss) bit, if needed we need to + * insert a nop to carry the sync flag. Would be kinda + * clever if we were aware of this during scheduling, but + * this should be a pretty rare case: + */ + if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { + struct ir3_instruction *nop; + nop = ir3_instr_create(block, 0, OPC_NOP); + nop->flags |= IR3_INSTR_SS; + n->flags &= ~IR3_INSTR_SS; + } + + /* need to be able to set (ss) on first instruction: */ + if ((shader->instrs_count == 0) && (n->category >= 5)) + ir3_instr_create(block, 0, OPC_NOP); + shader->instrs[shader->instrs_count++] = n; if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); - if (is_tex(n)) { + + if (is_tex(n)) regmask_set(&needs_sy, n->regs[0]); + + /* both tex/sfu appear to not always immediately consume + * their src register(s): + */ + if (is_tex(n) || is_sfu(n)) { for (i = 1; i < n->regs_count; i++) { reg = n->regs[i]; if (reg_gpr(reg)) regmask_set(&needs_ss_war, reg); } } + if (is_input(n)) last_input = n; } diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c index 5ac33abc548..5e585271f92 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_sched.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_sched.c @@ -77,7 +77,7 @@ static unsigned distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *n = ctx->scheduled; unsigned d = 0; while (n && (n != instr) && (d < maxd)) { - if (!is_meta(n)) + if (is_alu(n) || is_flow(n)) d++; n = n->next; }