From 615117ce4efd041459f7d4b0c77aa8e248345e66 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 23 May 2011 09:12:07 -0700 Subject: [PATCH] i965/fs: Track fixed GRF regs separate from allocated GRF file in scheduling. There's an assumption here that fixed GRFs will never intersect with the allocated GRFs. That's true today, though it might change some day if we decide to register-allocate the regs containing push constants once they're dead. This fixes a regression in 0f7325b89038937bd428f7c89ed9859189a0ab0b in Lightsmark from the texture instructions now containing g0 references instead of having that be implied. Performance is improved 15.2% +/- 3.6% (n=3). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34968 --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- .../dri/i965/brw_fs_schedule_instructions.cpp | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c02237850e2..1cee37cfdb2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1570,7 +1570,7 @@ fs_visitor::visit(ir_texture *ir) * use the null register. Otherwise, we want an implied move from g0. */ if (ir->offset != NULL || !inst->header_present) - inst->src[0] = fs_reg(brw_null_reg()); + inst->src[0] = reg_undef; else inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index ed88aa689d0..d8218c26edb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -263,6 +263,12 @@ instruction_scheduler::calculate_deps() schedule_node *last_grf_write[virtual_grf_count]; schedule_node *last_mrf_write[BRW_MAX_MRF]; schedule_node *last_conditional_mod = NULL; + /* Fixed HW registers are assumed to be separate from the virtual + * GRFs, so they can be tracked separately. We don't really write + * to fixed GRFs much, so don't bother tracking them on a more + * granular level. + */ + schedule_node *last_fixed_grf_write = NULL; /* The last instruction always needs to still be the last * instruction. Either it's flow control (IF, ELSE, ENDIF, DO, @@ -285,6 +291,10 @@ instruction_scheduler::calculate_deps() for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { add_dep(last_grf_write[inst->src[i].reg], n); + } else if (inst->src[i].file == FIXED_HW_REG && + (inst->src[i].fixed_hw_reg.file == + BRW_GENERAL_REGISTER_FILE)) { + add_dep(last_fixed_grf_write, n); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -323,6 +333,9 @@ instruction_scheduler::calculate_deps() add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; } + } else if (inst->dst.file == FIXED_HW_REG && + inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + last_fixed_grf_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); } @@ -344,6 +357,7 @@ instruction_scheduler::calculate_deps() memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); last_conditional_mod = NULL; + last_fixed_grf_write = NULL; exec_node *node; exec_node *prev; @@ -357,6 +371,10 @@ instruction_scheduler::calculate_deps() for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { add_dep(n, last_grf_write[inst->src[i].reg]); + } else if (inst->src[i].file == FIXED_HW_REG && + (inst->src[i].fixed_hw_reg.file == + BRW_GENERAL_REGISTER_FILE)) { + add_dep(n, last_fixed_grf_write); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM) { @@ -395,6 +413,9 @@ instruction_scheduler::calculate_deps() last_mrf_write[reg] = n; } + } else if (inst->dst.file == FIXED_HW_REG && + inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + last_fixed_grf_write = n; } else if (inst->dst.file != BAD_FILE) { add_barrier_deps(n); }