i965/fs: Track fixed GRF regs separate from allocated GRF file in scheduling.

There's an assumption here that fixed GRFs will never intersect with
the allocated GRFs.  That's true today, though it might change some
day if we decide to register-allocate the regs containing push
constants once they're dead.

This fixes a regression in 0f7325b890 in
Lightsmark from the texture instructions now containing g0 references
instead of having that be implied.  Performance is improved 15.2% +/-
3.6% (n=3).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34968
This commit is contained in:
Eric Anholt 2011-05-23 09:12:07 -07:00
parent 40540cc517
commit 615117ce4e
2 changed files with 22 additions and 1 deletions

View File

@ -1570,7 +1570,7 @@ fs_visitor::visit(ir_texture *ir)
* use the null register. Otherwise, we want an implied move from g0.
*/
if (ir->offset != NULL || !inst->header_present)
inst->src[0] = fs_reg(brw_null_reg());
inst->src[0] = reg_undef;
else
inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));

View File

@ -263,6 +263,12 @@ instruction_scheduler::calculate_deps()
schedule_node *last_grf_write[virtual_grf_count];
schedule_node *last_mrf_write[BRW_MAX_MRF];
schedule_node *last_conditional_mod = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
* GRFs, so they can be tracked separately. We don't really write
* to fixed GRFs much, so don't bother tracking them on a more
* granular level.
*/
schedule_node *last_fixed_grf_write = NULL;
/* The last instruction always needs to still be the last
* instruction. Either it's flow control (IF, ELSE, ENDIF, DO,
@ -285,6 +291,10 @@ instruction_scheduler::calculate_deps()
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
add_dep(last_grf_write[inst->src[i].reg], n);
} else if (inst->src[i].file == FIXED_HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
add_dep(last_fixed_grf_write, n);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@ -323,6 +333,9 @@ instruction_scheduler::calculate_deps()
add_dep(last_mrf_write[reg], n);
last_mrf_write[reg] = n;
}
} else if (inst->dst.file == FIXED_HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
last_fixed_grf_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@ -344,6 +357,7 @@ instruction_scheduler::calculate_deps()
memset(last_grf_write, 0, sizeof(last_grf_write));
memset(last_mrf_write, 0, sizeof(last_mrf_write));
last_conditional_mod = NULL;
last_fixed_grf_write = NULL;
exec_node *node;
exec_node *prev;
@ -357,6 +371,10 @@ instruction_scheduler::calculate_deps()
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
add_dep(n, last_grf_write[inst->src[i].reg]);
} else if (inst->src[i].file == FIXED_HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
add_dep(n, last_fixed_grf_write);
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@ -395,6 +413,9 @@ instruction_scheduler::calculate_deps()
last_mrf_write[reg] = n;
}
} else if (inst->dst.file == FIXED_HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
last_fixed_grf_write = n;
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}