intel/vec4: sel.cond writes the flags on Gfx4 and Gfx5
This is the equivalent of idr's intel/fs: sel.cond writes the flags on Gfx4 and Gfx5 except for the vec4 backend. This fixes buggy rendering seen with crocus on a qt trace. v2 (idr): Trivial whitespace change. Add unit tests. v3: Fix type in comment in unit tests. Noticed by Jason and Priit. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Iron Lake total instructions in shared programs: 8183077 -> 8184543 (0.02%) instructions in affected programs: 198990 -> 200456 (0.74%) helped: 0 HURT: 1355 HURT stats (abs) min: 1 max: 8 x̄: 1.08 x̃: 1 HURT stats (rel) min: 0.29% max: 6.00% x̄: 0.99% x̃: 0.70% 95% mean confidence interval for instructions value: 1.04 1.12 95% mean confidence interval for instructions %-change: 0.96% 1.03% Instructions are HURT. total cycles in shared programs: 238967672 -> 238962784 (<.01%) cycles in affected programs: 4666014 -> 4661126 (-0.10%) helped: 406 HURT: 314 helped stats (abs) min: 4 max: 54 x̄: 22.46 x̃: 18 helped stats (rel) min: <.01% max: 12.80% x̄: 1.82% x̃: 0.65% HURT stats (abs) min: 2 max: 112 x̄: 13.48 x̃: 12 HURT stats (rel) min: <.01% max: 7.82% x̄: 0.81% x̃: 0.16% 95% mean confidence interval for cycles value: -8.60 -4.98 95% mean confidence interval for cycles %-change: -0.87% -0.49% Cycles are helped. GM45 total instructions in shared programs: 4986888 -> 4988354 (0.03%) instructions in affected programs: 198990 -> 200456 (0.74%) helped: 0 HURT: 1355 HURT stats (abs) min: 1 max: 8 x̄: 1.08 x̃: 1 HURT stats (rel) min: 0.29% max: 6.00% x̄: 0.99% x̃: 0.70% 95% mean confidence interval for instructions value: 1.04 1.12 95% mean confidence interval for instructions %-change: 0.96% 1.03% Instructions are HURT. total cycles in shared programs: 153577826 -> 153572938 (<.01%) cycles in affected programs: 4666014 -> 4661126 (-0.10%) helped: 406 HURT: 314 helped stats (abs) min: 4 max: 54 x̄: 22.46 x̃: 18 helped stats (rel) min: <.01% max: 12.80% x̄: 1.82% x̃: 0.65% HURT stats (abs) min: 2 max: 112 x̄: 13.48 x̃: 12 HURT stats (rel) min: <.01% max: 7.82% x̄: 0.81% x̃: 0.16% 95% mean confidence interval for cycles value: -8.60 -4.98 95% mean confidence interval for cycles %-change: -0.87% -0.49% Cycles are helped. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12191>
This commit is contained in:
parent
38807ceeae
commit
8a81d14271
|
@ -1493,7 +1493,7 @@ namespace {
|
|||
st, reg_dependency_id(devinfo, brw_acc_reg(8), j));
|
||||
}
|
||||
|
||||
if (inst->writes_flag())
|
||||
if (inst->writes_flag(devinfo))
|
||||
stall_on_dependency(st, dependency_id_flag0);
|
||||
}
|
||||
|
||||
|
@ -1531,7 +1531,7 @@ namespace {
|
|||
reg_dependency_id(devinfo, brw_acc_reg(8), j));
|
||||
}
|
||||
|
||||
if (inst->writes_flag())
|
||||
if (inst->writes_flag(devinfo))
|
||||
mark_write_dependency(st, perf, dependency_id_flag0);
|
||||
}
|
||||
|
||||
|
|
|
@ -330,9 +330,9 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
bool writes_flag() const
|
||||
bool writes_flag(const intel_device_info *devinfo) const
|
||||
{
|
||||
return (conditional_mod && (opcode != BRW_OPCODE_SEL &&
|
||||
return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) &&
|
||||
opcode != BRW_OPCODE_CSEL &&
|
||||
opcode != BRW_OPCODE_IF &&
|
||||
opcode != BRW_OPCODE_WHILE));
|
||||
|
|
|
@ -1489,7 +1489,7 @@ vec4_instruction_scheduler::calculate_deps()
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->writes_flag()) {
|
||||
if (inst->writes_flag(v->devinfo)) {
|
||||
add_dep(last_conditional_mod, n, 0);
|
||||
last_conditional_mod = n;
|
||||
}
|
||||
|
@ -1565,7 +1565,7 @@ vec4_instruction_scheduler::calculate_deps()
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->writes_flag()) {
|
||||
if (inst->writes_flag(v->devinfo)) {
|
||||
last_conditional_mod = n;
|
||||
}
|
||||
|
||||
|
|
|
@ -1141,7 +1141,7 @@ vec4_instruction::can_reswizzle(const struct intel_device_info *devinfo,
|
|||
/* If we write to the flag register changing the swizzle would change
|
||||
* what channels are written to the flag register.
|
||||
*/
|
||||
if (writes_flag())
|
||||
if (writes_flag(devinfo))
|
||||
return false;
|
||||
|
||||
/* We can't swizzle implicit accumulator access. We'd have to
|
||||
|
|
|
@ -278,7 +278,7 @@ opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v)
|
|||
*/
|
||||
if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
|
||||
!inst->src[0].negate &&
|
||||
scan_inst->writes_flag()) {
|
||||
scan_inst->writes_flag(v->devinfo)) {
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
break;
|
||||
|
@ -337,7 +337,7 @@ opt_cmod_propagation_local(bblock_t *block, vec4_visitor *v)
|
|||
}
|
||||
|
||||
not_match:
|
||||
if (scan_inst->writes_flag())
|
||||
if (scan_inst->writes_flag(v->devinfo))
|
||||
break;
|
||||
|
||||
read_flag = read_flag || scan_inst->reads_flag();
|
||||
|
|
|
@ -261,9 +261,9 @@ vec4_visitor::opt_cse_local(bblock_t *block, const vec4_live_variables &live)
|
|||
/* Kill all AEB entries that write a different value to or read from
|
||||
* the flag register if we just wrote it.
|
||||
*/
|
||||
if (inst->writes_flag()) {
|
||||
if (inst->writes_flag(devinfo)) {
|
||||
if (entry->generator->reads_flag() ||
|
||||
(entry->generator->writes_flag() &&
|
||||
(entry->generator->writes_flag(devinfo) &&
|
||||
!instructions_match(inst, entry->generator))) {
|
||||
entry->remove();
|
||||
ralloc_free(entry);
|
||||
|
|
|
@ -54,7 +54,7 @@ vec4_visitor::dead_code_eliminate()
|
|||
|
||||
foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
|
||||
if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
|
||||
(inst->dst.is_null() && inst->writes_flag())){
|
||||
(inst->dst.is_null() && inst->writes_flag(devinfo))){
|
||||
bool result_live[4] = { false };
|
||||
if (inst->dst.file == VGRF) {
|
||||
for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
|
||||
|
@ -80,7 +80,7 @@ vec4_visitor::dead_code_eliminate()
|
|||
result_live[3] = result;
|
||||
}
|
||||
|
||||
if (inst->writes_flag()) {
|
||||
if (inst->writes_flag(devinfo)) {
|
||||
/* Independently calculate the usage of the flag components and
|
||||
* the destination value components.
|
||||
*/
|
||||
|
@ -126,7 +126,7 @@ vec4_visitor::dead_code_eliminate()
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->dst.is_null() && inst->writes_flag()) {
|
||||
if (inst->dst.is_null() && inst->writes_flag(devinfo)) {
|
||||
bool combined_live = false;
|
||||
for (unsigned c = 0; c < 4; c++)
|
||||
combined_live |= BITSET_TEST(flag_live, c);
|
||||
|
@ -149,7 +149,7 @@ vec4_visitor::dead_code_eliminate()
|
|||
}
|
||||
}
|
||||
|
||||
if (inst->writes_flag() && !inst->predicate && inst->exec_size == 8) {
|
||||
if (inst->writes_flag(devinfo) && !inst->predicate && inst->exec_size == 8) {
|
||||
for (unsigned c = 0; c < 4; c++)
|
||||
BITSET_CLEAR(flag_live, c);
|
||||
}
|
||||
|
|
|
@ -119,7 +119,7 @@ vec4_live_variables::setup_def_use()
|
|||
}
|
||||
}
|
||||
}
|
||||
if (inst->writes_flag()) {
|
||||
if (inst->writes_flag(devinfo)) {
|
||||
for (unsigned c = 0; c < 4; c++) {
|
||||
if ((inst->dst.writemask & (1 << c)) &&
|
||||
!BITSET_TEST(bd->flag_use, c)) {
|
||||
|
@ -229,6 +229,8 @@ vec4_live_variables::vec4_live_variables(const backend_shader *s)
|
|||
end[i] = -1;
|
||||
}
|
||||
|
||||
devinfo = s->compiler->devinfo;
|
||||
|
||||
block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
|
||||
|
||||
bitset_words = BITSET_WORDS(num_vars);
|
||||
|
|
|
@ -81,6 +81,8 @@ public:
|
|||
int num_vars;
|
||||
int bitset_words;
|
||||
|
||||
const struct intel_device_info *devinfo;
|
||||
|
||||
/** Per-basic-block information on live variables */
|
||||
struct block_data *block_data;
|
||||
|
||||
|
|
|
@ -109,7 +109,7 @@ void cmod_propagation_test::SetUp()
|
|||
|
||||
v = new cmod_propagation_vec4_visitor(compiler, ctx, shader, prog_data);
|
||||
|
||||
devinfo->ver = 4;
|
||||
devinfo->ver = 7;
|
||||
devinfo->verx10 = devinfo->ver * 10;
|
||||
}
|
||||
|
||||
|
@ -905,3 +905,149 @@ TEST_F(cmod_propagation_test, add_cmp_different_dst_writemask)
|
|||
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, prop_across_sel_gfx7)
|
||||
{
|
||||
const vec4_builder bld = vec4_builder(v).at_end();
|
||||
dst_reg dest1 = dst_reg(v, glsl_type::float_type);
|
||||
dst_reg dest2 = dst_reg(v, glsl_type::float_type);
|
||||
src_reg src0 = src_reg(v, glsl_type::float_type);
|
||||
src_reg src1 = src_reg(v, glsl_type::float_type);
|
||||
src_reg src2 = src_reg(v, glsl_type::float_type);
|
||||
src_reg src3 = src_reg(v, glsl_type::float_type);
|
||||
src_reg zero(brw_imm_f(0.0f));
|
||||
dst_reg dest_null = bld.null_reg_f();
|
||||
dest_null.writemask = WRITEMASK_X;
|
||||
|
||||
bld.ADD(dest1, src0, src1);
|
||||
bld.SEL(dest2, src2, src3)
|
||||
->conditional_mod = BRW_CONDITIONAL_GE;
|
||||
bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE);
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: add dest1.x src0.xxxx src1.xxxx
|
||||
* 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx
|
||||
* 2: cmp.ge.f0 null.x dest.xxxx 0.0f
|
||||
*
|
||||
* = After =
|
||||
* 0: add.ge.f0 dest.x src0.xxxx src1.xxxx
|
||||
* 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx
|
||||
*/
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_TRUE(cmod_propagation(v));
|
||||
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(1, block0->end_ip);
|
||||
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
|
||||
EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, prop_across_sel_gfx5)
|
||||
{
|
||||
devinfo->ver = 5;
|
||||
devinfo->verx10 = devinfo->ver * 10;
|
||||
|
||||
const vec4_builder bld = vec4_builder(v).at_end();
|
||||
dst_reg dest1 = dst_reg(v, glsl_type::float_type);
|
||||
dst_reg dest2 = dst_reg(v, glsl_type::float_type);
|
||||
src_reg src0 = src_reg(v, glsl_type::float_type);
|
||||
src_reg src1 = src_reg(v, glsl_type::float_type);
|
||||
src_reg src2 = src_reg(v, glsl_type::float_type);
|
||||
src_reg src3 = src_reg(v, glsl_type::float_type);
|
||||
src_reg zero(brw_imm_f(0.0f));
|
||||
dst_reg dest_null = bld.null_reg_f();
|
||||
dest_null.writemask = WRITEMASK_X;
|
||||
|
||||
bld.ADD(dest1, src0, src1);
|
||||
bld.SEL(dest2, src2, src3)
|
||||
->conditional_mod = BRW_CONDITIONAL_GE;
|
||||
bld.CMP(dest_null, src_reg(dest1), zero, BRW_CONDITIONAL_GE);
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: add dest1.x src0.xxxx src1.xxxx
|
||||
* 1: sel.ge.f0 dest2.x src2.xxxx src3.xxxx
|
||||
* 2: cmp.ge.f0 null.x dest.xxxx 0.0f
|
||||
*
|
||||
* = After =
|
||||
* (no changes)
|
||||
*
|
||||
* On Gfx4 and Gfx5, sel.l (for min) and sel.ge (for max) are implemented
|
||||
* using a separate cmpn and sel instruction. This lowering occurs in
|
||||
* fs_vistor::lower_minmax which is called a long time after the first
|
||||
* calls to cmod_propagation.
|
||||
*/
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(2, block0->end_ip);
|
||||
|
||||
EXPECT_FALSE(cmod_propagation(v));
|
||||
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(2, block0->end_ip);
|
||||
EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod);
|
||||
EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
|
||||
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
|
||||
}
|
||||
|
||||
TEST_F(cmod_propagation_test, prop_into_sel_gfx5)
|
||||
{
|
||||
devinfo->ver = 5;
|
||||
devinfo->verx10 = devinfo->ver * 10;
|
||||
|
||||
const vec4_builder bld = vec4_builder(v).at_end();
|
||||
dst_reg dest = dst_reg(v, glsl_type::float_type);
|
||||
src_reg src0 = src_reg(v, glsl_type::float_type);
|
||||
src_reg src1 = src_reg(v, glsl_type::float_type);
|
||||
src_reg zero(brw_imm_f(0.0f));
|
||||
dst_reg dest_null = bld.null_reg_f();
|
||||
dest_null.writemask = WRITEMASK_X;
|
||||
|
||||
bld.SEL(dest, src0, src1)
|
||||
->conditional_mod = BRW_CONDITIONAL_GE;
|
||||
bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE);
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: sel.ge.f0 dest.x src2.xxxx src3.xxxx
|
||||
* 1: cmp.ge.f0 null.x dest.xxxx 0.0f
|
||||
*
|
||||
* = After =
|
||||
* (no changes)
|
||||
*
|
||||
* Do not copy propagate into a sel.cond instruction. While it does modify
|
||||
* the flags, the flags are not based on the result compared with zero (as
|
||||
* with most other instructions). The result is based on the sources
|
||||
* compared with each other (like cmp.cond).
|
||||
*/
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_FALSE(cmod_propagation(v));
|
||||
|
||||
ASSERT_EQ(0, block0->start_ip);
|
||||
ASSERT_EQ(1, block0->end_ip);
|
||||
EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 0)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
|
||||
EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
|
||||
EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue