i965/fs: Use conditional sends to do FB writes on HSW+.
This drops the MOVs for header setup, which are totally mis-scheduled. total instructions in shared programs: 1590047 -> 1589331 (-0.05%) instructions in affected programs: 43729 -> 43013 (-1.64%) GAINED: 0 LOST: 0 glb27-trex: x before + after +-----------------------------------------------------------------------------+ | + x xx + + + | | ++ + xxx ++x xx + ** *x+ + + + x * | |+x xx x* x+++xx*x*xx+++*+*xx++** *x* x+***x*+xx+* + * + + *| | |__|__________MA___A___________|___| | +-----------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 49 62.33 65.41 63.49 63.53449 0.62757822 + 50 62.28 65.4 63.7 63.6982 0.656564 No difference proven at 95.0% confidence Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
4226798354
commit
d92f593d87
|
@ -2241,8 +2241,6 @@ void brw_fb_WRITE(struct brw_compile *p,
|
||||||
} else {
|
} else {
|
||||||
insn = next_insn(p, BRW_OPCODE_SEND);
|
insn = next_insn(p, BRW_OPCODE_SEND);
|
||||||
}
|
}
|
||||||
/* The execution mask is ignored for render target writes. */
|
|
||||||
insn->header.predicate_control = 0;
|
|
||||||
insn->header.compression_control = BRW_COMPRESSION_NONE;
|
insn->header.compression_control = BRW_COMPRESSION_NONE;
|
||||||
|
|
||||||
if (brw->gen >= 6) {
|
if (brw->gen >= 6) {
|
||||||
|
|
|
@ -114,7 +114,12 @@ fs_generator::generate_fb_write(fs_inst *inst)
|
||||||
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
brw_set_mask_control(p, BRW_MASK_DISABLE);
|
||||||
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
|
||||||
|
|
||||||
if ((fp && fp->UsesKill) || c->key.alpha_test_func) {
|
if (inst->header_present) {
|
||||||
|
/* On HSW, the GPU will use the predicate on SENDC, unless the header is
|
||||||
|
* present.
|
||||||
|
*/
|
||||||
|
if (!brw->is_haswell && ((fp && fp->UsesKill) ||
|
||||||
|
c->key.alpha_test_func)) {
|
||||||
struct brw_reg pixel_mask;
|
struct brw_reg pixel_mask;
|
||||||
|
|
||||||
if (brw->gen >= 6)
|
if (brw->gen >= 6)
|
||||||
|
@ -125,7 +130,6 @@ fs_generator::generate_fb_write(fs_inst *inst)
|
||||||
brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
|
brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inst->header_present) {
|
|
||||||
if (brw->gen >= 6) {
|
if (brw->gen >= 6) {
|
||||||
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
|
||||||
brw_MOV(p,
|
brw_MOV(p,
|
||||||
|
|
|
@ -2743,7 +2743,7 @@ fs_visitor::emit_fb_writes()
|
||||||
* thread message and on all dual-source messages."
|
* thread message and on all dual-source messages."
|
||||||
*/
|
*/
|
||||||
if (brw->gen >= 6 &&
|
if (brw->gen >= 6 &&
|
||||||
!this->fp->UsesKill &&
|
(brw->is_haswell || brw->gen >= 8 || !this->fp->UsesKill) &&
|
||||||
!do_dual_src &&
|
!do_dual_src &&
|
||||||
c->key.nr_color_regions == 1) {
|
c->key.nr_color_regions == 1) {
|
||||||
header_present = false;
|
header_present = false;
|
||||||
|
@ -2840,6 +2840,10 @@ fs_visitor::emit_fb_writes()
|
||||||
inst->mlen = nr - base_mrf;
|
inst->mlen = nr - base_mrf;
|
||||||
inst->eot = true;
|
inst->eot = true;
|
||||||
inst->header_present = header_present;
|
inst->header_present = header_present;
|
||||||
|
if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
|
||||||
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||||
|
inst->flag_subreg = 1;
|
||||||
|
}
|
||||||
|
|
||||||
c->prog_data.dual_src_blend = true;
|
c->prog_data.dual_src_blend = true;
|
||||||
this->current_annotation = NULL;
|
this->current_annotation = NULL;
|
||||||
|
@ -2885,6 +2889,10 @@ fs_visitor::emit_fb_writes()
|
||||||
inst->mlen = nr - base_mrf;
|
inst->mlen = nr - base_mrf;
|
||||||
inst->eot = eot;
|
inst->eot = eot;
|
||||||
inst->header_present = header_present;
|
inst->header_present = header_present;
|
||||||
|
if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
|
||||||
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||||
|
inst->flag_subreg = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c->key.nr_color_regions == 0) {
|
if (c->key.nr_color_regions == 0) {
|
||||||
|
@ -2902,6 +2910,10 @@ fs_visitor::emit_fb_writes()
|
||||||
inst->mlen = nr - base_mrf;
|
inst->mlen = nr - base_mrf;
|
||||||
inst->eot = true;
|
inst->eot = true;
|
||||||
inst->header_present = header_present;
|
inst->header_present = header_present;
|
||||||
|
if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
|
||||||
|
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||||
|
inst->flag_subreg = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this->current_annotation = NULL;
|
this->current_annotation = NULL;
|
||||||
|
|
|
@ -61,6 +61,14 @@ gen8_fs_generator::mark_surface_used(unsigned surf_index)
|
||||||
void
|
void
|
||||||
gen8_fs_generator::generate_fb_write(fs_inst *ir)
|
gen8_fs_generator::generate_fb_write(fs_inst *ir)
|
||||||
{
|
{
|
||||||
|
/* Disable the discard condition while setting up the header. */
|
||||||
|
default_state.predicate = BRW_PREDICATE_NONE;
|
||||||
|
default_state.predicate_inverse = false;
|
||||||
|
default_state.flag_subreg_nr = 0;
|
||||||
|
|
||||||
|
if (ir->header_present) {
|
||||||
|
/* The GPU will use the predicate on SENDC, unless the header is present.
|
||||||
|
*/
|
||||||
if (fp && fp->UsesKill) {
|
if (fp && fp->UsesKill) {
|
||||||
gen8_instruction *mov =
|
gen8_instruction *mov =
|
||||||
MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW),
|
MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW),
|
||||||
|
@ -68,7 +76,6 @@ gen8_fs_generator::generate_fb_write(fs_inst *ir)
|
||||||
gen8_set_mask_control(mov, BRW_MASK_DISABLE);
|
gen8_set_mask_control(mov, BRW_MASK_DISABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ir->header_present) {
|
|
||||||
gen8_instruction *mov =
|
gen8_instruction *mov =
|
||||||
MOV_RAW(brw_message_reg(ir->base_mrf), brw_vec8_grf(0, 0));
|
MOV_RAW(brw_message_reg(ir->base_mrf), brw_vec8_grf(0, 0));
|
||||||
gen8_set_exec_size(mov, BRW_EXECUTE_16);
|
gen8_set_exec_size(mov, BRW_EXECUTE_16);
|
||||||
|
@ -88,6 +95,13 @@ gen8_fs_generator::generate_fb_write(fs_inst *ir)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set the predicate back to get the conditional write if necessary for
|
||||||
|
* discards.
|
||||||
|
*/
|
||||||
|
default_state.predicate = ir->predicate;
|
||||||
|
default_state.predicate_inverse = ir->predicate_inverse;
|
||||||
|
default_state.flag_subreg_nr = ir->flag_subreg;
|
||||||
|
|
||||||
gen8_instruction *inst = next_inst(BRW_OPCODE_SENDC);
|
gen8_instruction *inst = next_inst(BRW_OPCODE_SENDC);
|
||||||
gen8_set_dst(brw, inst, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW));
|
gen8_set_dst(brw, inst, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW));
|
||||||
gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
|
gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));
|
||||||
|
|
Loading…
Reference in New Issue