diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 15f51b038cb..ebcdd52d668 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -734,14 +734,33 @@ fs_visitor::limit_dispatch_width(unsigned n, const char *msg) * it. */ bool -fs_inst::is_partial_write() const +fs_inst::is_partial_reg_write() const { return ((this->predicate && this->opcode != BRW_OPCODE_SEL) || - (this->exec_size * type_sz(this->dst.type)) < 32 || !this->dst.is_contiguous() || + (this->exec_size * type_sz(this->dst.type)) < REG_SIZE || this->dst.offset % REG_SIZE != 0); } +/** + * Returns true if the instruction has a flag that means it won't + * update an entire variable for the given dispatch width. + * + * This is only different from is_partial_reg_write() for SIMD8 + * dispatches of 16-bit (or smaller) instructions. + */ +bool +fs_inst::is_partial_var_write(uint32_t dispatch_width) const +{ + const uint32_t type_size = type_sz(this->dst.type); + uint32_t var_size = MIN2(REG_SIZE, dispatch_width * type_size); + + return ((this->predicate && this->opcode != BRW_OPCODE_SEL) || + !this->dst.is_contiguous() || + (this->exec_size * type_sz(this->dst.type)) < var_size || + this->dst.offset % var_size != 0); +} + unsigned fs_inst::components_read(unsigned i) const { @@ -2956,7 +2975,7 @@ fs_visitor::opt_register_renaming() if (depth == 0 && inst->dst.file == VGRF && alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written && - !inst->is_partial_write()) { + !inst->is_partial_reg_write()) { if (remap[dst] == ~0u) { remap[dst] = dst; } else { @@ -3160,7 +3179,7 @@ fs_visitor::compute_to_mrf() next_ip++; if (inst->opcode != BRW_OPCODE_MOV || - inst->is_partial_write() || + inst->is_partial_reg_write() || inst->dst.file != MRF || inst->src[0].file != VGRF || inst->dst.type != inst->src[0].type || inst->src[0].abs || inst->src[0].negate || @@ -3193,7 +3212,7 @@ fs_visitor::compute_to_mrf() * that writes that reg, but it would require smarter * tracking. */ - if (scan_inst->is_partial_write()) + if (scan_inst->is_partial_reg_write()) break; /* Handling things not fully contained in the source of the copy @@ -3511,7 +3530,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF && inst->src[0].file != ARF && - !inst->is_partial_write()) { + !inst->is_partial_reg_write()) { last_mrf_move[inst->dst.nr] = inst; } } diff --git a/src/intel/compiler/brw_fs_cmod_propagation.cpp b/src/intel/compiler/brw_fs_cmod_propagation.cpp index 14852e52e6e..a821d4e5481 100644 --- a/src/intel/compiler/brw_fs_cmod_propagation.cpp +++ b/src/intel/compiler/brw_fs_cmod_propagation.cpp @@ -50,13 +50,13 @@ static bool cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, - fs_inst *inst) + fs_inst *inst, unsigned dispatch_width) { bool read_flag = false; foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (scan_inst->opcode == BRW_OPCODE_ADD && - !scan_inst->is_partial_write() && + !scan_inst->is_partial_var_write(dispatch_width) && scan_inst->exec_size == inst->exec_size) { bool negate; @@ -126,7 +126,7 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block, */ static bool cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, - fs_inst *inst) + fs_inst *inst, unsigned dispatch_width) { const enum brw_conditional_mod cond = brw_negate_cmod(inst->conditional_mod); bool read_flag = false; @@ -141,7 +141,7 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, scan_inst->opcode != BRW_OPCODE_AND) break; - if (scan_inst->is_partial_write() || + if (scan_inst->is_partial_var_write(dispatch_width) || scan_inst->dst.offset != inst->src[0].offset || scan_inst->exec_size != inst->exec_size) break; @@ -166,7 +166,9 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block, } static bool -opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) +opt_cmod_propagation_local(const gen_device_info *devinfo, + bblock_t *block, + unsigned dispatch_width) { bool progress = false; int ip = block->end_ip + 1; @@ -219,14 +221,14 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) */ if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) { if (brw_reg_type_is_floating_point(inst->src[0].type) && - cmod_propagate_cmp_to_add(devinfo, block, inst)) + cmod_propagate_cmp_to_add(devinfo, block, inst, dispatch_width)) progress = true; continue; } if (inst->opcode == BRW_OPCODE_NOT) { - progress = cmod_propagate_not(devinfo, block, inst) || progress; + progress = cmod_propagate_not(devinfo, block, inst, dispatch_width) || progress; continue; } @@ -234,7 +236,7 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block) foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, inst->src[0], inst->size_read(0))) { - if (scan_inst->is_partial_write() || + if (scan_inst->is_partial_var_write(dispatch_width) || scan_inst->dst.offset != inst->src[0].offset || scan_inst->exec_size != inst->exec_size) break; @@ -364,7 +366,7 @@ fs_visitor::opt_cmod_propagation() bool progress = false; foreach_block_reverse(block, cfg) { - progress = opt_cmod_propagation_local(devinfo, block) || progress; + progress = opt_cmod_propagation_local(devinfo, block, dispatch_width) || progress; } if (progress) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index 1f4e122e6c9..1670eedce77 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -515,7 +515,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) /* Compute the first component of the copy that the instruction is * reading, and the base byte offset within that component. */ - assert(entry->dst.offset % REG_SIZE == 0 && entry->dst.stride == 1); + assert(entry->dst.stride == 1); const unsigned component = rel_offset / type_sz(entry->dst.type); const unsigned suboffset = rel_offset % type_sz(entry->dst.type); @@ -767,7 +767,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } static bool -can_propagate_from(fs_inst *inst) +can_propagate_from(fs_inst *inst, unsigned dispatch_width) { return (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == VGRF && @@ -778,7 +778,7 @@ can_propagate_from(fs_inst *inst) inst->src[0].file == UNIFORM || inst->src[0].file == IMM) && inst->src[0].type == inst->dst.type && - !inst->is_partial_write()); + !inst->is_partial_var_write(dispatch_width)); } /* Walks a basic block and does copy propagation on it using the acp @@ -830,7 +830,7 @@ fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, /* If this instruction's source could potentially be folded into the * operand of another instruction, add it to the ACP. */ - if (can_propagate_from(inst)) { + if (can_propagate_from(inst, dispatch_width)) { acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); entry->dst = inst->dst; entry->src = inst->src[0]; diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 6efa111b1a4..e955c35e145 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -252,7 +252,8 @@ fs_visitor::opt_cse_local(bblock_t *block) int ip = block->start_ip; foreach_inst_in_block(fs_inst, inst, block) { /* Skip some cases. */ - if (is_expression(this, inst) && !inst->is_partial_write() && + if (is_expression(this, inst) && + !inst->is_partial_var_write(dispatch_width) && ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) || inst->dst.is_null())) { diff --git a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp index 38ae1d41a6a..7c60a33eb58 100644 --- a/src/intel/compiler/brw_fs_dead_code_eliminate.cpp +++ b/src/intel/compiler/brw_fs_dead_code_eliminate.cpp @@ -107,7 +107,7 @@ fs_visitor::dead_code_eliminate() } if (inst->dst.file == VGRF) { - if (!inst->is_partial_write()) { + if (!inst->is_partial_reg_write()) { int var = live_intervals->var_from_reg(inst->dst); for (unsigned i = 0; i < regs_written(inst); i++) { BITSET_CLEAR(live, var + i); diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp index 059f076fa51..30625aa586a 100644 --- a/src/intel/compiler/brw_fs_live_variables.cpp +++ b/src/intel/compiler/brw_fs_live_variables.cpp @@ -84,7 +84,7 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst, * screens off previous updates of that variable (VGRF channel). */ if (inst->dst.file == VGRF) { - if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var)) + if (!inst->is_partial_reg_write() && !BITSET_TEST(bd->use, var)) BITSET_SET(bd->def, var); BITSET_SET(bd->defout, var); diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index b3825f1ef8c..94b18153492 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -1054,7 +1054,7 @@ fs_visitor::spill_reg(unsigned spill_reg) * write, there should be no need for the unspill since the * instruction will be overwriting the whole destination in any case. */ - if (inst->is_partial_write() || + if (inst->is_partial_reg_write() || (!inst->force_writemask_all && !per_channel)) emit_unspill(ubld, spill_src, subset_spill_offset, regs_written(inst)); diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index 4fe6773da54..27234292c30 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -70,7 +70,7 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) { if ((inst->opcode != BRW_OPCODE_MOV && inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) || - inst->is_partial_write() || + inst->is_partial_reg_write() || inst->saturate || inst->src[0].file != VGRF || inst->src[0].negate || diff --git a/src/intel/compiler/brw_fs_saturate_propagation.cpp b/src/intel/compiler/brw_fs_saturate_propagation.cpp index d6cfa79a618..1e1461063ae 100644 --- a/src/intel/compiler/brw_fs_saturate_propagation.cpp +++ b/src/intel/compiler/brw_fs_saturate_propagation.cpp @@ -43,7 +43,8 @@ */ static bool -opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) +opt_saturate_propagation_local(fs_visitor *v, bblock_t *block, + unsigned dispatch_width) { bool progress = false; int ip = block->end_ip + 1; @@ -66,7 +67,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, inst->src[0], inst->size_read(0))) { - if (scan_inst->is_partial_write() || + if (scan_inst->is_partial_var_write(dispatch_width) || (scan_inst->dst.type != inst->dst.type && !scan_inst->can_change_types())) break; @@ -153,7 +154,7 @@ fs_visitor::opt_saturate_propagation() calculate_live_intervals(); foreach_block (block, cfg) { - progress = opt_saturate_propagation_local(this, block) || progress; + progress = opt_saturate_propagation_local(this, block, dispatch_width) || progress; } /* Live intervals are still valid. */ diff --git a/src/intel/compiler/brw_fs_sel_peephole.cpp b/src/intel/compiler/brw_fs_sel_peephole.cpp index 6395b409b7c..98d640a3bfe 100644 --- a/src/intel/compiler/brw_fs_sel_peephole.cpp +++ b/src/intel/compiler/brw_fs_sel_peephole.cpp @@ -167,8 +167,8 @@ fs_visitor::opt_peephole_sel() then_mov[i]->exec_size != else_mov[i]->exec_size || then_mov[i]->group != else_mov[i]->group || then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all || - then_mov[i]->is_partial_write() || - else_mov[i]->is_partial_write() || + then_mov[i]->is_partial_var_write(dispatch_width) || + else_mov[i]->is_partial_var_write(dispatch_width) || then_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE || else_mov[i]->conditional_mod != BRW_CONDITIONAL_NONE) { movs = i; diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 56a4bdc6e52..87e03258d93 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -348,7 +348,8 @@ public: void resize_sources(uint8_t num_sources); bool is_send_from_grf() const; - bool is_partial_write() const; + bool is_partial_reg_write() const; + bool is_partial_var_write(unsigned dispatch_width) const; bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; unsigned components_read(unsigned i) const; unsigned size_read(int arg) const;