i965/fs: Add support for specifying register horizontal strides.
v2: Some improvements for copy propagation with non-contiguous register strides and mismatching types. v3: Add example of the situation that the copy propagation changes are intended to avoid. Clarify that 'fs_reg::apply_stride()' is expected to work with zero strides too. Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Paul Berry <stereotype441@gmail.com>
This commit is contained in:
parent
4c7206bafd
commit
756d37b1d6
|
@ -381,6 +381,7 @@ fs_reg::init()
|
||||||
{
|
{
|
||||||
memset(this, 0, sizeof(*this));
|
memset(this, 0, sizeof(*this));
|
||||||
this->smear = -1;
|
this->smear = -1;
|
||||||
|
stride = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Generic unset register constructor. */
|
/** Generic unset register constructor. */
|
||||||
|
@ -440,6 +441,7 @@ fs_reg::equals(const fs_reg &r) const
|
||||||
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
|
memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
|
||||||
sizeof(fixed_hw_reg)) == 0 &&
|
sizeof(fixed_hw_reg)) == 0 &&
|
||||||
smear == r.smear &&
|
smear == r.smear &&
|
||||||
|
stride == r.stride &&
|
||||||
imm.u == r.imm.u);
|
imm.u == r.imm.u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -451,6 +453,22 @@ fs_reg::retype(uint32_t type)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fs_reg &
|
||||||
|
fs_reg::apply_stride(unsigned stride)
|
||||||
|
{
|
||||||
|
assert((this->stride * stride) <= 4 &&
|
||||||
|
(is_power_of_two(stride) || stride == 0) &&
|
||||||
|
file != HW_REG && file != IMM);
|
||||||
|
this->stride *= stride;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
fs_reg::is_contiguous() const
|
||||||
|
{
|
||||||
|
return stride == 1;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
fs_reg::is_zero() const
|
fs_reg::is_zero() const
|
||||||
{
|
{
|
||||||
|
@ -708,7 +726,7 @@ fs_inst::is_partial_write()
|
||||||
{
|
{
|
||||||
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
|
return ((this->predicate && this->opcode != BRW_OPCODE_SEL) ||
|
||||||
this->force_uncompressed ||
|
this->force_uncompressed ||
|
||||||
this->force_sechalf);
|
this->force_sechalf || !this->dst.is_contiguous());
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -2317,6 +2335,7 @@ fs_visitor::register_coalesce()
|
||||||
inst->src[0].negate ||
|
inst->src[0].negate ||
|
||||||
inst->src[0].abs ||
|
inst->src[0].abs ||
|
||||||
inst->src[0].smear != -1 ||
|
inst->src[0].smear != -1 ||
|
||||||
|
!inst->src[0].is_contiguous() ||
|
||||||
inst->dst.file != GRF ||
|
inst->dst.file != GRF ||
|
||||||
inst->dst.type != inst->src[0].type) {
|
inst->dst.type != inst->src[0].type) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -2477,7 +2496,8 @@ fs_visitor::compute_to_mrf()
|
||||||
inst->dst.file != MRF || inst->src[0].file != GRF ||
|
inst->dst.file != MRF || inst->src[0].file != GRF ||
|
||||||
inst->dst.type != inst->src[0].type ||
|
inst->dst.type != inst->src[0].type ||
|
||||||
inst->src[0].abs || inst->src[0].negate ||
|
inst->src[0].abs || inst->src[0].negate ||
|
||||||
inst->src[0].smear != -1 || inst->src[0].subreg_offset)
|
inst->src[0].smear != -1 || !inst->src[0].is_contiguous() ||
|
||||||
|
inst->src[0].subreg_offset)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Work out which hardware MRF registers are written by this
|
/* Work out which hardware MRF registers are written by this
|
||||||
|
|
|
@ -82,7 +82,9 @@ public:
|
||||||
bool is_one() const;
|
bool is_one() const;
|
||||||
bool is_null() const;
|
bool is_null() const;
|
||||||
bool is_valid_3src() const;
|
bool is_valid_3src() const;
|
||||||
|
bool is_contiguous() const;
|
||||||
fs_reg retype(uint32_t type);
|
fs_reg retype(uint32_t type);
|
||||||
|
fs_reg &apply_stride(unsigned stride);
|
||||||
|
|
||||||
/** Register file: GRF, MRF, IMM. */
|
/** Register file: GRF, MRF, IMM. */
|
||||||
enum register_file file;
|
enum register_file file;
|
||||||
|
@ -120,6 +122,9 @@ public:
|
||||||
*/
|
*/
|
||||||
int subreg_offset;
|
int subreg_offset;
|
||||||
|
|
||||||
|
/** Register region horizontal stride */
|
||||||
|
int stride;
|
||||||
|
|
||||||
fs_reg *reladdr;
|
fs_reg *reladdr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -279,7 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||||
if (entry->src.file == IMM)
|
if (entry->src.file == IMM)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (inst->regs_read(this, arg) > 1)
|
/* Bail if inst is reading more than entry is writing. */
|
||||||
|
if ((inst->regs_read(this, arg) * inst->src[arg].stride *
|
||||||
|
type_sz(inst->src[arg].type)) > type_sz(entry->dst.type))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (inst->src[arg].file != entry->dst.file ||
|
if (inst->src[arg].file != entry->dst.file ||
|
||||||
|
@ -298,7 +300,32 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||||
bool has_source_modifiers = entry->src.abs || entry->src.negate;
|
bool has_source_modifiers = entry->src.abs || entry->src.negate;
|
||||||
|
|
||||||
if ((has_source_modifiers || entry->src.file == UNIFORM ||
|
if ((has_source_modifiers || entry->src.file == UNIFORM ||
|
||||||
entry->src.smear != -1) && !can_do_source_mods(inst))
|
entry->src.smear != -1 || !entry->src.is_contiguous()) &&
|
||||||
|
!can_do_source_mods(inst))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Bail if the result of composing both strides would exceed the
|
||||||
|
* hardware limit.
|
||||||
|
*/
|
||||||
|
if (entry->src.stride * inst->src[arg].stride > 4)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Bail if the result of composing both strides cannot be expressed
|
||||||
|
* as another stride. This avoids, for example, trying to transform
|
||||||
|
* this:
|
||||||
|
*
|
||||||
|
* MOV (8) rX<1>UD rY<0;1,0>UD
|
||||||
|
* FOO (8) ... rX<8;8,1>UW
|
||||||
|
*
|
||||||
|
* into this:
|
||||||
|
*
|
||||||
|
* FOO (8) ... rY<0;1,0>UW
|
||||||
|
*
|
||||||
|
* Which would have different semantics.
|
||||||
|
*/
|
||||||
|
if (entry->src.stride != 1 &&
|
||||||
|
(inst->src[arg].stride *
|
||||||
|
type_sz(inst->src[arg].type)) % type_sz(entry->src.type) != 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
|
if (has_source_modifiers && entry->dst.type != inst->src[arg].type)
|
||||||
|
@ -310,6 +337,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||||
if (entry->src.smear != -1)
|
if (entry->src.smear != -1)
|
||||||
inst->src[arg].smear = entry->src.smear;
|
inst->src[arg].smear = entry->src.smear;
|
||||||
inst->src[arg].subreg_offset = entry->src.subreg_offset;
|
inst->src[arg].subreg_offset = entry->src.subreg_offset;
|
||||||
|
inst->src[arg].stride *= entry->src.stride;
|
||||||
|
|
||||||
if (!inst->src[arg].abs) {
|
if (!inst->src[arg].abs) {
|
||||||
inst->src[arg].abs = entry->src.abs;
|
inst->src[arg].abs = entry->src.abs;
|
||||||
|
@ -332,7 +360,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||||
if (inst->src[i].file != entry->dst.file ||
|
if (inst->src[i].file != entry->dst.file ||
|
||||||
inst->src[i].reg != entry->dst.reg ||
|
inst->src[i].reg != entry->dst.reg ||
|
||||||
inst->src[i].reg_offset != entry->dst.reg_offset ||
|
inst->src[i].reg_offset != entry->dst.reg_offset ||
|
||||||
inst->src[i].subreg_offset != entry->dst.subreg_offset)
|
inst->src[i].subreg_offset != entry->dst.subreg_offset ||
|
||||||
|
inst->src[i].type != entry->dst.type ||
|
||||||
|
inst->src[i].stride > 1)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* Don't bother with cases that should have been taken care of by the
|
/* Don't bother with cases that should have been taken care of by the
|
||||||
|
|
|
@ -1030,11 +1030,13 @@ brw_reg_from_fs_reg(fs_reg *reg)
|
||||||
switch (reg->file) {
|
switch (reg->file) {
|
||||||
case GRF:
|
case GRF:
|
||||||
case MRF:
|
case MRF:
|
||||||
if (reg->smear == -1) {
|
if (reg->stride == 0 || reg->smear >= 0) {
|
||||||
brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
|
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
|
||||||
} else {
|
} else {
|
||||||
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
|
brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
|
||||||
|
brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
brw_reg = retype(brw_reg, reg->type);
|
brw_reg = retype(brw_reg, reg->type);
|
||||||
if (reg->sechalf)
|
if (reg->sechalf)
|
||||||
brw_reg = sechalf(brw_reg);
|
brw_reg = sechalf(brw_reg);
|
||||||
|
|
|
@ -85,7 +85,7 @@ fs_live_variables::setup_one_read(bblock_t *block, fs_inst *inst,
|
||||||
* would get stomped by the first decode as well.
|
* would get stomped by the first decode as well.
|
||||||
*/
|
*/
|
||||||
int end_ip = ip;
|
int end_ip = ip;
|
||||||
if (v->dispatch_width == 16 && (reg.smear != -1 ||
|
if (v->dispatch_width == 16 && (reg.smear != -1 || reg.stride == 0 ||
|
||||||
(v->pixel_x.reg == reg.reg ||
|
(v->pixel_x.reg == reg.reg ||
|
||||||
v->pixel_y.reg == reg.reg))) {
|
v->pixel_y.reg == reg.reg))) {
|
||||||
end_ip++;
|
end_ip++;
|
||||||
|
|
|
@ -592,7 +592,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
|
||||||
* loading pull constants, so spilling them is unlikely to reduce
|
* loading pull constants, so spilling them is unlikely to reduce
|
||||||
* register pressure anyhow.
|
* register pressure anyhow.
|
||||||
*/
|
*/
|
||||||
if (inst->src[i].smear >= 0) {
|
if (inst->src[i].smear >= 0 || !inst->src[i].is_contiguous()) {
|
||||||
no_spill[inst->src[i].reg] = true;
|
no_spill[inst->src[i].reg] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -601,7 +601,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
|
||||||
if (inst->dst.file == GRF) {
|
if (inst->dst.file == GRF) {
|
||||||
spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
|
spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
|
||||||
|
|
||||||
if (inst->dst.smear >= 0) {
|
if (inst->dst.smear >= 0 || !inst->dst.is_contiguous()) {
|
||||||
no_spill[inst->dst.reg] = true;
|
no_spill[inst->dst.reg] = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue