i965/fs: Bake regs_written into the IR instead of recomputing it later.
For sampler messages, it depends on the target gen, and on gen4 SIMD16-sampler-on-SIMD8-execution we were returning 4 instead of 8 like we should. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> NOTE: This is a candidate for the 9.1 branch.
This commit is contained in:
parent
8edc7cbe64
commit
3cf69b2284
|
@ -60,6 +60,9 @@ fs_inst::init()
|
|||
this->src[0] = reg_undef;
|
||||
this->src[1] = reg_undef;
|
||||
this->src[2] = reg_undef;
|
||||
|
||||
/* This will be the case for almost all instructions. */
|
||||
this->regs_written = 1;
|
||||
}
|
||||
|
||||
fs_inst::fs_inst()
|
||||
|
@ -254,6 +257,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
|
|||
fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(4), dst.type);
|
||||
inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
|
||||
vec4_result, surf_index, vec4_offset);
|
||||
inst->regs_written = 4;
|
||||
instructions.push_tail(inst);
|
||||
|
||||
vec4_result.reg_offset += const_offset & 3;
|
||||
|
@ -329,26 +333,13 @@ fs_inst::equals(fs_inst *inst)
|
|||
offset == inst->offset);
|
||||
}
|
||||
|
||||
int
|
||||
fs_inst::regs_written()
|
||||
{
|
||||
if (is_tex() || opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7)
|
||||
return 4;
|
||||
|
||||
/* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
|
||||
* but we don't currently use them...nor do we have an opcode for them.
|
||||
*/
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_inst::overwrites_reg(const fs_reg ®)
|
||||
{
|
||||
return (reg.file == dst.file &&
|
||||
reg.reg == dst.reg &&
|
||||
reg.reg_offset >= dst.reg_offset &&
|
||||
reg.reg_offset < dst.reg_offset + regs_written());
|
||||
reg.reg_offset < dst.reg_offset + regs_written);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -1376,7 +1367,7 @@ fs_visitor::split_virtual_grfs()
|
|||
/* If there's a SEND message that requires contiguous destination
|
||||
* registers, no splitting is allowed.
|
||||
*/
|
||||
if (inst->regs_written() > 1) {
|
||||
if (inst->regs_written > 1) {
|
||||
split_grf[inst->dst.reg] = false;
|
||||
}
|
||||
|
||||
|
@ -2104,7 +2095,7 @@ fs_visitor::compute_to_mrf()
|
|||
/* Things returning more than one register would need us to
|
||||
* understand coalescing out more than one MOV at a time.
|
||||
*/
|
||||
if (scan_inst->regs_written() > 1)
|
||||
if (scan_inst->regs_written > 1)
|
||||
break;
|
||||
|
||||
/* SEND instructions can't have MRF as a destination. */
|
||||
|
@ -2321,7 +2312,7 @@ void
|
|||
fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
|
||||
{
|
||||
int reg_size = dispatch_width / 8;
|
||||
int write_len = inst->regs_written() * reg_size;
|
||||
int write_len = inst->regs_written * reg_size;
|
||||
int first_write_grf = inst->dst.reg;
|
||||
bool needs_dep[BRW_MAX_MRF];
|
||||
assert(write_len < (int)sizeof(needs_dep) - 1);
|
||||
|
@ -2361,7 +2352,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
|
|||
* dependency has more latency than a MOV.
|
||||
*/
|
||||
if (scan_inst->dst.file == GRF) {
|
||||
for (int i = 0; i < scan_inst->regs_written(); i++) {
|
||||
for (int i = 0; i < scan_inst->regs_written; i++) {
|
||||
int reg = scan_inst->dst.reg + i * reg_size;
|
||||
|
||||
if (reg >= first_write_grf &&
|
||||
|
@ -2400,7 +2391,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
|
|||
void
|
||||
fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
|
||||
{
|
||||
int write_len = inst->regs_written() * dispatch_width / 8;
|
||||
int write_len = inst->regs_written * dispatch_width / 8;
|
||||
int first_write_grf = inst->dst.reg;
|
||||
bool needs_dep[BRW_MAX_MRF];
|
||||
assert(write_len < (int)sizeof(needs_dep) - 1);
|
||||
|
|
|
@ -174,7 +174,6 @@ public:
|
|||
fs_reg src0, fs_reg src1,fs_reg src2);
|
||||
|
||||
bool equals(fs_inst *inst);
|
||||
int regs_written();
|
||||
bool overwrites_reg(const fs_reg ®);
|
||||
bool is_tex();
|
||||
bool is_math();
|
||||
|
@ -192,6 +191,7 @@ public:
|
|||
uint8_t flag_subreg;
|
||||
|
||||
int mlen; /**< SEND message length */
|
||||
int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
|
||||
int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
|
||||
uint32_t texture_offset; /**< Texture offset bitfield */
|
||||
int sampler;
|
||||
|
|
|
@ -130,7 +130,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
|
|||
*/
|
||||
bool no_existing_temp = entry->tmp.file == BAD_FILE;
|
||||
if (no_existing_temp) {
|
||||
int written = entry->generator->regs_written();
|
||||
int written = entry->generator->regs_written;
|
||||
|
||||
fs_reg orig_dst = entry->generator->dst;
|
||||
fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
|
||||
|
@ -150,8 +150,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
|
|||
}
|
||||
|
||||
/* dest <- temp */
|
||||
int written = inst->regs_written();
|
||||
assert(written == entry->generator->regs_written());
|
||||
int written = inst->regs_written;
|
||||
assert(written == entry->generator->regs_written);
|
||||
assert(inst->dst.type == entry->tmp.type);
|
||||
fs_reg dst = inst->dst;
|
||||
fs_reg tmp = entry->tmp;
|
||||
|
|
|
@ -77,7 +77,7 @@ fs_live_variables::setup_def_use()
|
|||
* variable, and thus qualify for being in def[].
|
||||
*/
|
||||
if (inst->dst.file == GRF &&
|
||||
inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] &&
|
||||
inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
|
||||
!inst->predicate &&
|
||||
!inst->force_uncompressed &&
|
||||
!inst->force_sechalf) {
|
||||
|
|
|
@ -553,7 +553,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
|
|||
}
|
||||
|
||||
if (inst->dst.file == GRF) {
|
||||
spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
|
||||
spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
|
||||
|
||||
if (inst->dst.smear >= 0) {
|
||||
no_spill[inst->dst.reg] = true;
|
||||
|
@ -622,7 +622,7 @@ fs_visitor::spill_reg(int spill_reg)
|
|||
inst->dst.reg == spill_reg) {
|
||||
int subset_spill_offset = (spill_offset +
|
||||
REG_SIZE * inst->dst.reg_offset);
|
||||
inst->dst.reg = virtual_grf_alloc(inst->regs_written());
|
||||
inst->dst.reg = virtual_grf_alloc(inst->regs_written);
|
||||
inst->dst.reg_offset = 0;
|
||||
|
||||
/* If our write is going to affect just part of the
|
||||
|
@ -631,7 +631,7 @@ fs_visitor::spill_reg(int spill_reg)
|
|||
*/
|
||||
if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
|
||||
fs_reg unspill_reg = inst->dst;
|
||||
for (int chan = 0; chan < inst->regs_written(); chan++) {
|
||||
for (int chan = 0; chan < inst->regs_written; chan++) {
|
||||
emit_unspill(inst, unspill_reg,
|
||||
subset_spill_offset + REG_SIZE * chan);
|
||||
unspill_reg.reg_offset++;
|
||||
|
@ -644,7 +644,7 @@ fs_visitor::spill_reg(int spill_reg)
|
|||
spill_src.negate = false;
|
||||
spill_src.smear = -1;
|
||||
|
||||
for (int chan = 0; chan < inst->regs_written(); chan++) {
|
||||
for (int chan = 0; chan < inst->regs_written; chan++) {
|
||||
fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
|
||||
reg_null_f, spill_src);
|
||||
spill_src.reg_offset++;
|
||||
|
|
|
@ -556,7 +556,7 @@ instruction_scheduler::calculate_deps()
|
|||
/* write-after-write deps. */
|
||||
if (inst->dst.file == GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < inst->regs_written() * reg_width; r++) {
|
||||
for (int r = 0; r < inst->regs_written * reg_width; r++) {
|
||||
add_dep(last_grf_write[inst->dst.reg + r], n);
|
||||
last_grf_write[inst->dst.reg + r] = n;
|
||||
}
|
||||
|
@ -663,7 +663,7 @@ instruction_scheduler::calculate_deps()
|
|||
*/
|
||||
if (inst->dst.file == GRF) {
|
||||
if (post_reg_alloc) {
|
||||
for (int r = 0; r < inst->regs_written() * reg_width; r++)
|
||||
for (int r = 0; r < inst->regs_written * reg_width; r++)
|
||||
last_grf_write[inst->dst.reg + r] = n;
|
||||
} else {
|
||||
last_grf_write[inst->dst.reg] = n;
|
||||
|
@ -762,7 +762,7 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
|
|||
schedule_node *n = (schedule_node *)node;
|
||||
|
||||
chosen = n;
|
||||
if (chosen->inst->regs_written() <= 1)
|
||||
if (chosen->inst->regs_written <= 1)
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -261,7 +261,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
|
|||
* src, generate a saturated MOV
|
||||
*/
|
||||
fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
|
||||
if (!modify || modify->regs_written() != 1) {
|
||||
if (!modify || modify->regs_written != 1) {
|
||||
this->result = fs_reg(this, ir->type);
|
||||
fs_inst *inst = emit(MOV(this->result, src));
|
||||
inst->saturate = true;
|
||||
|
@ -746,7 +746,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
|
|||
/* If last_rhs_inst wrote a different number of components than our LHS,
|
||||
* we can't safely rewrite it.
|
||||
*/
|
||||
if (virtual_grf_sizes[dst.reg] != modify->regs_written())
|
||||
if (virtual_grf_sizes[dst.reg] != modify->regs_written)
|
||||
return false;
|
||||
|
||||
/* Success! Rewrite the instruction. */
|
||||
|
@ -948,6 +948,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
|
|||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = mlen;
|
||||
inst->header_present = true;
|
||||
inst->regs_written = simd16 ? 8 : 4;
|
||||
|
||||
if (simd16) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
@ -1089,6 +1090,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
|
|||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = mlen;
|
||||
inst->header_present = header_present;
|
||||
inst->regs_written = 4;
|
||||
|
||||
if (mlen > 11) {
|
||||
fail("Message length >11 disallowed by hardware\n");
|
||||
|
@ -1244,6 +1246,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
|
|||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = mlen;
|
||||
inst->header_present = header_present;
|
||||
inst->regs_written = 4;
|
||||
|
||||
if (mlen > 11) {
|
||||
fail("Message length >11 disallowed by hardware\n");
|
||||
|
|
Loading…
Reference in New Issue