i965/vec4: support basic spilling of 64-bit registers
The current spilling code can't spill vgrf allocations larger than 1 but SIMD4x2 doubles require 2 vgrfs, so we need to permit this case (which is handled properly for DF data types by emitting 2 scratch messages and doing data shuffling). We accomplish this by not auto-disabling spilling for vgrf allocations with a size of 2, and then disable spilling on any register with an offset != 0B (which indicates array access). Disable spilling of partial DF reads/writes because these don't read/write data for both logical threads and our scratch messages for 64-bit data need data for both threads to be present. Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
c762809e49
commit
82c69426a5
|
@ -376,7 +376,7 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
|
|||
|
||||
for (unsigned i = 0; i < this->alloc.count; i++) {
|
||||
spill_costs[i] = 0.0;
|
||||
no_spill[i] = alloc.sizes[i] != 1;
|
||||
no_spill[i] = alloc.sizes[i] != 1 && alloc.sizes[i] != 2;
|
||||
}
|
||||
|
||||
/* Calculate costs for spilling nodes. Call it a cost of 1 per
|
||||
|
@ -393,7 +393,17 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
|
|||
if (!can_use_scratch_for_source(inst, i, inst->src[i].nr)) {
|
||||
spill_costs[inst->src[i].nr] += loop_scale;
|
||||
if (inst->src[i].reladdr ||
|
||||
inst->src[i].offset % REG_SIZE != 0)
|
||||
inst->src[i].offset >= REG_SIZE)
|
||||
no_spill[inst->src[i].nr] = true;
|
||||
|
||||
/* We don't support unspills of partial DF reads.
|
||||
*
|
||||
* Our 64-bit unspills are implemented with two 32-bit scratch
|
||||
* messages, each one reading that for both SIMD4x2 threads that
|
||||
* we need to shuffle into correct 64-bit data. Ensure that we
|
||||
* are reading data for both threads.
|
||||
*/
|
||||
if (type_sz(inst->src[i].type) == 8 && inst->exec_size != 8)
|
||||
no_spill[inst->src[i].nr] = true;
|
||||
}
|
||||
}
|
||||
|
@ -401,7 +411,16 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
|
|||
|
||||
if (inst->dst.file == VGRF && !no_spill[inst->dst.nr]) {
|
||||
spill_costs[inst->dst.nr] += loop_scale;
|
||||
if (inst->dst.reladdr || inst->dst.offset % REG_SIZE != 0)
|
||||
if (inst->dst.reladdr || inst->dst.offset >= REG_SIZE)
|
||||
no_spill[inst->dst.nr] = true;
|
||||
|
||||
/* We don't support spills of partial DF writes.
|
||||
*
|
||||
* Our 64-bit spills are implemented with two 32-bit scratch messages,
|
||||
* each one writing that for both SIMD4x2 threads. Ensure that we
|
||||
* are writing data for both threads.
|
||||
*/
|
||||
if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
|
||||
no_spill[inst->dst.nr] = true;
|
||||
}
|
||||
|
||||
|
@ -450,8 +469,9 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g)
|
|||
void
|
||||
vec4_visitor::spill_reg(int spill_reg_nr)
|
||||
{
|
||||
assert(alloc.sizes[spill_reg_nr] == 1);
|
||||
unsigned int spill_offset = last_scratch++;
|
||||
assert(alloc.sizes[spill_reg_nr] == 1 || alloc.sizes[spill_reg_nr] == 2);
|
||||
unsigned int spill_offset = last_scratch;
|
||||
last_scratch += alloc.sizes[spill_reg_nr];
|
||||
|
||||
/* Generate spill/unspill instructions for the objects being spilled. */
|
||||
int scratch_reg = -1;
|
||||
|
@ -465,12 +485,14 @@ vec4_visitor::spill_reg(int spill_reg_nr)
|
|||
* for consecutive instructions that read different channels of
|
||||
* the same vec4.
|
||||
*/
|
||||
scratch_reg = alloc.allocate(1);
|
||||
scratch_reg = alloc.allocate(alloc.sizes[spill_reg_nr]);
|
||||
src_reg temp = inst->src[i];
|
||||
temp.nr = scratch_reg;
|
||||
temp.offset = 0;
|
||||
temp.swizzle = BRW_SWIZZLE_XYZW;
|
||||
emit_scratch_read(block, inst,
|
||||
dst_reg(temp), inst->src[i], spill_offset);
|
||||
temp.offset = inst->src[i].offset;
|
||||
}
|
||||
assert(scratch_reg != -1);
|
||||
inst->src[i].nr = scratch_reg;
|
||||
|
|
Loading…
Reference in New Issue