i965/fs: Hide varying pull constant load message setup behind logical opcode.
This will allow the SIMD lowering pass to split 32-wide varying pull constant loads (not natively supported by the hardware) into 16-wide instructions. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
parent
0bc5ad8d19
commit
d8a3294ac2
|
@ -1116,6 +1116,7 @@ enum opcode {
|
|||
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
|
||||
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
|
||||
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
|
||||
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
|
||||
FS_OPCODE_GET_BUFFER_SIZE,
|
||||
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
|
||||
FS_OPCODE_DISCARD_JUMP,
|
||||
|
|
|
@ -188,32 +188,16 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
|||
scale = 2;
|
||||
}
|
||||
|
||||
enum opcode op;
|
||||
if (devinfo->gen >= 7)
|
||||
op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
|
||||
else
|
||||
op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
|
||||
|
||||
/* The pull load message will load a vec4 (16 bytes). If we are loading
|
||||
* a double this means we are only loading 2 elements worth of data.
|
||||
* We also want to use a 32-bit data type for the dst of the load operation
|
||||
* so other parts of the driver don't get confused about the size of the
|
||||
* result.
|
||||
*/
|
||||
int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
|
||||
fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written),
|
||||
BRW_REGISTER_TYPE_F);
|
||||
fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
|
||||
inst->regs_written = regs_written;
|
||||
|
||||
if (devinfo->gen < 7) {
|
||||
inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
|
||||
inst->header_size = 1;
|
||||
if (devinfo->gen == 4)
|
||||
inst->mlen = 3;
|
||||
else
|
||||
inst->mlen = 1 + bld.dispatch_width() / 8;
|
||||
}
|
||||
fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * scale);
|
||||
fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
|
||||
vec4_result, surf_index, vec4_offset);
|
||||
inst->regs_written = 4 * (bld.dispatch_width() / 8) * scale;
|
||||
|
||||
if (type_sz(dst.type) == 8) {
|
||||
assert(scale == 1);
|
||||
|
@ -4439,6 +4423,28 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
delete[] components;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
{
|
||||
const brw_device_info *devinfo = bld.shader->devinfo;
|
||||
|
||||
if (devinfo->gen >= 7) {
|
||||
inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
|
||||
|
||||
} else {
|
||||
const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen),
|
||||
BRW_REGISTER_TYPE_UD);
|
||||
|
||||
bld.MOV(byte_offset(payload, REG_SIZE), inst->src[1]);
|
||||
|
||||
inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
|
||||
inst->resize_sources(1);
|
||||
inst->base_mrf = payload.nr;
|
||||
inst->header_size = 1;
|
||||
inst->mlen = 1 + inst->exec_size / 8;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::lower_logical_sends()
|
||||
{
|
||||
|
@ -4544,6 +4550,10 @@ fs_visitor::lower_logical_sends()
|
|||
ibld.sample_mask_reg());
|
||||
break;
|
||||
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
||||
lower_varying_pull_constant_logical_send(ibld, inst);
|
||||
break;
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -463,9 +463,9 @@ private:
|
|||
struct brw_reg dst,
|
||||
struct brw_reg surf_index,
|
||||
struct brw_reg offset);
|
||||
void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset);
|
||||
void generate_varying_pull_constant_load(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index);
|
||||
void generate_varying_pull_constant_load_gen7(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
|
|
|
@ -72,8 +72,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
|
|||
case BRW_OPCODE_MAD:
|
||||
case BRW_OPCODE_LRP:
|
||||
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
|
||||
case FS_OPCODE_CINTERP:
|
||||
case FS_OPCODE_LINTERP:
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
||||
|
|
|
@ -1347,8 +1347,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
|||
void
|
||||
fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg index,
|
||||
struct brw_reg offset)
|
||||
struct brw_reg index)
|
||||
{
|
||||
assert(devinfo->gen < 7); /* Should use the gen7 variant. */
|
||||
assert(inst->header_size != 0);
|
||||
|
@ -1380,10 +1379,6 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
|
|||
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
|
||||
}
|
||||
|
||||
struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
|
||||
BRW_REGISTER_TYPE_D);
|
||||
brw_MOV(p, offset_mrf, offset);
|
||||
|
||||
struct brw_reg header = brw_vec8_grf(0, 0);
|
||||
gen6_resolve_implied_move(p, &header, inst->base_mrf);
|
||||
|
||||
|
@ -2186,7 +2181,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
|||
break;
|
||||
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
|
||||
generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
|
||||
generate_varying_pull_constant_load(inst, dst, src[0]);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
|
||||
|
|
|
@ -348,6 +348,8 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op)
|
|||
return "varying_pull_const";
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
|
||||
return "varying_pull_const_gen7";
|
||||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
||||
return "varying_pull_const_logical";
|
||||
|
||||
case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
|
||||
return "mov_dispatch_to_flags";
|
||||
|
|
Loading…
Reference in New Issue