intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.
Reviewers are encouraged to audit the code generation pass independently for the case I missed some potential data hazard or new code has been added in the meantime. v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode(). v3: Drop likely redundant (and potentially harmful) RegDist SWSB annotation from ce0 read in brw_find_live_channel() (Caio). Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
parent
d3f3bdcd18
commit
15e3a0d9d2
|
@ -55,6 +55,7 @@ gen6_resolve_implied_move(struct brw_codegen *p,
|
|||
return;
|
||||
|
||||
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
|
||||
assert(devinfo->gen < 12);
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
@ -1225,6 +1226,7 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
|
|||
if (needs_zero_fill) {
|
||||
if (devinfo->gen < 12)
|
||||
brw_inst_set_no_dd_clear(devinfo, inst, true);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
|
||||
if (devinfo->gen < 12)
|
||||
brw_inst_set_no_dd_check(devinfo, inst, true);
|
||||
|
@ -2057,6 +2059,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
|
|||
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
|
||||
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
|
||||
BRW_SFID_DATAPORT_WRITE);
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
uint32_t msg_type;
|
||||
|
||||
if (devinfo->gen >= 6)
|
||||
|
@ -2076,11 +2079,13 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
|
|||
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p,
|
||||
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
|
||||
mrf.nr,
|
||||
|
@ -2088,6 +2093,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
|
|||
brw_imm_ud(offset));
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -2162,6 +2168,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
|
|||
unsigned offset)
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
|
||||
if (devinfo->gen >= 6)
|
||||
offset /= 16;
|
||||
|
@ -2188,6 +2195,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
|
|||
|
||||
{
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
||||
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
@ -2196,9 +2204,11 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
|
|||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -2275,6 +2285,7 @@ void brw_oword_block_read(struct brw_codegen *p,
|
|||
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
|
||||
BRW_SFID_DATAPORT_READ);
|
||||
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
|
||||
/* On newer hardware, offset is in units of owords. */
|
||||
if (devinfo->gen >= 6)
|
||||
|
@ -2289,10 +2300,12 @@ void brw_oword_block_read(struct brw_codegen *p,
|
|||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
/* set message header global offset field (reg 0, element 2) */
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p,
|
||||
retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
|
||||
mrf.nr,
|
||||
|
@ -2300,6 +2313,8 @@ void brw_oword_block_read(struct brw_codegen *p,
|
|||
brw_imm_ud(offset));
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
|
||||
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
|
||||
|
||||
brw_inst_set_sfid(devinfo, insn, target_cache);
|
||||
|
@ -2505,12 +2520,15 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
|
|||
|
||||
struct brw_reg temp = get_element_ud(header, 3);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
brw_SHL(p, temp, temp, brw_imm_ud(4));
|
||||
brw_ADD(p,
|
||||
get_element_ud(header, 3),
|
||||
get_element_ud(brw_vec8_grf(0, 0), 3),
|
||||
temp);
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2587,6 +2605,7 @@ brw_send_indirect_message(struct brw_codegen *p,
|
|||
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
|
||||
brw_set_desc(p, send, desc.ud | desc_imm);
|
||||
} else {
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
|
@ -2594,6 +2613,7 @@ brw_send_indirect_message(struct brw_codegen *p,
|
|||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
/* Load the indirect descriptor to an address register using OR so the
|
||||
* caller can specify additional descriptor bits with the desc_imm
|
||||
|
@ -2603,6 +2623,7 @@ brw_send_indirect_message(struct brw_codegen *p,
|
|||
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
send = next_insn(p, BRW_OPCODE_SEND);
|
||||
brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
|
||||
|
||||
|
@ -2639,6 +2660,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||
if (desc.file == BRW_IMMEDIATE_VALUE) {
|
||||
desc.ud |= desc_imm;
|
||||
} else {
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
|
@ -2646,6 +2668,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
/* Load the indirect descriptor to an address register using OR so the
|
||||
* caller can specify additional descriptor bits with the desc_imm
|
||||
|
@ -2655,12 +2678,15 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||
|
||||
brw_pop_insn_state(p);
|
||||
desc = addr;
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
|
||||
(ex_desc.ud & INTEL_MASK(15, 12)) == 0) {
|
||||
ex_desc.ud |= ex_desc_imm;
|
||||
} else {
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
|
@ -2668,6 +2694,7 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
/* Load the indirect extended descriptor to an address register using OR
|
||||
* so the caller can specify additional descriptor bits with the
|
||||
|
@ -2692,6 +2719,8 @@ brw_send_indirect_split_message(struct brw_codegen *p,
|
|||
|
||||
brw_pop_insn_state(p);
|
||||
ex_desc = addr;
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
send = next_insn(p, devinfo->gen >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
|
||||
|
@ -2733,6 +2762,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
|
|||
unsigned desc_imm)
|
||||
{
|
||||
if (surface.file != BRW_IMMEDIATE_VALUE) {
|
||||
const struct tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
|
@ -2740,6 +2770,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
|
|||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
/* Mask out invalid bits from the surface index to avoid hangs e.g. when
|
||||
* some surface array is accessed out of bounds.
|
||||
|
@ -2752,6 +2783,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
|
|||
brw_pop_insn_state(p);
|
||||
|
||||
surface = addr;
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
|
||||
brw_send_indirect_message(p, sfid, dst, payload, surface, desc_imm, false);
|
||||
|
@ -3166,8 +3198,12 @@ brw_memory_fence(struct brw_codegen *p,
|
|||
brw_MOV(p, dst, offset(dst, 1));
|
||||
}
|
||||
|
||||
if (stall)
|
||||
if (stall) {
|
||||
brw_set_default_swsb(p, tgl_swsb_sbid(TGL_SBID_DST,
|
||||
brw_get_default_swsb(p).sbid));
|
||||
|
||||
brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
|
||||
}
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
@ -3248,6 +3284,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
* hardware.
|
||||
*/
|
||||
brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
brw_AND(p, vec1(dst), exec_mask, vec1(dst));
|
||||
exec_mask = vec1(dst);
|
||||
}
|
||||
|
@ -3391,12 +3428,15 @@ brw_broadcast(struct brw_codegen *p,
|
|||
* register is above this limit.
|
||||
*/
|
||||
if (offset >= limit) {
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
|
||||
offset = offset % limit;
|
||||
}
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
/* Use indirect addressing to fetch the specified component. */
|
||||
if (type_sz(src.type) > 4 &&
|
||||
(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
|
||||
|
@ -3415,6 +3455,7 @@ brw_broadcast(struct brw_codegen *p,
|
|||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
||||
retype(brw_vec1_indirect(addr.subnr, offset),
|
||||
BRW_REGISTER_TYPE_D));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
||||
retype(brw_vec1_indirect(addr.subnr, offset + 4),
|
||||
BRW_REGISTER_TYPE_D));
|
||||
|
@ -3548,17 +3589,20 @@ void
|
|||
brw_float_controls_mode(struct brw_codegen *p,
|
||||
unsigned mode, unsigned mask)
|
||||
{
|
||||
brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
|
||||
brw_imm_ud(~mask));
|
||||
brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
|
||||
|
||||
/* From the Skylake PRM, Volume 7, page 760:
|
||||
* "Implementation Restriction on Register Access: When the control
|
||||
* register is used as an explicit source and/or destination, hardware
|
||||
* does not ensure execution pipeline coherency. Software must set the
|
||||
* thread control field to ‘switch’ for an instruction that uses
|
||||
* control register as an explicit operand."
|
||||
*
|
||||
* On Gen12+ this is implemented in terms of SWSB annotations instead.
|
||||
*/
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
|
||||
brw_imm_ud(~mask));
|
||||
brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
|
||||
if (p->devinfo->gen < 12)
|
||||
brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
|
||||
|
||||
|
@ -3569,4 +3613,7 @@ brw_float_controls_mode(struct brw_codegen *p,
|
|||
if (p->devinfo->gen < 12)
|
||||
brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
|
||||
}
|
||||
|
||||
if (p->devinfo->gen >= 12)
|
||||
brw_SYNC(p, TGL_SYNC_NOP);
|
||||
}
|
||||
|
|
|
@ -454,6 +454,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
|||
* of case-by-case work. It's just not worth it.
|
||||
*/
|
||||
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
if (type_sz(reg.type) > 4 &&
|
||||
((devinfo->gen == 7 && !devinfo->is_haswell) ||
|
||||
|
@ -476,6 +477,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
|||
*/
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
||||
retype(brw_VxH_indirect(0, 0), BRW_REGISTER_TYPE_D));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
||||
retype(brw_VxH_indirect(0, 4), BRW_REGISTER_TYPE_D));
|
||||
} else {
|
||||
|
@ -564,6 +566,7 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
src.hstride - 1));
|
||||
|
||||
/* Add on the register start offset */
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
brw_ADD(p, addr, addr, brw_imm_uw(src.nr * REG_SIZE + src.subnr));
|
||||
|
||||
if (type_sz(src.type) > 4 &&
|
||||
|
@ -591,6 +594,7 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
assert(dst.hstride == 1);
|
||||
brw_MOV(p, dst_d,
|
||||
retype(brw_VxH_indirect(0, 0), BRW_REGISTER_TYPE_D));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, byte_offset(dst_d, 4),
|
||||
retype(brw_VxH_indirect(0, 4), BRW_REGISTER_TYPE_D));
|
||||
} else {
|
||||
|
@ -598,6 +602,8 @@ fs_generator::generate_shuffle(fs_inst *inst,
|
|||
retype(brw_VxH_indirect(0, 0), src.type));
|
||||
}
|
||||
}
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -662,6 +668,8 @@ fs_generator::generate_quad_swizzle(const fs_inst *inst,
|
|||
brw_inst_set_no_dd_clear(devinfo, insn, c < 3);
|
||||
brw_inst_set_no_dd_check(devinfo, insn, c > 0);
|
||||
}
|
||||
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
}
|
||||
|
||||
break;
|
||||
|
@ -775,6 +783,7 @@ void
|
|||
fs_generator::generate_barrier(fs_inst *, struct brw_reg src)
|
||||
{
|
||||
brw_barrier(p, src);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_WAIT(p);
|
||||
}
|
||||
|
||||
|
@ -1109,15 +1118,18 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
|
|||
/* Set up an implied move from g0 to the MRF. */
|
||||
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
|
||||
} else {
|
||||
const tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
assert(inst->base_mrf != -1);
|
||||
struct brw_reg header_reg = brw_message_reg(inst->base_mrf);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_8);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
|
||||
/* Explicitly set up the message header by copying g0 to the MRF. */
|
||||
brw_MOV(p, header_reg, brw_vec8_grf(0, 0));
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
if (inst->offset) {
|
||||
|
@ -1127,6 +1139,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst,
|
|||
}
|
||||
|
||||
brw_pop_insn_state(p);
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1273,6 +1286,7 @@ fs_generator::generate_ddy(const fs_inst *inst,
|
|||
brw_ADD(p, byte_offset(dst, g * type_size),
|
||||
negate(byte_offset(src, g * type_size)),
|
||||
byte_offset(src, (g + 2) * type_size));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
}
|
||||
brw_pop_insn_state(p);
|
||||
} else {
|
||||
|
@ -1337,6 +1351,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
|
|||
const unsigned lower_size = inst->force_writemask_all ? inst->exec_size :
|
||||
MIN2(16, inst->exec_size);
|
||||
const unsigned block_size = 4 * lower_size / REG_SIZE;
|
||||
const tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
assert(inst->mlen != 0);
|
||||
|
||||
brw_push_insn_state(p);
|
||||
|
@ -1346,9 +1361,21 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src)
|
|||
for (unsigned i = 0; i < inst->exec_size / lower_size; i++) {
|
||||
brw_set_default_group(p, inst->group + lower_size * i);
|
||||
|
||||
if (i > 0) {
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_SYNC(p, TGL_SYNC_ALLRD);
|
||||
} else {
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
}
|
||||
|
||||
brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0),
|
||||
retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
if (i + 1 < inst->exec_size / lower_size)
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
else
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
|
||||
brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf),
|
||||
block_size,
|
||||
inst->offset + block_size * REG_SIZE * i);
|
||||
|
@ -1426,12 +1453,14 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
|||
BRW_DATAPORT_READ_TARGET_DATA_CACHE));
|
||||
|
||||
} else {
|
||||
const tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
/* a0.0 = surf_index & 0xff */
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
|
||||
brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
|
||||
brw_set_dest(p, insn_and, addr);
|
||||
|
@ -1439,6 +1468,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
|
|||
brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
|
||||
|
||||
/* dst = send(payload, a0.0 | <descriptor>) */
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
brw_send_indirect_message(
|
||||
p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
|
||||
retype(dst, BRW_REGISTER_TYPE_UD),
|
||||
|
@ -1562,6 +1592,7 @@ fs_generator::generate_set_sample_id(fs_inst *inst,
|
|||
brw_inst_set_exec_size(devinfo, insn, cvt(lower_size) - 1);
|
||||
brw_inst_set_group(devinfo, insn, inst->group + lower_size * i);
|
||||
brw_inst_set_compression(devinfo, insn, lower_size > 8);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1596,6 +1627,7 @@ fs_generator::generate_pack_half_2x16_split(fs_inst *,
|
|||
/* Now the form:
|
||||
* 0xhhhh0000
|
||||
*/
|
||||
brw_set_default_swsb(p, tgl_swsb_regdist(1));
|
||||
brw_SHL(p, dst, dst, brw_imm_ud(16u));
|
||||
|
||||
/* And, finally the form of packHalf2x16's output:
|
||||
|
@ -1610,9 +1642,12 @@ fs_generator::generate_shader_time_add(fs_inst *,
|
|||
struct brw_reg offset,
|
||||
struct brw_reg value)
|
||||
{
|
||||
const tgl_swsb swsb = brw_get_default_swsb(p);
|
||||
|
||||
assert(devinfo->gen >= 7);
|
||||
brw_push_insn_state(p);
|
||||
brw_set_default_mask_control(p, true);
|
||||
brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
|
||||
|
||||
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
|
||||
struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0),
|
||||
|
@ -1634,7 +1669,9 @@ fs_generator::generate_shader_time_add(fs_inst *,
|
|||
* out of this path, so we just emit the MOVs from here.
|
||||
*/
|
||||
brw_MOV(p, payload_offset, offset);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, payload_value, value);
|
||||
brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
|
||||
brw_shader_time_add(p, payload,
|
||||
prog_data->binding_table.shader_time_start);
|
||||
brw_pop_insn_state(p);
|
||||
|
@ -2134,6 +2171,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
brw_MOV(p, dst, src[1]);
|
||||
brw_set_default_mask_control(p, BRW_MASK_ENABLE);
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, dst, src[0]);
|
||||
break;
|
||||
|
||||
|
@ -2183,6 +2221,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
assert(src[0].type == dst.type);
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
||||
subscript(strided, BRW_REGISTER_TYPE_D, 0));
|
||||
brw_set_default_swsb(p, tgl_swsb_null());
|
||||
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
||||
subscript(strided, BRW_REGISTER_TYPE_D, 1));
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue