intel/fs: Eliminate "masked" and "per slot offset" URB messages
All of this information can be inferred from the sources. v2: Fix "error: unused variable 'opcode'" detected by marge-bot. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17605>
This commit is contained in:
parent
b21b901b46
commit
377246318a
|
@ -467,15 +467,10 @@ enum opcode {
|
|||
SHADER_OPCODE_SCRATCH_HEADER,
|
||||
|
||||
/**
|
||||
* Gfx8+ SIMD8 URB Read messages.
|
||||
* Gfx8+ SIMD8 URB messages.
|
||||
*/
|
||||
SHADER_OPCODE_URB_READ_LOGICAL,
|
||||
SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL,
|
||||
|
||||
SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL,
|
||||
SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL,
|
||||
|
||||
/**
|
||||
* Return the index of the first enabled live channel and assign it to
|
||||
|
|
|
@ -864,9 +864,6 @@ fs_inst::components_read(unsigned i) const
|
|||
}
|
||||
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
|
||||
if (i == URB_LOGICAL_SRC_DATA)
|
||||
return mlen - 1 -
|
||||
unsigned(src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) -
|
||||
|
@ -1534,10 +1531,7 @@ fs_visitor::emit_gs_thread_end()
|
|||
|
||||
if (gs_prog_data->static_vertex_count != -1) {
|
||||
foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
|
||||
if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL ||
|
||||
prev->opcode == SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL ||
|
||||
prev->opcode == SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL ||
|
||||
prev->opcode == SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL) {
|
||||
if (prev->opcode == SHADER_OPCODE_URB_WRITE_LOGICAL) {
|
||||
prev->eot = true;
|
||||
|
||||
/* Delete now dead instructions. */
|
||||
|
@ -5070,11 +5064,7 @@ get_lowered_simd_width(const struct brw_compiler *compiler,
|
|||
return 8;
|
||||
|
||||
case SHADER_OPCODE_URB_READ_LOGICAL:
|
||||
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
|
||||
return MIN2(8, inst->exec_size);
|
||||
|
||||
case SHADER_OPCODE_QUAD_SWIZZLE: {
|
||||
|
@ -6685,7 +6675,7 @@ fs_visitor::run_tcs()
|
|||
srcs[URB_LOGICAL_SRC_HANDLE] = get_tcs_output_urb_handle();
|
||||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16);
|
||||
srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0);
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 3;
|
||||
inst->eot = true;
|
||||
|
|
|
@ -2292,19 +2292,13 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
|
|||
* Similarly, if the control data header is <= 32 bits, there is only one
|
||||
* DWord, so we can skip channel masks.
|
||||
*/
|
||||
enum opcode opcode = SHADER_OPCODE_URB_WRITE_LOGICAL;
|
||||
|
||||
fs_reg channel_mask, per_slot_offset;
|
||||
|
||||
if (gs_compile->control_data_header_size_bits > 32) {
|
||||
opcode = SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL;
|
||||
if (gs_compile->control_data_header_size_bits > 32)
|
||||
channel_mask = vgrf(glsl_type::uint_type);
|
||||
}
|
||||
|
||||
if (gs_compile->control_data_header_size_bits > 128) {
|
||||
opcode = SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL;
|
||||
if (gs_compile->control_data_header_size_bits > 128)
|
||||
per_slot_offset = vgrf(glsl_type::uint_type);
|
||||
}
|
||||
|
||||
/* Figure out which DWord we're trying to write to using the formula:
|
||||
*
|
||||
|
@ -2315,7 +2309,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
|
|||
*
|
||||
* dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
|
||||
*/
|
||||
if (opcode != SHADER_OPCODE_URB_WRITE_LOGICAL) {
|
||||
if (channel_mask.file != BAD_FILE || per_slot_offset.file != BAD_FILE) {
|
||||
fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
|
||||
fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
|
||||
abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
|
||||
|
@ -2360,7 +2354,8 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
|
|||
BRW_REGISTER_TYPE_F);
|
||||
abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
|
||||
|
||||
fs_inst *inst = abld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
fs_inst *inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = header_size + length;
|
||||
/* We need to increment Global Offset by 256-bits to make room for
|
||||
* Broadwell's extra "Vertex Count" payload at the beginning of the
|
||||
|
@ -2652,7 +2647,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
|
|||
srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
|
||||
|
||||
if (first_component != 0) {
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = read_components *
|
||||
tmp.component_size(inst->exec_size);
|
||||
|
@ -2661,7 +2656,7 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
|
|||
offset(tmp, bld, i + first_component));
|
||||
}
|
||||
} else {
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = num_components *
|
||||
dst.component_size(inst->exec_size);
|
||||
|
@ -2958,14 +2953,14 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||
if (first_component != 0) {
|
||||
unsigned read_components = num_components + first_component;
|
||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
bld.MOV(offset(dst, bld, i),
|
||||
offset(tmp, bld, i + first_component));
|
||||
}
|
||||
} else {
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
}
|
||||
inst->offset = imm_offset;
|
||||
|
@ -3037,7 +3032,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||
unsigned read_components =
|
||||
instr->num_components + first_component;
|
||||
fs_reg tmp = bld.vgrf(dst.type, read_components);
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = read_components * REG_SIZE;
|
||||
for (unsigned i = 0; i < instr->num_components; i++) {
|
||||
|
@ -3045,7 +3040,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||
offset(tmp, bld, i + first_component));
|
||||
}
|
||||
} else {
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dst,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dst,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->size_written = instr->num_components * REG_SIZE;
|
||||
}
|
||||
|
@ -3067,7 +3062,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||
break;
|
||||
|
||||
unsigned num_components = util_last_bit(mask);
|
||||
enum opcode opcode;
|
||||
|
||||
/* We can only pack two 64-bit components in a single message, so send
|
||||
* 2 messages if we have more components
|
||||
|
@ -3076,16 +3070,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||
mask = mask << first_component;
|
||||
|
||||
fs_reg mask_reg;
|
||||
if (mask != WRITEMASK_XYZW) {
|
||||
if (mask != WRITEMASK_XYZW)
|
||||
mask_reg = brw_imm_ud(mask << 16);
|
||||
opcode = indirect_offset.file != BAD_FILE ?
|
||||
SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL :
|
||||
SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL;
|
||||
} else {
|
||||
opcode = indirect_offset.file != BAD_FILE ?
|
||||
SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL :
|
||||
SHADER_OPCODE_URB_WRITE_LOGICAL;
|
||||
}
|
||||
|
||||
fs_reg sources[4];
|
||||
|
||||
|
@ -3108,7 +3094,8 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
|
|||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
|
||||
|
||||
fs_inst *inst = bld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
inst->offset = imm_offset;
|
||||
inst->mlen = header_size + length;
|
||||
break;
|
||||
|
@ -3208,14 +3195,14 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
|
|||
unsigned read_components =
|
||||
num_components + first_component;
|
||||
fs_reg tmp = bld.vgrf(dest.type, read_components);
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, tmp,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, tmp,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
bld.MOV(offset(dest, bld, i),
|
||||
offset(tmp, bld, i + first_component));
|
||||
}
|
||||
} else {
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL, dest,
|
||||
inst = bld.emit(SHADER_OPCODE_URB_READ_LOGICAL, dest,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
}
|
||||
inst->mlen = 2;
|
||||
|
|
|
@ -774,7 +774,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
|||
else
|
||||
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
opcode opcode = SHADER_OPCODE_URB_WRITE_LOGICAL;
|
||||
int header_size = 1;
|
||||
fs_reg per_slot_offsets;
|
||||
|
||||
|
@ -794,7 +793,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
|||
* Vertex Count. SIMD8 mode processes 8 different primitives at a
|
||||
* time; each may output a different number of vertices.
|
||||
*/
|
||||
opcode = SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL;
|
||||
header_size++;
|
||||
|
||||
/* The URB offset is in 128-bit units, so we need to multiply by 2 */
|
||||
|
@ -943,7 +941,8 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
|||
BRW_REGISTER_TYPE_F);
|
||||
abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
|
||||
|
||||
fs_inst *inst = abld.emit(opcode, reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
fs_inst *inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
|
||||
srcs, ARRAY_SIZE(srcs));
|
||||
|
||||
/* For ICL WA 1805992985 one needs additional write in the end. */
|
||||
if (devinfo->ver == 11 && stage == MESA_SHADER_TESS_EVAL)
|
||||
|
@ -1038,7 +1037,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
|||
srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask;
|
||||
srcs[URB_LOGICAL_SRC_DATA] = payload;
|
||||
|
||||
fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->eot = true;
|
||||
inst->mlen = 6;
|
||||
|
|
|
@ -2728,14 +2728,10 @@ fs_visitor::lower_logical_sends()
|
|||
break;
|
||||
|
||||
case SHADER_OPCODE_URB_READ_LOGICAL:
|
||||
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
|
||||
lower_urb_read_logical_send(ibld, inst);
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
|
||||
lower_urb_write_logical_send(ibld, inst);
|
||||
break;
|
||||
|
||||
|
|
|
@ -908,7 +908,7 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 2 + length;
|
||||
inst->offset = urb_global_offset;
|
||||
|
@ -936,7 +936,7 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 2 + length;
|
||||
inst->offset = urb_global_offset;
|
||||
|
@ -1002,7 +1002,7 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
BRW_REGISTER_TYPE_F);
|
||||
bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0);
|
||||
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL,
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
|
||||
reg_undef, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 3 + length;
|
||||
inst->offset = 0;
|
||||
|
@ -1102,7 +1102,7 @@ emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
|
|||
|
||||
fs_reg data = bld8.vgrf(BRW_REGISTER_TYPE_UD, 4);
|
||||
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL,
|
||||
fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_READ_LOGICAL,
|
||||
data, srcs, ARRAY_SIZE(srcs));
|
||||
inst->mlen = 2;
|
||||
inst->offset = 0;
|
||||
|
|
|
@ -374,16 +374,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
return "urb_write_logical";
|
||||
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
|
||||
return "urb_write_per_slot_logical";
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
|
||||
return "urb_write_masked_logical";
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
|
||||
return "urb_write_masked_per_slot_logical";
|
||||
case SHADER_OPCODE_URB_READ_LOGICAL:
|
||||
return "urb_read_logical";
|
||||
case SHADER_OPCODE_URB_READ_PER_SLOT_LOGICAL:
|
||||
return "urb_read_per_slot_logical";
|
||||
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
||||
return "find_live_channel";
|
||||
|
@ -1137,9 +1129,6 @@ backend_instruction::has_side_effects() const
|
|||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
case SHADER_OPCODE_INTERLOCK:
|
||||
case SHADER_OPCODE_URB_WRITE_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_PER_SLOT_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_LOGICAL:
|
||||
case SHADER_OPCODE_URB_WRITE_MASKED_PER_SLOT_LOGICAL:
|
||||
case FS_OPCODE_FB_WRITE:
|
||||
case FS_OPCODE_FB_WRITE_LOGICAL:
|
||||
case FS_OPCODE_REP_FB_WRITE:
|
||||
|
|
Loading…
Reference in New Issue