intel/fs: Add surface OWORD BLOCK opcodes
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7448>
This commit is contained in:
parent
296137df53
commit
d372abe397
|
@ -781,6 +781,26 @@ brw_dp_dword_scattered_rw_desc(const struct gen_device_info *devinfo,
|
||||||
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
|
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint32_t
|
||||||
|
brw_dp_oword_block_rw_desc(const struct gen_device_info *devinfo,
|
||||||
|
bool align_16B,
|
||||||
|
unsigned num_dwords,
|
||||||
|
bool write)
|
||||||
|
{
|
||||||
|
/* Writes can only have addresses aligned by OWORDs (16 Bytes). */
|
||||||
|
assert(!write || align_16B);
|
||||||
|
|
||||||
|
const unsigned msg_type =
|
||||||
|
write ? GEN7_DATAPORT_DC_OWORD_BLOCK_WRITE :
|
||||||
|
align_16B ? GEN7_DATAPORT_DC_OWORD_BLOCK_READ :
|
||||||
|
GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
|
||||||
|
|
||||||
|
const unsigned msg_control =
|
||||||
|
SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
|
||||||
|
|
||||||
|
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
|
||||||
|
}
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
|
brw_dp_a64_untyped_surface_rw_desc(const struct gen_device_info *devinfo,
|
||||||
unsigned exec_size, /**< 0 for SIMD4x2 */
|
unsigned exec_size, /**< 0 for SIMD4x2 */
|
||||||
|
|
|
@ -415,6 +415,10 @@ enum opcode {
|
||||||
VEC4_OPCODE_UNTYPED_SURFACE_WRITE,
|
VEC4_OPCODE_UNTYPED_SURFACE_WRITE,
|
||||||
SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
|
SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
|
||||||
|
|
||||||
|
SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL,
|
||||||
|
SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
|
||||||
|
SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Untyped A64 surface access opcodes.
|
* Untyped A64 surface access opcodes.
|
||||||
*
|
*
|
||||||
|
|
|
@ -839,6 +839,21 @@ fs_inst::components_read(unsigned i) const
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
assert(src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
|
||||||
|
assert(src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);
|
||||||
|
if (i == SURFACE_LOGICAL_SRC_DATA) {
|
||||||
|
const unsigned comps = src[SURFACE_LOGICAL_SRC_IMM_ARG].ud / exec_size;
|
||||||
|
assert(comps > 0);
|
||||||
|
return comps;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||||
assert(src[2].file == IMM);
|
assert(src[2].file == IMM);
|
||||||
return i == 1 ? src[2].ud : 1;
|
return i == 1 ? src[2].ud : 1;
|
||||||
|
@ -5367,6 +5382,39 @@ emit_predicate_on_sample_mask(const fs_builder &bld, fs_inst *inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
setup_surface_descriptors(const fs_builder &bld, fs_inst *inst, uint32_t desc,
|
||||||
|
const fs_reg &surface, const fs_reg &surface_handle)
|
||||||
|
{
|
||||||
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
|
||||||
|
/* We must have exactly one of surface and surface_handle */
|
||||||
|
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
||||||
|
|
||||||
|
if (surface.file == IMM) {
|
||||||
|
inst->desc = desc | (surface.ud & 0xff);
|
||||||
|
inst->src[0] = brw_imm_ud(0);
|
||||||
|
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||||
|
} else if (surface_handle.file != BAD_FILE) {
|
||||||
|
/* Bindless surface */
|
||||||
|
assert(devinfo->gen >= 9);
|
||||||
|
inst->desc = desc | GEN9_BTI_BINDLESS;
|
||||||
|
inst->src[0] = brw_imm_ud(0);
|
||||||
|
|
||||||
|
/* We assume that the driver provided the handle in the top 20 bits so
|
||||||
|
* we can use the surface handle directly as the extended descriptor.
|
||||||
|
*/
|
||||||
|
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
||||||
|
} else {
|
||||||
|
inst->desc = desc;
|
||||||
|
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||||
|
fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
ubld.AND(tmp, surface, brw_imm_ud(0xff));
|
||||||
|
inst->src[0] = component(tmp, 0);
|
||||||
|
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
{
|
{
|
||||||
|
@ -5384,9 +5432,6 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
assert(arg.file == IMM);
|
assert(arg.file == IMM);
|
||||||
assert(allow_sample_mask.file == IMM);
|
assert(allow_sample_mask.file == IMM);
|
||||||
|
|
||||||
/* We must have exactly one of surface and surface_handle */
|
|
||||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
|
||||||
|
|
||||||
/* Calculate the total number of components of the payload. */
|
/* Calculate the total number of components of the payload. */
|
||||||
const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);
|
const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);
|
||||||
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
||||||
|
@ -5608,28 +5653,7 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
|
|
||||||
/* Set up SFID and descriptors */
|
/* Set up SFID and descriptors */
|
||||||
inst->sfid = sfid;
|
inst->sfid = sfid;
|
||||||
inst->desc = desc;
|
setup_surface_descriptors(bld, inst, desc, surface, surface_handle);
|
||||||
if (surface.file == IMM) {
|
|
||||||
inst->desc |= surface.ud & 0xff;
|
|
||||||
inst->src[0] = brw_imm_ud(0);
|
|
||||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
|
||||||
} else if (surface_handle.file != BAD_FILE) {
|
|
||||||
/* Bindless surface */
|
|
||||||
assert(devinfo->gen >= 9);
|
|
||||||
inst->desc |= GEN9_BTI_BINDLESS;
|
|
||||||
inst->src[0] = brw_imm_ud(0);
|
|
||||||
|
|
||||||
/* We assume that the driver provided the handle in the top 20 bits so
|
|
||||||
* we can use the surface handle directly as the extended descriptor.
|
|
||||||
*/
|
|
||||||
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
|
||||||
} else {
|
|
||||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
|
||||||
fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
|
||||||
ubld.AND(tmp, surface, brw_imm_ud(0xff));
|
|
||||||
inst->src[0] = component(tmp, 0);
|
|
||||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Finally, the payload */
|
/* Finally, the payload */
|
||||||
inst->src[2] = payload;
|
inst->src[2] = payload;
|
||||||
|
@ -5638,6 +5662,75 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
inst->resize_sources(4);
|
inst->resize_sources(4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
lower_surface_block_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||||
|
{
|
||||||
|
const gen_device_info *devinfo = bld.shader->devinfo;
|
||||||
|
assert(devinfo->gen >= 9);
|
||||||
|
|
||||||
|
/* Get the logical send arguments. */
|
||||||
|
const fs_reg &addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
|
||||||
|
const fs_reg &src = inst->src[SURFACE_LOGICAL_SRC_DATA];
|
||||||
|
const fs_reg &surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
|
||||||
|
const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
|
||||||
|
const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
|
||||||
|
assert(arg.file == IMM);
|
||||||
|
assert(inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == BAD_FILE);
|
||||||
|
assert(inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK].file == BAD_FILE);
|
||||||
|
|
||||||
|
const bool is_stateless =
|
||||||
|
surface.file == IMM && (surface.ud == BRW_BTI_STATELESS ||
|
||||||
|
surface.ud == GEN8_BTI_STATELESS_NON_COHERENT);
|
||||||
|
|
||||||
|
const bool has_side_effects = inst->has_side_effects();
|
||||||
|
|
||||||
|
const bool align_16B =
|
||||||
|
inst->opcode != SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL;
|
||||||
|
|
||||||
|
const bool write = inst->opcode == SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL;
|
||||||
|
|
||||||
|
/* The address is stored in the header. See MH_A32_GO and MH_BTS_GO. */
|
||||||
|
fs_builder ubld = bld.exec_all().group(8, 0);
|
||||||
|
fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||||
|
|
||||||
|
if (is_stateless)
|
||||||
|
ubld.emit(SHADER_OPCODE_SCRATCH_HEADER, header);
|
||||||
|
else
|
||||||
|
ubld.MOV(header, brw_imm_d(0));
|
||||||
|
|
||||||
|
/* Address in OWord units when aligned to OWords. */
|
||||||
|
if (align_16B)
|
||||||
|
ubld.group(1, 0).SHR(component(header, 2), addr, brw_imm_ud(4));
|
||||||
|
else
|
||||||
|
ubld.group(1, 0).MOV(component(header, 2), addr);
|
||||||
|
|
||||||
|
fs_reg data;
|
||||||
|
unsigned ex_mlen = 0;
|
||||||
|
if (write) {
|
||||||
|
const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
|
||||||
|
data = retype(bld.move_to_vgrf(src, src_sz), BRW_REGISTER_TYPE_UD);
|
||||||
|
ex_mlen = src_sz * type_sz(src.type) * inst->exec_size / REG_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
inst->opcode = SHADER_OPCODE_SEND;
|
||||||
|
inst->mlen = 1;
|
||||||
|
inst->ex_mlen = ex_mlen;
|
||||||
|
inst->header_size = 1;
|
||||||
|
inst->send_has_side_effects = has_side_effects;
|
||||||
|
inst->send_is_volatile = !has_side_effects;
|
||||||
|
|
||||||
|
inst->sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
|
||||||
|
|
||||||
|
const uint32_t desc = brw_dp_oword_block_rw_desc(devinfo, align_16B,
|
||||||
|
arg.ud, write);
|
||||||
|
setup_surface_descriptors(bld, inst, desc, surface, surface_handle);
|
||||||
|
|
||||||
|
inst->src[2] = header;
|
||||||
|
inst->src[3] = data;
|
||||||
|
|
||||||
|
inst->resize_sources(4);
|
||||||
|
}
|
||||||
|
|
||||||
static fs_reg
|
static fs_reg
|
||||||
emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)
|
emit_a64_oword_block_header(const fs_builder &bld, const fs_reg &addr)
|
||||||
{
|
{
|
||||||
|
@ -6019,6 +6112,12 @@ fs_visitor::lower_logical_sends()
|
||||||
lower_surface_logical_send(ibld, inst);
|
lower_surface_logical_send(ibld, inst);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
|
||||||
|
lower_surface_block_logical_send(ibld, inst);
|
||||||
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
|
|
@ -425,6 +425,7 @@ schedule_node::set_latency_gen7(bool is_haswell)
|
||||||
case GEN7_SFID_DATAPORT_DATA_CACHE:
|
case GEN7_SFID_DATAPORT_DATA_CACHE:
|
||||||
switch ((inst->desc >> 14) & 0x1f) {
|
switch ((inst->desc >> 14) & 0x1f) {
|
||||||
case BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ:
|
case BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ:
|
||||||
|
case GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ:
|
||||||
case GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE:
|
case GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE:
|
||||||
/* We have no data for this but assume it's a little faster than
|
/* We have no data for this but assume it's a little faster than
|
||||||
* untyped surface read/write.
|
* untyped surface read/write.
|
||||||
|
|
|
@ -301,6 +301,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
||||||
return "untyped_surface_write";
|
return "untyped_surface_write";
|
||||||
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
|
||||||
return "untyped_surface_write_logical";
|
return "untyped_surface_write_logical";
|
||||||
|
case SHADER_OPCODE_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
return "oword_block_read_logical";
|
||||||
|
case SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
return "unaligned_oword_block_read_logical";
|
||||||
|
case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
|
||||||
|
return "oword_block_write_logical";
|
||||||
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
|
||||||
return "a64_untyped_read_logical";
|
return "a64_untyped_read_logical";
|
||||||
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
case SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL:
|
||||||
|
@ -1094,6 +1100,7 @@ backend_instruction::has_side_effects() const
|
||||||
case SHADER_OPCODE_RND_MODE:
|
case SHADER_OPCODE_RND_MODE:
|
||||||
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
|
case SHADER_OPCODE_FLOAT_CONTROL_MODE:
|
||||||
case FS_OPCODE_SCHEDULING_FENCE:
|
case FS_OPCODE_SCHEDULING_FENCE:
|
||||||
|
case SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL:
|
||||||
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Reference in New Issue