intel/fs: Add support for bindless texture ops
We add two new texture sources for bindless surface and sampler handles. Bindless surface handles are expected to be pre-shifted so that the 20-bit surface state table index is in the top 20 bits of the 32-bit handle. This lets us avoid any extra shifts in the shader. Bindless sampler handles are 32-byte aligned byte offsets from general state base address. We use 32-byte aligned instead of 16-byte aligned to avoid having to use more indirect messages than needed. It means we can't tightly pack samplers but that's probably not a big deal. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
parent
2edf29b933
commit
843286d324
|
@ -835,6 +835,10 @@ enum tex_logical_srcs {
|
|||
TEX_LOGICAL_SRC_SURFACE,
|
||||
/** Texture sampler index */
|
||||
TEX_LOGICAL_SRC_SAMPLER,
|
||||
/** Texture surface bindless handle */
|
||||
TEX_LOGICAL_SRC_SURFACE_HANDLE,
|
||||
/** Texture sampler bindless handle */
|
||||
TEX_LOGICAL_SRC_SAMPLER_HANDLE,
|
||||
/** Texel offset for gathers */
|
||||
TEX_LOGICAL_SRC_TG4_OFFSET,
|
||||
/** REQUIRED: Number of coordinate components (as UD immediate) */
|
||||
|
@ -1224,6 +1228,7 @@ enum brw_message_target {
|
|||
*/
|
||||
#define GEN8_BTI_STATELESS_IA_COHERENT 255
|
||||
#define GEN8_BTI_STATELESS_NON_COHERENT 253
|
||||
#define GEN9_BTI_BINDLESS 252
|
||||
|
||||
/* Dataport atomic operations for Untyped Atomic Integer Operation message
|
||||
* (and others).
|
||||
|
|
|
@ -4685,6 +4685,8 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
const fs_reg &mcs,
|
||||
const fs_reg &surface,
|
||||
const fs_reg &sampler,
|
||||
const fs_reg &surface_handle,
|
||||
const fs_reg &sampler_handle,
|
||||
const fs_reg &tg4_offset,
|
||||
unsigned coord_components,
|
||||
unsigned grad_components)
|
||||
|
@ -4697,9 +4699,14 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
|
||||
sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);
|
||||
|
||||
/* We must have exactly one of surface/sampler and surface/sampler_handle */
|
||||
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
|
||||
assert((sampler.file == BAD_FILE) != (sampler_handle.file == BAD_FILE));
|
||||
|
||||
if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||
|
||||
inst->offset != 0 || inst->eot ||
|
||||
op == SHADER_OPCODE_SAMPLEINFO ||
|
||||
sampler_handle.file != BAD_FILE ||
|
||||
is_high_sampler(devinfo, sampler)) {
|
||||
/* For general texture offsets (no txf workaround), we need a header to
|
||||
* put them in.
|
||||
|
@ -4739,7 +4746,21 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
ubld1.MOV(component(header, 2), brw_imm_ud(0));
|
||||
}
|
||||
|
||||
if (is_high_sampler(devinfo, sampler)) {
|
||||
if (sampler_handle.file != BAD_FILE) {
|
||||
/* Bindless sampler handles aren't relative to the sampler state
|
||||
* pointer passed into the shader through SAMPLER_STATE_POINTERS_*.
|
||||
* Instead, it's an absolute pointer relative to dynamic state base
|
||||
* address.
|
||||
*
|
||||
* Sampler states are 16 bytes each and the pointer we give here has
|
||||
* to be 32-byte aligned. In order to avoid more indirect messages
|
||||
* than required, we assume that all bindless sampler states are
|
||||
* 32-byte aligned. This sacrifices a bit of general state base
|
||||
* address space but means we can do something more efficient in the
|
||||
* shader.
|
||||
*/
|
||||
ubld1.MOV(component(header, 3), sampler_handle);
|
||||
} else if (is_high_sampler(devinfo, sampler)) {
|
||||
if (sampler.file == BRW_IMMEDIATE_VALUE) {
|
||||
assert(sampler.ud >= 16);
|
||||
const int sampler_state_size = 16; /* 16 bytes */
|
||||
|
@ -4942,14 +4963,42 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
}
|
||||
|
||||
inst->sfid = BRW_SFID_SAMPLER;
|
||||
if (surface.file == IMM && sampler.file == IMM) {
|
||||
if (surface.file == IMM &&
|
||||
(sampler.file == IMM || sampler_handle.file != BAD_FILE)) {
|
||||
inst->desc = brw_sampler_desc(devinfo,
|
||||
surface.ud + base_binding_table_index,
|
||||
sampler.ud % 16,
|
||||
sampler.file == IMM ? sampler.ud % 16 : 0,
|
||||
msg_type,
|
||||
simd_mode,
|
||||
0 /* return_format unused on gen7+ */);
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
} else if (surface_handle.file != BAD_FILE) {
|
||||
/* Bindless surface */
|
||||
assert(devinfo->gen >= 9);
|
||||
inst->desc = brw_sampler_desc(devinfo,
|
||||
GEN9_BTI_BINDLESS,
|
||||
sampler.file == IMM ? sampler.ud % 16 : 0,
|
||||
msg_type,
|
||||
simd_mode,
|
||||
0 /* return_format unused on gen7+ */);
|
||||
|
||||
/* For bindless samplers, the entire address is included in the message
|
||||
* header so we can leave the portion in the message descriptor 0.
|
||||
*/
|
||||
if (sampler_handle.file != BAD_FILE || sampler.file == IMM) {
|
||||
inst->src[0] = brw_imm_ud(0);
|
||||
} else {
|
||||
const fs_builder ubld = bld.group(1, 0).exec_all();
|
||||
fs_reg desc = ubld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
ubld.SHL(desc, sampler, brw_imm_ud(8));
|
||||
inst->src[0] = desc;
|
||||
}
|
||||
|
||||
/* We assume that the driver provided the handle in the top 20 bits so
|
||||
* we can use the surface handle directly as the extended descriptor.
|
||||
*/
|
||||
inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD);
|
||||
} else {
|
||||
/* Immediate portion of the descriptor */
|
||||
inst->desc = brw_sampler_desc(devinfo,
|
||||
|
@ -4964,7 +5013,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
/* This case is common in GL */
|
||||
ubld.MUL(desc, surface, brw_imm_ud(0x101));
|
||||
} else {
|
||||
if (sampler.file == IMM) {
|
||||
if (sampler_handle.file != BAD_FILE) {
|
||||
ubld.MOV(desc, surface);
|
||||
} else if (sampler.file == IMM) {
|
||||
ubld.OR(desc, surface, brw_imm_ud(sampler.ud << 8));
|
||||
} else {
|
||||
ubld.SHL(desc, sampler, brw_imm_ud(8));
|
||||
|
@ -4976,8 +5027,8 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
|
|||
ubld.AND(desc, desc, brw_imm_ud(0xfff));
|
||||
|
||||
inst->src[0] = component(desc, 0);
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
}
|
||||
inst->src[1] = brw_imm_ud(0); /* ex_desc */
|
||||
|
||||
inst->src[2] = src_payload;
|
||||
inst->resize_sources(3);
|
||||
|
@ -5009,6 +5060,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
|||
const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS];
|
||||
const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
|
||||
const fs_reg &sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
|
||||
const fs_reg &surface_handle = inst->src[TEX_LOGICAL_SRC_SURFACE_HANDLE];
|
||||
const fs_reg &sampler_handle = inst->src[TEX_LOGICAL_SRC_SAMPLER_HANDLE];
|
||||
const fs_reg &tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
|
||||
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
|
||||
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
|
||||
|
@ -5019,7 +5072,9 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
|
|||
lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
|
||||
shadow_c, lod, lod2, min_lod,
|
||||
sample_index,
|
||||
mcs, surface, sampler, tg4_offset,
|
||||
mcs, surface, sampler,
|
||||
surface_handle, sampler_handle,
|
||||
tg4_offset,
|
||||
coord_components, grad_components);
|
||||
} else if (devinfo->gen >= 5) {
|
||||
lower_sampler_logical_send_gen5(bld, inst, op, coordinate,
|
||||
|
|
|
@ -183,7 +183,8 @@ public:
|
|||
void emit_interpolation_setup_gen6();
|
||||
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
|
||||
fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
||||
const fs_reg &sampler);
|
||||
const fs_reg &texture,
|
||||
const fs_reg &texture_handle);
|
||||
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
|
||||
fs_reg resolve_source_modifiers(const fs_reg &src);
|
||||
void emit_discard_jump();
|
||||
|
|
|
@ -3201,7 +3201,7 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
|
|||
|
||||
const fs_reg sample = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
|
||||
const fs_reg mcs = wm_key->multisample_fbo ?
|
||||
emit_mcs_fetch(coords, 3, brw_imm_ud(surface)) : fs_reg();
|
||||
emit_mcs_fetch(coords, 3, brw_imm_ud(surface), fs_reg()) : fs_reg();
|
||||
|
||||
/* Use either a normal or a CMS texel fetch message depending on whether
|
||||
* the framebuffer is single or multisample. On SKL+ use the wide CMS
|
||||
|
@ -5237,6 +5237,18 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_tex_src_texture_handle:
|
||||
assert(nir_tex_instr_src_index(instr, nir_tex_src_texture_offset) == -1);
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE] = fs_reg();
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = bld.emit_uniformize(src);
|
||||
break;
|
||||
|
||||
case nir_tex_src_sampler_handle:
|
||||
assert(nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset) == -1);
|
||||
srcs[TEX_LOGICAL_SRC_SAMPLER] = fs_reg();
|
||||
srcs[TEX_LOGICAL_SRC_SAMPLER_HANDLE] = bld.emit_uniformize(src);
|
||||
break;
|
||||
|
||||
case nir_tex_src_ms_mcs:
|
||||
assert(instr->op == nir_texop_txf_ms);
|
||||
srcs[TEX_LOGICAL_SRC_MCS] = retype(src, BRW_REGISTER_TYPE_D);
|
||||
|
@ -5266,7 +5278,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
|
|||
srcs[TEX_LOGICAL_SRC_MCS] =
|
||||
emit_mcs_fetch(srcs[TEX_LOGICAL_SRC_COORDINATE],
|
||||
instr->coord_components,
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE]);
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE],
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE]);
|
||||
} else {
|
||||
srcs[TEX_LOGICAL_SRC_MCS] = brw_imm_ud(0u);
|
||||
}
|
||||
|
|
|
@ -35,7 +35,8 @@ using namespace brw;
|
|||
/* Sample from the MCS surface attached to this multisample texture. */
|
||||
fs_reg
|
||||
fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
||||
const fs_reg &texture)
|
||||
const fs_reg &texture,
|
||||
const fs_reg &texture_handle)
|
||||
{
|
||||
const fs_reg dest = vgrf(glsl_type::uvec4_type);
|
||||
|
||||
|
@ -43,6 +44,7 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
|
|||
srcs[TEX_LOGICAL_SRC_COORDINATE] = coordinate;
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE] = texture;
|
||||
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_ud(0);
|
||||
srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = texture_handle;
|
||||
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(components);
|
||||
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
|
||||
|
||||
|
|
Loading…
Reference in New Issue