diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index b14e33024b8..66e6c53c2b7 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -852,6 +852,8 @@ enum tex_logical_srcs { enum surface_logical_srcs { /** Surface binding table index */ SURFACE_LOGICAL_SRC_SURFACE, + /** Surface bindless handle */ + SURFACE_LOGICAL_SRC_SURFACE_HANDLE, /** Surface address; could be multi-dimensional for typed opcodes */ SURFACE_LOGICAL_SRC_ADDRESS, /** Data to be written or used in an atomic op */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 856e2ef815d..e5ec2cbc450 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5112,10 +5112,14 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) const fs_reg &addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS]; const fs_reg &src = inst->src[SURFACE_LOGICAL_SRC_DATA]; const fs_reg &surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE]; + const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS]; const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; assert(arg.file == IMM); + /* We must have exactly one of surface and surface_handle */ + assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE)); + /* Calculate the total number of components of the payload. */ const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS); const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA); @@ -5308,13 +5312,24 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) if (surface.file == IMM) { inst->desc |= surface.ud & 0xff; inst->src[0] = brw_imm_ud(0); + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + } else if (surface_handle.file != BAD_FILE) { + /* Bindless surface */ + assert(devinfo->gen >= 9); + inst->desc |= GEN9_BTI_BINDLESS; + inst->src[0] = brw_imm_ud(0); + + /* We assume that the driver provided the handle in the top 20 bits so + * we can use the surface handle directly as the extended descriptor. + */ + inst->src[1] = retype(surface_handle, BRW_REGISTER_TYPE_UD); } else { const fs_builder ubld = bld.exec_all().group(1, 0); fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD); ubld.AND(tmp, surface, brw_imm_ud(0xff)); inst->src[0] = component(tmp, 0); + inst->src[1] = brw_imm_ud(0); /* ex_desc */ } - inst->src[1] = brw_imm_ud(0); /* ex_desc */ /* Finally, the payload */ inst->src[2] = payload; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 28c6e0e209a..de76da32c5d 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3940,7 +3940,17 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr case nir_intrinsic_image_atomic_or: case nir_intrinsic_image_atomic_xor: case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: { + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_bindless_image_load: + case nir_intrinsic_bindless_image_store: + case nir_intrinsic_bindless_image_atomic_add: + case nir_intrinsic_bindless_image_atomic_min: + case nir_intrinsic_bindless_image_atomic_max: + case nir_intrinsic_bindless_image_atomic_and: + case nir_intrinsic_bindless_image_atomic_or: + case nir_intrinsic_bindless_image_atomic_xor: + case nir_intrinsic_bindless_image_atomic_exchange: + case nir_intrinsic_bindless_image_atomic_comp_swap: { if (stage == MESA_SHADER_FRAGMENT && instr->intrinsic != nir_intrinsic_image_load) brw_wm_prog_data(prog_data)->has_side_effects = true; @@ -3950,20 +3960,43 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr const GLenum format = nir_intrinsic_format(instr); fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; - srcs[SURFACE_LOGICAL_SRC_SURFACE] = - get_nir_image_intrinsic_image(bld, instr); + + switch (instr->intrinsic) { + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + srcs[SURFACE_LOGICAL_SRC_SURFACE] = + get_nir_image_intrinsic_image(bld, instr); + break; + + default: + /* Bindless */ + srcs[SURFACE_LOGICAL_SRC_SURFACE_HANDLE] = + bld.emit_uniformize(get_nir_src(instr->src[0])); + break; + } + srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(image_intrinsic_coord_components(instr)); /* Emit an image load, store or atomic op. */ - if (instr->intrinsic == nir_intrinsic_image_load) { + if (instr->intrinsic == nir_intrinsic_image_load || + instr->intrinsic == nir_intrinsic_bindless_image_load) { srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); fs_inst *inst = bld.emit(SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL, dest, srcs, SURFACE_LOGICAL_NUM_SRCS); inst->size_written = instr->num_components * dispatch_width * 4; - } else if (instr->intrinsic == nir_intrinsic_image_store) { + } else if (instr->intrinsic == nir_intrinsic_image_store || + instr->intrinsic == nir_intrinsic_bindless_image_store) { srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[3]); bld.emit(SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, @@ -3974,6 +4007,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr switch (instr->intrinsic) { case nir_intrinsic_image_atomic_add: + case nir_intrinsic_bindless_image_atomic_add: assert(num_srcs == 4); op = get_op_for_atomic_add(instr, 3); @@ -3982,26 +4016,33 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr num_srcs = 3; break; case nir_intrinsic_image_atomic_min: + case nir_intrinsic_bindless_image_atomic_min: assert(format == GL_R32UI || format == GL_R32I); op = (format == GL_R32I) ? BRW_AOP_IMIN : BRW_AOP_UMIN; break; case nir_intrinsic_image_atomic_max: + case nir_intrinsic_bindless_image_atomic_max: assert(format == GL_R32UI || format == GL_R32I); op = (format == GL_R32I) ? BRW_AOP_IMAX : BRW_AOP_UMAX; break; case nir_intrinsic_image_atomic_and: + case nir_intrinsic_bindless_image_atomic_and: op = BRW_AOP_AND; break; case nir_intrinsic_image_atomic_or: + case nir_intrinsic_bindless_image_atomic_or: op = BRW_AOP_OR; break; case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_bindless_image_atomic_xor: op = BRW_AOP_XOR; break; case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_bindless_image_atomic_exchange: op = BRW_AOP_MOV; break; case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_bindless_image_atomic_comp_swap: op = BRW_AOP_CMPWR; break; default: @@ -4027,16 +4068,22 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - case nir_intrinsic_image_size: { + case nir_intrinsic_image_size: + case nir_intrinsic_bindless_image_size: { /* Unlike the [un]typed load and store opcodes, the TXS that this turns * into will handle the binding table index for us in the geneerator. + * Incidentally, this means that we can handle bindless with exactly the + * same code. */ fs_reg image = retype(get_nir_src_imm(instr->src[0]), BRW_REGISTER_TYPE_UD); image = bld.emit_uniformize(image); fs_reg srcs[TEX_LOGICAL_NUM_SRCS]; - srcs[TEX_LOGICAL_SRC_SURFACE] = image; + if (instr->intrinsic == nir_intrinsic_image_size) + srcs[TEX_LOGICAL_SRC_SURFACE] = image; + else + srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = image; srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0); srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0); srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);