From cb98e0755f8d05a5a7f9134e39c625e8933746ea Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 11 Oct 2018 15:57:50 -0500 Subject: [PATCH] intel/fs: Support min_lod parameters on texture instructions We have to lower some shadow instructions because they don't exist in hardware and we have to lower txb+offset+clamp because the message gets too big and we run into the sampler message length limit of 11 regs. Acked-by: Ian Romanick --- src/intel/compiler/brw_eu_defines.h | 2 ++ src/intel/compiler/brw_fs.cpp | 22 +++++++++++++++++++++- src/intel/compiler/brw_fs_nir.cpp | 6 +++++- src/intel/compiler/brw_nir.c | 3 +++ 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 52957882b10..affe977835b 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -811,6 +811,8 @@ enum tex_logical_srcs { TEX_LOGICAL_SRC_LOD, /** dPdy if the operation takes explicit derivatives */ TEX_LOGICAL_SRC_LOD2, + /** Min LOD */ + TEX_LOGICAL_SRC_MIN_LOD, /** Sample index */ TEX_LOGICAL_SRC_SAMPLE_INDEX, /** MCS data */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 713a6c7f40a..3125e5feb1d 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4472,6 +4472,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &coordinate, const fs_reg &shadow_c, fs_reg lod, const fs_reg &lod2, + const fs_reg &min_lod, const fs_reg &sample_index, const fs_reg &mcs, const fs_reg &surface, @@ -4682,6 +4683,15 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, bld.MOV(sources[length++], offset(coordinate, bld, i)); } + if (min_lod.file != BAD_FILE) { + /* Account for all of the missing coordinate sources */ + length += 4 - coord_components; + if (op == SHADER_OPCODE_TXD) + length += (3 - grad_components) * 2; + + bld.MOV(sources[length++], min_lod); + } + int mlen; if (reg_width == 2) mlen = length * reg_width - header_size; @@ -4713,6 +4723,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; const fs_reg &lod = inst->src[TEX_LOGICAL_SRC_LOD]; const fs_reg &lod2 = inst->src[TEX_LOGICAL_SRC_LOD2]; + const fs_reg &min_lod = inst->src[TEX_LOGICAL_SRC_MIN_LOD]; const fs_reg &sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX]; const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS]; const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE]; @@ -4725,7 +4736,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op) if (devinfo->gen >= 7) { lower_sampler_logical_send_gen7(bld, inst, op, coordinate, - shadow_c, lod, lod2, sample_index, + shadow_c, lod, lod2, min_lod, + sample_index, mcs, surface, sampler, tg4_offset, coord_components, grad_components); } else if (devinfo->gen >= 5) { @@ -5262,6 +5274,14 @@ static unsigned get_sampler_lowered_simd_width(const struct gen_device_info *devinfo, const fs_inst *inst) { + /* If we have a min_lod parameter on anything other than a simple sample + * message, it will push it over 5 arguments and we have to fall back to + * SIMD8. + */ + if (inst->opcode != SHADER_OPCODE_TEX && + inst->components_read(TEX_LOGICAL_SRC_MIN_LOD)) + return 8; + /* Calculate the number of coordinate components that have to be present * assuming that additional arguments follow the texel coordinates in the * message payload. On IVB+ there is no need for padding, on ILK-SNB we diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 165c70c7c29..d5a05aacdf5 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2968,7 +2968,7 @@ fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, /* Emit the instruction. */ const fs_reg srcs[] = { coords, fs_reg(), brw_imm_ud(0), fs_reg(), - sample, mcs, + fs_reg(), sample, mcs, brw_imm_ud(surface), brw_imm_ud(0), fs_reg(), brw_imm_ud(3), brw_imm_ud(0) }; STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS); @@ -4792,6 +4792,10 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) break; } break; + case nir_tex_src_min_lod: + srcs[TEX_LOGICAL_SRC_MIN_LOD] = + retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_F); + break; case nir_tex_src_ms_index: srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX] = retype(src, BRW_REGISTER_TYPE_UD); break; diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index aa6788b9fe5..2723ab6d4fb 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -656,6 +656,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) .lower_txf_offset = true, .lower_rect_offset = true, .lower_txd_cube_map = true, + .lower_txb_shadow_clamp = true, + .lower_txd_shadow_clamp = true, + .lower_txd_offset_clamp = true, }; OPT(nir_lower_tex, &tex_options);