radv,aco: lower texture descriptor loads in NIR
fossil-db (Sienna Cichlid): Totals from 39445 (24.30% of 162293) affected shaders: MaxWaves: 875988 -> 875972 (-0.00%) Instrs: 35372561 -> 35234909 (-0.39%); split: -0.41%, +0.03% CodeSize: 190237480 -> 189379240 (-0.45%); split: -0.47%, +0.02% VGPRs: 1889856 -> 1889928 (+0.00%); split: -0.00%, +0.01% SpillSGPRs: 10764 -> 10857 (+0.86%); split: -2.04%, +2.91% SpillVGPRs: 1891 -> 1907 (+0.85%); split: -0.32%, +1.16% Scratch: 260096 -> 261120 (+0.39%) Latency: 477701150 -> 477578466 (-0.03%); split: -0.06%, +0.03% InvThroughput: 87819847 -> 87830346 (+0.01%); split: -0.03%, +0.04% VClause: 673353 -> 673829 (+0.07%); split: -0.04%, +0.11% SClause: 1385396 -> 1366478 (-1.37%); split: -1.65%, +0.29% Copies: 2327965 -> 2229134 (-4.25%); split: -4.58%, +0.34% Branches: 906707 -> 906434 (-0.03%); split: -0.13%, +0.10% PreSGPRs: 1874153 -> 1862698 (-0.61%); split: -1.34%, +0.73% PreVGPRs: 1691382 -> 1691383 (+0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12773>
This commit is contained in:
parent
52f850238a
commit
15640e58d9
|
@ -79,6 +79,17 @@ enum ac_fetch_format
|
|||
AC_FETCH_FORMAT_NONE,
|
||||
};
|
||||
|
||||
enum ac_descriptor_type
|
||||
{
|
||||
AC_DESC_IMAGE,
|
||||
AC_DESC_FMASK,
|
||||
AC_DESC_SAMPLER,
|
||||
AC_DESC_BUFFER,
|
||||
AC_DESC_PLANE_0,
|
||||
AC_DESC_PLANE_1,
|
||||
AC_DESC_PLANE_2,
|
||||
};
|
||||
|
||||
unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask);
|
||||
|
||||
unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format);
|
||||
|
|
|
@ -1355,7 +1355,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
case nir_op_vec2:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec5: {
|
||||
case nir_op_vec5:
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16: {
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
|
||||
unsigned num = instr->dest.dest.ssa.num_components;
|
||||
for (unsigned i = 0; i < num; ++i)
|
||||
|
@ -8967,70 +8969,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
tex_fetch_ptrs(isel_context* ctx, nir_tex_instr* instr, Temp* res_ptr, Temp* samp_ptr,
|
||||
enum glsl_base_type* stype)
|
||||
{
|
||||
nir_deref_instr* texture_deref_instr = NULL;
|
||||
nir_deref_instr* sampler_deref_instr = NULL;
|
||||
int plane = -1;
|
||||
|
||||
for (unsigned i = 0; i < instr->num_srcs; i++) {
|
||||
switch (instr->src[i].src_type) {
|
||||
case nir_tex_src_texture_deref:
|
||||
texture_deref_instr = nir_src_as_deref(instr->src[i].src);
|
||||
break;
|
||||
case nir_tex_src_sampler_deref:
|
||||
sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
|
||||
break;
|
||||
case nir_tex_src_plane: plane = nir_src_as_int(instr->src[i].src); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
*stype = glsl_get_sampler_result_type(texture_deref_instr->type);
|
||||
|
||||
if (!sampler_deref_instr)
|
||||
sampler_deref_instr = texture_deref_instr;
|
||||
|
||||
if (plane >= 0) {
|
||||
assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
|
||||
*res_ptr = get_sampler_desc(ctx, texture_deref_instr,
|
||||
(aco_descriptor_type)(ACO_DESC_PLANE_0 + plane), instr, false);
|
||||
} else if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
*res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_BUFFER, instr, false);
|
||||
} else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
|
||||
*res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_FMASK, instr, false);
|
||||
} else {
|
||||
*res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_IMAGE, instr, false);
|
||||
}
|
||||
if (samp_ptr) {
|
||||
*samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, ACO_DESC_SAMPLER, instr, false);
|
||||
|
||||
if (ctx->options->disable_aniso_single_level &&
|
||||
instr->sampler_dim < GLSL_SAMPLER_DIM_RECT && ctx->options->chip_class < GFX8) {
|
||||
/* fix sampler aniso on SI/CI: samp[0] = samp[0] & img[7] */
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
/* to avoid unnecessary moves, we split and recombine sampler and image */
|
||||
Temp img[8] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1),
|
||||
bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)};
|
||||
Temp samp[4] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)};
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(img[0]), Definition(img[1]),
|
||||
Definition(img[2]), Definition(img[3]), Definition(img[4]), Definition(img[5]),
|
||||
Definition(img[6]), Definition(img[7]), *res_ptr);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(samp[0]), Definition(samp[1]),
|
||||
Definition(samp[2]), Definition(samp[3]), *samp_ptr);
|
||||
|
||||
samp[0] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), samp[0], img[7]);
|
||||
*res_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8), img[0], img[1], img[2],
|
||||
img[3], img[4], img[5], img[6], img[7]);
|
||||
*samp_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), samp[0], samp[1], samp[2],
|
||||
samp[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc,
|
||||
Temp* out_tc)
|
||||
|
@ -9178,11 +9116,21 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
std::vector<Temp> coords;
|
||||
std::vector<Temp> derivs;
|
||||
nir_const_value* const_offset[4] = {NULL, NULL, NULL, NULL};
|
||||
enum glsl_base_type stype;
|
||||
tex_fetch_ptrs(ctx, instr, &resource, &sampler, &stype);
|
||||
|
||||
for (unsigned i = 0; i < instr->num_srcs; i++) {
|
||||
switch (instr->src[i].src_type) {
|
||||
case nir_tex_src_texture_handle:
|
||||
resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
|
||||
break;
|
||||
case nir_tex_src_sampler_handle:
|
||||
sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
bool tg4_integer_workarounds = ctx->options->chip_class <= GFX8 && instr->op == nir_texop_tg4 &&
|
||||
(stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT);
|
||||
(instr->dest_type & (nir_type_int | nir_type_uint));
|
||||
bool tg4_integer_cube_workaround =
|
||||
tg4_integer_workarounds && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
|
||||
|
||||
|
@ -9476,7 +9424,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
Operand::c32(V_008F14_IMG_DATA_FORMAT_8_8_8_8));
|
||||
|
||||
Temp nfmt;
|
||||
if (stype == GLSL_TYPE_UINT) {
|
||||
if (instr->dest_type & nir_type_uint) {
|
||||
nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1),
|
||||
Operand::c32(V_008F14_IMG_NUM_FORMAT_USCALED),
|
||||
Operand::c32(V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa));
|
||||
|
@ -9753,7 +9701,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
for (unsigned i = 0; i < 4; i++) {
|
||||
val[i] = emit_extract_vector(ctx, tmp_dst, i, v1);
|
||||
Temp cvt_val;
|
||||
if (stype == GLSL_TYPE_UINT)
|
||||
if (instr->dest_type & nir_type_uint)
|
||||
cvt_val = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), val[i]);
|
||||
else
|
||||
cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]);
|
||||
|
|
|
@ -559,7 +559,7 @@ static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_
|
|||
|
||||
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||
{
|
||||
LLVMValueRef src[4], result = NULL;
|
||||
LLVMValueRef src[16], result = NULL;
|
||||
unsigned num_components = instr->dest.dest.ssa.num_components;
|
||||
unsigned src_components;
|
||||
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
|
||||
|
@ -570,6 +570,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec5:
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
case nir_op_unpack_32_2x16:
|
||||
case nir_op_unpack_64_2x32:
|
||||
case nir_op_unpack_64_4x16:
|
||||
|
@ -957,6 +959,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
|||
case nir_op_vec3:
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec5:
|
||||
case nir_op_vec8:
|
||||
case nir_op_vec16:
|
||||
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
|
||||
src[i] = ac_to_integer(&ctx->ac, src[i]);
|
||||
result = ac_build_gather_values(&ctx->ac, src, num_components);
|
||||
|
@ -4486,7 +4490,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValue
|
|||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef img7, samp0;
|
||||
|
||||
if (ctx->ac.chip_class >= GFX8 || !ctx->abi->disable_aniso_single_level)
|
||||
if (ctx->ac.chip_class >= GFX8)
|
||||
return samp;
|
||||
|
||||
img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#define AC_SHADER_ABI_H
|
||||
|
||||
#include "ac_shader_args.h"
|
||||
#include "ac_shader_util.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
|
@ -34,17 +35,6 @@
|
|||
|
||||
#define AC_MAX_INLINE_PUSH_CONSTS 8
|
||||
|
||||
enum ac_descriptor_type
|
||||
{
|
||||
AC_DESC_IMAGE,
|
||||
AC_DESC_FMASK,
|
||||
AC_DESC_SAMPLER,
|
||||
AC_DESC_BUFFER,
|
||||
AC_DESC_PLANE_0,
|
||||
AC_DESC_PLANE_1,
|
||||
AC_DESC_PLANE_2,
|
||||
};
|
||||
|
||||
/* Document the shader ABI during compilation. This is what allows radeonsi and
|
||||
* radv to share a compiler backend.
|
||||
*/
|
||||
|
@ -159,11 +149,6 @@ struct ac_shader_abi {
|
|||
*/
|
||||
bool adjust_frag_coord_z;
|
||||
|
||||
/* Whether anisotropic filtering should be disabled for single level
|
||||
* images.
|
||||
*/
|
||||
bool disable_aniso_single_level;
|
||||
|
||||
/* Whether to inline the compute dispatch size in user sgprs. */
|
||||
bool load_grid_size_from_user_sgpr;
|
||||
};
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
* IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include "ac_shader_util.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_private.h"
|
||||
|
@ -30,6 +31,7 @@
|
|||
typedef struct {
|
||||
enum chip_class chip_class;
|
||||
uint32_t address32_hi;
|
||||
bool disable_aniso_single_level;
|
||||
|
||||
const struct radv_shader_args *args;
|
||||
const struct radv_shader_info *info;
|
||||
|
@ -218,6 +220,122 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins
|
|||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref,
|
||||
enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex)
|
||||
{
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
assert(var);
|
||||
unsigned desc_set = var->data.descriptor_set;
|
||||
unsigned binding_index = var->data.binding;
|
||||
bool indirect = nir_deref_instr_has_indirect(deref);
|
||||
|
||||
struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout;
|
||||
struct radv_descriptor_set_binding_layout *binding = &layout->binding[binding_index];
|
||||
|
||||
/* Handle immutable (compile-time) samplers (VkDescriptorSetLayoutBinding::pImmutableSamplers)
|
||||
* We can only do this for constant array index or if all samplers in the array are the same.
|
||||
*/
|
||||
if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
|
||||
(!indirect || binding->immutable_samplers_equal)) {
|
||||
unsigned constant_index = 0;
|
||||
if (!binding->immutable_samplers_equal) {
|
||||
while (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
|
||||
constant_index += nir_src_as_uint(deref->arr.index) * array_size;
|
||||
deref = nir_deref_instr_parent(deref);
|
||||
}
|
||||
}
|
||||
|
||||
const uint32_t *samplers = radv_immutable_samplers(layout, binding);
|
||||
return nir_imm_ivec4(b, samplers[constant_index * 4 + 0], samplers[constant_index * 4 + 1],
|
||||
samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]);
|
||||
}
|
||||
|
||||
unsigned size = 8;
|
||||
unsigned offset = binding->offset;
|
||||
switch (desc_type) {
|
||||
case AC_DESC_IMAGE:
|
||||
case AC_DESC_PLANE_0:
|
||||
break;
|
||||
case AC_DESC_FMASK:
|
||||
case AC_DESC_PLANE_1:
|
||||
offset += 32;
|
||||
break;
|
||||
case AC_DESC_SAMPLER:
|
||||
size = 4;
|
||||
if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
|
||||
offset += radv_combined_image_descriptor_sampler_offset(binding);
|
||||
break;
|
||||
case AC_DESC_BUFFER:
|
||||
size = 4;
|
||||
break;
|
||||
case AC_DESC_PLANE_2:
|
||||
size = 4;
|
||||
offset += 64;
|
||||
break;
|
||||
}
|
||||
|
||||
nir_ssa_def *index = NULL;
|
||||
while (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
|
||||
array_size *= binding->size;
|
||||
|
||||
nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
|
||||
if (tmp != deref->arr.index.ssa)
|
||||
nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
if (index) {
|
||||
index = nir_iadd(b, tmp, index);
|
||||
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
|
||||
} else {
|
||||
index = tmp;
|
||||
}
|
||||
|
||||
deref = nir_deref_instr_parent(deref);
|
||||
}
|
||||
|
||||
nir_ssa_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
|
||||
if (index && index_offset != index)
|
||||
nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
if (non_uniform)
|
||||
return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
|
||||
|
||||
nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
|
||||
nir_ssa_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
|
||||
|
||||
/* 3 plane formats always have same size and format for plane 1 & 2, so
|
||||
* use the tail from plane 1 so that we can store only the first 16 bytes
|
||||
* of the last plane. */
|
||||
if (desc_type == AC_DESC_PLANE_2) {
|
||||
nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex);
|
||||
|
||||
nir_ssa_def *comp[8];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
comp[i] = nir_channel(b, desc, i);
|
||||
for (unsigned i = 4; i < 8; i++)
|
||||
comp[i] = nir_channel(b, desc2, i);
|
||||
|
||||
return nir_vec(b, comp, 8);
|
||||
} else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4) {
|
||||
nir_ssa_def *comp[4];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
comp[i] = nir_channel(b, desc, i);
|
||||
|
||||
/* We want to always use the linear filtering truncation behaviour for
|
||||
* nir_texop_tg4, even if the sampler uses nearest/point filtering.
|
||||
*/
|
||||
comp[0] = nir_iand_imm(b, comp[0], C_008F30_TRUNC_COORD);
|
||||
|
||||
return nir_vec(b, comp, 4);
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
static void
|
||||
apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
|
@ -263,6 +381,94 @@ apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *tex)
|
||||
{
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
nir_deref_instr *texture_deref_instr = NULL;
|
||||
nir_deref_instr *sampler_deref_instr = NULL;
|
||||
int plane = -1;
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
switch (tex->src[i].src_type) {
|
||||
case nir_tex_src_texture_deref:
|
||||
texture_deref_instr = nir_src_as_deref(tex->src[i].src);
|
||||
break;
|
||||
case nir_tex_src_sampler_deref:
|
||||
sampler_deref_instr = nir_src_as_deref(tex->src[i].src);
|
||||
break;
|
||||
case nir_tex_src_plane:
|
||||
plane = nir_src_as_int(tex->src[i].src);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *image = NULL;
|
||||
nir_ssa_def *sampler = NULL;
|
||||
if (plane >= 0) {
|
||||
assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
|
||||
assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
|
||||
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane,
|
||||
tex->texture_non_uniform, tex);
|
||||
} else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER,
|
||||
tex->texture_non_uniform, tex);
|
||||
} else if (tex->op == nir_texop_fragment_mask_fetch_amd ||
|
||||
tex->op == nir_texop_samples_identical) {
|
||||
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK,
|
||||
tex->texture_non_uniform, tex);
|
||||
} else {
|
||||
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE,
|
||||
tex->texture_non_uniform, tex);
|
||||
}
|
||||
|
||||
if (sampler_deref_instr) {
|
||||
sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER,
|
||||
tex->sampler_non_uniform, tex);
|
||||
|
||||
if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT &&
|
||||
state->chip_class < GFX8) {
|
||||
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
|
||||
*
|
||||
* GFX6-GFX7:
|
||||
* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
|
||||
* filtering manually. The driver sets img7 to a mask clearing
|
||||
* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
|
||||
* s_and_b32 samp0, samp0, img7
|
||||
*
|
||||
* GFX8:
|
||||
* The ANISO_OVERRIDE sampler field enables this fix in TA.
|
||||
*/
|
||||
/* TODO: This is unnecessary for combined image+sampler.
|
||||
* We can do this when updating the desc set. */
|
||||
nir_ssa_def *comp[4];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
comp[i] = nir_channel(b, sampler, i);
|
||||
comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7));
|
||||
|
||||
sampler = nir_vec(b, comp, 4);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
switch (tex->src[i].src_type) {
|
||||
case nir_tex_src_texture_deref:
|
||||
tex->src[i].src_type = nir_tex_src_texture_handle;
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, image);
|
||||
break;
|
||||
case nir_tex_src_sampler_deref:
|
||||
tex->src[i].src_type = nir_tex_src_sampler_handle;
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, sampler);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
|
||||
const struct radv_pipeline_layout *layout,
|
||||
|
@ -272,6 +478,7 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
|
|||
apply_layout_state state = {
|
||||
.chip_class = device->physical_device->rad_info.chip_class,
|
||||
.address32_hi = device->physical_device->rad_info.address32_hi,
|
||||
.disable_aniso_single_level = device->instance->disable_aniso_single_level,
|
||||
.args = args,
|
||||
.info = info,
|
||||
.pipeline_layout = layout,
|
||||
|
@ -291,7 +498,9 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
|
|||
*/
|
||||
nir_foreach_block_reverse (block, function->impl) {
|
||||
nir_foreach_instr_reverse_safe (instr, block) {
|
||||
if (instr->type == nir_instr_type_intrinsic)
|
||||
if (instr->type == nir_instr_type_tex)
|
||||
apply_layout_to_tex(&b, &state, nir_instr_as_tex(instr));
|
||||
else if (instr->type == nir_instr_type_intrinsic)
|
||||
apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -403,118 +403,30 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
|
|||
enum ac_descriptor_type desc_type, bool image, bool write, bool bindless)
|
||||
{
|
||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||
LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
|
||||
struct radv_descriptor_set_layout *layout =
|
||||
ctx->options->layout->set[descriptor_set].layout;
|
||||
struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
|
||||
unsigned offset = binding->offset;
|
||||
unsigned stride = binding->size;
|
||||
unsigned type_size;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMTypeRef type;
|
||||
|
||||
assert(base_index < layout->binding_count);
|
||||
|
||||
if (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && desc_type == AC_DESC_FMASK)
|
||||
if (image && desc_type == AC_DESC_FMASK)
|
||||
return NULL;
|
||||
|
||||
switch (desc_type) {
|
||||
case AC_DESC_IMAGE:
|
||||
type = ctx->ac.v8i32;
|
||||
type_size = 32;
|
||||
break;
|
||||
case AC_DESC_FMASK:
|
||||
type = ctx->ac.v8i32;
|
||||
offset += 32;
|
||||
type_size = 32;
|
||||
break;
|
||||
case AC_DESC_SAMPLER:
|
||||
type = ctx->ac.v4i32;
|
||||
if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
|
||||
offset += radv_combined_image_descriptor_sampler_offset(binding);
|
||||
}
|
||||
|
||||
type_size = 16;
|
||||
break;
|
||||
case AC_DESC_BUFFER:
|
||||
type = ctx->ac.v4i32;
|
||||
type_size = 16;
|
||||
break;
|
||||
case AC_DESC_PLANE_0:
|
||||
case AC_DESC_PLANE_1:
|
||||
case AC_DESC_PLANE_2:
|
||||
type = ctx->ac.v8i32;
|
||||
type_size = 32;
|
||||
offset += 32 * (desc_type - AC_DESC_PLANE_0);
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid desc_type\n");
|
||||
}
|
||||
|
||||
offset += constant_index * stride;
|
||||
|
||||
if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
|
||||
(!index || binding->immutable_samplers_equal)) {
|
||||
if (binding->immutable_samplers_equal)
|
||||
constant_index = 0;
|
||||
|
||||
const uint32_t *samplers = radv_immutable_samplers(layout, binding);
|
||||
|
||||
LLVMValueRef constants[] = {
|
||||
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
|
||||
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
|
||||
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
|
||||
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
|
||||
};
|
||||
return ac_build_gather_values(&ctx->ac, constants, 4);
|
||||
}
|
||||
|
||||
assert(stride % type_size == 0);
|
||||
|
||||
LLVMValueRef adjusted_index = index;
|
||||
if (!adjusted_index)
|
||||
adjusted_index = ctx->ac.i32_0;
|
||||
|
||||
adjusted_index =
|
||||
LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
|
||||
|
||||
LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0);
|
||||
list = LLVMBuildGEP(builder, list, &val_offset, 1, "");
|
||||
list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(type), "");
|
||||
|
||||
LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
|
||||
|
||||
/* 3 plane formats always have same size and format for plane 1 & 2, so
|
||||
* use the tail from plane 1 so that we can store only the first 16 bytes
|
||||
* of the last plane. */
|
||||
if (desc_type == AC_DESC_PLANE_2) {
|
||||
LLVMValueRef descriptor2 =
|
||||
radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index,
|
||||
AC_DESC_PLANE_1, image, write, bindless);
|
||||
if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) {
|
||||
LLVMValueRef plane1_addr =
|
||||
LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), "");
|
||||
LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32);
|
||||
LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32);
|
||||
|
||||
LLVMValueRef components[8];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
|
||||
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
|
||||
|
||||
for (unsigned i = 4; i < 8; ++i)
|
||||
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
|
||||
descriptor = ac_build_gather_values(&ctx->ac, components, 8);
|
||||
} else if (desc_type == AC_DESC_IMAGE &&
|
||||
ctx->options->has_image_load_dcc_bug &&
|
||||
image && !write) {
|
||||
LLVMValueRef components[8];
|
||||
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
|
||||
|
||||
/* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a hardware bug. */
|
||||
components[6] = LLVMBuildAnd(ctx->ac.builder, components[6],
|
||||
LLVMConstInt(ctx->ac.i32, C_00A018_WRITE_COMPRESS_ENABLE, false), "");
|
||||
|
||||
descriptor = ac_build_gather_values(&ctx->ac, components, 8);
|
||||
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor1, i);
|
||||
return ac_build_gather_values(&ctx->ac, components, 8);
|
||||
}
|
||||
|
||||
return descriptor;
|
||||
bool v4 = desc_type == AC_DESC_BUFFER || desc_type == AC_DESC_SAMPLER;
|
||||
return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32);
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
|
@ -2223,7 +2135,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||
ctx.abi.clamp_shadow_reference = false;
|
||||
ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z;
|
||||
ctx.abi.robust_buffer_access = options->robust_buffer_access;
|
||||
ctx.abi.disable_aniso_single_level = options->disable_aniso_single_level;
|
||||
ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
|
||||
|
||||
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;
|
||||
|
|
|
@ -1889,7 +1889,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
|
|||
options->enable_mrt_output_nan_fixup =
|
||||
module && !is_meta_shader(module->nir) && options->key.ps.enable_mrt_output_nan_fixup;
|
||||
options->adjust_frag_coord_z = options->key.adjust_frag_coord_z;
|
||||
options->disable_aniso_single_level = options->key.disable_aniso_single_level;
|
||||
options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
|
||||
options->debug.func = radv_compiler_debug;
|
||||
options->debug.private_data = &debug_data;
|
||||
|
|
|
@ -125,7 +125,6 @@ struct radv_nir_compiler_options {
|
|||
bool has_image_load_dcc_bug;
|
||||
bool enable_mrt_output_nan_fixup;
|
||||
bool wgp_mode;
|
||||
bool disable_aniso_single_level;
|
||||
enum radeon_family family;
|
||||
enum chip_class chip_class;
|
||||
const struct radeon_info *info;
|
||||
|
|
|
@ -520,7 +520,6 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
|
|||
ctx->abi.robust_buffer_access = true;
|
||||
ctx->abi.convert_undef_to_zero = true;
|
||||
ctx->abi.adjust_frag_coord_z = false;
|
||||
ctx->abi.disable_aniso_single_level = true;
|
||||
ctx->abi.load_grid_size_from_user_sgpr = true;
|
||||
|
||||
const struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
|
|
Loading…
Reference in New Issue