radv,aco: lower texture descriptor loads in NIR

fossil-db (Sienna Cichlid):
Totals from 39445 (24.30% of 162293) affected shaders:
MaxWaves: 875988 -> 875972 (-0.00%)
Instrs: 35372561 -> 35234909 (-0.39%); split: -0.41%, +0.03%
CodeSize: 190237480 -> 189379240 (-0.45%); split: -0.47%, +0.02%
VGPRs: 1889856 -> 1889928 (+0.00%); split: -0.00%, +0.01%
SpillSGPRs: 10764 -> 10857 (+0.86%); split: -2.04%, +2.91%
SpillVGPRs: 1891 -> 1907 (+0.85%); split: -0.32%, +1.16%
Scratch: 260096 -> 261120 (+0.39%)
Latency: 477701150 -> 477578466 (-0.03%); split: -0.06%, +0.03%
InvThroughput: 87819847 -> 87830346 (+0.01%); split: -0.03%, +0.04%
VClause: 673353 -> 673829 (+0.07%); split: -0.04%, +0.11%
SClause: 1385396 -> 1366478 (-1.37%); split: -1.65%, +0.29%
Copies: 2327965 -> 2229134 (-4.25%); split: -4.58%, +0.34%
Branches: 906707 -> 906434 (-0.03%); split: -0.13%, +0.10%
PreSGPRs: 1874153 -> 1862698 (-0.61%); split: -1.34%, +0.73%
PreVGPRs: 1691382 -> 1691383 (+0.00%); split: -0.00%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12773>
This commit is contained in:
Rhys Perry 2021-08-12 15:36:56 +01:00 committed by Marge Bot
parent 52f850238a
commit 15640e58d9
9 changed files with 257 additions and 192 deletions

View File

@ -79,6 +79,17 @@ enum ac_fetch_format
AC_FETCH_FORMAT_NONE,
};
enum ac_descriptor_type
{
AC_DESC_IMAGE,
AC_DESC_FMASK,
AC_DESC_SAMPLER,
AC_DESC_BUFFER,
AC_DESC_PLANE_0,
AC_DESC_PLANE_1,
AC_DESC_PLANE_2,
};
unsigned ac_get_spi_shader_z_format(bool writes_z, bool writes_stencil, bool writes_samplemask);
unsigned ac_get_cb_shader_mask(unsigned spi_shader_col_format);

View File

@ -1355,7 +1355,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
case nir_op_vec2:
case nir_op_vec3:
case nir_op_vec4:
case nir_op_vec5: {
case nir_op_vec5:
case nir_op_vec8:
case nir_op_vec16: {
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
unsigned num = instr->dest.dest.ssa.num_components;
for (unsigned i = 0; i < num; ++i)
@ -8967,70 +8969,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
}
}
void
tex_fetch_ptrs(isel_context* ctx, nir_tex_instr* instr, Temp* res_ptr, Temp* samp_ptr,
enum glsl_base_type* stype)
{
nir_deref_instr* texture_deref_instr = NULL;
nir_deref_instr* sampler_deref_instr = NULL;
int plane = -1;
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
case nir_tex_src_texture_deref:
texture_deref_instr = nir_src_as_deref(instr->src[i].src);
break;
case nir_tex_src_sampler_deref:
sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
break;
case nir_tex_src_plane: plane = nir_src_as_int(instr->src[i].src); break;
default: break;
}
}
*stype = glsl_get_sampler_result_type(texture_deref_instr->type);
if (!sampler_deref_instr)
sampler_deref_instr = texture_deref_instr;
if (plane >= 0) {
assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
*res_ptr = get_sampler_desc(ctx, texture_deref_instr,
(aco_descriptor_type)(ACO_DESC_PLANE_0 + plane), instr, false);
} else if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
*res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_BUFFER, instr, false);
} else if (instr->op == nir_texop_fragment_mask_fetch_amd) {
*res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_FMASK, instr, false);
} else {
*res_ptr = get_sampler_desc(ctx, texture_deref_instr, ACO_DESC_IMAGE, instr, false);
}
if (samp_ptr) {
*samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, ACO_DESC_SAMPLER, instr, false);
if (ctx->options->disable_aniso_single_level &&
instr->sampler_dim < GLSL_SAMPLER_DIM_RECT && ctx->options->chip_class < GFX8) {
/* fix sampler aniso on SI/CI: samp[0] = samp[0] & img[7] */
Builder bld(ctx->program, ctx->block);
/* to avoid unnecessary moves, we split and recombine sampler and image */
Temp img[8] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1),
bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)};
Temp samp[4] = {bld.tmp(s1), bld.tmp(s1), bld.tmp(s1), bld.tmp(s1)};
bld.pseudo(aco_opcode::p_split_vector, Definition(img[0]), Definition(img[1]),
Definition(img[2]), Definition(img[3]), Definition(img[4]), Definition(img[5]),
Definition(img[6]), Definition(img[7]), *res_ptr);
bld.pseudo(aco_opcode::p_split_vector, Definition(samp[0]), Definition(samp[1]),
Definition(samp[2]), Definition(samp[3]), *samp_ptr);
samp[0] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), samp[0], img[7]);
*res_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8), img[0], img[1], img[2],
img[3], img[4], img[5], img[6], img[7]);
*samp_ptr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), samp[0], samp[1], samp[2],
samp[3]);
}
}
}
void
build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc,
Temp* out_tc)
@ -9178,11 +9116,21 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
std::vector<Temp> coords;
std::vector<Temp> derivs;
nir_const_value* const_offset[4] = {NULL, NULL, NULL, NULL};
enum glsl_base_type stype;
tex_fetch_ptrs(ctx, instr, &resource, &sampler, &stype);
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
case nir_tex_src_texture_handle:
resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
break;
case nir_tex_src_sampler_handle:
sampler = bld.as_uniform(get_ssa_temp(ctx, instr->src[i].src.ssa));
break;
default: break;
}
}
bool tg4_integer_workarounds = ctx->options->chip_class <= GFX8 && instr->op == nir_texop_tg4 &&
(stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT);
(instr->dest_type & (nir_type_int | nir_type_uint));
bool tg4_integer_cube_workaround =
tg4_integer_workarounds && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
@ -9476,7 +9424,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
Operand::c32(V_008F14_IMG_DATA_FORMAT_8_8_8_8));
Temp nfmt;
if (stype == GLSL_TYPE_UINT) {
if (instr->dest_type & nir_type_uint) {
nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1),
Operand::c32(V_008F14_IMG_NUM_FORMAT_USCALED),
Operand::c32(V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa));
@ -9753,7 +9701,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
for (unsigned i = 0; i < 4; i++) {
val[i] = emit_extract_vector(ctx, tmp_dst, i, v1);
Temp cvt_val;
if (stype == GLSL_TYPE_UINT)
if (instr->dest_type & nir_type_uint)
cvt_val = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), val[i]);
else
cvt_val = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), val[i]);

View File

@ -559,7 +559,7 @@ static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
{
LLVMValueRef src[4], result = NULL;
LLVMValueRef src[16], result = NULL;
unsigned num_components = instr->dest.dest.ssa.num_components;
unsigned src_components;
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
@ -570,6 +570,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
case nir_op_vec3:
case nir_op_vec4:
case nir_op_vec5:
case nir_op_vec8:
case nir_op_vec16:
case nir_op_unpack_32_2x16:
case nir_op_unpack_64_2x32:
case nir_op_unpack_64_4x16:
@ -957,6 +959,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
case nir_op_vec3:
case nir_op_vec4:
case nir_op_vec5:
case nir_op_vec8:
case nir_op_vec16:
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
src[i] = ac_to_integer(&ctx->ac, src[i]);
result = ac_build_gather_values(&ctx->ac, src, num_components);
@ -4486,7 +4490,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValue
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef img7, samp0;
if (ctx->ac.chip_class >= GFX8 || !ctx->abi->disable_aniso_single_level)
if (ctx->ac.chip_class >= GFX8)
return samp;
img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");

View File

@ -25,6 +25,7 @@
#define AC_SHADER_ABI_H
#include "ac_shader_args.h"
#include "ac_shader_util.h"
#include "compiler/shader_enums.h"
#include <llvm-c/Core.h>
@ -34,17 +35,6 @@
#define AC_MAX_INLINE_PUSH_CONSTS 8
enum ac_descriptor_type
{
AC_DESC_IMAGE,
AC_DESC_FMASK,
AC_DESC_SAMPLER,
AC_DESC_BUFFER,
AC_DESC_PLANE_0,
AC_DESC_PLANE_1,
AC_DESC_PLANE_2,
};
/* Document the shader ABI during compilation. This is what allows radeonsi and
* radv to share a compiler backend.
*/
@ -159,11 +149,6 @@ struct ac_shader_abi {
*/
bool adjust_frag_coord_z;
/* Whether anisotropic filtering should be disabled for single level
* images.
*/
bool disable_aniso_single_level;
/* Whether to inline the compute dispatch size in user sgprs. */
bool load_grid_size_from_user_sgpr;
};

View File

@ -21,6 +21,7 @@
* IN THE SOFTWARE.
*
*/
#include "ac_shader_util.h"
#include "nir.h"
#include "nir_builder.h"
#include "radv_private.h"
@ -30,6 +31,7 @@
typedef struct {
enum chip_class chip_class;
uint32_t address32_hi;
bool disable_aniso_single_level;
const struct radv_shader_args *args;
const struct radv_shader_info *info;
@ -218,6 +220,122 @@ visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_ins
nir_instr_remove(&intrin->instr);
}
static nir_ssa_def *
get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref,
enum ac_descriptor_type desc_type, bool non_uniform, nir_tex_instr *tex)
{
nir_variable *var = nir_deref_instr_get_variable(deref);
assert(var);
unsigned desc_set = var->data.descriptor_set;
unsigned binding_index = var->data.binding;
bool indirect = nir_deref_instr_has_indirect(deref);
struct radv_descriptor_set_layout *layout = state->pipeline_layout->set[desc_set].layout;
struct radv_descriptor_set_binding_layout *binding = &layout->binding[binding_index];
/* Handle immutable (compile-time) samplers (VkDescriptorSetLayoutBinding::pImmutableSamplers)
* We can only do this for constant array index or if all samplers in the array are the same.
*/
if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
(!indirect || binding->immutable_samplers_equal)) {
unsigned constant_index = 0;
if (!binding->immutable_samplers_equal) {
while (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
constant_index += nir_src_as_uint(deref->arr.index) * array_size;
deref = nir_deref_instr_parent(deref);
}
}
const uint32_t *samplers = radv_immutable_samplers(layout, binding);
return nir_imm_ivec4(b, samplers[constant_index * 4 + 0], samplers[constant_index * 4 + 1],
samplers[constant_index * 4 + 2], samplers[constant_index * 4 + 3]);
}
unsigned size = 8;
unsigned offset = binding->offset;
switch (desc_type) {
case AC_DESC_IMAGE:
case AC_DESC_PLANE_0:
break;
case AC_DESC_FMASK:
case AC_DESC_PLANE_1:
offset += 32;
break;
case AC_DESC_SAMPLER:
size = 4;
if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
offset += radv_combined_image_descriptor_sampler_offset(binding);
break;
case AC_DESC_BUFFER:
size = 4;
break;
case AC_DESC_PLANE_2:
size = 4;
offset += 64;
break;
}
nir_ssa_def *index = NULL;
while (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
array_size *= binding->size;
nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
if (tmp != deref->arr.index.ssa)
nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true;
if (index) {
index = nir_iadd(b, tmp, index);
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
} else {
index = tmp;
}
deref = nir_deref_instr_parent(deref);
}
nir_ssa_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
if (index && index_offset != index)
nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true;
if (non_uniform)
return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
nir_ssa_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
/* 3 plane formats always have same size and format for plane 1 & 2, so
* use the tail from plane 1 so that we can store only the first 16 bytes
* of the last plane. */
if (desc_type == AC_DESC_PLANE_2) {
nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex);
nir_ssa_def *comp[8];
for (unsigned i = 0; i < 4; i++)
comp[i] = nir_channel(b, desc, i);
for (unsigned i = 4; i < 8; i++)
comp[i] = nir_channel(b, desc2, i);
return nir_vec(b, comp, 8);
} else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4) {
nir_ssa_def *comp[4];
for (unsigned i = 0; i < 4; i++)
comp[i] = nir_channel(b, desc, i);
/* We want to always use the linear filtering truncation behaviour for
* nir_texop_tg4, even if the sampler uses nearest/point filtering.
*/
comp[0] = nir_iand_imm(b, comp[0], C_008F30_TRUNC_COORD);
return nir_vec(b, comp, 4);
}
return desc;
}
static void
apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
{
@ -263,6 +381,94 @@ apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_
}
}
static void
apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *tex)
{
b->cursor = nir_before_instr(&tex->instr);
nir_deref_instr *texture_deref_instr = NULL;
nir_deref_instr *sampler_deref_instr = NULL;
int plane = -1;
for (unsigned i = 0; i < tex->num_srcs; i++) {
switch (tex->src[i].src_type) {
case nir_tex_src_texture_deref:
texture_deref_instr = nir_src_as_deref(tex->src[i].src);
break;
case nir_tex_src_sampler_deref:
sampler_deref_instr = nir_src_as_deref(tex->src[i].src);
break;
case nir_tex_src_plane:
plane = nir_src_as_int(tex->src[i].src);
break;
default:
break;
}
}
nir_ssa_def *image = NULL;
nir_ssa_def *sampler = NULL;
if (plane >= 0) {
assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_PLANE_0 + plane,
tex->texture_non_uniform, tex);
} else if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_BUFFER,
tex->texture_non_uniform, tex);
} else if (tex->op == nir_texop_fragment_mask_fetch_amd ||
tex->op == nir_texop_samples_identical) {
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_FMASK,
tex->texture_non_uniform, tex);
} else {
image = get_sampler_desc(b, state, texture_deref_instr, AC_DESC_IMAGE,
tex->texture_non_uniform, tex);
}
if (sampler_deref_instr) {
sampler = get_sampler_desc(b, state, sampler_deref_instr, AC_DESC_SAMPLER,
tex->sampler_non_uniform, tex);
if (state->disable_aniso_single_level && tex->sampler_dim < GLSL_SAMPLER_DIM_RECT &&
state->chip_class < GFX8) {
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
*
* GFX6-GFX7:
* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
* filtering manually. The driver sets img7 to a mask clearing
* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
* s_and_b32 samp0, samp0, img7
*
* GFX8:
* The ANISO_OVERRIDE sampler field enables this fix in TA.
*/
/* TODO: This is unnecessary for combined image+sampler.
* We can do this when updating the desc set. */
nir_ssa_def *comp[4];
for (unsigned i = 0; i < 4; i++)
comp[i] = nir_channel(b, sampler, i);
comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7));
sampler = nir_vec(b, comp, 4);
}
}
for (unsigned i = 0; i < tex->num_srcs; i++) {
switch (tex->src[i].src_type) {
case nir_tex_src_texture_deref:
tex->src[i].src_type = nir_tex_src_texture_handle;
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, image);
break;
case nir_tex_src_sampler_deref:
tex->src[i].src_type = nir_tex_src_sampler_handle;
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[i].src, sampler);
break;
default:
break;
}
}
}
void
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
const struct radv_pipeline_layout *layout,
@ -272,6 +478,7 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
apply_layout_state state = {
.chip_class = device->physical_device->rad_info.chip_class,
.address32_hi = device->physical_device->rad_info.address32_hi,
.disable_aniso_single_level = device->instance->disable_aniso_single_level,
.args = args,
.info = info,
.pipeline_layout = layout,
@ -291,7 +498,9 @@ radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
*/
nir_foreach_block_reverse (block, function->impl) {
nir_foreach_instr_reverse_safe (instr, block) {
if (instr->type == nir_instr_type_intrinsic)
if (instr->type == nir_instr_type_tex)
apply_layout_to_tex(&b, &state, nir_instr_as_tex(instr));
else if (instr->type == nir_instr_type_intrinsic)
apply_layout_to_intrin(&b, &state, nir_instr_as_intrinsic(instr));
}
}

View File

@ -403,118 +403,30 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
enum ac_descriptor_type desc_type, bool image, bool write, bool bindless)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
struct radv_descriptor_set_layout *layout =
ctx->options->layout->set[descriptor_set].layout;
struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
unsigned offset = binding->offset;
unsigned stride = binding->size;
unsigned type_size;
LLVMBuilderRef builder = ctx->ac.builder;
LLVMTypeRef type;
assert(base_index < layout->binding_count);
if (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE && desc_type == AC_DESC_FMASK)
if (image && desc_type == AC_DESC_FMASK)
return NULL;
switch (desc_type) {
case AC_DESC_IMAGE:
type = ctx->ac.v8i32;
type_size = 32;
break;
case AC_DESC_FMASK:
type = ctx->ac.v8i32;
offset += 32;
type_size = 32;
break;
case AC_DESC_SAMPLER:
type = ctx->ac.v4i32;
if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
offset += radv_combined_image_descriptor_sampler_offset(binding);
}
type_size = 16;
break;
case AC_DESC_BUFFER:
type = ctx->ac.v4i32;
type_size = 16;
break;
case AC_DESC_PLANE_0:
case AC_DESC_PLANE_1:
case AC_DESC_PLANE_2:
type = ctx->ac.v8i32;
type_size = 32;
offset += 32 * (desc_type - AC_DESC_PLANE_0);
break;
default:
unreachable("invalid desc_type\n");
}
offset += constant_index * stride;
if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
(!index || binding->immutable_samplers_equal)) {
if (binding->immutable_samplers_equal)
constant_index = 0;
const uint32_t *samplers = radv_immutable_samplers(layout, binding);
LLVMValueRef constants[] = {
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
};
return ac_build_gather_values(&ctx->ac, constants, 4);
}
assert(stride % type_size == 0);
LLVMValueRef adjusted_index = index;
if (!adjusted_index)
adjusted_index = ctx->ac.i32_0;
adjusted_index =
LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0);
list = LLVMBuildGEP(builder, list, &val_offset, 1, "");
list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(type), "");
LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
/* 3 plane formats always have same size and format for plane 1 & 2, so
* use the tail from plane 1 so that we can store only the first 16 bytes
* of the last plane. */
if (desc_type == AC_DESC_PLANE_2) {
LLVMValueRef descriptor2 =
radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index,
AC_DESC_PLANE_1, image, write, bindless);
if (desc_type == AC_DESC_PLANE_2 && index && LLVMTypeOf(index) == ctx->ac.i32) {
LLVMValueRef plane1_addr =
LLVMBuildSub(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 32, false), "");
LLVMValueRef descriptor1 = radv_load_rsrc(ctx, plane1_addr, ctx->ac.v8i32);
LLVMValueRef descriptor2 = radv_load_rsrc(ctx, index, ctx->ac.v4i32);
LLVMValueRef components[8];
for (unsigned i = 0; i < 4; ++i)
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
for (unsigned i = 4; i < 8; ++i)
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
descriptor = ac_build_gather_values(&ctx->ac, components, 8);
} else if (desc_type == AC_DESC_IMAGE &&
ctx->options->has_image_load_dcc_bug &&
image && !write) {
LLVMValueRef components[8];
for (unsigned i = 0; i < 8; i++)
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
/* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a hardware bug. */
components[6] = LLVMBuildAnd(ctx->ac.builder, components[6],
LLVMConstInt(ctx->ac.i32, C_00A018_WRITE_COMPRESS_ENABLE, false), "");
descriptor = ac_build_gather_values(&ctx->ac, components, 8);
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor1, i);
return ac_build_gather_values(&ctx->ac, components, 8);
}
return descriptor;
bool v4 = desc_type == AC_DESC_BUFFER || desc_type == AC_DESC_SAMPLER;
return radv_load_rsrc(ctx, index, v4 ? ctx->ac.v4i32 : ctx->ac.v8i32);
}
static LLVMValueRef
@ -2223,7 +2135,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ctx.abi.clamp_shadow_reference = false;
ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z;
ctx.abi.robust_buffer_access = options->robust_buffer_access;
ctx.abi.disable_aniso_single_level = options->disable_aniso_single_level;
ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;

View File

@ -1889,7 +1889,6 @@ shader_compile(struct radv_device *device, struct vk_shader_module *module,
options->enable_mrt_output_nan_fixup =
module && !is_meta_shader(module->nir) && options->key.ps.enable_mrt_output_nan_fixup;
options->adjust_frag_coord_z = options->key.adjust_frag_coord_z;
options->disable_aniso_single_level = options->key.disable_aniso_single_level;
options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
options->debug.func = radv_compiler_debug;
options->debug.private_data = &debug_data;

View File

@ -125,7 +125,6 @@ struct radv_nir_compiler_options {
bool has_image_load_dcc_bug;
bool enable_mrt_output_nan_fixup;
bool wgp_mode;
bool disable_aniso_single_level;
enum radeon_family family;
enum chip_class chip_class;
const struct radeon_info *info;

View File

@ -520,7 +520,6 @@ static bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *
ctx->abi.robust_buffer_access = true;
ctx->abi.convert_undef_to_zero = true;
ctx->abi.adjust_frag_coord_z = false;
ctx->abi.disable_aniso_single_level = true;
ctx->abi.load_grid_size_from_user_sgpr = true;
const struct si_shader_info *info = &ctx->shader->selector->info;