ac,radeonsi: lower 64-bit IO to 32 bits and remove all dead code
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6623>
This commit is contained in:
parent
e7d606289c
commit
9fc9615274
|
@ -2424,10 +2424,8 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||||
case 32:
|
case 32:
|
||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
writemask = widen_mask(writemask, 2);
|
unreachable("64-bit IO should have been lowered to 32 bits");
|
||||||
src = LLVMBuildBitCast(ctx->ac.builder, src,
|
return;
|
||||||
LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), "");
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
unreachable("unhandled store_output bit size");
|
unreachable("unhandled store_output bit size");
|
||||||
return;
|
return;
|
||||||
|
@ -3404,12 +3402,24 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||||
LLVMTypeRef component_type;
|
LLVMTypeRef component_type;
|
||||||
unsigned base = nir_intrinsic_base(instr);
|
unsigned base = nir_intrinsic_base(instr);
|
||||||
unsigned component = nir_intrinsic_component(instr);
|
unsigned component = nir_intrinsic_component(instr);
|
||||||
unsigned count = instr->dest.ssa.num_components * (instr->dest.ssa.bit_size == 64 ? 2 : 1);
|
unsigned count = instr->dest.ssa.num_components;
|
||||||
nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
|
nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
|
||||||
LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
|
LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
|
||||||
nir_src offset = *nir_get_io_offset_src(instr);
|
nir_src offset = *nir_get_io_offset_src(instr);
|
||||||
LLVMValueRef indir_index = NULL;
|
LLVMValueRef indir_index = NULL;
|
||||||
|
|
||||||
|
switch (instr->dest.ssa.bit_size) {
|
||||||
|
case 16:
|
||||||
|
case 32:
|
||||||
|
break;
|
||||||
|
case 64:
|
||||||
|
unreachable("64-bit IO should have been lowered");
|
||||||
|
return NULL;
|
||||||
|
default:
|
||||||
|
unreachable("unhandled load type");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
|
if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
|
||||||
component_type = LLVMGetElementType(dest_type);
|
component_type = LLVMGetElementType(dest_type);
|
||||||
else
|
else
|
||||||
|
@ -3420,10 +3430,13 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||||
else
|
else
|
||||||
indir_index = get_src(ctx, offset);
|
indir_index = get_src(ctx, offset);
|
||||||
|
|
||||||
if (ctx->stage == MESA_SHADER_TESS_CTRL || (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
|
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
|
||||||
LLVMValueRef result = ctx->abi->load_tess_varyings(
|
(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
|
||||||
ctx->abi, component_type, vertex_index, indir_index, 0, 0, base * 4, component,
|
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
|
||||||
instr->num_components, false, false, !is_output);
|
vertex_index, indir_index,
|
||||||
|
0, 0, base * 4,
|
||||||
|
component, count,
|
||||||
|
false, false, !is_output);
|
||||||
if (instr->dest.ssa.bit_size == 16) {
|
if (instr->dest.ssa.bit_size == 16) {
|
||||||
result = ac_to_integer(&ctx->ac, result);
|
result = ac_to_integer(&ctx->ac, result);
|
||||||
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
|
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
|
||||||
|
@ -3435,11 +3448,10 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||||
assert(!indir_index);
|
assert(!indir_index);
|
||||||
|
|
||||||
if (ctx->stage == MESA_SHADER_GEOMETRY) {
|
if (ctx->stage == MESA_SHADER_GEOMETRY) {
|
||||||
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
|
|
||||||
assert(nir_src_is_const(*vertex_index_src));
|
assert(nir_src_is_const(*vertex_index_src));
|
||||||
|
|
||||||
return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component, instr->num_components,
|
return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component, count,
|
||||||
nir_src_as_uint(*vertex_index_src), 0, type);
|
nir_src_as_uint(*vertex_index_src), 0, component_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
|
if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
|
||||||
|
@ -3485,8 +3497,6 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
|
||||||
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
|
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
|
||||||
|
|
||||||
for (unsigned chan = 0; chan < count; chan++) {
|
for (unsigned chan = 0; chan < count; chan++) {
|
||||||
if (component + chan > 4)
|
|
||||||
attr_number = LLVMConstInt(ctx->ac.i32, base + 1, false);
|
|
||||||
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
|
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
|
||||||
values[chan] =
|
values[chan] =
|
||||||
ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan,
|
ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan,
|
||||||
|
|
|
@ -242,8 +242,6 @@ LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueR
|
||||||
LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
|
LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
|
||||||
struct ac_arg param, unsigned return_index);
|
struct ac_arg param, unsigned return_index);
|
||||||
LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx);
|
LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx);
|
||||||
LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx, LLVMTypeRef type,
|
|
||||||
LLVMValueRef val1, LLVMValueRef val2);
|
|
||||||
void si_llvm_emit_barrier(struct si_shader_context *ctx);
|
void si_llvm_emit_barrier(struct si_shader_context *ctx);
|
||||||
void si_llvm_declare_esgs_ring(struct si_shader_context *ctx);
|
void si_llvm_declare_esgs_ring(struct si_shader_context *ctx);
|
||||||
void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
|
void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
|
||||||
|
|
|
@ -262,17 +262,6 @@ LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx, LLVMTypeRef type,
|
|
||||||
LLVMValueRef val1, LLVMValueRef val2)
|
|
||||||
{
|
|
||||||
LLVMValueRef values[2] = {
|
|
||||||
ac_to_integer(&ctx->ac, val1),
|
|
||||||
ac_to_integer(&ctx->ac, val2),
|
|
||||||
};
|
|
||||||
LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
|
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
|
|
||||||
}
|
|
||||||
|
|
||||||
void si_llvm_emit_barrier(struct si_shader_context *ctx)
|
void si_llvm_emit_barrier(struct si_shader_context *ctx)
|
||||||
{
|
{
|
||||||
/* GFX6 only (thanks to a hw bug workaround):
|
/* GFX6 only (thanks to a hw bug workaround):
|
||||||
|
|
|
@ -79,11 +79,6 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in
|
||||||
|
|
||||||
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset);
|
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->esgs_ring, vtx_offset);
|
||||||
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, "");
|
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, ptr, "");
|
||||||
if (ac_get_type_size(type) == 8) {
|
|
||||||
ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &ctx->ac.i32_1, 1, "");
|
|
||||||
LLVMValueRef values[2] = {value, LLVMBuildLoad(ctx->ac.builder, ptr, "")};
|
|
||||||
value = ac_build_gather_values(&ctx->ac, values, 2);
|
|
||||||
}
|
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,14 +92,6 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in
|
||||||
|
|
||||||
value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->ac.i32_0, vtx_offset, soffset, 0,
|
value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->ac.i32_0, vtx_offset, soffset, 0,
|
||||||
ac_glc, true, false);
|
ac_glc, true, false);
|
||||||
if (ac_get_type_size(type) == 8) {
|
|
||||||
LLVMValueRef value2;
|
|
||||||
soffset = LLVMConstInt(ctx->ac.i32, (param * 4 + swizzle + 1) * 256, 0);
|
|
||||||
|
|
||||||
value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->ac.i32_0, vtx_offset, soffset,
|
|
||||||
0, ac_glc, true, false);
|
|
||||||
return si_build_gather_64bit(ctx, type, value, value2);
|
|
||||||
}
|
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -116,14 +103,9 @@ static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi, unsigned loc
|
||||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||||
|
|
||||||
LLVMValueRef value[4];
|
LLVMValueRef value[4];
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = component; i < component + num_components; i++) {
|
||||||
unsigned offset = i;
|
value[i] = si_llvm_load_input_gs(&ctx->abi, driver_location / 4 + const_index,
|
||||||
if (ac_get_type_size(type) == 8)
|
vertex_index, type, i);
|
||||||
offset *= 2;
|
|
||||||
|
|
||||||
offset += component;
|
|
||||||
value[i + component] = si_llvm_load_input_gs(&ctx->abi, driver_location / 4 + const_index,
|
|
||||||
vertex_index, type, offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||||
|
|
|
@ -269,7 +269,7 @@ static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type,
|
||||||
LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base,
|
LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base,
|
||||||
bool can_speculate)
|
bool can_speculate)
|
||||||
{
|
{
|
||||||
LLVMValueRef value, value2;
|
LLVMValueRef value;
|
||||||
LLVMTypeRef vec_type = LLVMVectorType(type, 4);
|
LLVMTypeRef vec_type = LLVMVectorType(type, 4);
|
||||||
|
|
||||||
if (swizzle == ~0) {
|
if (swizzle == ~0) {
|
||||||
|
@ -279,22 +279,12 @@ static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type,
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
|
return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ac_get_type_size(type) != 8) {
|
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
|
||||||
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
|
|
||||||
can_speculate, false);
|
|
||||||
|
|
||||||
value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
|
|
||||||
return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0),
|
|
||||||
"");
|
|
||||||
}
|
|
||||||
|
|
||||||
value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4, ac_glc,
|
|
||||||
can_speculate, false);
|
can_speculate, false);
|
||||||
|
|
||||||
value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4 + 4, ac_glc,
|
value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
|
||||||
can_speculate, false);
|
return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0),
|
||||||
|
"");
|
||||||
return si_build_gather_64bit(ctx, type, value, value2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -318,19 +308,8 @@ static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef typ
|
||||||
return ac_build_gather_values(&ctx->ac, values, 4);
|
return ac_build_gather_values(&ctx->ac, values, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Split 64-bit loads. */
|
|
||||||
if (ac_get_type_size(type) == 8) {
|
|
||||||
LLVMValueRef lo, hi;
|
|
||||||
|
|
||||||
lo = lshs_lds_load(ctx, ctx->ac.i32, swizzle, dw_addr);
|
|
||||||
hi = lshs_lds_load(ctx, ctx->ac.i32, swizzle + 1, dw_addr);
|
|
||||||
return si_build_gather_64bit(ctx, type, lo, hi);
|
|
||||||
}
|
|
||||||
|
|
||||||
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
|
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
|
||||||
|
|
||||||
value = ac_lds_load(&ctx->ac, dw_addr);
|
value = ac_lds_load(&ctx->ac, dw_addr);
|
||||||
|
|
||||||
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -443,14 +422,8 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
|
||||||
semantic);
|
semantic);
|
||||||
|
|
||||||
LLVMValueRef value[4];
|
LLVMValueRef value[4];
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = component; i < component + num_components; i++)
|
||||||
unsigned offset = i;
|
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
|
||||||
if (ac_get_type_size(type) == 8)
|
|
||||||
offset *= 2;
|
|
||||||
|
|
||||||
offset += component;
|
|
||||||
value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||||
}
|
}
|
||||||
|
@ -487,23 +460,8 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef
|
||||||
* to refactor buffer_load().
|
* to refactor buffer_load().
|
||||||
*/
|
*/
|
||||||
LLVMValueRef value[4];
|
LLVMValueRef value[4];
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = component; i < component + num_components; i++)
|
||||||
unsigned offset = i;
|
value[i] = buffer_load(ctx, type, i, ctx->tess_offchip_ring, base, addr, true);
|
||||||
if (ac_get_type_size(type) == 8) {
|
|
||||||
offset *= 2;
|
|
||||||
if (offset == 4) {
|
|
||||||
ubyte semantic = info->input_semantic[driver_location + 1];
|
|
||||||
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
|
|
||||||
semantic);
|
|
||||||
}
|
|
||||||
|
|
||||||
offset = offset % 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
offset += component;
|
|
||||||
value[i + component] =
|
|
||||||
buffer_load(ctx, type, offset, ctx->tess_offchip_ring, base, addr, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
||||||
}
|
}
|
||||||
|
@ -563,20 +521,13 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_
|
||||||
addr =
|
addr =
|
||||||
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
|
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
|
||||||
|
|
||||||
for (unsigned chan = component; chan < 8; chan++) {
|
for (unsigned chan = component; chan < 4; chan++) {
|
||||||
if (!(writemask & (1 << chan)))
|
if (!(writemask & (1 << chan)))
|
||||||
continue;
|
continue;
|
||||||
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
|
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
|
||||||
|
|
||||||
unsigned buffer_store_offset = chan % 4;
|
|
||||||
if (chan == 4) {
|
|
||||||
ubyte semantic = info->output_semantic[driver_location + 1];
|
|
||||||
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
|
|
||||||
semantic);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Skip LDS stores if there is no LDS read of this output. */
|
/* Skip LDS stores if there is no LDS read of this output. */
|
||||||
if (info->output_readmask[driver_location + chan / 4] & (1 << (chan % 4)) ||
|
if (info->output_readmask[driver_location] & (1 << chan) ||
|
||||||
/* The epilog reads LDS if invocation 0 doesn't define tess factors. */
|
/* The epilog reads LDS if invocation 0 doesn't define tess factors. */
|
||||||
(is_tess_factor &&
|
(is_tess_factor &&
|
||||||
!ctx->shader->selector->info.tessfactors_are_def_in_all_invocs))
|
!ctx->shader->selector->info.tessfactors_are_def_in_all_invocs))
|
||||||
|
@ -587,7 +538,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_
|
||||||
|
|
||||||
if (writemask != 0xF && !is_tess_factor) {
|
if (writemask != 0xF && !is_tess_factor) {
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base,
|
||||||
4 * buffer_store_offset, ac_glc);
|
4 * chan, ac_glc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write tess factors into VGPRs for the epilog. */
|
/* Write tess factors into VGPRs for the epilog. */
|
||||||
|
|
|
@ -64,29 +64,18 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned mask, bit_size;
|
unsigned mask, bit_size;
|
||||||
bool dual_slot, is_output_load;
|
bool is_output_load;
|
||||||
|
|
||||||
if (nir_intrinsic_infos[intr->intrinsic].index_map[NIR_INTRINSIC_WRMASK] > 0) {
|
if (nir_intrinsic_infos[intr->intrinsic].index_map[NIR_INTRINSIC_WRMASK] > 0) {
|
||||||
mask = nir_intrinsic_write_mask(intr); /* store */
|
mask = nir_intrinsic_write_mask(intr); /* store */
|
||||||
bit_size = nir_src_bit_size(intr->src[0]);
|
bit_size = nir_src_bit_size(intr->src[0]);
|
||||||
dual_slot = bit_size == 64 && nir_src_num_components(intr->src[0]) >= 3;
|
|
||||||
is_output_load = false;
|
is_output_load = false;
|
||||||
} else {
|
} else {
|
||||||
mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */
|
mask = nir_ssa_def_components_read(&intr->dest.ssa); /* load */
|
||||||
bit_size = intr->dest.ssa.bit_size;
|
bit_size = intr->dest.ssa.bit_size;
|
||||||
dual_slot = bit_size == 64 && intr->dest.ssa.num_components >= 3;
|
|
||||||
is_output_load = !is_input;
|
is_output_load = !is_input;
|
||||||
}
|
}
|
||||||
|
assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");
|
||||||
/* Convert the 64-bit component mask to a 32-bit component mask. */
|
|
||||||
if (bit_size == 64) {
|
|
||||||
unsigned new_mask = 0;
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
|
||||||
if (mask & (1 << i))
|
|
||||||
new_mask |= 0x3 << (2 * i);
|
|
||||||
}
|
|
||||||
mask = new_mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Convert the 16-bit component mask to a 32-bit component mask. */
|
/* Convert the 16-bit component mask to a 32-bit component mask. */
|
||||||
if (bit_size == 16) {
|
if (bit_size == 16) {
|
||||||
|
@ -120,20 +109,19 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned driver_location = nir_intrinsic_base(intr);
|
unsigned driver_location = nir_intrinsic_base(intr);
|
||||||
unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : (1 + dual_slot);
|
unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
|
||||||
|
|
||||||
if (is_input) {
|
if (is_input) {
|
||||||
assert(driver_location + num_slots <= ARRAY_SIZE(info->input_usage_mask));
|
assert(driver_location + num_slots <= ARRAY_SIZE(info->input_usage_mask));
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_slots; i++) {
|
for (unsigned i = 0; i < num_slots; i++) {
|
||||||
unsigned loc = driver_location + i;
|
unsigned loc = driver_location + i;
|
||||||
unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf;
|
|
||||||
|
|
||||||
info->input_semantic[loc] = semantic + i;
|
info->input_semantic[loc] = semantic + i;
|
||||||
info->input_interpolate[loc] = interp;
|
info->input_interpolate[loc] = interp;
|
||||||
|
|
||||||
if (slot_mask) {
|
if (mask) {
|
||||||
info->input_usage_mask[loc] |= slot_mask;
|
info->input_usage_mask[loc] |= mask;
|
||||||
info->num_inputs = MAX2(info->num_inputs, loc + 1);
|
info->num_inputs = MAX2(info->num_inputs, loc + 1);
|
||||||
|
|
||||||
if (semantic == VARYING_SLOT_PRIMITIVE_ID)
|
if (semantic == VARYING_SLOT_PRIMITIVE_ID)
|
||||||
|
@ -147,24 +135,23 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_slots; i++) {
|
for (unsigned i = 0; i < num_slots; i++) {
|
||||||
unsigned loc = driver_location + i;
|
unsigned loc = driver_location + i;
|
||||||
unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf;
|
|
||||||
|
|
||||||
info->output_semantic[loc] = semantic + i;
|
info->output_semantic[loc] = semantic + i;
|
||||||
info->output_semantic_to_slot[semantic + i] = loc;
|
info->output_semantic_to_slot[semantic + i] = loc;
|
||||||
|
|
||||||
if (is_output_load) {
|
if (is_output_load) {
|
||||||
/* Output loads have only a few things that we need to track. */
|
/* Output loads have only a few things that we need to track. */
|
||||||
info->output_readmask[loc] |= slot_mask;
|
info->output_readmask[loc] |= mask;
|
||||||
|
|
||||||
if (info->stage == MESA_SHADER_FRAGMENT &&
|
if (info->stage == MESA_SHADER_FRAGMENT &&
|
||||||
nir_intrinsic_io_semantics(intr).fb_fetch_output)
|
nir_intrinsic_io_semantics(intr).fb_fetch_output)
|
||||||
info->uses_fbfetch = true;
|
info->uses_fbfetch = true;
|
||||||
} else if (slot_mask) {
|
} else if (mask) {
|
||||||
/* Output stores. */
|
/* Output stores. */
|
||||||
if (info->stage == MESA_SHADER_GEOMETRY) {
|
if (info->stage == MESA_SHADER_GEOMETRY) {
|
||||||
unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
|
unsigned gs_streams = (uint32_t)nir_intrinsic_io_semantics(intr).gs_streams <<
|
||||||
(nir_intrinsic_component(intr) * 2);
|
(nir_intrinsic_component(intr) * 2);
|
||||||
unsigned new_mask = slot_mask & ~info->output_usagemask[loc];
|
unsigned new_mask = mask & ~info->output_usagemask[loc];
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
for (unsigned i = 0; i < 4; i++) {
|
||||||
unsigned stream = (gs_streams >> (i * 2)) & 0x3;
|
unsigned stream = (gs_streams >> (i * 2)) & 0x3;
|
||||||
|
@ -176,7 +163,7 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info->output_usagemask[loc] |= slot_mask;
|
info->output_usagemask[loc] |= mask;
|
||||||
info->num_outputs = MAX2(info->num_outputs, loc + 1);
|
info->num_outputs = MAX2(info->num_outputs, loc + 1);
|
||||||
|
|
||||||
if (info->stage == MESA_SHADER_FRAGMENT) {
|
if (info->stage == MESA_SHADER_FRAGMENT) {
|
||||||
|
@ -632,7 +619,7 @@ static void si_lower_io(struct nir_shader *nir)
|
||||||
si_nir_lower_color(nir);
|
si_nir_lower_color(nir);
|
||||||
|
|
||||||
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
|
NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
|
||||||
type_size_vec4, 0);
|
type_size_vec4, nir_lower_io_lower_64bit_to_32);
|
||||||
nir->info.io_lowered = true;
|
nir->info.io_lowered = true;
|
||||||
|
|
||||||
/* This pass needs actual constants */
|
/* This pass needs actual constants */
|
||||||
|
|
Loading…
Reference in New Issue