ac: set swizzled bit in cache policy as a hint not to merge loads/stores
LLVM now merges loads and stores for all opcodes, so this must be set. Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
parent
8afab607ac
commit
f671cc4d95
|
@ -1237,8 +1237,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef voffset,
|
LLVMValueRef voffset,
|
||||||
LLVMValueRef soffset,
|
LLVMValueRef soffset,
|
||||||
unsigned inst_offset,
|
unsigned inst_offset,
|
||||||
unsigned cache_policy,
|
unsigned cache_policy)
|
||||||
bool swizzle_enable_hint)
|
|
||||||
{
|
{
|
||||||
/* Split 3 channel stores, because only LLVM 9+ support 3-channel
|
/* Split 3 channel stores, because only LLVM 9+ support 3-channel
|
||||||
* intrinsics. */
|
* intrinsics. */
|
||||||
|
@ -1252,12 +1251,10 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
|
||||||
v01 = ac_build_gather_values(ctx, v, 2);
|
v01 = ac_build_gather_values(ctx, v, 2);
|
||||||
|
|
||||||
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
|
ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
|
||||||
soffset, inst_offset, cache_policy,
|
soffset, inst_offset, cache_policy);
|
||||||
swizzle_enable_hint);
|
|
||||||
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
|
ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
|
||||||
soffset, inst_offset + 8,
|
soffset, inst_offset + 8,
|
||||||
cache_policy,
|
cache_policy);
|
||||||
swizzle_enable_hint);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1265,7 +1262,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
|
||||||
* (voffset is swizzled, but soffset isn't swizzled).
|
* (voffset is swizzled, but soffset isn't swizzled).
|
||||||
* llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
|
* llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
|
||||||
*/
|
*/
|
||||||
if (!swizzle_enable_hint) {
|
if (!(cache_policy & ac_swizzled)) {
|
||||||
LLVMValueRef offset = soffset;
|
LLVMValueRef offset = soffset;
|
||||||
|
|
||||||
if (inst_offset)
|
if (inst_offset)
|
||||||
|
|
|
@ -299,8 +299,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
|
||||||
LLVMValueRef voffset,
|
LLVMValueRef voffset,
|
||||||
LLVMValueRef soffset,
|
LLVMValueRef soffset,
|
||||||
unsigned inst_offset,
|
unsigned inst_offset,
|
||||||
unsigned cache_policy,
|
unsigned cache_policy);
|
||||||
bool swizzle_enable_hint);
|
|
||||||
|
|
||||||
void
|
void
|
||||||
ac_build_buffer_store_format(struct ac_llvm_context *ctx,
|
ac_build_buffer_store_format(struct ac_llvm_context *ctx,
|
||||||
|
@ -533,6 +532,7 @@ enum ac_image_cache_policy {
|
||||||
ac_glc = 1 << 0, /* per-CU cache control */
|
ac_glc = 1 << 0, /* per-CU cache control */
|
||||||
ac_slc = 1 << 1, /* global L2 cache control */
|
ac_slc = 1 << 1, /* global L2 cache control */
|
||||||
ac_dlc = 1 << 2, /* per-shader-array cache control */
|
ac_dlc = 1 << 2, /* per-shader-array cache control */
|
||||||
|
ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ac_image_args {
|
struct ac_image_args {
|
||||||
|
|
|
@ -1650,7 +1650,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
|
||||||
ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
|
ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
|
||||||
num_channels, offset,
|
num_channels, offset,
|
||||||
ctx->ac.i32_0, 0,
|
ctx->ac.i32_0, 0,
|
||||||
cache_policy, false);
|
cache_policy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -742,13 +742,13 @@ store_tcs_output(struct ac_shader_abi *abi,
|
||||||
if (!is_tess_factor && writemask != 0xF)
|
if (!is_tess_factor && writemask != 0xF)
|
||||||
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
|
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
|
||||||
buf_addr, oc_lds,
|
buf_addr, oc_lds,
|
||||||
4 * (base + chan), ac_glc, false);
|
4 * (base + chan), ac_glc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (writemask == 0xF) {
|
if (writemask == 0xF) {
|
||||||
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
|
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
|
||||||
buf_addr, oc_lds,
|
buf_addr, oc_lds,
|
||||||
(base * 4), ac_glc, false);
|
(base * 4), ac_glc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1037,7 +1037,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
|
||||||
voffset,
|
voffset,
|
||||||
ac_get_arg(&ctx->ac,
|
ac_get_arg(&ctx->ac,
|
||||||
ctx->args->gs2vs_offset),
|
ctx->args->gs2vs_offset),
|
||||||
0, ac_glc | ac_slc, true);
|
0, ac_glc | ac_slc | ac_swizzled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1768,7 +1768,7 @@ radv_emit_stream_output(struct radv_shader_context *ctx,
|
||||||
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
|
ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
|
||||||
vdata, num_comps, so_write_offsets[buf],
|
vdata, num_comps, so_write_offsets[buf],
|
||||||
ctx->ac.i32_0, offset,
|
ctx->ac.i32_0, offset,
|
||||||
ac_glc | ac_slc, false);
|
ac_glc | ac_slc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -2173,7 +2173,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
||||||
NULL,
|
NULL,
|
||||||
ac_get_arg(&ctx->ac, ctx->args->es2gs_offset),
|
ac_get_arg(&ctx->ac, ctx->args->es2gs_offset),
|
||||||
(4 * param_index + j) * 4,
|
(4 * param_index + j) * 4,
|
||||||
ac_glc | ac_slc, true);
|
ac_glc | ac_slc | ac_swizzled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3635,7 +3635,7 @@ write_tess_factors(struct radv_shader_context *ctx)
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer,
|
ac_build_buffer_store_dword(&ctx->ac, buffer,
|
||||||
LLVMConstInt(ctx->ac.i32, 0x80000000, false),
|
LLVMConstInt(ctx->ac.i32, 0x80000000, false),
|
||||||
1, ctx->ac.i32_0, tf_base,
|
1, ctx->ac.i32_0, tf_base,
|
||||||
0, ac_glc, false);
|
0, ac_glc);
|
||||||
tf_offset += 4;
|
tf_offset += 4;
|
||||||
|
|
||||||
ac_build_endif(&ctx->ac, 6504);
|
ac_build_endif(&ctx->ac, 6504);
|
||||||
|
@ -3644,11 +3644,11 @@ write_tess_factors(struct radv_shader_context *ctx)
|
||||||
/* Store the tessellation factors. */
|
/* Store the tessellation factors. */
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
|
||||||
MIN2(stride, 4), byteoffset, tf_base,
|
MIN2(stride, 4), byteoffset, tf_base,
|
||||||
tf_offset, ac_glc, false);
|
tf_offset, ac_glc);
|
||||||
if (vec1)
|
if (vec1)
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
|
||||||
stride - 4, byteoffset, tf_base,
|
stride - 4, byteoffset, tf_base,
|
||||||
16 + tf_offset, ac_glc, false);
|
16 + tf_offset, ac_glc);
|
||||||
|
|
||||||
//store to offchip for TES to read - only if TES reads them
|
//store to offchip for TES to read - only if TES reads them
|
||||||
if (ctx->args->options->key.tcs.tes_reads_tess_factors) {
|
if (ctx->args->options->key.tcs.tes_reads_tess_factors) {
|
||||||
|
@ -3666,7 +3666,7 @@ write_tess_factors(struct radv_shader_context *ctx)
|
||||||
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
|
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
|
||||||
outer_comps, tf_outer_offset,
|
outer_comps, tf_outer_offset,
|
||||||
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
|
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
|
||||||
0, ac_glc, false);
|
0, ac_glc);
|
||||||
if (inner_comps) {
|
if (inner_comps) {
|
||||||
param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
|
param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
|
||||||
tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
|
tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
|
||||||
|
@ -3677,7 +3677,7 @@ write_tess_factors(struct radv_shader_context *ctx)
|
||||||
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
|
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
|
||||||
inner_comps, tf_inner_offset,
|
inner_comps, tf_inner_offset,
|
||||||
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
|
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
|
||||||
0, ac_glc, false);
|
0, ac_glc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -822,7 +822,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
|
||||||
};
|
};
|
||||||
LLVMValueRef rsrc = ac_build_gather_values(&ctx->ac, desc, 4);
|
LLVMValueRef rsrc = ac_build_gather_values(&ctx->ac, desc, 4);
|
||||||
ac_build_buffer_store_dword(&ctx->ac, rsrc, count, 1, ctx->i32_0,
|
ac_build_buffer_store_dword(&ctx->ac, rsrc, count, 1, ctx->i32_0,
|
||||||
ctx->i32_0, 0, ac_glc | ac_slc, false);
|
ctx->i32_0, 0, ac_glc | ac_slc);
|
||||||
} else {
|
} else {
|
||||||
LLVMBuildStore(builder, count,
|
LLVMBuildStore(builder, count,
|
||||||
si_expand_32bit_pointer(ctx,
|
si_expand_32bit_pointer(ctx,
|
||||||
|
|
|
@ -1309,7 +1309,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
|
||||||
if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
|
if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
|
||||||
buf_addr, base,
|
buf_addr, base,
|
||||||
4 * chan_index, ac_glc, false);
|
4 * chan_index, ac_glc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write tess factors into VGPRs for the epilog. */
|
/* Write tess factors into VGPRs for the epilog. */
|
||||||
|
@ -1329,7 +1329,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
|
||||||
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
|
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
|
||||||
values, 4);
|
values, 4);
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
|
||||||
base, 0, ac_glc, false);
|
base, 0, ac_glc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1432,7 +1432,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
|
||||||
addr, base,
|
addr, base,
|
||||||
4 * buffer_store_offset,
|
4 * buffer_store_offset,
|
||||||
ac_glc, false);
|
ac_glc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Write tess factors into VGPRs for the epilog. */
|
/* Write tess factors into VGPRs for the epilog. */
|
||||||
|
@ -1452,7 +1452,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
|
||||||
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
|
LLVMValueRef value = ac_build_gather_values(&ctx->ac,
|
||||||
values, 4);
|
values, 4);
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
|
||||||
base, 0, ac_glc, false);
|
base, 0, ac_glc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2661,7 +2661,7 @@ void si_emit_streamout_output(struct si_shader_context *ctx,
|
||||||
vdata, num_comps,
|
vdata, num_comps,
|
||||||
so_write_offsets[buf_idx],
|
so_write_offsets[buf_idx],
|
||||||
ctx->i32_0,
|
ctx->i32_0,
|
||||||
stream_out->dst_offset * 4, ac_glc | ac_slc, false);
|
stream_out->dst_offset * 4, ac_glc | ac_slc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3066,7 +3066,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
|
||||||
LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
|
LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
|
||||||
|
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
|
||||||
buffer_offset, 0, ac_glc, false);
|
buffer_offset, 0, ac_glc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3191,7 +3191,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer,
|
ac_build_buffer_store_dword(&ctx->ac, buffer,
|
||||||
LLVMConstInt(ctx->i32, 0x80000000, 0),
|
LLVMConstInt(ctx->i32, 0x80000000, 0),
|
||||||
1, ctx->i32_0, tf_base,
|
1, ctx->i32_0, tf_base,
|
||||||
offset, ac_glc, false);
|
offset, ac_glc);
|
||||||
offset += 4;
|
offset += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3200,12 +3200,12 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
|
||||||
/* Store the tessellation factors. */
|
/* Store the tessellation factors. */
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
|
||||||
MIN2(stride, 4), byteoffset, tf_base,
|
MIN2(stride, 4), byteoffset, tf_base,
|
||||||
offset, ac_glc, false);
|
offset, ac_glc);
|
||||||
offset += 16;
|
offset += 16;
|
||||||
if (vec1)
|
if (vec1)
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
|
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
|
||||||
stride - 4, byteoffset, tf_base,
|
stride - 4, byteoffset, tf_base,
|
||||||
offset, ac_glc, false);
|
offset, ac_glc);
|
||||||
|
|
||||||
/* Store the tess factors into the offchip buffer if TES reads them. */
|
/* Store the tess factors into the offchip buffer if TES reads them. */
|
||||||
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
|
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
|
||||||
|
@ -3228,7 +3228,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
|
||||||
|
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
|
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
|
||||||
outer_comps, tf_outer_offset,
|
outer_comps, tf_outer_offset,
|
||||||
base, 0, ac_glc, false);
|
base, 0, ac_glc);
|
||||||
if (inner_comps) {
|
if (inner_comps) {
|
||||||
param_inner = si_shader_io_get_unique_index_patch(
|
param_inner = si_shader_io_get_unique_index_patch(
|
||||||
TGSI_SEMANTIC_TESSINNER, 0);
|
TGSI_SEMANTIC_TESSINNER, 0);
|
||||||
|
@ -3239,7 +3239,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
|
||||||
ac_build_gather_values(&ctx->ac, inner, inner_comps);
|
ac_build_gather_values(&ctx->ac, inner, inner_comps);
|
||||||
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
|
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
|
||||||
inner_comps, tf_inner_offset,
|
inner_comps, tf_inner_offset,
|
||||||
base, 0, ac_glc, false);
|
base, 0, ac_glc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3554,7 +3554,7 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
|
||||||
out_val, 1, NULL,
|
out_val, 1, NULL,
|
||||||
ac_get_arg(&ctx->ac, ctx->es2gs_offset),
|
ac_get_arg(&ctx->ac, ctx->es2gs_offset),
|
||||||
(4 * param + chan) * 4,
|
(4 * param + chan) * 4,
|
||||||
ac_glc | ac_slc, true);
|
ac_glc | ac_slc | ac_swizzled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4283,7 +4283,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
|
||||||
ctx->gsvs_ring[stream],
|
ctx->gsvs_ring[stream],
|
||||||
out_val, 1,
|
out_val, 1,
|
||||||
voffset, soffset, 0,
|
voffset, soffset, 0,
|
||||||
ac_glc | ac_slc, true);
|
ac_glc | ac_slc | ac_swizzled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -649,8 +649,7 @@ static void store_emit_buffer(struct si_shader_context *ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
|
ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
|
||||||
voff, ctx->i32_0, 0, cache_policy,
|
voff, ctx->i32_0, 0, cache_policy);
|
||||||
false);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue