anv: Patch constant data pointers into shaders with using softpin

When we have softpin, we know the address of the shader constant data at
shader upload time because it's sitting at the end of the shader.  This
commit changes ANV to use patch constants to embed the address in the
shader patch the right address in at upload time.  This allows us to
avoid having to set up a UBO binding on-the-fly for shader constants.

This commit uses an A64 message but it's quite possible that we could
also use an A32 message and make the dataport do the 64-bit add for us.
However, load_global is what we have right now so it was easier to just
use that.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>
This commit is contained in:
Jason Ekstrand 2020-08-08 16:29:36 -05:00 committed by Marge Bot
parent a06955f4ef
commit 5ee3242837
3 changed files with 60 additions and 18 deletions

View File

@ -784,31 +784,51 @@ lower_load_constant(nir_intrinsic_instr *intrin,
{
nir_builder *b = &state->builder;
b->cursor = nir_before_instr(&intrin->instr);
b->cursor = nir_instr_remove(&intrin->instr);
/* Any constant-offset load_constant instructions should have been removed
* by constant folding.
*/
assert(!nir_src_is_const(intrin->src[0]));
nir_ssa_def *offset = nir_iadd_imm(b, nir_ssa_for_src(b, intrin->src[0], 1),
nir_intrinsic_base(intrin));
nir_ssa_def *index = nir_imm_int(b, state->constants_offset);
nir_ssa_def *offset = nir_iadd(b, nir_ssa_for_src(b, intrin->src[0], 1),
nir_imm_int(b, nir_intrinsic_base(intrin)));
nir_ssa_def *data;
if (state->pdevice->use_softpin) {
unsigned load_size = intrin->dest.ssa.num_components *
intrin->dest.ssa.bit_size / 8;
unsigned load_align = intrin->dest.ssa.bit_size / 8;
nir_intrinsic_instr *load_ubo =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
load_ubo->num_components = intrin->num_components;
load_ubo->src[0] = nir_src_for_ssa(index);
load_ubo->src[1] = nir_src_for_ssa(offset);
nir_intrinsic_set_align(load_ubo, intrin->dest.ssa.bit_size / 8, 0);
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
nir_builder_instr_insert(b, &load_ubo->instr);
assert(load_size < b->shader->constant_data_size);
unsigned max_offset = b->shader->constant_data_size - load_size;
offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&load_ubo->dest.ssa));
nir_instr_remove(&intrin->instr);
nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(b,
nir_load_reloc_const_intel(b, ANV_SHADER_RELOC_CONST_DATA_ADDR_LOW),
nir_load_reloc_const_intel(b, ANV_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
data = nir_load_global(b, nir_iadd(b, const_data_base_addr,
nir_u2u64(b, offset)),
load_align,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size);
} else {
nir_ssa_def *index = nir_imm_int(b, state->constants_offset);
nir_intrinsic_instr *load_ubo =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
load_ubo->num_components = intrin->num_components;
load_ubo->src[0] = nir_src_for_ssa(index);
load_ubo->src[1] = nir_src_for_ssa(offset);
nir_intrinsic_set_align(load_ubo, intrin->dest.ssa.bit_size / 8, 0);
nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
nir_builder_instr_insert(b, &load_ubo->instr);
data = &load_ubo->dest.ssa;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(data));
}
static void
@ -1147,7 +1167,7 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
}
}
if (state.uses_constants) {
if (state.uses_constants && !pdevice->use_softpin) {
state.constants_offset = map->surface_count;
map->surface_to_descriptor[map->surface_count].set =
ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;

View File

@ -82,6 +82,23 @@ anv_shader_bin_create(struct anv_device *device,
memcpy(shader->kernel.map, kernel_data, kernel_size);
shader->kernel_size = kernel_size;
uint64_t shader_data_addr = INSTRUCTION_STATE_POOL_MIN_ADDRESS +
shader->kernel.offset +
prog_data_in->const_data_offset;
struct brw_shader_reloc_value reloc_values[] = {
{
.id = ANV_SHADER_RELOC_CONST_DATA_ADDR_LOW,
.value = shader_data_addr,
},
{
.id = ANV_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
.value = shader_data_addr >> 32,
},
};
brw_write_shader_relocs(&device->info, shader->kernel.map, prog_data_in,
reloc_values, ARRAY_SIZE(reloc_values));
memcpy(prog_data, prog_data_in, prog_data_size);
typed_memcpy(prog_data_relocs, prog_data_in->relocs,
prog_data_in->num_relocs);

View File

@ -3318,6 +3318,11 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
stage = __builtin_ffs(__tmp) - 1, __tmp; \
__tmp &= ~(1 << (stage)))
enum anv_shader_reloc {
ANV_SHADER_RELOC_CONST_DATA_ADDR_LOW,
ANV_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
};
struct anv_pipeline_bind_map {
unsigned char surface_sha1[20];
unsigned char sampler_sha1[20];