anv: Put image params in the descriptor set buffer on gen8 and earlier
This is really where they belong; not push constants. The one downside here is that we can't push them anymore for compute shaders. However, that's a general problem and we should figure out how to push descriptor sets for compute shaders. This lets us bump MAX_IMAGES to 64 on BDW and earlier platforms because we no longer have to worry about push constant overhead limits. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
parent
83b943cc2f
commit
3b755b52e8
|
@ -90,7 +90,12 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
|
|||
static unsigned
|
||||
anv_descriptor_data_size(enum anv_descriptor_data data)
|
||||
{
|
||||
return 0;
|
||||
unsigned size = 0;
|
||||
|
||||
if (data & ANV_DESCRIPTOR_IMAGE_PARAM)
|
||||
size += BRW_IMAGE_PARAM_SIZE * 4;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/** Returns the size in bytes of each descriptor with the given layout */
|
||||
|
@ -902,6 +907,24 @@ VkResult anv_FreeDescriptorSets(
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
anv_descriptor_set_write_image_param(uint32_t *param_desc_map,
|
||||
const struct brw_image_param *param)
|
||||
{
|
||||
#define WRITE_PARAM_FIELD(field, FIELD) \
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(param->field); i++) \
|
||||
param_desc_map[BRW_IMAGE_PARAM_##FIELD##_OFFSET + i] = param->field[i]
|
||||
|
||||
WRITE_PARAM_FIELD(offset, OFFSET);
|
||||
WRITE_PARAM_FIELD(size, SIZE);
|
||||
WRITE_PARAM_FIELD(stride, STRIDE);
|
||||
WRITE_PARAM_FIELD(tiling, TILING);
|
||||
WRITE_PARAM_FIELD(swizzling, SWIZZLING);
|
||||
WRITE_PARAM_FIELD(size, SIZE);
|
||||
|
||||
#undef WRITE_PARAM_FIELD
|
||||
}
|
||||
|
||||
void
|
||||
anv_descriptor_set_write_image_view(struct anv_device *device,
|
||||
struct anv_descriptor_set *set,
|
||||
|
@ -952,6 +975,18 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
|
|||
.image_view = image_view,
|
||||
.sampler = sampler,
|
||||
};
|
||||
|
||||
void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
|
||||
element * anv_descriptor_size(bind_layout);
|
||||
|
||||
if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
|
||||
/* Storage images can only ever have one plane */
|
||||
assert(image_view->n_planes == 1);
|
||||
const struct brw_image_param *image_param =
|
||||
&image_view->planes[0].storage_image_param;
|
||||
|
||||
anv_descriptor_set_write_image_param(desc_map, image_param);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -973,6 +1008,14 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
|
|||
.type = type,
|
||||
.buffer_view = buffer_view,
|
||||
};
|
||||
|
||||
void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
|
||||
element * anv_descriptor_size(bind_layout);
|
||||
|
||||
if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
|
||||
anv_descriptor_set_write_image_param(desc_map,
|
||||
&buffer_view->storage_image_param);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -1096,8 +1096,6 @@ void anv_GetPhysicalDeviceProperties(
|
|||
const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
|
||||
128 : 16;
|
||||
|
||||
const uint32_t max_images = devinfo->gen < 9 ? MAX_GEN8_IMAGES : MAX_IMAGES;
|
||||
|
||||
VkSampleCountFlags sample_counts =
|
||||
isl_device_get_sample_counts(&pdevice->isl_dev);
|
||||
|
||||
|
@ -1121,7 +1119,7 @@ void anv_GetPhysicalDeviceProperties(
|
|||
.maxPerStageDescriptorUniformBuffers = 64,
|
||||
.maxPerStageDescriptorStorageBuffers = 64,
|
||||
.maxPerStageDescriptorSampledImages = max_samplers,
|
||||
.maxPerStageDescriptorStorageImages = max_images,
|
||||
.maxPerStageDescriptorStorageImages = MAX_IMAGES,
|
||||
.maxPerStageDescriptorInputAttachments = 64,
|
||||
.maxPerStageResources = 250,
|
||||
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
|
||||
|
@ -1130,7 +1128,7 @@ void anv_GetPhysicalDeviceProperties(
|
|||
.maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */
|
||||
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
|
||||
.maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
|
||||
.maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
|
||||
.maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */
|
||||
.maxDescriptorSetInputAttachments = 256,
|
||||
.maxVertexInputAttributes = MAX_VBS,
|
||||
.maxVertexInputBindings = MAX_VBS,
|
||||
|
|
|
@ -35,8 +35,6 @@ struct apply_pipeline_layout_state {
|
|||
struct anv_pipeline_layout *layout;
|
||||
bool add_bounds_checks;
|
||||
|
||||
unsigned first_image_uniform;
|
||||
|
||||
bool uses_constants;
|
||||
uint8_t constants_offset;
|
||||
struct {
|
||||
|
@ -46,7 +44,6 @@ struct apply_pipeline_layout_state {
|
|||
BITSET_WORD *used;
|
||||
uint8_t *surface_offsets;
|
||||
uint8_t *sampler_offsets;
|
||||
uint8_t *image_offsets;
|
||||
} set[MAX_SETS];
|
||||
};
|
||||
|
||||
|
@ -239,11 +236,11 @@ lower_get_buffer_size(nir_intrinsic_instr *intrin,
|
|||
nir_src_for_ssa(nir_channel(b, index, 0)));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_image_intrinsic(nir_intrinsic_instr *intrin,
|
||||
static nir_ssa_def *
|
||||
build_descriptor_load(nir_deref_instr *deref, unsigned offset,
|
||||
unsigned num_components, unsigned bit_size,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
||||
unsigned set = var->data.descriptor_set;
|
||||
|
@ -251,47 +248,80 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
|
|||
unsigned array_size =
|
||||
state->layout->set[set].layout->binding[binding].array_size;
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->layout->set[set].layout->binding[binding];
|
||||
|
||||
nir_ssa_def *index = NULL;
|
||||
nir_builder *b = &state->builder;
|
||||
|
||||
nir_ssa_def *desc_buffer_index =
|
||||
nir_imm_int(b, state->set[set].desc_offset);
|
||||
|
||||
nir_ssa_def *desc_offset =
|
||||
nir_imm_int(b, bind_layout->descriptor_offset + offset);
|
||||
if (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
index = nir_ssa_for_src(b, deref->arr.index, 1);
|
||||
|
||||
const unsigned descriptor_size = anv_descriptor_size(bind_layout);
|
||||
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
|
||||
if (state->add_bounds_checks)
|
||||
index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
|
||||
} else {
|
||||
index = nir_imm_int(b, 0);
|
||||
arr_index = nir_umin(b, arr_index, nir_imm_int(b, array_size - 1));
|
||||
|
||||
desc_offset = nir_iadd(b, desc_offset,
|
||||
nir_imul_imm(b, arr_index, descriptor_size));
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *desc_load =
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
|
||||
desc_load->src[0] = nir_src_for_ssa(desc_buffer_index);
|
||||
desc_load->src[1] = nir_src_for_ssa(desc_offset);
|
||||
desc_load->num_components = num_components;
|
||||
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
|
||||
num_components, bit_size, NULL);
|
||||
nir_builder_instr_insert(b, &desc_load->instr);
|
||||
|
||||
return &desc_load->dest.ssa;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_image_intrinsic(nir_intrinsic_instr *intrin,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
|
||||
|
||||
nir_intrinsic_set_base(load, state->first_image_uniform +
|
||||
state->set[set].image_offsets[binding] *
|
||||
BRW_IMAGE_PARAM_SIZE * 4);
|
||||
nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
|
||||
|
||||
const unsigned param = nir_intrinsic_base(intrin);
|
||||
nir_ssa_def *offset =
|
||||
nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
|
||||
offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
|
||||
load->src[0] = nir_src_for_ssa(offset);
|
||||
|
||||
load->num_components = intrin->dest.ssa.num_components;
|
||||
nir_ssa_dest_init(&load->instr, &load->dest,
|
||||
intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
nir_ssa_def *desc =
|
||||
build_descriptor_load(deref, param * 16,
|
||||
intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size, state);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
||||
nir_src_for_ssa(&load->dest.ssa));
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc));
|
||||
} else {
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
||||
unsigned set = var->data.descriptor_set;
|
||||
unsigned binding = var->data.binding;
|
||||
unsigned binding_offset = state->set[set].surface_offsets[binding];
|
||||
index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
|
||||
unsigned array_size =
|
||||
state->layout->set[set].layout->binding[binding].array_size;
|
||||
|
||||
nir_ssa_def *index = NULL;
|
||||
if (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
index = nir_ssa_for_src(b, deref->arr.index, 1);
|
||||
if (state->add_bounds_checks)
|
||||
index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
|
||||
} else {
|
||||
index = nir_imm_int(b, 0);
|
||||
}
|
||||
|
||||
index = nir_iadd_imm(b, index, binding_offset);
|
||||
nir_rewrite_image_intrinsic(intrin, index, false);
|
||||
}
|
||||
}
|
||||
|
@ -475,16 +505,6 @@ apply_pipeline_layout_block(nir_block *block,
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n)
|
||||
{
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
params[i] = ANV_PARAM_PUSH(offset + i * sizeof(uint32_t));
|
||||
|
||||
for (unsigned i = n; i < 4; ++i)
|
||||
params[i] = BRW_PARAM_BUILTIN_ZERO;
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
|
@ -508,7 +528,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
|
|||
state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
|
||||
state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
|
||||
state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
|
||||
state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
|
||||
}
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
|
@ -583,45 +602,9 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (binding->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
|
||||
state.set[set].image_offsets[b] = map->image_param_count;
|
||||
map->image_param_count += binding->array_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (map->image_param_count > 0) {
|
||||
assert(map->image_param_count <= MAX_GEN8_IMAGES);
|
||||
assert(shader->num_uniforms == prog_data->nr_params * 4);
|
||||
state.first_image_uniform = shader->num_uniforms;
|
||||
uint32_t *param = brw_stage_prog_data_add_params(prog_data,
|
||||
map->image_param_count *
|
||||
BRW_IMAGE_PARAM_SIZE);
|
||||
struct anv_push_constants *null_data = NULL;
|
||||
const struct brw_image_param *image_param = null_data->images;
|
||||
for (uint32_t i = 0; i < map->image_param_count; i++) {
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
|
||||
(uintptr_t)image_param->offset, 2);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
|
||||
(uintptr_t)image_param->size, 3);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
|
||||
(uintptr_t)image_param->stride, 4);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
|
||||
(uintptr_t)image_param->tiling, 3);
|
||||
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
|
||||
(uintptr_t)image_param->swizzling, 2);
|
||||
|
||||
param += BRW_IMAGE_PARAM_SIZE;
|
||||
image_param ++;
|
||||
}
|
||||
assert(param == prog_data->param + prog_data->nr_params);
|
||||
|
||||
shader->num_uniforms += map->image_param_count *
|
||||
BRW_IMAGE_PARAM_SIZE * 4;
|
||||
assert(shader->num_uniforms == prog_data->nr_params * 4);
|
||||
}
|
||||
|
||||
nir_foreach_variable(var, &shader->uniforms) {
|
||||
const struct glsl_type *glsl_type = glsl_without_array(var->type);
|
||||
|
||||
|
|
|
@ -154,7 +154,6 @@ anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
|
|||
|
||||
blob_write_uint32(blob, shader->bind_map.surface_count);
|
||||
blob_write_uint32(blob, shader->bind_map.sampler_count);
|
||||
blob_write_uint32(blob, shader->bind_map.image_param_count);
|
||||
blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
|
||||
shader->bind_map.surface_count *
|
||||
sizeof(*shader->bind_map.surface_to_descriptor));
|
||||
|
@ -194,7 +193,6 @@ anv_shader_bin_create_from_blob(struct anv_device *device,
|
|||
struct anv_pipeline_bind_map bind_map;
|
||||
bind_map.surface_count = blob_read_uint32(blob);
|
||||
bind_map.sampler_count = blob_read_uint32(blob);
|
||||
bind_map.image_param_count = blob_read_uint32(blob);
|
||||
bind_map.surface_to_descriptor = (void *)
|
||||
blob_read_bytes(blob, bind_map.surface_count *
|
||||
sizeof(*bind_map.surface_to_descriptor));
|
||||
|
|
|
@ -158,7 +158,6 @@ struct gen_l3_config;
|
|||
#define MAX_PUSH_CONSTANTS_SIZE 128
|
||||
#define MAX_DYNAMIC_BUFFERS 16
|
||||
#define MAX_IMAGES 64
|
||||
#define MAX_GEN8_IMAGES 8
|
||||
#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
|
||||
#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
|
||||
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
|
||||
|
@ -2077,9 +2076,6 @@ struct anv_push_constants {
|
|||
|
||||
/* Used for vkCmdDispatchBase */
|
||||
uint32_t base_work_group_id[3];
|
||||
|
||||
/* Image data for image_load_store on pre-SKL */
|
||||
struct brw_image_param images[MAX_GEN8_IMAGES];
|
||||
};
|
||||
|
||||
struct anv_dynamic_state {
|
||||
|
@ -2587,7 +2583,6 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
|
|||
struct anv_pipeline_bind_map {
|
||||
uint32_t surface_count;
|
||||
uint32_t sampler_count;
|
||||
uint32_t image_param_count;
|
||||
|
||||
struct anv_pipeline_binding * surface_to_descriptor;
|
||||
struct anv_pipeline_binding * sampler_to_descriptor;
|
||||
|
|
|
@ -2010,7 +2010,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
gl_shader_stage stage,
|
||||
struct anv_state *bt_state)
|
||||
{
|
||||
const struct gen_device_info *devinfo = &cmd_buffer->device->info;
|
||||
struct anv_subpass *subpass = cmd_buffer->state.subpass;
|
||||
struct anv_cmd_pipeline_state *pipe_state;
|
||||
struct anv_pipeline *pipeline;
|
||||
|
@ -2051,17 +2050,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
const bool need_client_mem_relocs =
|
||||
!cmd_buffer->device->instance->physicalDevice.use_softpin;
|
||||
|
||||
/* We only use push constant space for images before gen9 */
|
||||
if (map->image_param_count > 0) {
|
||||
VkResult result =
|
||||
anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
cmd_buffer->state.push_constants_dirty |= 1 << stage;
|
||||
}
|
||||
|
||||
uint32_t image = 0;
|
||||
for (uint32_t s = 0; s < map->surface_count; s++) {
|
||||
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s];
|
||||
|
||||
|
@ -2201,18 +2189,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
assert(surface_state.alloc_size);
|
||||
if (need_client_mem_relocs)
|
||||
add_surface_state_relocs(cmd_buffer, sstate);
|
||||
if (devinfo->gen < 9) {
|
||||
/* We only need the image params on gen8 and earlier. No image
|
||||
* workarounds that require tiling information are required on
|
||||
* SKL and above.
|
||||
*/
|
||||
assert(image < MAX_GEN8_IMAGES);
|
||||
struct brw_image_param *image_param =
|
||||
&cmd_buffer->state.push_constants[stage]->images[image++];
|
||||
|
||||
*image_param =
|
||||
desc->image_view->planes[binding->plane].storage_image_param;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -2262,13 +2238,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
add_surface_reloc(cmd_buffer, surface_state,
|
||||
desc->buffer_view->address);
|
||||
}
|
||||
if (devinfo->gen < 9) {
|
||||
assert(image < MAX_GEN8_IMAGES);
|
||||
struct brw_image_param *image_param =
|
||||
&cmd_buffer->state.push_constants[stage]->images[image++];
|
||||
|
||||
*image_param = desc->buffer_view->storage_image_param;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -2278,7 +2247,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
|||
|
||||
bt_map[s] = surface_state.offset + state_offset;
|
||||
}
|
||||
assert(image == map->image_param_count);
|
||||
|
||||
#if GEN_GEN >= 11
|
||||
/* The PIPE_CONTROL command description says:
|
||||
|
|
Loading…
Reference in New Issue