anv: Put image params in the descriptor set buffer on gen8 and earlier

This is really where they belong; not push constants.  The one downside
here is that we can't push them anymore for compute shaders.  However,
that's a general problem and we should figure out how to push descriptor
sets for compute shaders.  This lets us bump MAX_IMAGES to 64 on BDW and
earlier platforms because we no longer have to worry about push constant
overhead limits.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
This commit is contained in:
Jason Ekstrand 2018-11-21 18:26:27 -06:00 committed by Jason Ekstrand
parent 83b943cc2f
commit 3b755b52e8
6 changed files with 109 additions and 124 deletions

View File

@ -90,7 +90,12 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
static unsigned
anv_descriptor_data_size(enum anv_descriptor_data data)
{
return 0;
unsigned size = 0;
if (data & ANV_DESCRIPTOR_IMAGE_PARAM)
size += BRW_IMAGE_PARAM_SIZE * 4;
return size;
}
/** Returns the size in bytes of each descriptor with the given layout */
@ -902,6 +907,24 @@ VkResult anv_FreeDescriptorSets(
return VK_SUCCESS;
}
static void
anv_descriptor_set_write_image_param(uint32_t *param_desc_map,
const struct brw_image_param *param)
{
#define WRITE_PARAM_FIELD(field, FIELD) \
for (unsigned i = 0; i < ARRAY_SIZE(param->field); i++) \
param_desc_map[BRW_IMAGE_PARAM_##FIELD##_OFFSET + i] = param->field[i]
WRITE_PARAM_FIELD(offset, OFFSET);
WRITE_PARAM_FIELD(size, SIZE);
WRITE_PARAM_FIELD(stride, STRIDE);
WRITE_PARAM_FIELD(tiling, TILING);
WRITE_PARAM_FIELD(swizzling, SWIZZLING);
WRITE_PARAM_FIELD(size, SIZE);
#undef WRITE_PARAM_FIELD
}
void
anv_descriptor_set_write_image_view(struct anv_device *device,
struct anv_descriptor_set *set,
@ -952,6 +975,18 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
.image_view = image_view,
.sampler = sampler,
};
void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
element * anv_descriptor_size(bind_layout);
if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
/* Storage images can only ever have one plane */
assert(image_view->n_planes == 1);
const struct brw_image_param *image_param =
&image_view->planes[0].storage_image_param;
anv_descriptor_set_write_image_param(desc_map, image_param);
}
}
void
@ -973,6 +1008,14 @@ anv_descriptor_set_write_buffer_view(struct anv_device *device,
.type = type,
.buffer_view = buffer_view,
};
void *desc_map = set->desc_mem.map + bind_layout->descriptor_offset +
element * anv_descriptor_size(bind_layout);
if (bind_layout->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
anv_descriptor_set_write_image_param(desc_map,
&buffer_view->storage_image_param);
}
}
void

View File

@ -1096,8 +1096,6 @@ void anv_GetPhysicalDeviceProperties(
const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
128 : 16;
const uint32_t max_images = devinfo->gen < 9 ? MAX_GEN8_IMAGES : MAX_IMAGES;
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
@ -1121,7 +1119,7 @@ void anv_GetPhysicalDeviceProperties(
.maxPerStageDescriptorUniformBuffers = 64,
.maxPerStageDescriptorStorageBuffers = 64,
.maxPerStageDescriptorSampledImages = max_samplers,
.maxPerStageDescriptorStorageImages = max_images,
.maxPerStageDescriptorStorageImages = MAX_IMAGES,
.maxPerStageDescriptorInputAttachments = 64,
.maxPerStageResources = 250,
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
@ -1130,7 +1128,7 @@ void anv_GetPhysicalDeviceProperties(
.maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
.maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
.maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
.maxDescriptorSetStorageImages = 6 * MAX_IMAGES, /* number of stages * maxPerStageDescriptorStorageImages */
.maxDescriptorSetInputAttachments = 256,
.maxVertexInputAttributes = MAX_VBS,
.maxVertexInputBindings = MAX_VBS,

View File

@ -35,8 +35,6 @@ struct apply_pipeline_layout_state {
struct anv_pipeline_layout *layout;
bool add_bounds_checks;
unsigned first_image_uniform;
bool uses_constants;
uint8_t constants_offset;
struct {
@ -46,7 +44,6 @@ struct apply_pipeline_layout_state {
BITSET_WORD *used;
uint8_t *surface_offsets;
uint8_t *sampler_offsets;
uint8_t *image_offsets;
} set[MAX_SETS];
};
@ -239,11 +236,11 @@ lower_get_buffer_size(nir_intrinsic_instr *intrin,
nir_src_for_ssa(nir_channel(b, index, 0)));
}
static void
lower_image_intrinsic(nir_intrinsic_instr *intrin,
static nir_ssa_def *
build_descriptor_load(nir_deref_instr *deref, unsigned offset,
unsigned num_components, unsigned bit_size,
struct apply_pipeline_layout_state *state)
{
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
@ -251,47 +248,80 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
unsigned array_size =
state->layout->set[set].layout->binding[binding].array_size;
nir_builder *b = &state->builder;
b->cursor = nir_before_instr(&intrin->instr);
const struct anv_descriptor_set_binding_layout *bind_layout =
&state->layout->set[set].layout->binding[binding];
nir_ssa_def *index = NULL;
nir_builder *b = &state->builder;
nir_ssa_def *desc_buffer_index =
nir_imm_int(b, state->set[set].desc_offset);
nir_ssa_def *desc_offset =
nir_imm_int(b, bind_layout->descriptor_offset + offset);
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
index = nir_ssa_for_src(b, deref->arr.index, 1);
const unsigned descriptor_size = anv_descriptor_size(bind_layout);
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
if (state->add_bounds_checks)
index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
} else {
index = nir_imm_int(b, 0);
arr_index = nir_umin(b, arr_index, nir_imm_int(b, array_size - 1));
desc_offset = nir_iadd(b, desc_offset,
nir_imul_imm(b, arr_index, descriptor_size));
}
nir_intrinsic_instr *desc_load =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_ubo);
desc_load->src[0] = nir_src_for_ssa(desc_buffer_index);
desc_load->src[1] = nir_src_for_ssa(desc_offset);
desc_load->num_components = num_components;
nir_ssa_dest_init(&desc_load->instr, &desc_load->dest,
num_components, bit_size, NULL);
nir_builder_instr_insert(b, &desc_load->instr);
return &desc_load->dest.ssa;
}
static void
lower_image_intrinsic(nir_intrinsic_instr *intrin,
struct apply_pipeline_layout_state *state)
{
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_builder *b = &state->builder;
b->cursor = nir_before_instr(&intrin->instr);
if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
b->cursor = nir_instr_remove(&intrin->instr);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
nir_intrinsic_set_base(load, state->first_image_uniform +
state->set[set].image_offsets[binding] *
BRW_IMAGE_PARAM_SIZE * 4);
nir_intrinsic_set_range(load, array_size * BRW_IMAGE_PARAM_SIZE * 4);
const unsigned param = nir_intrinsic_base(intrin);
nir_ssa_def *offset =
nir_imul(b, index, nir_imm_int(b, BRW_IMAGE_PARAM_SIZE * 4));
offset = nir_iadd(b, offset, nir_imm_int(b, param * 16));
load->src[0] = nir_src_for_ssa(offset);
load->num_components = intrin->dest.ssa.num_components;
nir_ssa_dest_init(&load->instr, &load->dest,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, NULL);
nir_builder_instr_insert(b, &load->instr);
nir_ssa_def *desc =
build_descriptor_load(deref, param * 16,
intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, state);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&load->dest.ssa));
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc));
} else {
nir_variable *var = nir_deref_instr_get_variable(deref);
unsigned set = var->data.descriptor_set;
unsigned binding = var->data.binding;
unsigned binding_offset = state->set[set].surface_offsets[binding];
index = nir_iadd(b, index, nir_imm_int(b, binding_offset));
unsigned array_size =
state->layout->set[set].layout->binding[binding].array_size;
nir_ssa_def *index = NULL;
if (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
index = nir_ssa_for_src(b, deref->arr.index, 1);
if (state->add_bounds_checks)
index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
} else {
index = nir_imm_int(b, 0);
}
index = nir_iadd_imm(b, index, binding_offset);
nir_rewrite_image_intrinsic(intrin, index, false);
}
}
@ -475,16 +505,6 @@ apply_pipeline_layout_block(nir_block *block,
}
}
static void
setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n)
{
for (unsigned i = 0; i < n; ++i)
params[i] = ANV_PARAM_PUSH(offset + i * sizeof(uint32_t));
for (unsigned i = n; i < 4; ++i)
params[i] = BRW_PARAM_BUILTIN_ZERO;
}
void
anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
bool robust_buffer_access,
@ -508,7 +528,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
state.set[s].used = rzalloc_array(mem_ctx, BITSET_WORD, words);
state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
state.set[s].image_offsets = rzalloc_array(mem_ctx, uint8_t, count);
}
nir_foreach_function(function, shader) {
@ -583,45 +602,9 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
}
}
}
if (binding->data & ANV_DESCRIPTOR_IMAGE_PARAM) {
state.set[set].image_offsets[b] = map->image_param_count;
map->image_param_count += binding->array_size;
}
}
}
if (map->image_param_count > 0) {
assert(map->image_param_count <= MAX_GEN8_IMAGES);
assert(shader->num_uniforms == prog_data->nr_params * 4);
state.first_image_uniform = shader->num_uniforms;
uint32_t *param = brw_stage_prog_data_add_params(prog_data,
map->image_param_count *
BRW_IMAGE_PARAM_SIZE);
struct anv_push_constants *null_data = NULL;
const struct brw_image_param *image_param = null_data->images;
for (uint32_t i = 0; i < map->image_param_count; i++) {
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
(uintptr_t)image_param->offset, 2);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
(uintptr_t)image_param->size, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
(uintptr_t)image_param->stride, 4);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
(uintptr_t)image_param->tiling, 3);
setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
(uintptr_t)image_param->swizzling, 2);
param += BRW_IMAGE_PARAM_SIZE;
image_param ++;
}
assert(param == prog_data->param + prog_data->nr_params);
shader->num_uniforms += map->image_param_count *
BRW_IMAGE_PARAM_SIZE * 4;
assert(shader->num_uniforms == prog_data->nr_params * 4);
}
nir_foreach_variable(var, &shader->uniforms) {
const struct glsl_type *glsl_type = glsl_without_array(var->type);

View File

@ -154,7 +154,6 @@ anv_shader_bin_write_to_blob(const struct anv_shader_bin *shader,
blob_write_uint32(blob, shader->bind_map.surface_count);
blob_write_uint32(blob, shader->bind_map.sampler_count);
blob_write_uint32(blob, shader->bind_map.image_param_count);
blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
shader->bind_map.surface_count *
sizeof(*shader->bind_map.surface_to_descriptor));
@ -194,7 +193,6 @@ anv_shader_bin_create_from_blob(struct anv_device *device,
struct anv_pipeline_bind_map bind_map;
bind_map.surface_count = blob_read_uint32(blob);
bind_map.sampler_count = blob_read_uint32(blob);
bind_map.image_param_count = blob_read_uint32(blob);
bind_map.surface_to_descriptor = (void *)
blob_read_bytes(blob, bind_map.surface_count *
sizeof(*bind_map.surface_to_descriptor));

View File

@ -158,7 +158,6 @@ struct gen_l3_config;
#define MAX_PUSH_CONSTANTS_SIZE 128
#define MAX_DYNAMIC_BUFFERS 16
#define MAX_IMAGES 64
#define MAX_GEN8_IMAGES 8
#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32
@ -2077,9 +2076,6 @@ struct anv_push_constants {
/* Used for vkCmdDispatchBase */
uint32_t base_work_group_id[3];
/* Image data for image_load_store on pre-SKL */
struct brw_image_param images[MAX_GEN8_IMAGES];
};
struct anv_dynamic_state {
@ -2587,7 +2583,6 @@ mesa_to_vk_shader_stage(gl_shader_stage mesa_stage)
struct anv_pipeline_bind_map {
uint32_t surface_count;
uint32_t sampler_count;
uint32_t image_param_count;
struct anv_pipeline_binding * surface_to_descriptor;
struct anv_pipeline_binding * sampler_to_descriptor;

View File

@ -2010,7 +2010,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage,
struct anv_state *bt_state)
{
const struct gen_device_info *devinfo = &cmd_buffer->device->info;
struct anv_subpass *subpass = cmd_buffer->state.subpass;
struct anv_cmd_pipeline_state *pipe_state;
struct anv_pipeline *pipeline;
@ -2051,17 +2050,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
const bool need_client_mem_relocs =
!cmd_buffer->device->instance->physicalDevice.use_softpin;
/* We only use push constant space for images before gen9 */
if (map->image_param_count > 0) {
VkResult result =
anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images);
if (result != VK_SUCCESS)
return result;
cmd_buffer->state.push_constants_dirty |= 1 << stage;
}
uint32_t image = 0;
for (uint32_t s = 0; s < map->surface_count; s++) {
struct anv_pipeline_binding *binding = &map->surface_to_descriptor[s];
@ -2201,18 +2189,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
assert(surface_state.alloc_size);
if (need_client_mem_relocs)
add_surface_state_relocs(cmd_buffer, sstate);
if (devinfo->gen < 9) {
/* We only need the image params on gen8 and earlier. No image
* workarounds that require tiling information are required on
* SKL and above.
*/
assert(image < MAX_GEN8_IMAGES);
struct brw_image_param *image_param =
&cmd_buffer->state.push_constants[stage]->images[image++];
*image_param =
desc->image_view->planes[binding->plane].storage_image_param;
}
break;
}
@ -2262,13 +2238,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
add_surface_reloc(cmd_buffer, surface_state,
desc->buffer_view->address);
}
if (devinfo->gen < 9) {
assert(image < MAX_GEN8_IMAGES);
struct brw_image_param *image_param =
&cmd_buffer->state.push_constants[stage]->images[image++];
*image_param = desc->buffer_view->storage_image_param;
}
break;
default:
@ -2278,7 +2247,6 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
bt_map[s] = surface_state.offset + state_offset;
}
assert(image == map->image_param_count);
#if GEN_GEN >= 11
/* The PIPE_CONTROL command description says: