v3dv: implement layered texel buffer copies using a geometry shader

Instead of specifying a separate framebuffer per layer which is expected
to be much slower.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11843>
This commit is contained in:
Iago Toral Quiroga 2021-07-12 13:16:06 +02:00 committed by Marge Bot
parent 8c16b48009
commit 738e7106dd
2 changed files with 187 additions and 79 deletions

View File

@ -170,13 +170,21 @@ create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
}
assert(*p_layout == 0);
/* FIXME: this is abusing a bit the API, since not all of our copy
* pipelines have a geometry shader. We could create 2 different pipeline
* layouts, but this works for us for now.
*/
VkPushConstantRange ranges[2] = {
{ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
{ VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
};
VkPipelineLayoutCreateInfo p_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges =
&(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
.pushConstantRangeCount = 2,
.pPushConstantRanges = ranges,
};
result =
@ -1689,6 +1697,7 @@ static void
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
bool is_layered,
uint8_t *key)
{
memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
@ -1701,6 +1710,12 @@ get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
*p = cmask;
p++;
/* Note that that we are using a single byte for this, so we could pack
* more data into this 32-bit slot in the future.
*/
*p = is_layered ? 1 : 0;
p++;
memcpy(p, cswizzle, sizeof(VkComponentMapping));
p += sizeof(VkComponentMapping) / sizeof(uint32_t);
@ -1720,6 +1735,7 @@ static bool
create_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
struct nir_shader *vs_nir,
struct nir_shader *gs_nir,
struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
@ -1745,6 +1761,70 @@ get_texel_buffer_copy_vs()
return b.shader;
}
static nir_shader *
get_texel_buffer_copy_gs()
{
/* FIXME: this creates a geometry shader that takes the index of a single
* layer to clear from push constants, so we need to emit a draw call for
* each layer that we want to clear. We could actually do better and have it
* take a range of layers however, if we were to do this, we would need to
* be careful not to exceed the maximum number of output vertices allowed in
* a geometry shader.
*/
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
"meta texel buffer copy gs");
nir_shader *nir = b.shader;
nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
(1ull << VARYING_SLOT_LAYER);
nir->info.gs.input_primitive = GL_TRIANGLES;
nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
nir->info.gs.vertices_in = 3;
nir->info.gs.vertices_out = 3;
nir->info.gs.invocations = 1;
nir->info.gs.active_stream_mask = 0x1;
/* in vec4 gl_Position[3] */
nir_variable *gs_in_pos =
nir_variable_create(b.shader, nir_var_shader_in,
glsl_array_type(glsl_vec4_type(), 3, 0),
"in_gl_Position");
gs_in_pos->data.location = VARYING_SLOT_POS;
/* out vec4 gl_Position */
nir_variable *gs_out_pos =
nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
"out_gl_Position");
gs_out_pos->data.location = VARYING_SLOT_POS;
/* out float gl_Layer */
nir_variable *gs_out_layer =
nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
"out_gl_Layer");
gs_out_layer->data.location = VARYING_SLOT_LAYER;
/* Emit output triangle */
for (uint32_t i = 0; i < 3; i++) {
/* gl_Position from shader input */
nir_deref_instr *in_pos_i =
nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
/* gl_Layer from push constants */
nir_ssa_def *layer =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = 24, .range = 4);
nir_store_var(&b, gs_out_layer, layer, 0x1);
nir_emit_vertex(&b, 0);
}
nir_end_primitive(&b, 0);
return nir;
}
static nir_ssa_def *
load_frag_coord(nir_builder *b)
{
@ -1874,6 +1954,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device,
VkFormat format,
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
bool is_layered,
VkRenderPass _pass,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
@ -1884,6 +1965,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device,
nir_shader *vs_nir = get_texel_buffer_copy_vs();
nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@ -1919,7 +2001,7 @@ create_texel_buffer_copy_pipeline(struct v3dv_device *device,
return create_pipeline(device,
pass,
vs_nir, fs_nir,
vs_nir, gs_nir, fs_nir,
&vi_state,
&ds_state,
&cb_state,
@ -1935,12 +2017,14 @@ get_copy_texel_buffer_pipeline(
VkColorComponentFlags cmask,
VkComponentMapping *cswizzle,
VkImageType image_type,
bool is_layered,
struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
{
bool ok = true;
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, key);
get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
key);
mtx_lock(&device->meta.mtx);
struct hash_entry *entry =
@ -1966,7 +2050,8 @@ get_copy_texel_buffer_pipeline(
goto fail;
ok =
create_texel_buffer_copy_pipeline(device, format, cmask, cswizzle,
create_texel_buffer_copy_pipeline(device,
format, cmask, cswizzle, is_layered,
(*pipeline)->pass,
device->meta.texel_buffer_copy.p_layout,
&(*pipeline)->pipeline);
@ -2058,11 +2143,28 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
*/
handled = true;
/* Compute the number of layers to copy.
*
* If we are batching (region_count > 1) all our regions have the same
* image subresource so we can take this from the first region.
*/
const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D) {
num_layers = resource->layerCount;
} else {
assert(region_count == 1);
num_layers = regions[0].imageExtent.depth;
}
assert(num_layers > 0);
/* Get the texel buffer copy pipeline */
struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
dst_format, cmask, cswizzle,
image->type, &pipeline);
image->type, num_layers > 1,
&pipeline);
if (!ok)
return handled;
assert(pipeline && pipeline->pipeline && pipeline->pass);
@ -2132,78 +2234,58 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
0, 1, &set,
0, NULL);
/* Compute the number of layers to copy.
/* Setup framebuffer.
*
* If we are batching (region_count > 1) all our regions have the same
* image subresource so we can take this from the first region.
* For 3D images, this creates a layered framebuffer with a number of
* layers matching the depth extent of the 3D image.
*/
const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
uint32_t num_layers;
if (image->type != VK_IMAGE_TYPE_3D) {
num_layers = resource->layerCount;
} else {
assert(region_count == 1);
num_layers = regions[0].imageExtent.depth;
}
assert(num_layers > 0);
uint32_t fb_width = u_minify(image->extent.width, resource->mipLevel);
uint32_t fb_height = u_minify(image->extent.height, resource->mipLevel);
VkImageViewCreateInfo image_view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = v3dv_image_to_handle(image),
.viewType = v3dv_image_type_to_view_type(image->type),
.format = dst_format,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = resource->mipLevel,
.levelCount = 1,
.baseArrayLayer = resource->baseArrayLayer,
.layerCount = num_layers,
},
};
VkImageView image_view;
result = v3dv_CreateImageView(_device, &image_view_info,
&cmd_buffer->device->vk.alloc, &image_view);
if (result != VK_SUCCESS)
goto fail;
/* Sanity check: we can only batch multiple regions together if they have
* the same framebuffer (so the same layer).
*/
assert(num_layers == 1 || region_count == 1);
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)image_view,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
VkFramebufferCreateInfo fb_info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = pipeline->pass,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = fb_width,
.height = fb_height,
.layers = num_layers,
};
VkFramebuffer fb;
result = v3dv_CreateFramebuffer(_device, &fb_info,
&cmd_buffer->device->vk.alloc, &fb);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)fb,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
/* For each layer */
for (uint32_t l = 0; l < num_layers; l++) {
/* Setup framebuffer for this layer.
*
* FIXME: once we support geometry shaders, we should be able to have
* one layered framebuffer and emit just one draw call for
* all layers using layered rendering. At that point, we should
* also be able to batch multi-layered regions as well.
*/
VkImageViewCreateInfo image_view_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = v3dv_image_to_handle(image),
.viewType = v3dv_image_type_to_view_type(image->type),
.format = dst_format,
.subresourceRange = {
.aspectMask = aspect,
.baseMipLevel = resource->mipLevel,
.levelCount = 1,
.baseArrayLayer = resource->baseArrayLayer + l,
.layerCount = 1
},
};
VkImageView image_view;
result = v3dv_CreateImageView(_device, &image_view_info,
&cmd_buffer->device->vk.alloc, &image_view);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)image_view,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
VkFramebufferCreateInfo fb_info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = pipeline->pass,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = u_minify(image->extent.width, resource->mipLevel),
.height = u_minify(image->extent.height, resource->mipLevel),
.layers = 1,
};
VkFramebuffer fb;
result = v3dv_CreateFramebuffer(_device, &fb_info,
&cmd_buffer->device->vk.alloc, &fb);
if (result != VK_SUCCESS)
goto fail;
v3dv_cmd_buffer_add_private_obj(
cmd_buffer, (uintptr_t)fb,
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
/* Start render pass for this layer.
*
* If the we only have one region to copy, then we might be able to
@ -2230,8 +2312,8 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
} else {
render_area.offset.x = 0;
render_area.offset.y = 0;
render_area.extent.width = fb_info.width;
render_area.extent.height = fb_info.height;
render_area.extent.width = fb_width;
render_area.extent.height = fb_height;
}
VkRenderPassBeginInfo rp_info = {
@ -2248,6 +2330,17 @@ texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
if (!job)
goto fail;
/* If we are using a layered copy we need to specify the layer for the
* Geometry Shader.
*/
if (num_layers > 1) {
uint32_t layer = resource->baseArrayLayer + l;
v3dv_CmdPushConstants(_cmd_buffer,
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
VK_SHADER_STAGE_GEOMETRY_BIT,
24, 4, &layer);
}
/* For each region */
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
for (uint32_t r = 0; r < region_count; r++) {
@ -3390,6 +3483,7 @@ static bool
create_pipeline(struct v3dv_device *device,
struct v3dv_render_pass *pass,
struct nir_shader *vs_nir,
struct nir_shader *gs_nir,
struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
@ -3399,12 +3493,15 @@ create_pipeline(struct v3dv_device *device,
VkPipeline *pipeline)
{
struct vk_shader_module vs_m;
struct vk_shader_module gs_m;
struct vk_shader_module fs_m;
uint32_t num_stages = gs_nir ? 3 : 2;
v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
VkPipelineShaderStageCreateInfo stages[2] = {
VkPipelineShaderStageCreateInfo stages[3] = {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
@ -3417,12 +3514,23 @@ create_pipeline(struct v3dv_device *device,
.module = vk_shader_module_to_handle(&fs_m),
.pName = "main",
},
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_GEOMETRY_BIT,
.module = VK_NULL_HANDLE,
.pName = "main",
},
};
if (gs_nir) {
v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
stages[2].module = vk_shader_module_to_handle(&gs_m);
}
VkGraphicsPipelineCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = 2,
.stageCount = num_stages,
.pStages = stages,
.pVertexInputState = vi_state,
@ -3574,7 +3682,7 @@ create_blit_pipeline(struct v3dv_device *device,
return create_pipeline(device,
pass,
vs_nir, fs_nir,
vs_nir, NULL, fs_nir,
&vi_state,
&ds_state,
&cb_state,

View File

@ -217,7 +217,7 @@ struct v3dv_queue {
};
#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (2 * sizeof(uint32_t) + \
#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
sizeof(VkComponentMapping))
struct v3dv_meta_color_clear_pipeline {