mesa/src/intel/vulkan/anv_cmd_buffer.c

1109 lines
40 KiB
C

/*
* Copyright © 2015 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "anv_private.h"
#include "anv_measure.h"
#include "vk_util.h"
/** \file anv_cmd_buffer.c
*
* This file contains all of the stuff for emitting commands into a command
* buffer. This includes implementations of most of the vkCmd*
* entrypoints. This file is concerned entirely with state emission and
* not with the command buffer data structure itself. As far as this file
* is concerned, most of anv_cmd_buffer is magic.
*/
static void
anv_cmd_state_init(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_cmd_state *state = &cmd_buffer->state;
memset(state, 0, sizeof(*state));
state->current_pipeline = UINT32_MAX;
state->gfx.restart_index = UINT32_MAX;
state->gfx.dirty = 0;
}
static void
anv_cmd_pipeline_state_finish(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipe_state)
{
for (uint32_t i = 0; i < ARRAY_SIZE(pipe_state->push_descriptors); i++) {
if (pipe_state->push_descriptors[i]) {
anv_descriptor_set_layout_unref(cmd_buffer->device,
pipe_state->push_descriptors[i]->set.layout);
vk_free(&cmd_buffer->vk.pool->alloc, pipe_state->push_descriptors[i]);
}
}
}
static void
anv_cmd_state_finish(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_cmd_state *state = &cmd_buffer->state;
anv_cmd_pipeline_state_finish(cmd_buffer, &state->gfx.base);
anv_cmd_pipeline_state_finish(cmd_buffer, &state->compute.base);
}
static void
anv_cmd_state_reset(struct anv_cmd_buffer *cmd_buffer)
{
anv_cmd_state_finish(cmd_buffer);
anv_cmd_state_init(cmd_buffer);
}
static void anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer);
static VkResult anv_create_cmd_buffer(
struct anv_device * device,
struct vk_command_pool * pool,
VkCommandBufferLevel level,
VkCommandBuffer* pCommandBuffer)
{
struct anv_cmd_buffer *cmd_buffer;
VkResult result;
cmd_buffer = vk_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
return vk_error(pool, VK_ERROR_OUT_OF_HOST_MEMORY);
result = vk_command_buffer_init(&cmd_buffer->vk, pool, level);
if (result != VK_SUCCESS)
goto fail_alloc;
cmd_buffer->vk.destroy = anv_cmd_buffer_destroy;
cmd_buffer->vk.dynamic_graphics_state.ms.sample_locations =
&cmd_buffer->state.gfx.sample_locations;
cmd_buffer->batch.status = VK_SUCCESS;
cmd_buffer->device = device;
assert(pool->queue_family_index < device->physical->queue.family_count);
cmd_buffer->queue_family =
&device->physical->queue.families[pool->queue_family_index];
result = anv_cmd_buffer_init_batch_bo_chain(cmd_buffer);
if (result != VK_SUCCESS)
goto fail_vk;
anv_state_stream_init(&cmd_buffer->surface_state_stream,
&device->surface_state_pool, 4096);
anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
&device->dynamic_state_pool, 16384);
anv_state_stream_init(&cmd_buffer->general_state_stream,
&device->general_state_pool, 16384);
cmd_buffer->self_mod_locations = NULL;
anv_cmd_state_init(cmd_buffer);
anv_measure_init(cmd_buffer);
u_trace_init(&cmd_buffer->trace, &device->ds.trace_context);
*pCommandBuffer = anv_cmd_buffer_to_handle(cmd_buffer);
return VK_SUCCESS;
fail_vk:
vk_command_buffer_finish(&cmd_buffer->vk);
fail_alloc:
vk_free2(&device->vk.alloc, &pool->alloc, cmd_buffer);
return result;
}
VkResult anv_AllocateCommandBuffers(
VkDevice _device,
const VkCommandBufferAllocateInfo* pAllocateInfo,
VkCommandBuffer* pCommandBuffers)
{
ANV_FROM_HANDLE(anv_device, device, _device);
VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool);
VkResult result = VK_SUCCESS;
uint32_t i;
for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
result = anv_create_cmd_buffer(device, pool, pAllocateInfo->level,
&pCommandBuffers[i]);
if (result != VK_SUCCESS)
break;
}
if (result != VK_SUCCESS) {
while (i--) {
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]);
anv_cmd_buffer_destroy(cmd_buffer);
}
for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
pCommandBuffers[i] = VK_NULL_HANDLE;
}
return result;
}
static void
anv_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
{
struct anv_cmd_buffer *cmd_buffer =
container_of(vk_cmd_buffer, struct anv_cmd_buffer, vk);
u_trace_fini(&cmd_buffer->trace);
anv_measure_destroy(cmd_buffer);
anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer);
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
anv_state_stream_finish(&cmd_buffer->general_state_stream);
anv_cmd_state_finish(cmd_buffer);
vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer->self_mod_locations);
vk_command_buffer_finish(&cmd_buffer->vk);
vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
}
VkResult
anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer)
{
vk_command_buffer_reset(&cmd_buffer->vk);
cmd_buffer->usage_flags = 0;
cmd_buffer->perf_query_pool = NULL;
anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer);
anv_cmd_state_reset(cmd_buffer);
anv_state_stream_finish(&cmd_buffer->surface_state_stream);
anv_state_stream_init(&cmd_buffer->surface_state_stream,
&cmd_buffer->device->surface_state_pool, 4096);
anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
&cmd_buffer->device->dynamic_state_pool, 16384);
anv_state_stream_finish(&cmd_buffer->general_state_stream);
anv_state_stream_init(&cmd_buffer->general_state_stream,
&cmd_buffer->device->general_state_pool, 16384);
anv_measure_reset(cmd_buffer);
u_trace_fini(&cmd_buffer->trace);
u_trace_init(&cmd_buffer->trace, &cmd_buffer->device->ds.trace_context);
return VK_SUCCESS;
}
VkResult anv_ResetCommandBuffer(
VkCommandBuffer commandBuffer,
VkCommandBufferResetFlags flags)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
return anv_cmd_buffer_reset(cmd_buffer);
}
void
anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *devinfo = &cmd_buffer->device->info;
anv_genX(devinfo, cmd_buffer_emit_state_base_address)(cmd_buffer);
}
void
anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,
const struct anv_image *image,
VkImageAspectFlagBits aspect,
enum isl_aux_usage aux_usage,
uint32_t level,
uint32_t base_layer,
uint32_t layer_count)
{
const struct intel_device_info *devinfo = &cmd_buffer->device->info;
anv_genX(devinfo, cmd_buffer_mark_image_written)(cmd_buffer, image,
aspect, aux_usage,
level, base_layer,
layer_count);
}
void
anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *devinfo = &cmd_buffer->device->info;
anv_genX(devinfo, cmd_emit_conditional_render_predicate)(cmd_buffer);
}
static bool
mem_update(void *dst, const void *src, size_t size)
{
if (memcmp(dst, src, size) == 0)
return false;
memcpy(dst, src, size);
return true;
}
static void
set_dirty_for_bind_map(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage,
const struct anv_pipeline_bind_map *map)
{
assert(stage < ARRAY_SIZE(cmd_buffer->state.surface_sha1s));
if (mem_update(cmd_buffer->state.surface_sha1s[stage],
map->surface_sha1, sizeof(map->surface_sha1)))
cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
assert(stage < ARRAY_SIZE(cmd_buffer->state.sampler_sha1s));
if (mem_update(cmd_buffer->state.sampler_sha1s[stage],
map->sampler_sha1, sizeof(map->sampler_sha1)))
cmd_buffer->state.descriptors_dirty |= mesa_to_vk_shader_stage(stage);
assert(stage < ARRAY_SIZE(cmd_buffer->state.push_sha1s));
if (mem_update(cmd_buffer->state.push_sha1s[stage],
map->push_sha1, sizeof(map->push_sha1)))
cmd_buffer->state.push_constants_dirty |= mesa_to_vk_shader_stage(stage);
}
static inline uint32_t
ilog2_round_up(uint32_t value)
{
assert(value != 0);
return 32 - __builtin_clz(value - 1);
}
static void
anv_cmd_buffer_set_ray_query_buffer(struct anv_cmd_buffer *cmd_buffer,
struct anv_cmd_pipeline_state *pipeline_state,
struct anv_pipeline *pipeline,
VkShaderStageFlags stages)
{
struct anv_device *device = cmd_buffer->device;
uint64_t ray_shadow_size =
align_u64(brw_rt_ray_queries_shadow_stacks_size(&device->info,
pipeline->ray_queries),
4096);
if (ray_shadow_size > 0 &&
(!cmd_buffer->state.ray_query_shadow_bo ||
cmd_buffer->state.ray_query_shadow_bo->size < ray_shadow_size)) {
unsigned shadow_size_log2 = MAX2(ilog2_round_up(ray_shadow_size), 16);
unsigned bucket = shadow_size_log2 - 16;
assert(bucket < ARRAY_SIZE(device->ray_query_shadow_bos));
struct anv_bo *bo = p_atomic_read(&device->ray_query_shadow_bos[bucket]);
if (bo == NULL) {
struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT queries shadow",
ray_shadow_size,
ANV_BO_ALLOC_LOCAL_MEM, /* alloc_flags */
0, /* explicit_address */
&new_bo);
if (result != VK_SUCCESS) {
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
bo = p_atomic_cmpxchg(&device->ray_query_shadow_bos[bucket], NULL, new_bo);
if (bo != NULL) {
anv_device_release_bo(device, bo);
} else {
bo = new_bo;
}
}
cmd_buffer->state.ray_query_shadow_bo = bo;
/* Add the ray query buffers to the batch list. */
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
cmd_buffer->state.ray_query_shadow_bo);
}
/* Add the HW buffer to the list of BO used. */
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
device->ray_query_bo);
/* Fill the push constants & mark them dirty. */
struct anv_state ray_query_global_state =
anv_genX(&device->info, cmd_buffer_ray_query_globals)(cmd_buffer);
struct anv_address ray_query_globals_addr = (struct anv_address) {
.bo = device->dynamic_state_pool.block_pool.bo,
.offset = ray_query_global_state.offset,
};
pipeline_state->push_constants.ray_query_globals =
anv_address_physical(ray_query_globals_addr);
cmd_buffer->state.push_constants_dirty |= stages;
}
void anv_CmdBindPipeline(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipeline _pipeline)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
struct anv_cmd_pipeline_state *state;
VkShaderStageFlags stages = 0;
switch (pipelineBindPoint) {
case VK_PIPELINE_BIND_POINT_COMPUTE: {
struct anv_compute_pipeline *compute_pipeline =
anv_pipeline_to_compute(pipeline);
if (cmd_buffer->state.compute.pipeline == compute_pipeline)
return;
cmd_buffer->state.compute.pipeline = compute_pipeline;
cmd_buffer->state.compute.pipeline_dirty = true;
set_dirty_for_bind_map(cmd_buffer, MESA_SHADER_COMPUTE,
&compute_pipeline->cs->bind_map);
state = &cmd_buffer->state.compute.base;
stages = VK_SHADER_STAGE_COMPUTE_BIT;
break;
}
case VK_PIPELINE_BIND_POINT_GRAPHICS: {
struct anv_graphics_pipeline *gfx_pipeline =
anv_pipeline_to_graphics(pipeline);
if (cmd_buffer->state.gfx.pipeline == gfx_pipeline)
return;
cmd_buffer->state.gfx.pipeline = gfx_pipeline;
cmd_buffer->state.gfx.vb_dirty |= gfx_pipeline->vb_used;
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
anv_foreach_stage(stage, gfx_pipeline->active_stages) {
set_dirty_for_bind_map(cmd_buffer, stage,
&gfx_pipeline->shaders[stage]->bind_map);
}
/* Apply the non dynamic state from the pipeline */
vk_cmd_set_dynamic_graphics_state(&cmd_buffer->vk,
&gfx_pipeline->dynamic_state);
state = &cmd_buffer->state.gfx.base;
stages = gfx_pipeline->active_stages;
break;
}
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
struct anv_ray_tracing_pipeline *rt_pipeline =
anv_pipeline_to_ray_tracing(pipeline);
if (cmd_buffer->state.rt.pipeline == rt_pipeline)
return;
cmd_buffer->state.rt.pipeline = rt_pipeline;
cmd_buffer->state.rt.pipeline_dirty = true;
if (rt_pipeline->stack_size > 0) {
anv_CmdSetRayTracingPipelineStackSizeKHR(commandBuffer,
rt_pipeline->stack_size);
}
state = &cmd_buffer->state.rt.base;
break;
}
default:
unreachable("invalid bind point");
break;
}
if (pipeline->ray_queries > 0)
anv_cmd_buffer_set_ray_query_buffer(cmd_buffer, state, pipeline, stages);
}
static void
anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
struct anv_pipeline_layout *layout,
uint32_t set_index,
struct anv_descriptor_set *set,
uint32_t *dynamic_offset_count,
const uint32_t **dynamic_offsets)
{
/* Either we have no pool because it's a push descriptor or the pool is not
* host only :
*
* VUID-vkCmdBindDescriptorSets-pDescriptorSets-04616:
*
* "Each element of pDescriptorSets must not have been allocated from a
* VkDescriptorPool with the
* VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_VALVE flag set"
*/
assert(!set->pool || !set->pool->host_only);
struct anv_descriptor_set_layout *set_layout =
layout->set[set_index].layout;
VkShaderStageFlags stages = set_layout->shader_stages;
struct anv_cmd_pipeline_state *pipe_state;
switch (bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
stages &= VK_SHADER_STAGE_ALL_GRAPHICS |
(cmd_buffer->device->vk.enabled_extensions.NV_mesh_shader ?
(VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV) : 0);
pipe_state = &cmd_buffer->state.gfx.base;
break;
case VK_PIPELINE_BIND_POINT_COMPUTE:
stages &= VK_SHADER_STAGE_COMPUTE_BIT;
pipe_state = &cmd_buffer->state.compute.base;
break;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
stages &= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR;
pipe_state = &cmd_buffer->state.rt.base;
break;
default:
unreachable("invalid bind point");
}
VkShaderStageFlags dirty_stages = 0;
/* If it's a push descriptor set, we have to flag things as dirty
* regardless of whether or not the CPU-side data structure changed as we
* may have edited in-place.
*/
if (pipe_state->descriptors[set_index] != set ||
anv_descriptor_set_is_push(set)) {
pipe_state->descriptors[set_index] = set;
/* Those stages don't have access to HW binding tables.
* This means that we have to upload the descriptor set
* as an 64-bit address in the push constants.
*/
bool update_desc_sets = stages & (VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV |
VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR);
if (update_desc_sets) {
struct anv_push_constants *push = &pipe_state->push_constants;
struct anv_address addr = anv_descriptor_set_address(set);
push->desc_sets[set_index] = anv_address_physical(addr);
if (addr.bo) {
anv_reloc_list_add_bo(cmd_buffer->batch.relocs,
cmd_buffer->batch.alloc,
addr.bo);
}
}
dirty_stages |= stages;
}
if (dynamic_offsets) {
if (set_layout->dynamic_offset_count > 0) {
struct anv_push_constants *push = &pipe_state->push_constants;
uint32_t dynamic_offset_start =
layout->set[set_index].dynamic_offset_start;
uint32_t *push_offsets =
&push->dynamic_offsets[dynamic_offset_start];
/* Assert that everything is in range */
assert(set_layout->dynamic_offset_count <= *dynamic_offset_count);
assert(dynamic_offset_start + set_layout->dynamic_offset_count <=
ARRAY_SIZE(push->dynamic_offsets));
for (uint32_t i = 0; i < set_layout->dynamic_offset_count; i++) {
if (push_offsets[i] != (*dynamic_offsets)[i]) {
push_offsets[i] = (*dynamic_offsets)[i];
/* dynamic_offset_stages[] elements could contain blanket
* values like VK_SHADER_STAGE_ALL, so limit this to the
* binding point's bits.
*/
dirty_stages |= set_layout->dynamic_offset_stages[i] & stages;
}
}
*dynamic_offsets += set_layout->dynamic_offset_count;
*dynamic_offset_count -= set_layout->dynamic_offset_count;
}
}
cmd_buffer->state.descriptors_dirty |= dirty_stages;
cmd_buffer->state.push_constants_dirty |= dirty_stages;
}
void anv_CmdBindDescriptorSets(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
uint32_t firstSet,
uint32_t descriptorSetCount,
const VkDescriptorSet* pDescriptorSets,
uint32_t dynamicOffsetCount,
const uint32_t* pDynamicOffsets)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
assert(firstSet + descriptorSetCount <= MAX_SETS);
for (uint32_t i = 0; i < descriptorSetCount; i++) {
ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
anv_cmd_buffer_bind_descriptor_set(cmd_buffer, pipelineBindPoint,
layout, firstSet + i, set,
&dynamicOffsetCount,
&pDynamicOffsets);
}
}
void anv_CmdBindVertexBuffers2(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets,
const VkDeviceSize* pSizes,
const VkDeviceSize* pStrides)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_vertex_binding *vb = cmd_buffer->state.vertex_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= MAX_VBS);
for (uint32_t i = 0; i < bindingCount; i++) {
ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
if (buffer == NULL) {
vb[firstBinding + i] = (struct anv_vertex_binding) {
.buffer = NULL,
};
} else {
vb[firstBinding + i] = (struct anv_vertex_binding) {
.buffer = buffer,
.offset = pOffsets[i],
.size = vk_buffer_range(&buffer->vk, pOffsets[i],
pSizes ? pSizes[i] : VK_WHOLE_SIZE),
};
}
cmd_buffer->state.gfx.vb_dirty |= 1 << (firstBinding + i);
}
if (pStrides != NULL) {
vk_cmd_set_vertex_binding_strides(&cmd_buffer->vk, firstBinding,
bindingCount, pStrides);
}
}
void anv_CmdBindTransformFeedbackBuffersEXT(
VkCommandBuffer commandBuffer,
uint32_t firstBinding,
uint32_t bindingCount,
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets,
const VkDeviceSize* pSizes)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_xfb_binding *xfb = cmd_buffer->state.xfb_bindings;
/* We have to defer setting up vertex buffer since we need the buffer
* stride from the pipeline. */
assert(firstBinding + bindingCount <= MAX_XFB_BUFFERS);
for (uint32_t i = 0; i < bindingCount; i++) {
if (pBuffers[i] == VK_NULL_HANDLE) {
xfb[firstBinding + i].buffer = NULL;
} else {
ANV_FROM_HANDLE(anv_buffer, buffer, pBuffers[i]);
xfb[firstBinding + i].buffer = buffer;
xfb[firstBinding + i].offset = pOffsets[i];
xfb[firstBinding + i].size =
vk_buffer_range(&buffer->vk, pOffsets[i],
pSizes ? pSizes[i] : VK_WHOLE_SIZE);
}
}
}
enum isl_format
anv_isl_format_for_descriptor_type(const struct anv_device *device,
VkDescriptorType type)
{
switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
return device->physical->compiler->indirect_ubos_use_sampler ?
ISL_FORMAT_R32G32B32A32_FLOAT : ISL_FORMAT_RAW;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
return ISL_FORMAT_RAW;
default:
unreachable("Invalid descriptor type");
}
}
struct anv_state
anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
const void *data, uint32_t size, uint32_t alignment)
{
struct anv_state state;
state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, alignment);
memcpy(state.map, data, size);
VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, size));
return state;
}
struct anv_state
anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
uint32_t *a, uint32_t *b,
uint32_t dwords, uint32_t alignment)
{
struct anv_state state;
uint32_t *p;
state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
dwords * 4, alignment);
p = state.map;
for (uint32_t i = 0; i < dwords; i++)
p[i] = a[i] | b[i];
VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
return state;
}
struct anv_state
anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_push_constants *data =
&cmd_buffer->state.gfx.base.push_constants;
struct anv_state state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
sizeof(struct anv_push_constants),
32 /* bottom 5 bits MBZ */);
memcpy(state.map, data, sizeof(struct anv_push_constants));
return state;
}
struct anv_state
anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
{
const struct intel_device_info *devinfo = &cmd_buffer->device->info;
struct anv_push_constants *data =
&cmd_buffer->state.compute.base.push_constants;
struct anv_compute_pipeline *pipeline = cmd_buffer->state.compute.pipeline;
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
const struct anv_push_range *range = &pipeline->cs->bind_map.push_ranges[0];
const struct brw_cs_dispatch_info dispatch =
brw_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
const unsigned total_push_constants_size =
brw_cs_push_const_total_size(cs_prog_data, dispatch.threads);
if (total_push_constants_size == 0)
return (struct anv_state) { .offset = 0 };
const unsigned push_constant_alignment =
cmd_buffer->device->info.ver < 8 ? 32 : 64;
const unsigned aligned_total_push_constants_size =
ALIGN(total_push_constants_size, push_constant_alignment);
struct anv_state state;
if (devinfo->verx10 >= 125) {
state = anv_state_stream_alloc(&cmd_buffer->general_state_stream,
aligned_total_push_constants_size,
push_constant_alignment);
} else {
state = anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
aligned_total_push_constants_size,
push_constant_alignment);
}
void *dst = state.map;
const void *src = (char *)data + (range->start * 32);
if (cs_prog_data->push.cross_thread.size > 0) {
memcpy(dst, src, cs_prog_data->push.cross_thread.size);
dst += cs_prog_data->push.cross_thread.size;
src += cs_prog_data->push.cross_thread.size;
}
if (cs_prog_data->push.per_thread.size > 0) {
for (unsigned t = 0; t < dispatch.threads; t++) {
memcpy(dst, src, cs_prog_data->push.per_thread.size);
uint32_t *subgroup_id = dst +
offsetof(struct anv_push_constants, cs.subgroup_id) -
(range->start * 32 + cs_prog_data->push.cross_thread.size);
*subgroup_id = t;
dst += cs_prog_data->push.per_thread.size;
}
}
return state;
}
void anv_CmdPushConstants(
VkCommandBuffer commandBuffer,
VkPipelineLayout layout,
VkShaderStageFlags stageFlags,
uint32_t offset,
uint32_t size,
const void* pValues)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
if (stageFlags & (VK_SHADER_STAGE_ALL_GRAPHICS |
VK_SHADER_STAGE_TASK_BIT_NV |
VK_SHADER_STAGE_MESH_BIT_NV)) {
struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.gfx.base;
memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
}
if (stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.compute.base;
memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
}
if (stageFlags & (VK_SHADER_STAGE_RAYGEN_BIT_KHR |
VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
VK_SHADER_STAGE_MISS_BIT_KHR |
VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
VK_SHADER_STAGE_CALLABLE_BIT_KHR)) {
struct anv_cmd_pipeline_state *pipe_state =
&cmd_buffer->state.rt.base;
memcpy(pipe_state->push_constants.client_data + offset, pValues, size);
}
cmd_buffer->state.push_constants_dirty |= stageFlags;
}
static struct anv_descriptor_set *
anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
VkPipelineBindPoint bind_point,
struct anv_descriptor_set_layout *layout,
uint32_t _set)
{
struct anv_cmd_pipeline_state *pipe_state;
switch (bind_point) {
case VK_PIPELINE_BIND_POINT_GRAPHICS:
pipe_state = &cmd_buffer->state.gfx.base;
break;
case VK_PIPELINE_BIND_POINT_COMPUTE:
pipe_state = &cmd_buffer->state.compute.base;
break;
case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
pipe_state = &cmd_buffer->state.rt.base;
break;
default:
unreachable("invalid bind point");
}
struct anv_push_descriptor_set **push_set =
&pipe_state->push_descriptors[_set];
if (*push_set == NULL) {
*push_set = vk_zalloc(&cmd_buffer->vk.pool->alloc,
sizeof(struct anv_push_descriptor_set), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (*push_set == NULL) {
anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
return NULL;
}
}
struct anv_descriptor_set *set = &(*push_set)->set;
if (set->layout != layout) {
if (set->layout)
anv_descriptor_set_layout_unref(cmd_buffer->device, set->layout);
anv_descriptor_set_layout_ref(layout);
set->layout = layout;
}
set->size = anv_descriptor_set_layout_size(layout, 0);
set->buffer_view_count = layout->buffer_view_count;
set->descriptor_count = layout->descriptor_count;
set->buffer_views = (*push_set)->buffer_views;
if (layout->descriptor_buffer_size &&
((*push_set)->set_used_on_gpu ||
set->desc_mem.alloc_size < layout->descriptor_buffer_size)) {
/* The previous buffer is either actively used by some GPU command (so
* we can't modify it) or is too small. Allocate a new one.
*/
struct anv_state desc_mem =
anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
anv_descriptor_set_layout_descriptor_buffer_size(layout, 0),
ANV_UBO_ALIGNMENT);
if (set->desc_mem.alloc_size) {
/* TODO: Do we really need to copy all the time? */
memcpy(desc_mem.map, set->desc_mem.map,
MIN2(desc_mem.alloc_size, set->desc_mem.alloc_size));
}
set->desc_mem = desc_mem;
set->desc_addr = (struct anv_address) {
.bo = cmd_buffer->dynamic_state_stream.state_pool->block_pool.bo,
.offset = set->desc_mem.offset,
};
enum isl_format format =
anv_isl_format_for_descriptor_type(cmd_buffer->device,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
set->desc_surface_state =
anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
isl_dev->ss.size, isl_dev->ss.align);
anv_fill_buffer_surface_state(cmd_buffer->device,
set->desc_surface_state,
format, ISL_SWIZZLE_IDENTITY,
ISL_SURF_USAGE_CONSTANT_BUFFER_BIT,
set->desc_addr,
layout->descriptor_buffer_size, 1);
}
return set;
}
void anv_CmdPushDescriptorSetKHR(
VkCommandBuffer commandBuffer,
VkPipelineBindPoint pipelineBindPoint,
VkPipelineLayout _layout,
uint32_t _set,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet* pDescriptorWrites)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
assert(_set < MAX_SETS);
struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout;
struct anv_descriptor_set *set =
anv_cmd_buffer_push_descriptor_set(cmd_buffer, pipelineBindPoint,
set_layout, _set);
if (!set)
return;
/* Go through the user supplied descriptors. */
for (uint32_t i = 0; i < descriptorWriteCount; i++) {
const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
switch (write->descriptorType) {
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
anv_descriptor_set_write_image_view(cmd_buffer->device, set,
write->pImageInfo + j,
write->descriptorType,
write->dstBinding,
write->dstArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
ANV_FROM_HANDLE(anv_buffer_view, bview,
write->pTexelBufferView[j]);
anv_descriptor_set_write_buffer_view(cmd_buffer->device, set,
write->descriptorType,
bview,
write->dstBinding,
write->dstArrayElement + j);
}
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
for (uint32_t j = 0; j < write->descriptorCount; j++) {
ANV_FROM_HANDLE(anv_buffer, buffer, write->pBufferInfo[j].buffer);
anv_descriptor_set_write_buffer(cmd_buffer->device, set,
&cmd_buffer->surface_state_stream,
write->descriptorType,
buffer,
write->dstBinding,
write->dstArrayElement + j,
write->pBufferInfo[j].offset,
write->pBufferInfo[j].range);
}
break;
case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
const VkWriteDescriptorSetAccelerationStructureKHR *accel_write =
vk_find_struct_const(write, WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR);
assert(accel_write->accelerationStructureCount ==
write->descriptorCount);
for (uint32_t j = 0; j < write->descriptorCount; j++) {
ANV_FROM_HANDLE(anv_acceleration_structure, accel,
accel_write->pAccelerationStructures[j]);
anv_descriptor_set_write_acceleration_structure(cmd_buffer->device,
set, accel,
write->dstBinding,
write->dstArrayElement + j);
}
break;
}
default:
break;
}
}
anv_cmd_buffer_bind_descriptor_set(cmd_buffer, pipelineBindPoint,
layout, _set, set, NULL, NULL);
}
void anv_CmdPushDescriptorSetWithTemplateKHR(
VkCommandBuffer commandBuffer,
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
VkPipelineLayout _layout,
uint32_t _set,
const void* pData)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_descriptor_update_template, template,
descriptorUpdateTemplate);
ANV_FROM_HANDLE(anv_pipeline_layout, layout, _layout);
assert(_set < MAX_PUSH_DESCRIPTORS);
struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout;
struct anv_descriptor_set *set =
anv_cmd_buffer_push_descriptor_set(cmd_buffer, template->bind_point,
set_layout, _set);
if (!set)
return;
anv_descriptor_set_write_template(cmd_buffer->device, set,
&cmd_buffer->surface_state_stream,
template,
pData);
anv_cmd_buffer_bind_descriptor_set(cmd_buffer, template->bind_point,
layout, _set, set, NULL, NULL);
}
void anv_CmdSetDeviceMask(
VkCommandBuffer commandBuffer,
uint32_t deviceMask)
{
/* No-op */
}
void anv_CmdSetRayTracingPipelineStackSizeKHR(
VkCommandBuffer commandBuffer,
uint32_t pipelineStackSize)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_cmd_ray_tracing_state *rt = &cmd_buffer->state.rt;
struct anv_device *device = cmd_buffer->device;
if (anv_batch_has_error(&cmd_buffer->batch))
return;
uint32_t stack_ids_per_dss = 2048; /* TODO */
unsigned stack_size_log2 = ilog2_round_up(pipelineStackSize);
if (stack_size_log2 < 10)
stack_size_log2 = 10;
if (rt->scratch.layout.total_size == 1 << stack_size_log2)
return;
brw_rt_compute_scratch_layout(&rt->scratch.layout, &device->info,
stack_ids_per_dss, 1 << stack_size_log2);
unsigned bucket = stack_size_log2 - 10;
assert(bucket < ARRAY_SIZE(device->rt_scratch_bos));
struct anv_bo *bo = p_atomic_read(&device->rt_scratch_bos[bucket]);
if (bo == NULL) {
struct anv_bo *new_bo;
VkResult result = anv_device_alloc_bo(device, "RT scratch",
rt->scratch.layout.total_size,
ANV_BO_ALLOC_LOCAL_MEM,
0, /* explicit_address */
&new_bo);
if (result != VK_SUCCESS) {
rt->scratch.layout.total_size = 0;
anv_batch_set_error(&cmd_buffer->batch, result);
return;
}
bo = p_atomic_cmpxchg(&device->rt_scratch_bos[bucket], NULL, new_bo);
if (bo != NULL) {
anv_device_release_bo(device, bo);
} else {
bo = new_bo;
}
}
rt->scratch.bo = bo;
}