From 792a0ab0b1460e7b0b85e6fd9b74291bd8b78173 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 7 Jul 2021 16:19:16 +0200 Subject: [PATCH] panvk: Prepare per-gen split Signed-off-by: Boris Brezillon Part-of: --- src/panfrost/vulkan/meson.build | 39 +- src/panfrost/vulkan/panvk_cmd_buffer.c | 1097 --------------- src/panfrost/vulkan/panvk_cs.c | 798 ----------- src/panfrost/vulkan/panvk_cs.h | 127 +- src/panfrost/vulkan/panvk_descriptor_set.c | 352 +---- src/panfrost/vulkan/panvk_device.c | 478 +------ src/panfrost/vulkan/panvk_image.c | 115 -- src/panfrost/vulkan/panvk_pipeline.c | 948 ------------- src/panfrost/vulkan/panvk_private.h | 167 +-- src/panfrost/vulkan/panvk_shader.c | 347 ----- src/panfrost/vulkan/panvk_vX_cmd_buffer.c | 1173 +++++++++++++++++ ...panvk_varyings.c => panvk_vX_cmd_buffer.h} | 52 +- src/panfrost/vulkan/panvk_vX_cs.c | 919 +++++++++++++ src/panfrost/vulkan/panvk_vX_cs.h | 137 ++ src/panfrost/vulkan/panvk_vX_descriptor_set.c | 375 ++++++ src/panfrost/vulkan/panvk_vX_device.c | 317 +++++ src/panfrost/vulkan/panvk_vX_image.c | 149 +++ .../vulkan/{panvk_meta.c => panvk_vX_meta.c} | 335 ++--- src/panfrost/vulkan/panvk_vX_meta.h | 36 + src/panfrost/vulkan/panvk_vX_pipeline.c | 991 ++++++++++++++ src/panfrost/vulkan/panvk_vX_shader.c | 386 ++++++ src/panfrost/vulkan/panvk_varyings.h | 44 +- 22 files changed, 4864 insertions(+), 4518 deletions(-) create mode 100644 src/panfrost/vulkan/panvk_vX_cmd_buffer.c rename src/panfrost/vulkan/{panvk_varyings.c => panvk_vX_cmd_buffer.h} (53%) create mode 100644 src/panfrost/vulkan/panvk_vX_cs.c create mode 100644 src/panfrost/vulkan/panvk_vX_cs.h create mode 100644 src/panfrost/vulkan/panvk_vX_descriptor_set.c create mode 100644 src/panfrost/vulkan/panvk_vX_device.c create mode 100644 src/panfrost/vulkan/panvk_vX_image.c rename src/panfrost/vulkan/{panvk_meta.c => panvk_vX_meta.c} (60%) create mode 100644 src/panfrost/vulkan/panvk_vX_meta.h create mode 100644 src/panfrost/vulkan/panvk_vX_pipeline.c create mode 100644 src/panfrost/vulkan/panvk_vX_shader.c diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index 773fb48d933..2da622f7860 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -28,7 +28,9 @@ panvk_entrypoints = custom_target( command : [ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'panvk', + '--device-prefix', 'panvk_v5', '--device-prefix', 'panvk_v6', '--device-prefix', 'panvk_v7', ], + depend_files : vk_entrypoints_gen_depend_files, ) libpanvk_files = files( @@ -39,7 +41,6 @@ libpanvk_files = files( 'panvk_formats.c', 'panvk_image.c', 'panvk_mempool.c', - 'panvk_meta.c', 'panvk_pass.c', 'panvk_pipeline.c', 'panvk_pipeline_cache.c', @@ -48,13 +49,46 @@ libpanvk_files = files( 'panvk_shader.c', 'panvk_sync.c', 'panvk_util.c', - 'panvk_varyings.c', 'panvk_wsi.c', 'panvk_wsi_display.c', ) panvk_deps = [] panvk_flags = [] +panvk_per_arch_libs = [] + +foreach arch : ['5', '6', '7'] + panvk_per_arch_libs += static_library( + 'panvk_v@0@'.format(arch), + [ + 'panvk_vX_cmd_buffer.c', + 'panvk_vX_cs.c', + 'panvk_vX_descriptor_set.c', + 'panvk_vX_device.c', + 'panvk_vX_image.c', + 'panvk_vX_meta.c', + 'panvk_vX_pipeline.c', + 'panvk_vX_shader.c', + ], + include_directories : [ + inc_include, + inc_src, + inc_compiler, + inc_gallium, # XXX: pipe/p_format.h + inc_gallium_aux, # XXX: renderonly + inc_vulkan_wsi, + inc_panfrost, + ], + dependencies : [ + idep_nir_headers, + idep_pan_packers, + idep_vulkan_util_headers, + dep_libdrm, + dep_valgrind, + ], + c_args : [no_override_init_args, panvk_flags, '-DPAN_ARCH=@0@'.format(arch)], + ) +endforeach if system_has_kms_drm panvk_flags += '-DVK_USE_PLATFORM_DISPLAY_KHR' @@ -80,6 +114,7 @@ libvulkan_panfrost = shared_library( inc_vulkan_wsi, inc_panfrost, ], + link_whole : [panvk_per_arch_libs], link_with : [ libvulkan_wsi, libpanfrost_shared, diff --git a/src/panfrost/vulkan/panvk_cmd_buffer.c b/src/panfrost/vulkan/panvk_cmd_buffer.c index c505db6c87c..0b1d0721793 100644 --- a/src/panfrost/vulkan/panvk_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_cmd_buffer.c @@ -26,210 +26,14 @@ * DEALINGS IN THE SOFTWARE. */ -#include "panvk_cs.h" #include "panvk_private.h" #include "panfrost-quirks.h" -#include "pan_blitter.h" #include "pan_encoder.h" #include "util/rounding.h" #include "vk_format.h" -static VkResult -panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf) -{ - struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - - cmdbuf->record_result = VK_SUCCESS; - - list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { - list_del(&batch->node); - util_dynarray_fini(&batch->jobs); - if (!pan_is_bifrost(pdev)) - panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); - - util_dynarray_fini(&batch->event_ops); - - vk_free(&cmdbuf->pool->alloc, batch); - } - - panvk_pool_reset(&cmdbuf->desc_pool); - panvk_pool_reset(&cmdbuf->tls_pool); - panvk_pool_reset(&cmdbuf->varying_pool); - cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; - - for (unsigned i = 0; i < MAX_BIND_POINTS; i++) - memset(&cmdbuf->descriptors[i].sets, 0, sizeof(cmdbuf->descriptors[i].sets)); - - return cmdbuf->record_result; -} - -static VkResult -panvk_create_cmdbuf(struct panvk_device *device, - struct panvk_cmd_pool *pool, - VkCommandBufferLevel level, - struct panvk_cmd_buffer **cmdbuf_out) -{ - struct panvk_cmd_buffer *cmdbuf; - - cmdbuf = vk_object_zalloc(&device->vk, NULL, sizeof(*cmdbuf), - VK_OBJECT_TYPE_COMMAND_BUFFER); - if (!cmdbuf) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - cmdbuf->device = device; - cmdbuf->level = level; - cmdbuf->pool = pool; - - if (pool) { - list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); - cmdbuf->queue_family_index = pool->queue_family_index; - } else { - /* Init the pool_link so we can safely call list_del when we destroy - * the command buffer - */ - list_inithead(&cmdbuf->pool_link); - cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL; - } - - panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev, - pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024, - "Command buffer descriptor pool", true); - panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev, - pool ? &pool->tls_bo_pool : NULL, - PAN_BO_INVISIBLE, 64 * 1024, "TLS pool", false); - panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev, - pool ? &pool->varying_bo_pool : NULL, - PAN_BO_INVISIBLE, 64 * 1024, "Varyings pool", false); - list_inithead(&cmdbuf->batches); - cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; - *cmdbuf_out = cmdbuf; - return VK_SUCCESS; -} - -static void -panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf) -{ - struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - struct panvk_device *device = cmdbuf->device; - - list_del(&cmdbuf->pool_link); - - list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { - list_del(&batch->node); - util_dynarray_fini(&batch->jobs); - if (!pan_is_bifrost(pdev)) - panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); - - util_dynarray_fini(&batch->event_ops); - - vk_free(&cmdbuf->pool->alloc, batch); - } - - panvk_pool_cleanup(&cmdbuf->desc_pool); - panvk_pool_cleanup(&cmdbuf->tls_pool); - panvk_pool_cleanup(&cmdbuf->varying_pool); - vk_object_free(&device->vk, NULL, cmdbuf); -} - -VkResult -panvk_AllocateCommandBuffers(VkDevice _device, - const VkCommandBufferAllocateInfo *pAllocateInfo, - VkCommandBuffer *pCommandBuffers) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool); - - VkResult result = VK_SUCCESS; - unsigned i; - - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - struct panvk_cmd_buffer *cmdbuf = NULL; - - if (!list_is_empty(&pool->free_cmd_buffers)) { - cmdbuf = list_first_entry( - &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link); - - list_del(&cmdbuf->pool_link); - list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); - - cmdbuf->level = pAllocateInfo->level; - vk_object_base_reset(&cmdbuf->base); - } else { - result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf); - } - - if (result != VK_SUCCESS) - goto err_free_cmd_bufs; - - pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf); - } - - return VK_SUCCESS; - -err_free_cmd_bufs: - panvk_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, - pCommandBuffers); - for (unsigned j = 0; j < i; j++) - pCommandBuffers[j] = VK_NULL_HANDLE; - - return result; -} - -void -panvk_FreeCommandBuffers(VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer *pCommandBuffers) -{ - for (uint32_t i = 0; i < commandBufferCount; i++) { - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]); - - if (cmdbuf) { - if (cmdbuf->pool) { - list_del(&cmdbuf->pool_link); - panvk_reset_cmdbuf(cmdbuf); - list_addtail(&cmdbuf->pool_link, - &cmdbuf->pool->free_cmd_buffers); - } else - panvk_destroy_cmdbuf(cmdbuf); - } - } -} - -VkResult -panvk_ResetCommandBuffer(VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - return panvk_reset_cmdbuf(cmdbuf); -} - -VkResult -panvk_BeginCommandBuffer(VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo *pBeginInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VkResult result = VK_SUCCESS; - - if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) { - /* If the command buffer has already been reset with - * vkResetCommandBuffer, no need to do it again. - */ - result = panvk_reset_cmdbuf(cmdbuf); - if (result != VK_SUCCESS) - return result; - } - - memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); - - cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING; - - return VK_SUCCESS; -} - void panvk_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding, @@ -321,19 +125,6 @@ panvk_CmdPushConstants(VkCommandBuffer commandBuffer, panvk_stub(); } -VkResult -panvk_EndCommandBuffer(VkCommandBuffer commandBuffer) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - if (cmdbuf->state.batch) - panvk_cmd_close_batch(cmdbuf); - - cmdbuf->status = PANVK_CMD_BUFFER_STATUS_EXECUTABLE; - - return cmdbuf->record_result; -} - void panvk_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -520,62 +311,6 @@ panvk_CreateCommandPool(VkDevice _device, return VK_SUCCESS; } -void -panvk_DestroyCommandPool(VkDevice _device, - VkCommandPool commandPool, - const VkAllocationCallbacks *pAllocator) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); - - list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, - &pool->active_cmd_buffers, pool_link) - panvk_destroy_cmdbuf(cmdbuf); - - list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, - &pool->free_cmd_buffers, pool_link) - panvk_destroy_cmdbuf(cmdbuf); - - panvk_bo_pool_cleanup(&pool->desc_bo_pool); - panvk_bo_pool_cleanup(&pool->varying_bo_pool); - panvk_bo_pool_cleanup(&pool->tls_bo_pool); - vk_object_free(&device->vk, pAllocator, pool); -} - -VkResult -panvk_ResetCommandPool(VkDevice device, - VkCommandPool commandPool, - VkCommandPoolResetFlags flags) -{ - VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); - VkResult result; - - list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers, - pool_link) - { - result = panvk_reset_cmdbuf(cmdbuf); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} - -void -panvk_TrimCommandPool(VkDevice device, - VkCommandPool commandPool, - VkCommandPoolTrimFlags flags) -{ - VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); - - if (!pool) - return; - - list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, - &pool->free_cmd_buffers, pool_link) - panvk_destroy_cmdbuf(cmdbuf); -} - static void panvk_cmd_prepare_clear_values(struct panvk_cmd_buffer *cmdbuf, const VkClearValue *in) @@ -638,153 +373,6 @@ panvk_CmdBeginRenderPass(VkCommandBuffer cmd, return panvk_CmdBeginRenderPass2(cmd, info, &subpass_info); } -static void -panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf) -{ - assert(cmdbuf->state.bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); - - struct panvk_batch *batch = cmdbuf->state.batch; - struct panfrost_ptr job_ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB); - - panvk_emit_fragment_job(cmdbuf->device, cmdbuf->state.framebuffer, - cmdbuf->state.batch->fb.desc.gpu, - job_ptr.cpu); - cmdbuf->state.batch->fragment_job = job_ptr.gpu; - util_dynarray_append(&batch->jobs, void *, job_ptr.cpu); -} - -void -panvk_cmd_get_midgard_polygon_list(struct panvk_cmd_buffer *cmdbuf, - unsigned width, unsigned height, - bool has_draws) -{ - struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - struct panvk_batch *batch = cmdbuf->state.batch; - - assert(!pan_is_bifrost(pdev)); - - if (batch->tiler.ctx.midgard.polygon_list) - return; - - unsigned size = - panfrost_tiler_get_polygon_list_size(pdev, width, height, has_draws); - size = util_next_power_of_two(size); - - /* Create the BO as invisible if we can. In the non-hierarchical tiler case, - * we need to write the polygon list manually because there's not WRITE_VALUE - * job in the chain. */ - bool init_polygon_list = !has_draws && (pdev->quirks & MIDGARD_NO_HIER_TILING); - batch->tiler.ctx.midgard.polygon_list = - panfrost_bo_create(pdev, size, - init_polygon_list ? 0 : PAN_BO_INVISIBLE, - "Polygon list"); - - - if (init_polygon_list) { - assert(batch->tiler.ctx.midgard.polygon_list->ptr.cpu); - uint32_t *polygon_list_body = - batch->tiler.ctx.midgard.polygon_list->ptr.cpu + - MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; - polygon_list_body[0] = 0xa0000000; - } - - batch->tiler.ctx.midgard.disable = !has_draws; -} - -void -panvk_cmd_close_batch(struct panvk_cmd_buffer *cmdbuf) -{ - struct panvk_batch *batch = cmdbuf->state.batch; - - assert(batch); - - if (!batch->fragment_job && !batch->scoreboard.first_job) { - if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) { - /* Content-less batch, let's drop it */ - vk_free(&cmdbuf->pool->alloc, batch); - } else { - /* Batch has no jobs but is needed for synchronization, let's add a - * NULL job so the SUBMIT ioctl doesn't choke on it. - */ - struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, - JOB_HEADER); - util_dynarray_append(&batch->jobs, void *, ptr.cpu); - panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, - MALI_JOB_TYPE_NULL, false, false, 0, 0, - &ptr, false); - list_addtail(&batch->node, &cmdbuf->batches); - } - cmdbuf->state.batch = NULL; - return; - } - - struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - - list_addtail(&cmdbuf->state.batch->node, &cmdbuf->batches); - - struct pan_tls_info tlsinfo = {}; - if (cmdbuf->state.pipeline) { - tlsinfo.tls.size = cmdbuf->state.pipeline->tls_size; - tlsinfo.wls.size = cmdbuf->state.pipeline->wls_size; - } - - if (tlsinfo.tls.size) { - tlsinfo.tls.ptr = - pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, tlsinfo.tls.size, 4096).gpu; - } - - if (tlsinfo.wls.size) { - unsigned wls_size = - pan_wls_mem_size(pdev, &cmdbuf->state.compute.wg_count, tlsinfo.wls.size); - tlsinfo.wls.ptr = - pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, wls_size, 4096).gpu; - } - - if ((pan_is_bifrost(pdev) || !cmdbuf->state.batch->fb.desc.cpu) && - cmdbuf->state.batch->tls.cpu) { - pan_emit_tls(pdev, &tlsinfo, cmdbuf->state.batch->tls.cpu); - } - - if (cmdbuf->state.batch->fb.desc.cpu) { - if (!pan_is_bifrost(pdev)) { - panvk_cmd_get_midgard_polygon_list(cmdbuf, - batch->fb.info->width, - batch->fb.info->height, - false); - - mali_ptr polygon_list = - cmdbuf->state.batch->tiler.ctx.midgard.polygon_list->ptr.gpu; - struct panfrost_ptr writeval_job = - panfrost_scoreboard_initialize_tiler(&cmdbuf->desc_pool.base, - &cmdbuf->state.batch->scoreboard, - polygon_list); - if (writeval_job.cpu) - util_dynarray_append(&cmdbuf->state.batch->jobs, void *, writeval_job.cpu); - } - - cmdbuf->state.batch->fb.desc.gpu |= - panvk_emit_fb(cmdbuf->device, - cmdbuf->state.batch, - cmdbuf->state.subpass, - cmdbuf->state.framebuffer, - cmdbuf->state.clear, - &tlsinfo, &cmdbuf->state.batch->tiler.ctx, - cmdbuf->state.batch->fb.desc.cpu); - - if (!pan_is_bifrost(pdev)) { - memcpy(&cmdbuf->state.batch->tiler.templ.midgard, - pan_section_ptr(cmdbuf->state.batch->fb.desc.cpu, - MULTI_TARGET_FRAMEBUFFER, TILER), - sizeof(cmdbuf->state.batch->tiler.templ.midgard)); - } - - panvk_cmd_prepare_fragment_job(cmdbuf); - } - - cmdbuf->state.batch = NULL; -} - void panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf) { @@ -795,526 +383,6 @@ panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf) assert(cmdbuf->state.batch); } -void -panvk_CmdNextSubpass2(VkCommandBuffer commandBuffer, - const VkSubpassBeginInfo *pSubpassBeginInfo, - const VkSubpassEndInfo *pSubpassEndInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - panvk_cmd_close_batch(cmdbuf); - - cmdbuf->state.subpass++; - panvk_cmd_open_batch(cmdbuf); - memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); -} - -void -panvk_CmdNextSubpass(VkCommandBuffer cmd, VkSubpassContents contents) -{ - VkSubpassBeginInfo binfo = { - .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, - .contents = contents - }; - VkSubpassEndInfo einfo = { - .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, - }; - - panvk_CmdNextSubpass2(cmd, &binfo, &einfo); -} - - -void -panvk_cmd_alloc_fb_desc(struct panvk_cmd_buffer *cmdbuf) -{ - struct panvk_batch *batch = cmdbuf->state.batch; - - if (batch->fb.desc.gpu) - return; - - const struct panvk_subpass *subpass = cmdbuf->state.subpass; - bool has_zs_ext = subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED; - unsigned tags = MALI_FBD_TAG_IS_MFBD; - - batch->fb.info = cmdbuf->state.framebuffer; - batch->fb.desc = - pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, - PAN_DESC(MULTI_TARGET_FRAMEBUFFER), - PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION), - PAN_DESC_ARRAY(MAX2(subpass->color_count, 1), RENDER_TARGET)); - - /* Tag the pointer */ - batch->fb.desc.gpu |= tags; -} - -void -panvk_cmd_alloc_tls_desc(struct panvk_cmd_buffer *cmdbuf) -{ - const struct panfrost_device *pdev = - &cmdbuf->device->physical_device->pdev; - struct panvk_batch *batch = cmdbuf->state.batch; - - assert(batch); - if (batch->tls.gpu) - return; - - if (!pan_is_bifrost(pdev) && - cmdbuf->state.bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { - panvk_cmd_alloc_fb_desc(cmdbuf); - batch->tls = batch->fb.desc; - batch->tls.gpu &= ~63ULL; - } else { - batch->tls = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); - } -} - -static void -panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf, - unsigned id, union panvk_sysval_data *data) -{ - switch (PAN_SYSVAL_TYPE(id)) { - case PAN_SYSVAL_VIEWPORT_SCALE: - panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport, data); - break; - case PAN_SYSVAL_VIEWPORT_OFFSET: - panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, data); - break; - case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: - /* TODO: support base_{vertex,instance} */ - data->u32[0] = data->u32[1] = data->u32[2] = 0; - break; - default: - unreachable("Invalid static sysval"); - } -} - -static void -panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) -{ - struct panvk_descriptor_state *desc_state = - &cmdbuf->descriptors[cmdbuf->state.bind_point]; - const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; - - if (!pipeline->num_sysvals) - return; - - for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sysvals); i++) { - unsigned sysval_count = pipeline->sysvals[i].ids.sysval_count; - if (!sysval_count || - (desc_state->sysvals[i] && - !(cmdbuf->state.dirty & pipeline->sysvals[i].dirty_mask))) - continue; - - struct panfrost_ptr sysvals = - pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, sysval_count * 16, 16); - union panvk_sysval_data *data = sysvals.cpu; - - for (unsigned s = 0; s < pipeline->sysvals[i].ids.sysval_count; s++) { - panvk_cmd_upload_sysval(cmdbuf, pipeline->sysvals[i].ids.sysvals[s], - &data[s]); - } - - desc_state->sysvals[i] = sysvals.gpu; - } -} - -static void -panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf) -{ - struct panvk_descriptor_state *desc_state = - &cmdbuf->descriptors[cmdbuf->state.bind_point]; - const struct panvk_pipeline *pipeline = - cmdbuf->state.pipeline; - - if (!pipeline->num_ubos || desc_state->ubos) - return; - - panvk_cmd_prepare_sysvals(cmdbuf); - - struct panfrost_ptr ubos = - pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, - pipeline->num_ubos, - UNIFORM_BUFFER); - - panvk_emit_ubos(pipeline, desc_state, ubos.cpu); - - desc_state->ubos = ubos.gpu; -} - -static void -panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf) -{ - struct panvk_descriptor_state *desc_state = - &cmdbuf->descriptors[cmdbuf->state.bind_point]; - const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; - unsigned num_textures = pipeline->layout->num_textures; - - if (!num_textures || desc_state->textures) - return; - - const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - unsigned tex_entry_size = pan_is_bifrost(pdev) ? - sizeof(struct mali_bifrost_texture_packed) : - sizeof(mali_ptr); - struct panfrost_ptr textures = - pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, - num_textures * tex_entry_size, - tex_entry_size); - - void *texture = textures.cpu; - - for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { - if (!desc_state->sets[i].set) continue; - - memcpy(texture, - desc_state->sets[i].set->textures.midgard, - desc_state->sets[i].set->layout->num_textures * - tex_entry_size); - - texture += desc_state->sets[i].set->layout->num_textures * - tex_entry_size; - } - - desc_state->textures = textures.gpu; -} - -static void -panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf) -{ - struct panvk_descriptor_state *desc_state = - &cmdbuf->descriptors[cmdbuf->state.bind_point]; - const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; - unsigned num_samplers = pipeline->layout->num_samplers; - - if (!num_samplers || desc_state->samplers) - return; - - struct panfrost_ptr samplers = - pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, - num_samplers, - MIDGARD_SAMPLER); - - struct mali_midgard_sampler_packed *sampler = samplers.cpu; - - for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { - if (!desc_state->sets[i].set) continue; - - memcpy(sampler, - desc_state->sets[i].set->samplers, - desc_state->sets[i].set->layout->num_samplers * - sizeof(*sampler)); - - sampler += desc_state->sets[i].set->layout->num_samplers; - } - - desc_state->samplers = samplers.gpu; -} - -static void -panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; - - if (!pipeline->fs.dynamic_rsd) { - draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT]; - return; - } - - if (!cmdbuf->state.fs_rsd) { - struct panfrost_ptr rsd = - pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, - PAN_DESC(RENDERER_STATE), - PAN_DESC_ARRAY(pipeline->blend.state.rt_count, - BLEND)); - - struct mali_renderer_state_packed rsd_dyn; - - panvk_emit_dyn_fs_rsd(cmdbuf->device, pipeline, &cmdbuf->state, &rsd_dyn); - pan_merge(rsd_dyn, pipeline->fs.rsd_template, RENDERER_STATE); - memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn)); - - void *bd = rsd.cpu + pan_size(RENDERER_STATE); - for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { - if (pipeline->blend.constant[i].index != ~0) { - struct mali_blend_packed bd_dyn; - - panvk_emit_blend_constant(cmdbuf->device, pipeline, i, - cmdbuf->state.blend.constants[i], - &bd_dyn); - pan_merge(bd_dyn, pipeline->blend.bd_template[i], BLEND); - memcpy(bd, &bd_dyn, sizeof(bd_dyn)); - } - bd += pan_size(BLEND); - } - - cmdbuf->state.fs_rsd = rsd.gpu; - } - - draw->fs_rsd = cmdbuf->state.fs_rsd; -} - -void -panvk_cmd_get_bifrost_tiler_context(struct panvk_cmd_buffer *cmdbuf, - unsigned width, unsigned height) -{ - struct panvk_batch *batch = cmdbuf->state.batch; - - if (batch->tiler.bifrost_descs.cpu) - return; - - batch->tiler.bifrost_descs = - pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, - PAN_DESC(BIFROST_TILER), - PAN_DESC(BIFROST_TILER_HEAP)); - - panvk_emit_bifrost_tiler_context(cmdbuf->device, width, height, - &batch->tiler.bifrost_descs); - memcpy(&batch->tiler.templ.bifrost, batch->tiler.bifrost_descs.cpu, - sizeof(batch->tiler.templ.bifrost)); - batch->tiler.ctx.bifrost = batch->tiler.bifrost_descs.gpu; -} - -static void -panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - struct panvk_batch *batch = cmdbuf->state.batch; - - if (pan_is_bifrost(pdev)) { - panvk_cmd_get_bifrost_tiler_context(cmdbuf, - batch->fb.info->width, - batch->fb.info->height); - } else { - panvk_cmd_get_midgard_polygon_list(cmdbuf, - batch->fb.info->width, - batch->fb.info->height, - true); - } - - draw->tiler_ctx = &batch->tiler.ctx; -} - -static void -panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; - struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; - - panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base, - draw->vertex_count); - - unsigned buf_count = panvk_varyings_buf_count(cmdbuf->device, varyings); - struct panfrost_ptr bufs = - pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, - buf_count, ATTRIBUTE_BUFFER); - - panvk_emit_varying_bufs(cmdbuf->device, varyings, bufs.cpu); - if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) { - draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address + - varyings->varying[VARYING_SLOT_POS].offset; - } - - if (BITSET_TEST(varyings->active, VARYING_SLOT_PSIZ)) { - draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address + - varyings->varying[VARYING_SLOT_POS].offset; - } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || - pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || - pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { - draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ? - cmdbuf->state.rast.line_width : pipeline->rast.line_width; - } else { - draw->line_width = 1.0f; - } - draw->varying_bufs = bufs.gpu; - - for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { - if (!varyings->stage[s].count) continue; - - struct panfrost_ptr attribs = - pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, - varyings->stage[s].count, - ATTRIBUTE); - - panvk_emit_varyings(cmdbuf->device, varyings, s, attribs.cpu); - draw->stages[s].varyings = attribs.gpu; - } -} - -static void -panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - /* TODO: images */ - const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - - if (!cmdbuf->state.pipeline->attribs.buf_count) - return; - - if (cmdbuf->state.vb.attribs) { - draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; - draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; - return; - } - - unsigned buf_count = cmdbuf->state.pipeline->attribs.buf_count + - (pan_is_bifrost(pdev) ? 1 : 0); - struct panfrost_ptr bufs = - pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, - buf_count * 2, ATTRIBUTE_BUFFER); - - panvk_emit_attrib_bufs(cmdbuf->device, - &cmdbuf->state.pipeline->attribs, - cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, - draw, bufs.cpu); - cmdbuf->state.vb.attrib_bufs = bufs.gpu; - - struct panfrost_ptr attribs = - pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, - cmdbuf->state.pipeline->attribs.attrib_count, - ATTRIBUTE); - - panvk_emit_attribs(cmdbuf->device, &cmdbuf->state.pipeline->attribs, - cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, - attribs.cpu); - cmdbuf->state.vb.attribs = attribs.gpu; - draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; - draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; -} - -static void -panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; - - if (pipeline->vpd) { - draw->viewport = pipeline->vpd; - } else if (cmdbuf->state.vpd) { - draw->viewport = cmdbuf->state.vpd; - } else { - struct panfrost_ptr vp = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT); - - const VkViewport *viewport = - pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ? - &cmdbuf->state.viewport : &pipeline->viewport; - const VkRect2D *scissor = - pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ? - &cmdbuf->state.scissor : &pipeline->scissor; - - panvk_emit_viewport(viewport, scissor, vp.cpu); - draw->viewport = cmdbuf->state.vpd = vp.gpu; - } -} - -static void -panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - struct panvk_batch *batch = cmdbuf->state.batch; - struct panfrost_ptr ptr = - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); - - util_dynarray_append(&batch->jobs, void *, ptr.cpu); - draw->jobs.vertex = ptr; - panvk_emit_vertex_job(cmdbuf->device, - cmdbuf->state.pipeline, - draw, ptr.cpu); - -} - -static void -panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, - struct panvk_draw_info *draw) -{ - const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; - struct panvk_batch *batch = cmdbuf->state.batch; - struct panfrost_ptr ptr = - pan_is_bifrost(pdev) ? - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, BIFROST_TILER_JOB) : - pan_pool_alloc_desc(&cmdbuf->desc_pool.base, MIDGARD_TILER_JOB); - - util_dynarray_append(&batch->jobs, void *, ptr.cpu); - draw->jobs.tiler = ptr; - panvk_emit_tiler_job(cmdbuf->device, - cmdbuf->state.pipeline, - draw, ptr.cpu); -} - -void -panvk_CmdDraw(VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - struct panvk_batch *batch = cmdbuf->state.batch; - - /* There are only 16 bits in the descriptor for the job ID, make sure all - * the 3 (2 in Bifrost) jobs in this draw are in the same batch. - */ - if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) { - panvk_cmd_close_batch(cmdbuf); - panvk_cmd_open_batch(cmdbuf); - batch = cmdbuf->state.batch; - } - - if (cmdbuf->state.pipeline->fs.required) - panvk_cmd_alloc_fb_desc(cmdbuf); - - panvk_cmd_alloc_tls_desc(cmdbuf); - panvk_cmd_prepare_ubos(cmdbuf); - panvk_cmd_prepare_textures(cmdbuf); - panvk_cmd_prepare_samplers(cmdbuf); - - /* TODO: indexed draws */ - - struct panvk_draw_info draw = { - .first_vertex = firstVertex, - .vertex_count = vertexCount, - .first_instance = firstInstance, - .instance_count = instanceCount, - .padded_vertex_count = panfrost_padded_vertex_count(vertexCount), - .offset_start = firstVertex, - .tls = batch->tls.gpu, - .fb = batch->fb.desc.gpu, - .ubos = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].ubos, - .textures = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].textures, - .samplers = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].samplers, - }; - - panfrost_pack_work_groups_compute(&draw.invocation, 1, vertexCount, - instanceCount, 1, 1, 1, true, false); - panvk_draw_prepare_fs_rsd(cmdbuf, &draw); - panvk_draw_prepare_varyings(cmdbuf, &draw); - panvk_draw_prepare_attributes(cmdbuf, &draw); - panvk_draw_prepare_viewport(cmdbuf, &draw); - panvk_draw_prepare_tiler_context(cmdbuf, &draw); - panvk_draw_prepare_vertex_job(cmdbuf, &draw); - panvk_draw_prepare_tiler_job(cmdbuf, &draw); - - const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; - unsigned vjob_id = - panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, - MALI_JOB_TYPE_VERTEX, false, false, 0, 0, - &draw.jobs.vertex, false); - - if (pipeline->fs.required) { - panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, - MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, - &draw.jobs.tiler, false); - } - - /* Clear the dirty flags all at once */ - cmdbuf->state.dirty = 0; -} - void panvk_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, @@ -1375,171 +443,6 @@ panvk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, panvk_stub(); } -void -panvk_CmdEndRenderPass2(VkCommandBuffer commandBuffer, - const VkSubpassEndInfoKHR *pSubpassEndInfo) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - panvk_cmd_close_batch(cmdbuf); - vk_free(&cmdbuf->pool->alloc, cmdbuf->state.clear); - cmdbuf->state.batch = NULL; - cmdbuf->state.pass = NULL; - cmdbuf->state.subpass = NULL; - cmdbuf->state.framebuffer = NULL; - cmdbuf->state.clear = NULL; - memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); -} - -void -panvk_CmdEndRenderPass(VkCommandBuffer cmd) -{ - VkSubpassEndInfoKHR einfo = { - .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, - }; - - panvk_CmdEndRenderPass2(cmd, &einfo); -} - - -void -panvk_CmdPipelineBarrier(VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkDependencyFlags dependencyFlags, - uint32_t memoryBarrierCount, - const VkMemoryBarrier *pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier *pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier *pImageMemoryBarriers) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - /* Caches are flushed/invalidated at batch boundaries for now, nothing to do - * for memory barriers assuming we implement barriers with the creation of a - * new batch. - * FIXME: We can probably do better with a CacheFlush job that has the - * barrier flag set to true. - */ - if (cmdbuf->state.batch) { - panvk_cmd_close_batch(cmdbuf); - panvk_cmd_open_batch(cmdbuf); - } -} - -static void -panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf, - struct panvk_event *event, - enum panvk_event_op_type type) -{ - struct panvk_event_op op = { - .type = type, - .event = event, - }; - - if (cmdbuf->state.batch == NULL) { - /* No open batch, let's create a new one so this operation happens in - * the right order. - */ - panvk_cmd_open_batch(cmdbuf); - util_dynarray_append(&cmdbuf->state.batch->event_ops, - struct panvk_event_op, - op); - panvk_cmd_close_batch(cmdbuf); - } else { - /* Let's close the current batch so the operation executes before any - * future commands. - */ - util_dynarray_append(&cmdbuf->state.batch->event_ops, - struct panvk_event_op, - op); - panvk_cmd_close_batch(cmdbuf); - panvk_cmd_open_batch(cmdbuf); - } -} - -static void -panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf, - struct panvk_event *event) -{ - struct panvk_event_op op = { - .type = PANVK_EVENT_OP_WAIT, - .event = event, - }; - - if (cmdbuf->state.batch == NULL) { - /* No open batch, let's create a new one and have it wait for this event. */ - panvk_cmd_open_batch(cmdbuf); - util_dynarray_append(&cmdbuf->state.batch->event_ops, - struct panvk_event_op, - op); - } else { - /* Let's close the current batch so any future commands wait on the - * event signal operation. - */ - if (cmdbuf->state.batch->fragment_job || - cmdbuf->state.batch->scoreboard.first_job) { - panvk_cmd_close_batch(cmdbuf); - panvk_cmd_open_batch(cmdbuf); - } - util_dynarray_append(&cmdbuf->state.batch->event_ops, - struct panvk_event_op, - op); - } -} - -void -panvk_CmdSetEvent(VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_event, event, _event); - - /* vkCmdSetEvent cannot be called inside a render pass */ - assert(cmdbuf->state.pass == NULL); - - panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET); -} - -void -panvk_CmdResetEvent(VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - VK_FROM_HANDLE(panvk_event, event, _event); - - /* vkCmdResetEvent cannot be called inside a render pass */ - assert(cmdbuf->state.pass == NULL); - - panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET); -} - -void -panvk_CmdWaitEvents(VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent *pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier *pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier *pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier *pImageMemoryBarriers) -{ - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); - - assert(eventCount > 0); - - for (uint32_t i = 0; i < eventCount; i++) { - VK_FROM_HANDLE(panvk_event, event, pEvents[i]); - panvk_add_wait_event_operation(cmdbuf, event); - } -} - void panvk_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) { diff --git a/src/panfrost/vulkan/panvk_cs.c b/src/panfrost/vulkan/panvk_cs.c index 8ab9d8d767c..46186e224a0 100644 --- a/src/panfrost/vulkan/panvk_cs.c +++ b/src/panfrost/vulkan/panvk_cs.c @@ -26,443 +26,10 @@ #include "panfrost-quirks.h" #include "pan_cs.h" -#include "pan_encoder.h" #include "pan_pool.h" #include "panvk_cs.h" #include "panvk_private.h" -#include "panvk_varyings.h" - -static mali_pixel_format -panvk_varying_hw_format(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, unsigned idx) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - gl_varying_slot loc = varyings->stage[stage].loc[idx]; - bool fs = stage == MESA_SHADER_FRAGMENT; - - switch (loc) { - case VARYING_SLOT_PNTC: - case VARYING_SLOT_PSIZ: - return (MALI_R16F << 12) | - (pdev->quirks & HAS_SWIZZLES ? - panfrost_get_default_swizzle(1) : 0); - case VARYING_SLOT_POS: - return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) | - (pdev->quirks & HAS_SWIZZLES ? - panfrost_get_default_swizzle(4) : 0); - default: - assert(!panvk_varying_is_builtin(stage, loc)); - return pdev->formats[varyings->varying[loc].format].hw; - } -} - -static void -panvk_emit_varying(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, unsigned idx, - void *attrib) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - gl_varying_slot loc = varyings->stage[stage].loc[idx]; - bool fs = stage == MESA_SHADER_FRAGMENT; - - pan_pack(attrib, ATTRIBUTE, cfg) { - if (!panvk_varying_is_builtin(stage, loc)) { - cfg.buffer_index = varyings->varying[loc].buf; - cfg.offset = varyings->varying[loc].offset; - } else { - cfg.buffer_index = - panvk_varying_buf_index(varyings, - panvk_varying_buf_id(fs, loc)); - } - cfg.offset_enable = !pan_is_bifrost(pdev); - cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx); - } -} - -void -panvk_emit_varyings(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, - void *descs) -{ - struct mali_attribute_packed *attrib = descs; - - for (unsigned i = 0; i < varyings->stage[stage].count; i++) - panvk_emit_varying(dev, varyings, stage, i, attrib++); -} - -static void -panvk_emit_varying_buf(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - enum panvk_varying_buf_id id, void *buf) -{ - unsigned buf_idx = panvk_varying_buf_index(varyings, id); - enum mali_attribute_special special_id = panvk_varying_special_buf_id(id); - - pan_pack(buf, ATTRIBUTE_BUFFER, cfg) { - if (special_id) { - cfg.type = 0; - cfg.special = special_id; - } else { - unsigned offset = varyings->buf[buf_idx].address & 63; - - cfg.stride = varyings->buf[buf_idx].stride; - cfg.size = varyings->buf[buf_idx].size + offset; - cfg.pointer = varyings->buf[buf_idx].address & ~63ULL; - } - } -} - -void -panvk_emit_varying_bufs(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - void *descs) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - struct mali_attribute_buffer_packed *buf = descs; - - for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { - if (varyings->buf_mask & (1 << i)) - panvk_emit_varying_buf(dev, varyings, i, buf++); - } - - if (pan_is_bifrost(pdev)) - memset(buf, 0, sizeof(*buf)); -} - -static void -panvk_emit_attrib_buf(const struct panvk_device *dev, - const struct panvk_attribs_info *info, - const struct panvk_draw_info *draw, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - unsigned idx, void *desc) -{ - ASSERTED const struct panfrost_device *pdev = &dev->physical_device->pdev; - const struct panvk_attrib_buf_info *buf_info = &info->buf[idx]; - - if (buf_info->special) { - assert(!pan_is_bifrost(pdev)); - switch (buf_info->special_id) { - case PAN_VERTEX_ID: - panfrost_vertex_id(draw->padded_vertex_count, desc, - draw->instance_count > 1); - return; - case PAN_INSTANCE_ID: - panfrost_instance_id(draw->padded_vertex_count, desc, - draw->instance_count > 1); - return; - default: - unreachable("Invalid attribute ID"); - } - } - - assert(idx < buf_count); - const struct panvk_attrib_buf *buf = &bufs[idx]; - unsigned divisor = buf_info->per_instance ? - draw->padded_vertex_count : 0; - unsigned stride = divisor && draw->instance_count == 1 ? - 0 : buf_info->stride; - mali_ptr addr = buf->address & ~63ULL; - unsigned size = buf->size + (buf->address & 63); - - /* TODO: support instanced arrays */ - pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { - if (draw->instance_count > 1 && divisor) { - cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; - cfg.divisor = divisor; - } - - cfg.pointer = addr; - cfg.stride = stride; - cfg.size = size; - } -} - -void -panvk_emit_attrib_bufs(const struct panvk_device *dev, - const struct panvk_attribs_info *info, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - const struct panvk_draw_info *draw, - void *descs) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - struct mali_attribute_buffer_packed *buf = descs; - - for (unsigned i = 0; i < info->buf_count; i++) - panvk_emit_attrib_buf(dev, info, draw, bufs, buf_count, i, buf++); - - /* A NULL entry is needed to stop prefecting on Bifrost */ - if (pan_is_bifrost(pdev)) - memset(buf, 0, sizeof(*buf)); -} - -static void -panvk_emit_attrib(const struct panvk_device *dev, - const struct panvk_attribs_info *attribs, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - unsigned idx, void *attrib) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - - pan_pack(attrib, ATTRIBUTE, cfg) { - cfg.buffer_index = attribs->attrib[idx].buf; - cfg.offset = attribs->attrib[idx].offset + - (bufs[cfg.buffer_index].address & 63); - cfg.format = pdev->formats[attribs->attrib[idx].format].hw; - } -} - -void -panvk_emit_attribs(const struct panvk_device *dev, - const struct panvk_attribs_info *attribs, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - void *descs) -{ - struct mali_attribute_packed *attrib = descs; - - for (unsigned i = 0; i < attribs->attrib_count; i++) - panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++); -} - -void -panvk_emit_ubos(const struct panvk_pipeline *pipeline, - const struct panvk_descriptor_state *state, - void *descs) -{ - struct mali_uniform_buffer_packed *ubos = descs; - - for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) { - const struct panvk_descriptor_set_layout *set_layout = - pipeline->layout->sets[i].layout; - const struct panvk_descriptor_set *set = state->sets[i].set; - unsigned offset = pipeline->layout->sets[i].ubo_offset; - - if (!set_layout) - continue; - - if (!set) { - unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos; - memset(&ubos[offset], 0, num_ubos * sizeof(*ubos)); - } else { - memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos)); - if (set_layout->num_dynoffsets) { - pan_pack(&ubos[offset + set_layout->num_ubos], UNIFORM_BUFFER, cfg) { - cfg.pointer = state->sets[i].dynoffsets.gpu; - cfg.entries = DIV_ROUND_UP(set->layout->num_dynoffsets, 16); - } - } - } - } - - for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { - if (!pipeline->sysvals[i].ids.sysval_count) - continue; - - pan_pack(&ubos[pipeline->sysvals[i].ubo_idx], UNIFORM_BUFFER, cfg) { - cfg.pointer = pipeline->sysvals[i].ubo ? : - state->sysvals[i]; - cfg.entries = pipeline->sysvals[i].ids.sysval_count; - } - } -} - -void -panvk_emit_vertex_job(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, - void *job) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION); - - memcpy(section, &draw->invocation, pan_size(INVOCATION)); - - pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { - cfg.job_task_split = 5; - } - - pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { - cfg.draw_descriptor_is_64b = true; - if (!pan_is_bifrost(pdev)) - cfg.texture_descriptor_is_64b = true; - cfg.state = pipeline->rsds[MESA_SHADER_VERTEX]; - cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes; - cfg.attribute_buffers = draw->attribute_bufs; - cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings; - cfg.varying_buffers = draw->varying_bufs; - cfg.thread_storage = draw->tls; - cfg.offset_start = draw->offset_start; - cfg.instance_size = draw->instance_count > 1 ? - draw->padded_vertex_count : 1; - cfg.uniform_buffers = draw->ubos; - cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants; - cfg.textures = draw->textures; - cfg.samplers = draw->samplers; - } - - pan_section_pack(job, COMPUTE_JOB, DRAW_PADDING, cfg); -} - -void -panvk_emit_tiler_job(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, - void *job) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - void *section = pan_is_bifrost(pdev) ? - pan_section_ptr(job, BIFROST_TILER_JOB, INVOCATION) : - pan_section_ptr(job, MIDGARD_TILER_JOB, INVOCATION); - - memcpy(section, &draw->invocation, pan_size(INVOCATION)); - - section = pan_is_bifrost(pdev) ? - pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE) : - pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE); - - pan_pack(section, PRIMITIVE, cfg) { - cfg.draw_mode = pipeline->ia.topology; - if (pipeline->ia.writes_point_size) - cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; - - cfg.first_provoking_vertex = true; - if (pipeline->ia.primitive_restart) - cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; - cfg.job_task_split = 6; - /* TODO: indexed draws */ - cfg.index_count = draw->vertex_count; - } - - section = pan_is_bifrost(pdev) ? - pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE_SIZE) : - pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE_SIZE); - pan_pack(section, PRIMITIVE_SIZE, cfg) { - if (pipeline->ia.writes_point_size) { - cfg.size_array = draw->psiz; - } else { - cfg.constant = draw->line_width; - } - } - - section = pan_is_bifrost(pdev) ? - pan_section_ptr(job, BIFROST_TILER_JOB, DRAW) : - pan_section_ptr(job, MIDGARD_TILER_JOB, DRAW); - - pan_pack(section, DRAW, cfg) { - cfg.four_components_per_vertex = true; - cfg.draw_descriptor_is_64b = true; - if (!pan_is_bifrost(pdev)) - cfg.texture_descriptor_is_64b = true; - cfg.front_face_ccw = pipeline->rast.front_ccw; - cfg.cull_front_face = pipeline->rast.cull_front_face; - cfg.cull_back_face = pipeline->rast.cull_back_face; - cfg.position = draw->position; - cfg.state = draw->fs_rsd; - cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes; - cfg.attribute_buffers = draw->attribute_bufs; - cfg.viewport = draw->viewport; - cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings; - cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; - if (pan_is_bifrost(pdev)) - cfg.thread_storage = draw->tls; - else - cfg.fbd = draw->fb; - - /* For all primitives but lines DRAW.flat_shading_vertex must - * be set to 0 and the provoking vertex is selected with the - * PRIMITIVE.first_provoking_vertex field. - */ - if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || - pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || - pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { - /* The logic is inverted on bifrost. */ - cfg.flat_shading_vertex = pan_is_bifrost(pdev) ? - true : false; - } - - cfg.offset_start = draw->offset_start; - cfg.instance_size = draw->instance_count > 1 ? - draw->padded_vertex_count : 1; - cfg.uniform_buffers = draw->ubos; - cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants; - cfg.textures = draw->textures; - cfg.samplers = draw->samplers; - - /* TODO: occlusion queries */ - } - - if (pan_is_bifrost(pdev)) { - pan_section_pack(job, BIFROST_TILER_JOB, TILER, cfg) { - cfg.address = draw->tiler_ctx->bifrost; - } - pan_section_pack(job, BIFROST_TILER_JOB, DRAW_PADDING, padding); - pan_section_pack(job, BIFROST_TILER_JOB, PADDING, padding); - } -} - -void -panvk_emit_fragment_job(const struct panvk_device *dev, - const struct panvk_framebuffer *fb, - mali_ptr fbdesc, - void *job) -{ - pan_section_pack(job, FRAGMENT_JOB, HEADER, header) { - header.type = MALI_JOB_TYPE_FRAGMENT; - header.index = 1; - } - - pan_section_pack(job, FRAGMENT_JOB, PAYLOAD, payload) { - payload.bound_min_x = 0; - payload.bound_min_y = 0; - - payload.bound_max_x = (fb->width - 1) >> MALI_TILE_SHIFT; - payload.bound_max_y = (fb->height - 1) >> MALI_TILE_SHIFT; - payload.framebuffer = fbdesc; - } -} - -void -panvk_emit_viewport(const VkViewport *viewport, const VkRect2D *scissor, - void *vpd) -{ - /* The spec says "width must be greater than 0.0" */ - assert(viewport->x >= 0); - int minx = (int)viewport->x; - int maxx = (int)(viewport->x + viewport->width); - - /* Viewport height can be negative */ - int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height)); - int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height)); - - assert(scissor->offset.x >= 0 && scissor->offset.y >= 0); - miny = MAX2(scissor->offset.x, minx); - miny = MAX2(scissor->offset.y, miny); - maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx); - maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy); - - /* Make sure we don't end up with a max < min when width/height is 0 */ - maxx = maxx > minx ? maxx - 1 : maxx; - maxy = maxy > miny ? maxy - 1 : maxy; - - assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f); - assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f); - - pan_pack(vpd, VIEWPORT, cfg) { - cfg.scissor_minimum_x = minx; - cfg.scissor_minimum_y = miny; - cfg.scissor_maximum_x = maxx; - cfg.scissor_maximum_y = maxy; - cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth); - cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth); - } -} void panvk_sysval_upload_viewport_scale(const VkViewport *viewport, @@ -481,368 +48,3 @@ panvk_sysval_upload_viewport_offset(const VkViewport *viewport, data->f32[1] = (0.5f * viewport->height) + viewport->y; data->f32[2] = (0.5f * (viewport->maxDepth - viewport->minDepth)) + viewport->minDepth; } - -static enum mali_bifrost_register_file_format -bifrost_blend_type_from_nir(nir_alu_type nir_type) -{ - switch(nir_type) { - case 0: /* Render target not in use */ - return 0; - case nir_type_float16: - return MALI_BIFROST_REGISTER_FILE_FORMAT_F16; - case nir_type_float32: - return MALI_BIFROST_REGISTER_FILE_FORMAT_F32; - case nir_type_int32: - return MALI_BIFROST_REGISTER_FILE_FORMAT_I32; - case nir_type_uint32: - return MALI_BIFROST_REGISTER_FILE_FORMAT_U32; - case nir_type_int16: - return MALI_BIFROST_REGISTER_FILE_FORMAT_I16; - case nir_type_uint16: - return MALI_BIFROST_REGISTER_FILE_FORMAT_U16; - default: - unreachable("Unsupported blend shader type for NIR alu type"); - } -} - -static void -panvk_emit_bifrost_blend(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, void *bd) -{ - const struct pan_blend_state *blend = &pipeline->blend.state; - const struct panfrost_device *pdev = &dev->physical_device->pdev; - const struct pan_blend_rt_state *rts = &blend->rts[rt]; - bool dithered = false; - - pan_pack(bd, BLEND, cfg) { - if (!blend->rt_count || !rts->equation.color_mask) { - cfg.enable = false; - cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF; - continue; - } - - cfg.srgb = util_format_is_srgb(rts->format); - cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); - cfg.round_to_fb_precision = !dithered; - - const struct util_format_description *format_desc = - util_format_description(rts->format); - unsigned chan_size = 0; - for (unsigned i = 0; i < format_desc->nr_channels; i++) - chan_size = MAX2(format_desc->channel[0].size, chan_size); - - pan_blend_to_fixed_function_equation(blend->rts[rt].equation, - &cfg.bifrost.equation); - - /* Fixed point constant */ - float fconst = - pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), - blend->constants); - u16 constant = fconst * ((1 << chan_size) - 1); - constant <<= 16 - chan_size; - cfg.bifrost.constant = constant; - - if (pan_blend_is_opaque(blend->rts[rt].equation)) - cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE; - else - cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_FIXED_FUNCTION; - - /* If we want the conversion to work properly, - * num_comps must be set to 4 - */ - cfg.bifrost.internal.fixed_function.num_comps = 4; - cfg.bifrost.internal.fixed_function.conversion.memory_format = - panfrost_format_to_bifrost_blend(pdev, rts->format, dithered); - cfg.bifrost.internal.fixed_function.conversion.register_format = - bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type); - cfg.bifrost.internal.fixed_function.rt = rt; - } -} - -static void -panvk_emit_midgard_blend(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, void *bd) -{ - const struct pan_blend_state *blend = &pipeline->blend.state; - const struct pan_blend_rt_state *rts = &blend->rts[rt]; - - pan_pack(bd, BLEND, cfg) { - if (!blend->rt_count || !rts->equation.color_mask) { - cfg.enable = false; - continue; - } - - cfg.srgb = util_format_is_srgb(rts->format); - cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); - cfg.round_to_fb_precision = true; - cfg.midgard.blend_shader = false; - pan_blend_to_fixed_function_equation(blend->rts[rt].equation, - &cfg.midgard.equation); - cfg.midgard.constant = - pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), - blend->constants); - } -} - -void -panvk_emit_blend(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, void *bd) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - - if (pan_is_bifrost(pdev)) - panvk_emit_bifrost_blend(dev, pipeline, rt, bd); - else - panvk_emit_midgard_blend(dev, pipeline, rt, bd); -} - -void -panvk_emit_blend_constant(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, const float *constants, void *bd) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - float constant = constants[pipeline->blend.constant[rt].index]; - - pan_pack(bd, BLEND, cfg) { - cfg.enable = false; - if (pan_is_bifrost(pdev)) { - cfg.bifrost.constant = constant * pipeline->blend.constant[rt].bifrost_factor; - } else { - cfg.midgard.constant = constant; - } - } -} - -void -panvk_emit_dyn_fs_rsd(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - const struct panvk_cmd_state *state, - void *rsd) -{ - pan_pack(rsd, RENDERER_STATE, cfg) { - if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { - cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f; - cfg.depth_factor = state->rast.depth_bias.slope_factor; - cfg.depth_bias_clamp = state->rast.depth_bias.clamp; - } - - if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { - cfg.stencil_front.mask = state->zs.s_front.compare_mask; - cfg.stencil_back.mask = state->zs.s_back.compare_mask; - } - - if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { - cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask; - cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask; - } - - if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { - cfg.stencil_front.reference_value = state->zs.s_front.ref; - cfg.stencil_back.reference_value = state->zs.s_back.ref; - } - } -} - -void -panvk_emit_base_fs_rsd(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - void *rsd) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - const struct pan_shader_info *info = &pipeline->fs.info; - - pan_pack(rsd, RENDERER_STATE, cfg) { - if (pipeline->fs.required) { - pan_shader_prepare_rsd(pdev, info, pipeline->fs.address, &cfg); - if (pan_is_bifrost(pdev)) { - cfg.properties.bifrost.allow_forward_pixel_to_kill = info->fs.can_fpk; - } else { - /* If either depth or stencil is enabled, discard matters */ - bool zs_enabled = - (pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) || - pipeline->zs.s_test; - - cfg.properties.midgard.work_register_count = info->work_reg_count; - cfg.properties.midgard.force_early_z = - info->fs.can_early_z && !pipeline->ms.alpha_to_coverage && - pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS; - - - /* Workaround a hardware errata where early-z cannot be enabled - * when discarding even when the depth buffer is read-only, by - * lying to the hardware about the discard and setting the - * reads tilebuffer? flag to compensate */ - cfg.properties.midgard.shader_reads_tilebuffer = - info->fs.outputs_read || - (!zs_enabled && info->fs.can_discard); - cfg.properties.midgard.shader_contains_discard = - zs_enabled && info->fs.can_discard; - } - } else { - if (pan_is_bifrost(pdev)) { - cfg.properties.bifrost.shader_modifies_coverage = true; - cfg.properties.bifrost.allow_forward_pixel_to_kill = true; - cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true; - cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; - } else { - cfg.shader.shader = 0x1; - cfg.properties.midgard.work_register_count = 1; - cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; - cfg.properties.midgard.force_early_z = true; - } - } - - bool msaa = pipeline->ms.rast_samples > 1; - cfg.multisample_misc.multisample_enable = msaa; - cfg.multisample_misc.sample_mask = - msaa ? pipeline->ms.sample_mask : UINT16_MAX; - - cfg.multisample_misc.depth_function = - pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS; - - cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write; - cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth; - cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth; - cfg.multisample_misc.shader_depth_range_fixed = true; - - cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test; - cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage; - cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; - cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable; - cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable; - cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1; - - if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) { - cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f; - cfg.depth_factor = pipeline->rast.depth_bias.slope_factor; - cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp; - } - - if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) { - cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask; - cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask; - } - - if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) { - cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask; - cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask; - } - - if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) { - cfg.stencil_front.reference_value = pipeline->zs.s_front.ref; - cfg.stencil_back.reference_value = pipeline->zs.s_back.ref; - } - - cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func; - cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op; - cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op; - cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op; - cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func; - cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op; - cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op; - cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op; - } -} - -void -panvk_emit_non_fs_rsd(const struct panvk_device *dev, - const struct pan_shader_info *shader_info, - mali_ptr shader_ptr, - void *rsd) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - - assert(shader_info->stage != MESA_SHADER_FRAGMENT); - - pan_pack(rsd, RENDERER_STATE, cfg) { - pan_shader_prepare_rsd(pdev, shader_info, shader_ptr, &cfg); - } -} - -void -panvk_emit_bifrost_tiler_context(const struct panvk_device *dev, - unsigned width, unsigned height, - const struct panfrost_ptr *descs) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - - pan_pack(descs->cpu + pan_size(BIFROST_TILER), BIFROST_TILER_HEAP, cfg) { - cfg.size = pdev->tiler_heap->size; - cfg.base = pdev->tiler_heap->ptr.gpu; - cfg.bottom = pdev->tiler_heap->ptr.gpu; - cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size; - } - - pan_pack(descs->cpu, BIFROST_TILER, cfg) { - cfg.hierarchy_mask = 0x28; - cfg.fb_width = width; - cfg.fb_height = height; - cfg.heap = descs->gpu + pan_size(BIFROST_TILER); - } -} - -unsigned -panvk_emit_fb(const struct panvk_device *dev, - const struct panvk_batch *batch, - const struct panvk_subpass *subpass, - const struct panvk_framebuffer *fb, - const struct panvk_clear_value *clears, - const struct pan_tls_info *tlsinfo, - const struct pan_tiler_context *tilerctx, - void *desc) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - struct panvk_image_view *view; - bool crc_valid[8] = { false }; - struct pan_fb_info fbinfo = { - .width = fb->width, - .height = fb->height, - .extent.maxx = fb->width - 1, - .extent.maxy = fb->height - 1, - .nr_samples = 1, - }; - - for (unsigned cb = 0; cb < subpass->color_count; cb++) { - int idx = subpass->color_attachments[cb].idx; - view = idx != VK_ATTACHMENT_UNUSED ? - fb->attachments[idx].iview : NULL; - if (!view) - continue; - fbinfo.rts[cb].view = &view->pview; - fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear; - fbinfo.rts[cb].crc_valid = &crc_valid[cb]; - - memcpy(fbinfo.rts[cb].clear_value, clears[idx].color, - sizeof(fbinfo.rts[cb].clear_value)); - fbinfo.nr_samples = - MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); - } - - if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) { - view = fb->attachments[subpass->zs_attachment.idx].iview; - const struct util_format_description *fdesc = - util_format_description(view->pview.format); - - fbinfo.nr_samples = - MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); - - if (util_format_has_depth(fdesc)) { - fbinfo.zs.clear.z = subpass->zs_attachment.clear; - fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth; - fbinfo.zs.view.zs = &view->pview; - } - - if (util_format_has_depth(fdesc)) { - fbinfo.zs.clear.s = subpass->zs_attachment.clear; - fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth; - if (!fbinfo.zs.view.zs) - fbinfo.zs.view.s = &view->pview; - } - } - - return pan_emit_fbd(pdev, &fbinfo, tlsinfo, tilerctx, desc); -} diff --git a/src/panfrost/vulkan/panvk_cs.h b/src/panfrost/vulkan/panvk_cs.h index 79a9eac541c..894da75d6a4 100644 --- a/src/panfrost/vulkan/panvk_cs.h +++ b/src/panfrost/vulkan/panvk_cs.h @@ -24,12 +24,18 @@ #ifndef PANVK_CS_H #define PANVK_CS_H +#include "pan_encoder.h" + #include #include "compiler/shader_enums.h" #include "panfrost-job.h" #include "pan_cs.h" +#include "vk_util.h" + +#include "panvk_private.h" + struct pan_blend_state; struct pan_shader_info; struct panfrost_ptr; @@ -50,107 +56,32 @@ struct panvk_descriptor_state; struct panvk_subpass; struct panvk_clear_value; -void -panvk_emit_varyings(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - gl_shader_stage stage, - void *descs); +#ifdef PAN_ARCH +static inline enum mali_func +panvk_per_arch(translate_compare_func)(VkCompareOp comp) +{ + STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER); + STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS); + STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL); + STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL); + STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER); + STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL); + STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL == (VkCompareOp)MALI_FUNC_GEQUAL); + STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS); -void -panvk_emit_varying_bufs(const struct panvk_device *dev, - const struct panvk_varyings_info *varyings, - void *descs); + return (enum mali_func)comp; +} -void -panvk_emit_attrib_bufs(const struct panvk_device *dev, - const struct panvk_attribs_info *info, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - const struct panvk_draw_info *draw, - void *descs); +static inline enum mali_func +panvk_per_arch(translate_sampler_compare_func)(const VkSamplerCreateInfo *pCreateInfo) +{ + if (!pCreateInfo->compareEnable) + return MALI_FUNC_NEVER; -void -panvk_emit_attribs(const struct panvk_device *dev, - const struct panvk_attribs_info *attribs, - const struct panvk_attrib_buf *bufs, - unsigned buf_count, - void *descs); - -void -panvk_emit_ubos(const struct panvk_pipeline *pipeline, - const struct panvk_descriptor_state *state, - void *descs); - -void -panvk_emit_vertex_job(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, - void *job); - -void -panvk_emit_tiler_job(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - const struct panvk_draw_info *draw, - void *job); - -void -panvk_emit_fragment_job(const struct panvk_device *dev, - const struct panvk_framebuffer *fb, - mali_ptr fbdesc, - void *job); - -void -panvk_emit_viewport(const VkViewport *viewport, const VkRect2D *scissor, - void *vpd); - -void -panvk_emit_blend(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, void *bd); - -void -panvk_emit_blend_constant(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - unsigned rt, const float *constants, void *bd); - -void -panvk_emit_dyn_fs_rsd(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - const struct panvk_cmd_state *state, - void *rsd); - -void -panvk_emit_base_fs_rsd(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - void *rsd); - -void -panvk_emit_non_fs_rsd(const struct panvk_device *dev, - const struct pan_shader_info *shader_info, - mali_ptr shader_ptr, - void *rsd); - -void -panvk_emit_bifrost_tiler_context(const struct panvk_device *dev, - unsigned width, unsigned height, - const struct panfrost_ptr *descs); - -unsigned -panvk_emit_fb(const struct panvk_device *dev, - const struct panvk_batch *batch, - const struct panvk_subpass *subpass, - const struct panvk_framebuffer *fb, - const struct panvk_clear_value *clears, - const struct pan_tls_info *tlsinfo, - const struct pan_tiler_context *tilerctx, - void *desc); - -void -panvk_emit_tls(const struct panvk_device *dev, - const struct panvk_pipeline *pipeline, - const struct pan_compute_dim *wg_count, - struct pan_pool *tls_pool, - void *desc); + enum mali_func f = panvk_per_arch(translate_compare_func)(pCreateInfo->compareOp); + return panfrost_flip_compare_func(f); +} +#endif void panvk_sysval_upload_viewport_scale(const VkViewport *viewport, diff --git a/src/panfrost/vulkan/panvk_descriptor_set.c b/src/panfrost/vulkan/panvk_descriptor_set.c index e10a6cbdab2..10cfda5a077 100644 --- a/src/panfrost/vulkan/panvk_descriptor_set.c +++ b/src/panfrost/vulkan/panvk_descriptor_set.c @@ -37,7 +37,6 @@ #include "vk_util.h" #include "pan_bo.h" -#include "gen_macros.h" VkResult panvk_CreateDescriptorSetLayout(VkDevice _device, @@ -418,128 +417,18 @@ panvk_ResetDescriptorPool(VkDevice _device, return VK_SUCCESS; } -static VkResult -panvk_descriptor_set_create(struct panvk_device *device, - struct panvk_descriptor_pool *pool, - const struct panvk_descriptor_set_layout *layout, - struct panvk_descriptor_set **out_set) -{ - const struct panfrost_device *pdev = &device->physical_device->pdev; - struct panvk_descriptor_set *set; - - /* TODO: Allocate from the pool! */ - set = vk_object_zalloc(&device->vk, NULL, - sizeof(struct panvk_descriptor_set), - VK_OBJECT_TYPE_DESCRIPTOR_SET); - if (!set) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - set->layout = layout; - set->descs = vk_alloc(&device->vk.alloc, - sizeof(*set->descs) * layout->num_descs, 8, - VK_OBJECT_TYPE_DESCRIPTOR_SET); - if (!set->descs) - goto err_free_set; - - if (layout->num_ubos) { - set->ubos = vk_zalloc(&device->vk.alloc, - sizeof(*set->ubos) * layout->num_ubos, 8, - VK_OBJECT_TYPE_DESCRIPTOR_SET); - if (!set->ubos) - goto err_free_set; - } - - if (layout->num_samplers) { - set->samplers = vk_zalloc(&device->vk.alloc, - sizeof(*set->samplers) * layout->num_samplers, 8, - VK_OBJECT_TYPE_DESCRIPTOR_SET); - if (!set->samplers) - goto err_free_set; - } - - if (layout->num_textures) { - if (pan_is_bifrost(pdev)) { - set->textures.bifrost = vk_zalloc(&device->vk.alloc, - sizeof(*set->textures.bifrost) * - layout->num_textures, - 8, VK_OBJECT_TYPE_DESCRIPTOR_SET); - } else { - set->textures.midgard = vk_zalloc(&device->vk.alloc, - sizeof(*set->textures.midgard) * - layout->num_textures, - 8, VK_OBJECT_TYPE_DESCRIPTOR_SET); - } - - if (!set->textures.midgard) - goto err_free_set; - } - - for (unsigned i = 0; i < layout->binding_count; i++) { - if (!layout->bindings[i].immutable_samplers) - continue; - - for (unsigned j = 0; j < layout->bindings[i].array_size; j++) { - set->descs[layout->bindings[i].desc_idx].image.sampler = - layout->bindings[i].immutable_samplers[j]; - } - } - - *out_set = set; - return VK_SUCCESS; - -err_free_set: - vk_free(&device->vk.alloc, set->textures.midgard); - vk_free(&device->vk.alloc, set->samplers); - vk_free(&device->vk.alloc, set->ubos); - vk_free(&device->vk.alloc, set->descs); - vk_object_free(&device->vk, NULL, set); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); -} - static void panvk_descriptor_set_destroy(struct panvk_device *device, struct panvk_descriptor_pool *pool, struct panvk_descriptor_set *set) { - vk_free(&device->vk.alloc, set->textures.midgard); + vk_free(&device->vk.alloc, set->textures); vk_free(&device->vk.alloc, set->samplers); vk_free(&device->vk.alloc, set->ubos); vk_free(&device->vk.alloc, set->descs); vk_object_free(&device->vk, NULL, set); } -VkResult -panvk_AllocateDescriptorSets(VkDevice _device, - const VkDescriptorSetAllocateInfo *pAllocateInfo, - VkDescriptorSet *pDescriptorSets) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - VK_FROM_HANDLE(panvk_descriptor_pool, pool, pAllocateInfo->descriptorPool); - VkResult result; - unsigned i; - - for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { - VK_FROM_HANDLE(panvk_descriptor_set_layout, layout, - pAllocateInfo->pSetLayouts[i]); - struct panvk_descriptor_set *set = NULL; - - result = panvk_descriptor_set_create(device, pool, layout, &set); - if (result != VK_SUCCESS) - goto err_free_sets; - - pDescriptorSets[i] = panvk_descriptor_set_to_handle(set); - } - - return VK_SUCCESS; - -err_free_sets: - panvk_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets); - for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) - pDescriptorSets[i] = VK_NULL_HANDLE; - - return result; -} - VkResult panvk_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, @@ -558,245 +447,6 @@ panvk_FreeDescriptorSets(VkDevice _device, return VK_SUCCESS; } -static void -panvk_set_image_desc(struct panvk_descriptor *desc, - const VkDescriptorImageInfo *pImageInfo) -{ - VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler); - VK_FROM_HANDLE(panvk_image_view, image_view, pImageInfo->imageView); - desc->image.sampler = sampler; - desc->image.view = image_view; - desc->image.layout = pImageInfo->imageLayout; -} - -static void -panvk_set_texel_buffer_view_desc(struct panvk_descriptor *desc, - const VkBufferView *pTexelBufferView) -{ - VK_FROM_HANDLE(panvk_buffer_view, buffer_view, *pTexelBufferView); - desc->buffer_view = buffer_view; -} - -static void -panvk_set_buffer_info_desc(struct panvk_descriptor *desc, - const VkDescriptorBufferInfo *pBufferInfo) -{ - VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); - desc->buffer_info.buffer = buffer; - desc->buffer_info.offset = pBufferInfo->offset; - desc->buffer_info.range = pBufferInfo->range; -} - -static void -panvk_set_ubo_desc(void *ubo, - const VkDescriptorBufferInfo *pBufferInfo) -{ - VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); - size_t size = pBufferInfo->range == VK_WHOLE_SIZE ? - (buffer->bo->size - pBufferInfo->offset) : - pBufferInfo->range; - - pan_pack(ubo, UNIFORM_BUFFER, cfg) { - cfg.pointer = buffer->bo->ptr.gpu + pBufferInfo->offset; - cfg.entries = DIV_ROUND_UP(size, 16); - } -} - -static void -panvk_set_sampler_desc(void *desc, - const VkDescriptorImageInfo *pImageInfo) -{ - VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler); - - memcpy(desc, &sampler->desc, sizeof(sampler->desc)); -} - -static void -panvk_set_bifrost_texture_desc(struct mali_bifrost_texture_packed *desc, - const VkDescriptorImageInfo *pImageInfo) -{ - VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView); - - *desc = view->bifrost.tex_desc; -} - -static void -panvk_set_midgard_texture_desc(mali_ptr *desc, - const VkDescriptorImageInfo *pImageInfo) -{ - VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView); - - *desc = view->bo->ptr.gpu; -} - -static void -panvk_write_descriptor_set(struct panvk_device *dev, - const VkWriteDescriptorSet *pDescriptorWrite) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorWrite->dstSet); - const struct panvk_descriptor_set_layout *layout = set->layout; - unsigned dest_offset = pDescriptorWrite->dstArrayElement; - unsigned binding = pDescriptorWrite->dstBinding; - unsigned src_offset = 0; - - while (src_offset < pDescriptorWrite->descriptorCount && - binding < layout->binding_count) { - const struct panvk_descriptor_set_binding_layout *binding_layout = - &layout->bindings[binding]; - - if (!binding_layout->array_size) { - binding++; - dest_offset = 0; - continue; - } - - assert(pDescriptorWrite->descriptorType == binding_layout->type); - unsigned ndescs = MIN2(pDescriptorWrite->descriptorCount - src_offset, - binding_layout->array_size - dest_offset); - struct panvk_descriptor *descs = &set->descs[binding_layout->desc_idx + dest_offset]; - assert(binding_layout->desc_idx + dest_offset + ndescs <= set->layout->num_descs); - - switch (pDescriptorWrite->descriptorType) { - case VK_DESCRIPTOR_TYPE_SAMPLER: - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - for (unsigned i = 0; i < ndescs; i++) { - const VkDescriptorImageInfo *info = &pDescriptorWrite->pImageInfo[src_offset + i]; - - if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || - pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - - if (binding_layout->immutable_samplers == NULL) { - unsigned sampler = binding_layout->sampler_idx + dest_offset + i; - panvk_set_sampler_desc(&set->samplers[sampler], info); - } - } - - if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || - pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { - unsigned tex = binding_layout->tex_idx + dest_offset + i; - - if (pan_is_bifrost(pdev)) - panvk_set_bifrost_texture_desc(&set->textures.bifrost[tex], info); - else - panvk_set_midgard_texture_desc(&set->textures.midgard[tex], info); - } - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - for (unsigned i = 0; i < ndescs; i++) - panvk_set_image_desc(&descs[i], &pDescriptorWrite->pImageInfo[src_offset + i]); - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - for (unsigned i = 0; i < ndescs; i++) - panvk_set_texel_buffer_view_desc(&descs[i], &pDescriptorWrite->pTexelBufferView[src_offset + i]); - break; - - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - for (unsigned i = 0; i < ndescs; i++) { - unsigned ubo = binding_layout->ubo_idx + dest_offset + i; - panvk_set_ubo_desc(&set->ubos[ubo], - &pDescriptorWrite->pBufferInfo[src_offset + i]); - } - break; - - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - for (unsigned i = 0; i < ndescs; i++) - panvk_set_buffer_info_desc(&descs[i], &pDescriptorWrite->pBufferInfo[src_offset + i]); - break; - default: - unreachable("Invalid type"); - } - - src_offset += ndescs; - binding++; - dest_offset = 0; - } -} - -static void -panvk_copy_descriptor_set(struct panvk_device *dev, - const VkCopyDescriptorSet *pDescriptorCopy) -{ - VK_FROM_HANDLE(panvk_descriptor_set, dest_set, pDescriptorCopy->dstSet); - VK_FROM_HANDLE(panvk_descriptor_set, src_set, pDescriptorCopy->srcSet); - const struct panvk_descriptor_set_layout *dest_layout = dest_set->layout; - const struct panvk_descriptor_set_layout *src_layout = dest_set->layout; - unsigned dest_offset = pDescriptorCopy->dstArrayElement; - unsigned src_offset = pDescriptorCopy->srcArrayElement; - unsigned dest_binding = pDescriptorCopy->dstBinding; - unsigned src_binding = pDescriptorCopy->srcBinding; - unsigned desc_count = pDescriptorCopy->descriptorCount; - - while (desc_count && src_binding < src_layout->binding_count && - dest_binding < dest_layout->binding_count) { - const struct panvk_descriptor_set_binding_layout *dest_binding_layout = - &src_layout->bindings[dest_binding]; - - if (!dest_binding_layout->array_size) { - dest_binding++; - dest_offset = 0; - continue; - } - - const struct panvk_descriptor_set_binding_layout *src_binding_layout = - &src_layout->bindings[src_binding]; - - if (!src_binding_layout->array_size) { - src_binding++; - src_offset = 0; - continue; - } - - assert(dest_binding_layout->type == src_binding_layout->type); - - unsigned ndescs = MAX3(desc_count, - dest_binding_layout->array_size - dest_offset, - src_binding_layout->array_size - src_offset); - - struct panvk_descriptor *dest_descs = dest_set->descs + dest_binding_layout->desc_idx + dest_offset; - struct panvk_descriptor *src_descs = src_set->descs + src_binding_layout->desc_idx + src_offset; - memcpy(dest_descs, src_descs, ndescs * sizeof(*dest_descs)); - desc_count -= ndescs; - dest_offset += ndescs; - if (dest_offset == dest_binding_layout->array_size) { - dest_binding++; - dest_offset = 0; - continue; - } - src_offset += ndescs; - if (src_offset == src_binding_layout->array_size) { - src_binding++; - src_offset = 0; - continue; - } - } - - assert(!desc_count); -} - -void -panvk_UpdateDescriptorSets(VkDevice _device, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet *pDescriptorCopies) -{ - VK_FROM_HANDLE(panvk_device, dev, _device); - - for (unsigned i = 0; i < descriptorWriteCount; i++) - panvk_write_descriptor_set(dev, &pDescriptorWrites[i]); - for (unsigned i = 0; i < descriptorCopyCount; i++) - panvk_copy_descriptor_set(dev, &pDescriptorCopies[i]); -} - VkResult panvk_CreateDescriptorUpdateTemplate(VkDevice _device, const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, diff --git a/src/panfrost/vulkan/panvk_device.c b/src/panfrost/vulkan/panvk_device.c index 9321525f373..8b73a7efa21 100644 --- a/src/panfrost/vulkan/panvk_device.c +++ b/src/panfrost/vulkan/panvk_device.c @@ -32,7 +32,6 @@ #include "pan_bo.h" #include "pan_encoder.h" #include "pan_util.h" -#include "decode.h" #include #include @@ -199,7 +198,7 @@ panvk_physical_device_finish(struct panvk_physical_device *device) { panvk_wsi_finish(device); - panvk_meta_cleanup(device); + panvk_arch_dispatch(device->pdev.arch, meta_cleanup, device); panfrost_close_device(&device->pdev); if (device->master_fd != -1) close(device->master_fd); @@ -306,7 +305,7 @@ panvk_physical_device_init(struct panvk_physical_device *device, goto fail; } - panvk_meta_init(device); + panvk_arch_dispatch(device->pdev.arch, meta_init, device); memset(device->name, 0, sizeof(device->name)); sprintf(device->name, "%s", panfrost_model_name(device->pdev.gpu_id)); @@ -955,10 +954,29 @@ panvk_CreateDevice(VkPhysicalDevice physicalDevice, if (!device) return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + const struct vk_device_entrypoint_table *dev_entrypoints; struct vk_device_dispatch_table dispatch_table; + + switch (physical_device->pdev.arch) { + case 5: + dev_entrypoints = &panvk_v5_device_entrypoints; + break; + case 6: + dev_entrypoints = &panvk_v6_device_entrypoints; + break; + case 7: + dev_entrypoints = &panvk_v7_device_entrypoints; + break; + default: + unreachable("Unsupported architecture"); + } + + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + dev_entrypoints, + true); vk_device_dispatch_table_from_entrypoints(&dispatch_table, &panvk_device_entrypoints, - true); + false); result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { @@ -1076,262 +1094,6 @@ panvk_GetDeviceQueue(VkDevice _device, panvk_GetDeviceQueue2(_device, &info, pQueue); } -static void -panvk_queue_submit_batch(struct panvk_queue *queue, - struct panvk_batch *batch, - uint32_t *bos, unsigned nr_bos, - uint32_t *in_fences, - unsigned nr_in_fences) -{ - const struct panvk_device *dev = queue->device; - unsigned debug = dev->physical_device->instance->debug_flags; - const struct panfrost_device *pdev = &dev->physical_device->pdev; - int ret; - - /* Reset the batch if it's already been issued */ - if (batch->issued) { - util_dynarray_foreach(&batch->jobs, void *, job) - memset((*job), 0, 4 * 4); - - /* Reset the tiler before re-issuing the batch */ - if (pan_is_bifrost(pdev) && batch->tiler.bifrost_descs.cpu) { - memcpy(batch->tiler.bifrost_descs.cpu, &batch->tiler.templ.bifrost, - sizeof(batch->tiler.templ.bifrost)); - } else if (!pan_is_bifrost(pdev) && batch->fb.desc.cpu) { - void *tiler = pan_section_ptr(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER); - memcpy(tiler, &batch->tiler.templ.midgard, sizeof(batch->tiler.templ.midgard)); - /* All weights set to 0, nothing to do here */ - pan_section_pack(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS, w); - } - } - - if (batch->scoreboard.first_job) { - struct drm_panfrost_submit submit = { - .bo_handles = (uintptr_t)bos, - .bo_handle_count = nr_bos, - .in_syncs = (uintptr_t)in_fences, - .in_sync_count = nr_in_fences, - .out_sync = queue->sync, - .jc = batch->scoreboard.first_job, - }; - - ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); - assert(!ret); - - if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) { - ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); - assert(!ret); - } - - if (debug & PANVK_DEBUG_TRACE) - pandecode_jc(batch->scoreboard.first_job, pan_is_bifrost(pdev), pdev->gpu_id); - } - - if (batch->fragment_job) { - struct drm_panfrost_submit submit = { - .bo_handles = (uintptr_t)bos, - .bo_handle_count = nr_bos, - .out_sync = queue->sync, - .jc = batch->fragment_job, - .requirements = PANFROST_JD_REQ_FS, - }; - - if (batch->scoreboard.first_job) { - submit.in_syncs = (uintptr_t)(&queue->sync); - submit.in_sync_count = 1; - } else { - submit.in_syncs = (uintptr_t)in_fences; - submit.in_sync_count = nr_in_fences; - } - - ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); - assert(!ret); - if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) { - ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); - assert(!ret); - } - - if (debug & PANVK_DEBUG_TRACE) - pandecode_jc(batch->fragment_job, pan_is_bifrost(pdev), pdev->gpu_id); - } - - if (debug & PANVK_DEBUG_TRACE) - pandecode_next_frame(); - - batch->issued = true; -} - -static void -panvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj) -{ - const struct panfrost_device *pdev = &queue->device->physical_device->pdev; - int ret; - - struct drm_syncobj_handle handle = { - .handle = queue->sync, - .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE, - .fd = -1, - }; - - ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); - assert(!ret); - assert(handle.fd >= 0); - - handle.handle = syncobj; - ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle); - assert(!ret); - - close(handle.fd); -} - -static void -panvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences, unsigned *nr_in_fences) -{ - util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) { - switch (op->type) { - case PANVK_EVENT_OP_SET: - /* Nothing to do yet */ - break; - case PANVK_EVENT_OP_RESET: - /* Nothing to do yet */ - break; - case PANVK_EVENT_OP_WAIT: - in_fences[*nr_in_fences++] = op->event->syncobj; - break; - default: - unreachable("bad panvk_event_op type\n"); - } - } -} - -static void -panvk_signal_event_syncobjs(struct panvk_queue *queue, struct panvk_batch *batch) -{ - const struct panfrost_device *pdev = &queue->device->physical_device->pdev; - - util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) { - switch (op->type) { - case PANVK_EVENT_OP_SET: { - panvk_queue_transfer_sync(queue, op->event->syncobj); - break; - } - case PANVK_EVENT_OP_RESET: { - struct panvk_event *event = op->event; - - struct drm_syncobj_array objs = { - .handles = (uint64_t) (uintptr_t) &event->syncobj, - .count_handles = 1 - }; - - int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs); - assert(!ret); - break; - } - case PANVK_EVENT_OP_WAIT: - /* Nothing left to do */ - break; - default: - unreachable("bad panvk_event_op type\n"); - } - } -} - -VkResult -panvk_QueueSubmit(VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo *pSubmits, - VkFence _fence) -{ - VK_FROM_HANDLE(panvk_queue, queue, _queue); - VK_FROM_HANDLE(panvk_fence, fence, _fence); - const struct panfrost_device *pdev = &queue->device->physical_device->pdev; - - for (uint32_t i = 0; i < submitCount; ++i) { - const VkSubmitInfo *submit = pSubmits + i; - unsigned nr_semaphores = submit->waitSemaphoreCount + 1; - uint32_t semaphores[nr_semaphores]; - - semaphores[0] = queue->sync; - for (unsigned i = 0; i < submit->waitSemaphoreCount; i++) { - VK_FROM_HANDLE(panvk_semaphore, sem, submit->pWaitSemaphores[i]); - - semaphores[i + 1] = sem->syncobj.temporary ? : sem->syncobj.permanent; - } - - for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { - VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, (submit->pCommandBuffers[j])); - - list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) { - /* FIXME: should be done at the batch level */ - unsigned nr_bos = - panvk_pool_num_bos(&cmdbuf->desc_pool) + - panvk_pool_num_bos(&cmdbuf->varying_pool) + - panvk_pool_num_bos(&cmdbuf->tls_pool) + - (batch->fb.info ? batch->fb.info->attachment_count : 0) + - (batch->blit.src ? 1 : 0) + - (batch->blit.dst ? 1 : 0) + - (batch->scoreboard.first_tiler ? 1 : 0) + 1; - unsigned bo_idx = 0; - uint32_t bos[nr_bos]; - - panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]); - bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool); - - panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]); - bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool); - - panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]); - bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool); - - if (batch->fb.info) { - for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) { - bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle; - } - } - - if (batch->blit.src) - bos[bo_idx++] = batch->blit.src->gem_handle; - - if (batch->blit.dst) - bos[bo_idx++] = batch->blit.dst->gem_handle; - - if (batch->scoreboard.first_tiler) - bos[bo_idx++] = pdev->tiler_heap->gem_handle; - - bos[bo_idx++] = pdev->sample_positions->gem_handle; - assert(bo_idx == nr_bos); - - unsigned nr_in_fences = 0; - unsigned max_wait_event_syncobjs = - util_dynarray_num_elements(&batch->event_ops, - struct panvk_event_op); - uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs]; - memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences)); - nr_in_fences += nr_semaphores; - - panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences); - - panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences); - - panvk_signal_event_syncobjs(queue, batch); - } - } - - /* Transfer the out fence to signal semaphores */ - for (unsigned i = 0; i < submit->signalSemaphoreCount; i++) { - VK_FROM_HANDLE(panvk_semaphore, sem, submit->pSignalSemaphores[i]); - panvk_queue_transfer_sync(queue, sem->syncobj.temporary ? : sem->syncobj.permanent); - } - } - - if (fence) { - /* Transfer the last out fence to the fence object */ - panvk_queue_transfer_sync(queue, fence->syncobj.temporary ? : fence->syncobj.permanent); - } - - return VK_SUCCESS; -} - VkResult panvk_QueueWaitIdle(VkQueue _queue) { @@ -1891,202 +1653,6 @@ panvk_DestroyFramebuffer(VkDevice _device, vk_object_free(&device->vk, pAllocator, fb); } -static enum mali_mipmap_mode -panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode) -{ - switch (mode) { - case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST; - case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR; - default: unreachable("Invalid mipmap mode"); - } -} - -static unsigned -panvk_translate_sampler_address_mode(VkSamplerAddressMode mode) -{ - switch (mode) { - case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT; - case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT; - case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE; - case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER; - case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; - default: unreachable("Invalid wrap"); - } -} - -static enum mali_func -panvk_translate_sampler_compare_func(const VkSamplerCreateInfo *pCreateInfo) -{ - if (!pCreateInfo->compareEnable) - return MALI_FUNC_NEVER; - - enum mali_func f = panvk_translate_compare_func(pCreateInfo->compareOp); - return panfrost_flip_compare_func(f); -} - -static void -panvk_init_midgard_sampler(struct panvk_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo) -{ - const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = - vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); - - pan_pack(&sampler->desc, MIDGARD_SAMPLER, cfg) { - cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; - cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; - cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode); - cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates; - cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true); - cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false); - cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false); - - cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU); - cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV); - cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW); - cfg.compare_function = panvk_translate_sampler_compare_func(pCreateInfo); - - switch (pCreateInfo->borderColor) { - case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: - case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: - cfg.border_color_r = fui(0.0); - cfg.border_color_g = fui(0.0); - cfg.border_color_b = fui(0.0); - cfg.border_color_a = - pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ? - fui(1.0) : fui(0.0); - break; - case VK_BORDER_COLOR_INT_OPAQUE_BLACK: - case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: - cfg.border_color_r = 0; - cfg.border_color_g = 0; - cfg.border_color_b = 0; - cfg.border_color_a = - pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ? - UINT_MAX : 0; - break; - case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: - cfg.border_color_r = fui(1.0); - cfg.border_color_g = fui(1.0); - cfg.border_color_b = fui(1.0); - cfg.border_color_a = fui(1.0); - break; - case VK_BORDER_COLOR_INT_OPAQUE_WHITE: - cfg.border_color_r = UINT_MAX; - cfg.border_color_g = UINT_MAX; - cfg.border_color_b = UINT_MAX; - cfg.border_color_a = UINT_MAX; - break; - case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: - case VK_BORDER_COLOR_INT_CUSTOM_EXT: - cfg.border_color_r = pBorderColor->customBorderColor.int32[0]; - cfg.border_color_g = pBorderColor->customBorderColor.int32[1]; - cfg.border_color_b = pBorderColor->customBorderColor.int32[2]; - cfg.border_color_a = pBorderColor->customBorderColor.int32[3]; - break; - default: - unreachable("Invalid border color"); - } - } -} - -static void -panvk_init_bifrost_sampler(struct panvk_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo) -{ - const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = - vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); - - pan_pack(&sampler->desc, BIFROST_SAMPLER, cfg) { - cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; - cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; - cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode); - cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates; - - cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true); - cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false); - cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false); - cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU); - cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV); - cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW); - cfg.compare_function = panvk_translate_sampler_compare_func(pCreateInfo); - - switch (pCreateInfo->borderColor) { - case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: - case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: - cfg.border_color_r = fui(0.0); - cfg.border_color_g = fui(0.0); - cfg.border_color_b = fui(0.0); - cfg.border_color_a = - pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ? - fui(1.0) : fui(0.0); - break; - case VK_BORDER_COLOR_INT_OPAQUE_BLACK: - case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: - cfg.border_color_r = 0; - cfg.border_color_g = 0; - cfg.border_color_b = 0; - cfg.border_color_a = - pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ? - UINT_MAX : 0; - break; - case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: - cfg.border_color_r = fui(1.0); - cfg.border_color_g = fui(1.0); - cfg.border_color_b = fui(1.0); - cfg.border_color_a = fui(1.0); - break; - case VK_BORDER_COLOR_INT_OPAQUE_WHITE: - cfg.border_color_r = UINT_MAX; - cfg.border_color_g = UINT_MAX; - cfg.border_color_b = UINT_MAX; - cfg.border_color_a = UINT_MAX; - break; - case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: - case VK_BORDER_COLOR_INT_CUSTOM_EXT: - cfg.border_color_r = pBorderColor->customBorderColor.int32[0]; - cfg.border_color_g = pBorderColor->customBorderColor.int32[1]; - cfg.border_color_b = pBorderColor->customBorderColor.int32[2]; - cfg.border_color_a = pBorderColor->customBorderColor.int32[3]; - break; - default: - unreachable("Invalid border color"); - } - } -} - -static void -panvk_init_sampler(struct panvk_device *device, - struct panvk_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo) -{ - if (pan_is_bifrost(&device->physical_device->pdev)) - panvk_init_bifrost_sampler(sampler, pCreateInfo); - else - panvk_init_midgard_sampler(sampler, pCreateInfo); -} - -VkResult -panvk_CreateSampler(VkDevice _device, - const VkSamplerCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSampler *pSampler) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - struct panvk_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler), - VK_OBJECT_TYPE_SAMPLER); - if (!sampler) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - panvk_init_sampler(device, sampler, pCreateInfo); - *pSampler = panvk_sampler_to_handle(sampler); - - return VK_SUCCESS; -} - void panvk_DestroySampler(VkDevice _device, VkSampler _sampler, diff --git a/src/panfrost/vulkan/panvk_image.c b/src/panfrost/vulkan/panvk_image.c index 35cba69b399..6ef706b86ba 100644 --- a/src/panfrost/vulkan/panvk_image.c +++ b/src/panfrost/vulkan/panvk_image.c @@ -276,121 +276,6 @@ panvk_GetImageSubresourceLayout(VkDevice _device, pLayout->depthPitch = slice_layout->surface_stride; } -static enum mali_texture_dimension -panvk_view_type_to_mali_tex_dim(VkImageViewType type) -{ - switch (type) { - case VK_IMAGE_VIEW_TYPE_1D: - case VK_IMAGE_VIEW_TYPE_1D_ARRAY: - return MALI_TEXTURE_DIMENSION_1D; - case VK_IMAGE_VIEW_TYPE_2D: - case VK_IMAGE_VIEW_TYPE_2D_ARRAY: - return MALI_TEXTURE_DIMENSION_2D; - case VK_IMAGE_VIEW_TYPE_3D: - return MALI_TEXTURE_DIMENSION_3D; - case VK_IMAGE_VIEW_TYPE_CUBE: - case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: - return MALI_TEXTURE_DIMENSION_CUBE; - default: - unreachable("Invalid view type"); - } -} - -static void -panvk_convert_swizzle(const VkComponentMapping *in, - unsigned char *out) -{ - const VkComponentSwizzle *comp = &in->r; - for (unsigned i = 0; i < 4; i++) { - switch (comp[i]) { - case VK_COMPONENT_SWIZZLE_IDENTITY: - out[i] = PIPE_SWIZZLE_X + i; - break; - case VK_COMPONENT_SWIZZLE_ZERO: - out[i] = PIPE_SWIZZLE_0; - break; - case VK_COMPONENT_SWIZZLE_ONE: - out[i] = PIPE_SWIZZLE_1; - break; - case VK_COMPONENT_SWIZZLE_R: - out[i] = PIPE_SWIZZLE_X; - break; - case VK_COMPONENT_SWIZZLE_G: - out[i] = PIPE_SWIZZLE_Y; - break; - case VK_COMPONENT_SWIZZLE_B: - out[i] = PIPE_SWIZZLE_Z; - break; - case VK_COMPONENT_SWIZZLE_A: - out[i] = PIPE_SWIZZLE_W; - break; - default: - unreachable("Invalid swizzle"); - } - } -} - - - -VkResult -panvk_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - VK_FROM_HANDLE(panvk_device, device, _device); - VK_FROM_HANDLE(panvk_image, image, pCreateInfo->image); - struct panvk_image_view *view; - - view = vk_object_zalloc(&device->vk, pAllocator, sizeof(*view), - VK_OBJECT_TYPE_IMAGE_VIEW); - if (view == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - view->pview.format = vk_format_to_pipe_format(pCreateInfo->format); - - if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) - view->pview.format = util_format_get_depth_only(view->pview.format); - else if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) - view->pview.format = util_format_stencil_only(view->pview.format); - - view->pview.dim = panvk_view_type_to_mali_tex_dim(pCreateInfo->viewType); - view->pview.first_level = pCreateInfo->subresourceRange.baseMipLevel; - view->pview.last_level = pCreateInfo->subresourceRange.baseMipLevel + - pCreateInfo->subresourceRange.levelCount - 1; - view->pview.first_layer = pCreateInfo->subresourceRange.baseArrayLayer; - view->pview.last_layer = pCreateInfo->subresourceRange.baseArrayLayer + - pCreateInfo->subresourceRange.layerCount - 1; - panvk_convert_swizzle(&pCreateInfo->components, view->pview.swizzle); - view->pview.image = &image->pimage; - view->pview.nr_samples = image->pimage.layout.nr_samples; - view->vk_format = pCreateInfo->format; - - struct panfrost_device *pdev = &device->physical_device->pdev; - unsigned bo_size = - panfrost_estimate_texture_payload_size(pdev, &view->pview); - - unsigned surf_descs_offset = 0; - if (!pan_is_bifrost(pdev)) { - bo_size += pan_size(MIDGARD_TEXTURE); - surf_descs_offset = pan_size(MIDGARD_TEXTURE); - } - - view->bo = panfrost_bo_create(pdev, bo_size, 0, "Texture descriptor"); - - struct panfrost_ptr surf_descs = { - .cpu = view->bo->ptr.cpu + surf_descs_offset, - .gpu = view->bo->ptr.gpu + surf_descs_offset, - }; - void *tex_desc = pan_is_bifrost(pdev) ? - &view->bifrost.tex_desc : view->bo->ptr.cpu; - - panfrost_new_texture(pdev, &view->pview, tex_desc, &surf_descs); - - *pView = panvk_image_view_to_handle(view); - return VK_SUCCESS; -} - void panvk_DestroyImageView(VkDevice _device, VkImageView _view, diff --git a/src/panfrost/vulkan/panvk_pipeline.c b/src/panfrost/vulkan/panvk_pipeline.c index f2e6484d5b7..d62fd2df41b 100644 --- a/src/panfrost/vulkan/panvk_pipeline.c +++ b/src/panfrost/vulkan/panvk_pipeline.c @@ -40,956 +40,8 @@ #include "vk_format.h" #include "vk_util.h" -#include "panfrost/util/pan_lower_framebuffer.h" - #include "panfrost-quirks.h" -struct panvk_pipeline_builder -{ - struct panvk_device *device; - struct panvk_pipeline_cache *cache; - const VkAllocationCallbacks *alloc; - const VkGraphicsPipelineCreateInfo *create_info; - const struct panvk_pipeline_layout *layout; - - struct panvk_shader *shaders[MESA_SHADER_STAGES]; - struct { - uint32_t shader_offset; - uint32_t rsd_offset; - uint32_t sysvals_offset; - } stages[MESA_SHADER_STAGES]; - uint32_t blend_shader_offsets[MAX_RTS]; - uint32_t shader_total_size; - uint32_t static_state_size; - uint32_t vpd_offset; - - bool rasterizer_discard; - /* these states are affectd by rasterizer_discard */ - VkSampleCountFlagBits samples; - bool use_depth_stencil_attachment; - uint8_t active_color_attachments; - enum pipe_format color_attachment_formats[MAX_RTS]; -}; - -static VkResult -panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder, - struct panvk_pipeline **out_pipeline) -{ - struct panvk_device *dev = builder->device; - - struct panvk_pipeline *pipeline = - vk_object_zalloc(&dev->vk, builder->alloc, - sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); - if (!pipeline) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - pipeline->layout = builder->layout; - *out_pipeline = pipeline; - return VK_SUCCESS; -} - -static void -panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder) -{ - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - if (!builder->shaders[i]) - continue; - panvk_shader_destroy(builder->device, builder->shaders[i], builder->alloc); - } -} - -static bool -panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id) -{ - return !(pipeline->dynamic_state_mask & (1 << id)); -} - -static VkResult -panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { - NULL - }; - for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { - gl_shader_stage stage = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage); - stage_infos[stage] = &builder->create_info->pStages[i]; - } - - /* compile shaders in reverse order */ - unsigned sysval_ubo = builder->layout->num_ubos; - - for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; - stage > MESA_SHADER_NONE; stage--) { - const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; - if (!stage_info) - continue; - - struct panvk_shader *shader; - - shader = panvk_shader_create(builder->device, stage, stage_info, - builder->layout, sysval_ubo, - &pipeline->blend.state, - panvk_pipeline_static_state(pipeline, - VK_DYNAMIC_STATE_BLEND_CONSTANTS), - builder->alloc); - if (!shader) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - if (shader->info.sysvals.sysval_count) - sysval_ubo++; - - builder->shaders[stage] = shader; - builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128); - builder->stages[stage].shader_offset = builder->shader_total_size; - builder->shader_total_size += - util_dynarray_num_elements(&shader->binary, uint8_t); - } - - return VK_SUCCESS; -} - -static VkResult -panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - struct panfrost_bo *bin_bo = - panfrost_bo_create(&builder->device->physical_device->pdev, - builder->shader_total_size, PAN_BO_EXECUTE, - "Shader"); - - pipeline->binary_bo = bin_bo; - panfrost_bo_mmap(bin_bo); - - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - const struct panvk_shader *shader = builder->shaders[i]; - if (!shader) - continue; - - memcpy(pipeline->binary_bo->ptr.cpu + builder->stages[i].shader_offset, - util_dynarray_element(&shader->binary, uint8_t, 0), - util_dynarray_num_elements(&shader->binary, uint8_t)); - } - - return VK_SUCCESS; -} - -static bool -panvk_pipeline_static_sysval(struct panvk_pipeline *pipeline, - unsigned id) -{ - switch (id) { - case PAN_SYSVAL_VIEWPORT_SCALE: - case PAN_SYSVAL_VIEWPORT_OFFSET: - return panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT); - default: - return false; - } -} - -static void -panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - struct panfrost_device *pdev = - &builder->device->physical_device->pdev; - unsigned bo_size = 0; - - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - const struct panvk_shader *shader = builder->shaders[i]; - if (!shader) - continue; - - if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT) - continue; - - bo_size = ALIGN_POT(bo_size, pan_alignment(RENDERER_STATE)); - builder->stages[i].rsd_offset = bo_size; - bo_size += pan_size(RENDERER_STATE); - if (i == MESA_SHADER_FRAGMENT) - bo_size += pan_size(BLEND) * pipeline->blend.state.rt_count; - } - - if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) && - panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) { - bo_size = ALIGN_POT(bo_size, pan_alignment(VIEWPORT)); - builder->vpd_offset = bo_size; - bo_size += pan_size(VIEWPORT); - } - - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - const struct panvk_shader *shader = builder->shaders[i]; - if (!shader || !shader->info.sysvals.sysval_count) - continue; - - bool static_sysvals = true; - for (unsigned s = 0; s < shader->info.sysvals.sysval_count; s++) { - unsigned id = shader->info.sysvals.sysvals[i]; - static_sysvals &= panvk_pipeline_static_sysval(pipeline, id); - switch (PAN_SYSVAL_TYPE(id)) { - case PAN_SYSVAL_VIEWPORT_SCALE: - case PAN_SYSVAL_VIEWPORT_OFFSET: - pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VIEWPORT; - break; - default: - break; - } - } - - if (!static_sysvals) { - builder->stages[i].sysvals_offset = ~0; - continue; - } - - bo_size = ALIGN_POT(bo_size, 16); - builder->stages[i].sysvals_offset = bo_size; - bo_size += shader->info.sysvals.sysval_count * 16; - } - - if (bo_size) { - pipeline->state_bo = - panfrost_bo_create(pdev, bo_size, 0, "Pipeline descriptors"); - panfrost_bo_mmap(pipeline->state_bo); - } -} - -static void -panvk_pipeline_builder_upload_sysval(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline, - unsigned id, union panvk_sysval_data *data) -{ - switch (PAN_SYSVAL_TYPE(id)) { - case PAN_SYSVAL_VIEWPORT_SCALE: - panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports, - data); - break; - case PAN_SYSVAL_VIEWPORT_OFFSET: - panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports, - data); - break; - default: - unreachable("Invalid static sysval"); - } -} - -static void -panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline, - gl_shader_stage stage) -{ - const struct panvk_shader *shader = builder->shaders[stage]; - - pipeline->sysvals[stage].ids = shader->info.sysvals; - pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo; - - if (!shader->info.sysvals.sysval_count || - builder->stages[stage].sysvals_offset == ~0) - return; - - union panvk_sysval_data *static_data = - pipeline->state_bo->ptr.cpu + builder->stages[stage].sysvals_offset; - - pipeline->sysvals[stage].ubo = - pipeline->state_bo->ptr.gpu + builder->stages[stage].sysvals_offset; - - for (unsigned i = 0; i < shader->info.sysvals.sysval_count; i++) { - unsigned id = shader->info.sysvals.sysvals[i]; - - panvk_pipeline_builder_upload_sysval(builder, - pipeline, - id, &static_data[i]); - } -} - -static void -panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - const struct panvk_shader *shader = builder->shaders[i]; - if (!shader) - continue; - - pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size); - pipeline->wls_size = MAX2(pipeline->tls_size, shader->info.wls_size); - - if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size) - pipeline->ia.writes_point_size = true; - - mali_ptr shader_ptr = pipeline->binary_bo->ptr.gpu + - builder->stages[i].shader_offset; - - void *rsd = pipeline->state_bo->ptr.cpu + builder->stages[i].rsd_offset; - mali_ptr gpu_rsd = pipeline->state_bo->ptr.gpu + builder->stages[i].rsd_offset; - - if (i != MESA_SHADER_FRAGMENT) { - panvk_emit_non_fs_rsd(builder->device, &shader->info, shader_ptr, rsd); - } else if (!pipeline->fs.dynamic_rsd) { - void *bd = rsd + pan_size(RENDERER_STATE); - - panvk_emit_base_fs_rsd(builder->device, pipeline, rsd); - for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) { - panvk_emit_blend(builder->device, pipeline, rt, bd); - bd += pan_size(BLEND); - } - } else { - gpu_rsd = 0; - panvk_emit_base_fs_rsd(builder->device, pipeline, &pipeline->fs.rsd_template); - for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) { - panvk_emit_blend(builder->device, pipeline, rt, - &pipeline->blend.bd_template[rt]); - } - } - - pipeline->rsds[i] = gpu_rsd; - panvk_pipeline_builder_init_sysvals(builder, pipeline, i); - } - - pipeline->num_ubos = builder->layout->num_ubos; - for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { - if (pipeline->sysvals[i].ids.sysval_count) - pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1); - } - - pipeline->num_sysvals = 0; - for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) - pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count; -} - - -static void -panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - /* The spec says: - * - * pViewportState is a pointer to an instance of the - * VkPipelineViewportStateCreateInfo structure, and is ignored if the - * pipeline has rasterization disabled. - */ - if (!builder->rasterizer_discard && - panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) && - panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) { - void *vpd = pipeline->state_bo->ptr.cpu + builder->vpd_offset; - panvk_emit_viewport(builder->create_info->pViewportState->pViewports, - builder->create_info->pViewportState->pScissors, - vpd); - pipeline->vpd = pipeline->state_bo->ptr.gpu + - builder->vpd_offset; - } else { - if (builder->create_info->pViewportState->pViewports) - pipeline->viewport = builder->create_info->pViewportState->pViewports[0]; - - if (builder->create_info->pViewportState->pScissors) - pipeline->scissor = builder->create_info->pViewportState->pScissors[0]; - } -} - -static void -panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - const VkPipelineDynamicStateCreateInfo *dynamic_info = - builder->create_info->pDynamicState; - - if (!dynamic_info) - return; - - for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { - VkDynamicState state = dynamic_info->pDynamicStates[i]; - switch (state) { - case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE: - pipeline->dynamic_state_mask |= 1 << state; - break; - default: - unreachable("unsupported dynamic state"); - } - } - -} - -static enum mali_draw_mode -translate_prim_topology(VkPrimitiveTopology in) -{ - switch (in) { - case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: - return MALI_DRAW_MODE_POINTS; - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: - return MALI_DRAW_MODE_LINES; - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: - return MALI_DRAW_MODE_LINE_STRIP; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: - return MALI_DRAW_MODE_TRIANGLES; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: - return MALI_DRAW_MODE_TRIANGLE_STRIP; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: - return MALI_DRAW_MODE_TRIANGLE_FAN; - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: - case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: - default: - unreachable("Invalid primitive type"); - } -} - -static void -panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - pipeline->ia.primitive_restart = - builder->create_info->pInputAssemblyState->primitiveRestartEnable; - pipeline->ia.topology = - translate_prim_topology(builder->create_info->pInputAssemblyState->topology); -} - -static enum pipe_logicop -translate_logicop(VkLogicOp in) -{ - switch (in) { - case VK_LOGIC_OP_CLEAR: return PIPE_LOGICOP_CLEAR; - case VK_LOGIC_OP_AND: return PIPE_LOGICOP_AND; - case VK_LOGIC_OP_AND_REVERSE: return PIPE_LOGICOP_AND_REVERSE; - case VK_LOGIC_OP_COPY: return PIPE_LOGICOP_COPY; - case VK_LOGIC_OP_AND_INVERTED: return PIPE_LOGICOP_AND_INVERTED; - case VK_LOGIC_OP_NO_OP: return PIPE_LOGICOP_NOOP; - case VK_LOGIC_OP_XOR: return PIPE_LOGICOP_XOR; - case VK_LOGIC_OP_OR: return PIPE_LOGICOP_OR; - case VK_LOGIC_OP_NOR: return PIPE_LOGICOP_NOR; - case VK_LOGIC_OP_EQUIVALENT: return PIPE_LOGICOP_EQUIV; - case VK_LOGIC_OP_INVERT: return PIPE_LOGICOP_INVERT; - case VK_LOGIC_OP_OR_REVERSE: return PIPE_LOGICOP_OR_REVERSE; - case VK_LOGIC_OP_COPY_INVERTED: return PIPE_LOGICOP_COPY_INVERTED; - case VK_LOGIC_OP_OR_INVERTED: return PIPE_LOGICOP_OR_INVERTED; - case VK_LOGIC_OP_NAND: return PIPE_LOGICOP_NAND; - case VK_LOGIC_OP_SET: return PIPE_LOGICOP_SET; - default: unreachable("Invalid logicop"); - } -} - -static enum blend_func -translate_blend_op(VkBlendOp in) -{ - switch (in) { - case VK_BLEND_OP_ADD: return BLEND_FUNC_ADD; - case VK_BLEND_OP_SUBTRACT: return BLEND_FUNC_SUBTRACT; - case VK_BLEND_OP_REVERSE_SUBTRACT: return BLEND_FUNC_REVERSE_SUBTRACT; - case VK_BLEND_OP_MIN: return BLEND_FUNC_MIN; - case VK_BLEND_OP_MAX: return BLEND_FUNC_MAX; - default: unreachable("Invalid blend op"); - } -} - -static enum blend_factor -translate_blend_factor(VkBlendFactor in, bool dest_has_alpha) -{ - switch (in) { - case VK_BLEND_FACTOR_ZERO: - case VK_BLEND_FACTOR_ONE: - return BLEND_FACTOR_ZERO; - case VK_BLEND_FACTOR_SRC_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - return BLEND_FACTOR_SRC_COLOR; - case VK_BLEND_FACTOR_DST_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - return BLEND_FACTOR_DST_COLOR; - case VK_BLEND_FACTOR_SRC_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - return BLEND_FACTOR_SRC_ALPHA; - case VK_BLEND_FACTOR_DST_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - return dest_has_alpha ? BLEND_FACTOR_DST_ALPHA : BLEND_FACTOR_ZERO; - case VK_BLEND_FACTOR_CONSTANT_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - return BLEND_FACTOR_CONSTANT_COLOR; - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - return BLEND_FACTOR_CONSTANT_ALPHA; - case VK_BLEND_FACTOR_SRC1_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: - return BLEND_FACTOR_SRC1_COLOR; - case VK_BLEND_FACTOR_SRC1_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: - return BLEND_FACTOR_SRC1_ALPHA; - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - return BLEND_FACTOR_SRC_ALPHA_SATURATE; - default: unreachable("Invalid blend factor"); - } -} - -static bool -inverted_blend_factor(VkBlendFactor in, bool dest_has_alpha) -{ - switch (in) { - case VK_BLEND_FACTOR_ONE: - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: - return true; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - return dest_has_alpha ? true : false; - case VK_BLEND_FACTOR_DST_ALPHA: - return !dest_has_alpha ? true : false; - default: - return false; - } -} - -bool -panvk_blend_needs_lowering(const struct panfrost_device *dev, - const struct pan_blend_state *state, - unsigned rt) -{ - /* LogicOp requires a blend shader */ - if (state->logicop_enable) - return true; - - /* Not all formats can be blended by fixed-function hardware */ - if (!panfrost_blendable_formats_v7[state->rts[rt].format].internal) - return true; - - unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation); - - /* v6 doesn't support blend constants in FF blend equations. - * v7 only uses the constant from RT 0 (TODO: what if it's the same - * constant? or a constant is shared?) - */ - if (constant_mask && (dev->arch == 6 || (dev->arch == 7 && rt > 0))) - return true; - - if (!pan_blend_is_homogenous_constant(constant_mask, state->constants)) - return true; - - bool supports_2src = pan_blend_supports_2src(dev->arch); - return !pan_blend_can_fixed_function(state->rts[rt].equation, supports_2src); -} - -static void -panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - struct panfrost_device *pdev = &builder->device->physical_device->pdev; - pipeline->blend.state.logicop_enable = - builder->create_info->pColorBlendState->logicOpEnable; - pipeline->blend.state.logicop_func = - translate_logicop(builder->create_info->pColorBlendState->logicOp); - pipeline->blend.state.rt_count = util_last_bit(builder->active_color_attachments); - memcpy(pipeline->blend.state.constants, - builder->create_info->pColorBlendState->blendConstants, - sizeof(pipeline->blend.state.constants)); - - for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { - const VkPipelineColorBlendAttachmentState *in = - &builder->create_info->pColorBlendState->pAttachments[i]; - struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i]; - - out->format = builder->color_attachment_formats[i]; - - bool dest_has_alpha = util_format_has_alpha(out->format); - - out->nr_samples = builder->create_info->pMultisampleState->rasterizationSamples; - out->equation.blend_enable = in->blendEnable; - out->equation.color_mask = in->colorWriteMask; - out->equation.rgb_func = translate_blend_op(in->colorBlendOp); - out->equation.rgb_src_factor = translate_blend_factor(in->srcColorBlendFactor, dest_has_alpha); - out->equation.rgb_invert_src_factor = inverted_blend_factor(in->srcColorBlendFactor, dest_has_alpha); - out->equation.rgb_dst_factor = translate_blend_factor(in->dstColorBlendFactor, dest_has_alpha); - out->equation.rgb_invert_dst_factor = inverted_blend_factor(in->dstColorBlendFactor, dest_has_alpha); - out->equation.alpha_func = translate_blend_op(in->alphaBlendOp); - out->equation.alpha_src_factor = translate_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha); - out->equation.alpha_invert_src_factor = inverted_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha); - out->equation.alpha_dst_factor = translate_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha); - out->equation.alpha_invert_dst_factor = inverted_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha); - - unsigned constant_mask = - panvk_blend_needs_lowering(pdev, &pipeline->blend.state, i) ? - 0 : pan_blend_constant_mask(out->equation); - pipeline->blend.constant[i].index = ffs(constant_mask) - 1; - if (constant_mask && pan_is_bifrost(pdev)) { - /* On Bifrost, the blend constant is expressed with a UNORM of the - * size of the target format. The value is then shifted such that - * used bits are in the MSB. Here we calculate the factor at pipeline - * creation time so we only have to do a - * hw_constant = float_constant * factor; - * at descriptor emission time. - */ - const struct util_format_description *format_desc = - util_format_description(out->format); - unsigned chan_size = 0; - for (unsigned c = 0; c < format_desc->nr_channels; c++) - chan_size = MAX2(format_desc->channel[c].size, chan_size); - pipeline->blend.constant[i].bifrost_factor = - ((1 << chan_size) - 1) << (16 - chan_size); - } - } -} - -static void -panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - unsigned nr_samples = - MAX2(builder->create_info->pMultisampleState->rasterizationSamples, 1); - - pipeline->ms.rast_samples = - builder->create_info->pMultisampleState->rasterizationSamples; - pipeline->ms.sample_mask = - builder->create_info->pMultisampleState->pSampleMask ? - builder->create_info->pMultisampleState->pSampleMask[0] : UINT16_MAX; - pipeline->ms.min_samples = - MAX2(builder->create_info->pMultisampleState->minSampleShading * nr_samples, 1); -} - -static enum mali_stencil_op -translate_stencil_op(VkStencilOp in) -{ - switch (in) { - case VK_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP; - case VK_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO; - case VK_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE; - case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return MALI_STENCIL_OP_INCR_SAT; - case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return MALI_STENCIL_OP_DECR_SAT; - case VK_STENCIL_OP_INCREMENT_AND_WRAP: return MALI_STENCIL_OP_INCR_WRAP; - case VK_STENCIL_OP_DECREMENT_AND_WRAP: return MALI_STENCIL_OP_DECR_WRAP; - case VK_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT; - default: unreachable("Invalid stencil op"); - } -} - -static void -panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - pipeline->zs.z_test = builder->create_info->pDepthStencilState->depthTestEnable; - pipeline->zs.z_write = builder->create_info->pDepthStencilState->depthWriteEnable; - pipeline->zs.z_compare_func = - panvk_translate_compare_func(builder->create_info->pDepthStencilState->depthCompareOp); - pipeline->zs.s_test = builder->create_info->pDepthStencilState->stencilTestEnable; - pipeline->zs.s_front.fail_op = - translate_stencil_op(builder->create_info->pDepthStencilState->front.failOp); - pipeline->zs.s_front.pass_op = - translate_stencil_op(builder->create_info->pDepthStencilState->front.passOp); - pipeline->zs.s_front.z_fail_op = - translate_stencil_op(builder->create_info->pDepthStencilState->front.depthFailOp); - pipeline->zs.s_front.compare_func = - panvk_translate_compare_func(builder->create_info->pDepthStencilState->front.compareOp); - pipeline->zs.s_front.compare_mask = - builder->create_info->pDepthStencilState->front.compareMask; - pipeline->zs.s_front.write_mask = - builder->create_info->pDepthStencilState->front.writeMask; - pipeline->zs.s_front.ref = - builder->create_info->pDepthStencilState->front.reference; - pipeline->zs.s_back.fail_op = - translate_stencil_op(builder->create_info->pDepthStencilState->back.failOp); - pipeline->zs.s_back.pass_op = - translate_stencil_op(builder->create_info->pDepthStencilState->back.passOp); - pipeline->zs.s_back.z_fail_op = - translate_stencil_op(builder->create_info->pDepthStencilState->back.depthFailOp); - pipeline->zs.s_back.compare_func = - panvk_translate_compare_func(builder->create_info->pDepthStencilState->back.compareOp); - pipeline->zs.s_back.compare_mask = - builder->create_info->pDepthStencilState->back.compareMask; - pipeline->zs.s_back.write_mask = - builder->create_info->pDepthStencilState->back.writeMask; - pipeline->zs.s_back.ref = - builder->create_info->pDepthStencilState->back.reference; -} - -static void -panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - pipeline->rast.clamp_depth = builder->create_info->pRasterizationState->depthClampEnable; - pipeline->rast.depth_bias.enable = builder->create_info->pRasterizationState->depthBiasEnable; - pipeline->rast.depth_bias.constant_factor = - builder->create_info->pRasterizationState->depthBiasConstantFactor; - pipeline->rast.depth_bias.clamp = builder->create_info->pRasterizationState->depthBiasClamp; - pipeline->rast.depth_bias.slope_factor = builder->create_info->pRasterizationState->depthBiasSlopeFactor; - pipeline->rast.front_ccw = builder->create_info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE; - pipeline->rast.cull_front_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT; - pipeline->rast.cull_back_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT; -} - -static bool -panvk_fs_required(struct panvk_pipeline *pipeline) -{ - const struct pan_shader_info *info = &pipeline->fs.info; - - /* If we generally have side effects */ - if (info->fs.sidefx) - return true; - - /* If colour is written we need to execute */ - const struct pan_blend_state *blend = &pipeline->blend.state; - for (unsigned i = 0; i < blend->rt_count; ++i) { - if (blend->rts[i].equation.color_mask) - return true; - } - - /* If depth is written and not implied we need to execute. - * TODO: Predicate on Z/S writes being enabled */ - return (info->fs.writes_depth || info->fs.writes_stencil); -} - -#define PANVK_DYNAMIC_FS_RSD_MASK \ - ((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) | \ - (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) | \ - (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | \ - (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | \ - (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) - -static void -panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - if (!builder->shaders[MESA_SHADER_FRAGMENT]) - return; - - pipeline->fs.dynamic_rsd = - pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK; - pipeline->fs.address = pipeline->binary_bo->ptr.gpu + - builder->stages[MESA_SHADER_FRAGMENT].shader_offset; - pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info; - pipeline->fs.required = panvk_fs_required(pipeline); -} - -static void -panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings, - gl_shader_stage stage, - const struct pan_shader_varying *varying, - bool input) -{ - bool fs = stage == MESA_SHADER_FRAGMENT; - gl_varying_slot loc = varying->location; - enum panvk_varying_buf_id buf_id = - panvk_varying_buf_id(fs, loc); - - varyings->stage[stage].loc[varyings->stage[stage].count++] = loc; - - if (panvk_varying_is_builtin(stage, loc)) { - varyings->buf_mask |= 1 << buf_id; - return; - } - - assert(loc < ARRAY_SIZE(varyings->varying)); - - enum pipe_format new_fmt = varying->format; - enum pipe_format old_fmt = varyings->varying[loc].format; - - BITSET_SET(varyings->active, loc); - - /* We expect inputs to either be set by a previous stage or be built - * in, skip the entry if that's not the case, we'll emit a const - * varying returning zero for those entries. - */ - if (input && old_fmt == PIPE_FORMAT_NONE) - return; - - unsigned new_size = util_format_get_blocksize(new_fmt); - unsigned old_size = util_format_get_blocksize(old_fmt); - - if (old_size < new_size) - varyings->varying[loc].format = new_fmt; - - varyings->buf_mask |= 1 << buf_id; -} - -static void -panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) { - if (!builder->shaders[s]) - continue; - - const struct pan_shader_info *info = &builder->shaders[s]->info; - - for (unsigned i = 0; i < info->varyings.input_count; i++) { - panvk_pipeline_update_varying_slot(&pipeline->varyings, s, - &info->varyings.input[i], - true); - } - - for (unsigned i = 0; i < info->varyings.output_count; i++) { - panvk_pipeline_update_varying_slot(&pipeline->varyings, s, - &info->varyings.output[i], - false); - } - } - - /* TODO: Xfb */ - gl_varying_slot loc; - BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) { - enum panvk_varying_buf_id buf_id = - panvk_varying_buf_id(false, loc); - unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id); - unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc); - - pipeline->varyings.varying[loc].buf = buf_idx; - pipeline->varyings.varying[loc].offset = - pipeline->varyings.buf[buf_idx].stride; - pipeline->varyings.buf[buf_idx].stride += varying_sz; - } -} - -static void -panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder, - struct panvk_pipeline *pipeline) -{ - struct panvk_attribs_info *attribs = &pipeline->attribs; - const VkPipelineVertexInputStateCreateInfo *info = - builder->create_info->pVertexInputState; - - for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) { - const VkVertexInputBindingDescription *desc = - &info->pVertexBindingDescriptions[i]; - attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count); - attribs->buf[desc->binding].stride = desc->stride; - attribs->buf[desc->binding].special = false; - } - - for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) { - const VkVertexInputAttributeDescription *desc = - &info->pVertexAttributeDescriptions[i]; - attribs->attrib[desc->location].buf = desc->binding; - attribs->attrib[desc->location].format = - vk_format_to_pipe_format(desc->format); - attribs->attrib[desc->location].offset = desc->offset; - } - - const struct pan_shader_info *vs = - &builder->shaders[MESA_SHADER_VERTEX]->info; - - if (vs->attribute_count >= PAN_VERTEX_ID) { - attribs->buf[attribs->buf_count].special = true; - attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID; - attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++; - attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT; - } - - if (vs->attribute_count >= PAN_INSTANCE_ID) { - attribs->buf[attribs->buf_count].special = true; - attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID; - attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++; - attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT; - } - - attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count); -} - -static VkResult -panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder, - struct panvk_pipeline **pipeline) -{ - VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline); - if (result != VK_SUCCESS) - return result; - - /* TODO: make those functions return a result and handle errors */ - panvk_pipeline_builder_parse_dynamic(builder, *pipeline); - panvk_pipeline_builder_parse_color_blend(builder, *pipeline); - panvk_pipeline_builder_compile_shaders(builder, *pipeline); - panvk_pipeline_builder_collect_varyings(builder, *pipeline); - panvk_pipeline_builder_parse_input_assembly(builder, *pipeline); - panvk_pipeline_builder_parse_multisample(builder, *pipeline); - panvk_pipeline_builder_parse_zs(builder, *pipeline); - panvk_pipeline_builder_parse_rast(builder, *pipeline); - panvk_pipeline_builder_parse_vertex_input(builder, *pipeline); - - - panvk_pipeline_builder_upload_shaders(builder, *pipeline); - panvk_pipeline_builder_init_fs_state(builder, *pipeline); - panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline); - panvk_pipeline_builder_init_shaders(builder, *pipeline); - panvk_pipeline_builder_parse_viewport(builder, *pipeline); - - return VK_SUCCESS; -} - -static void -panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder *builder, - struct panvk_device *dev, - struct panvk_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *create_info, - const VkAllocationCallbacks *alloc) -{ - VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout); - assert(layout); - *builder = (struct panvk_pipeline_builder) { - .device = dev, - .cache = cache, - .layout = layout, - .create_info = create_info, - .alloc = alloc, - }; - - builder->rasterizer_discard = - create_info->pRasterizationState->rasterizerDiscardEnable; - - if (builder->rasterizer_discard) { - builder->samples = VK_SAMPLE_COUNT_1_BIT; - } else { - builder->samples = create_info->pMultisampleState->rasterizationSamples; - - const struct panvk_render_pass *pass = panvk_render_pass_from_handle(create_info->renderPass); - const struct panvk_subpass *subpass = &pass->subpasses[create_info->subpass]; - - builder->use_depth_stencil_attachment = - subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED; - - assert(subpass->color_count == create_info->pColorBlendState->attachmentCount); - builder->active_color_attachments = 0; - for (uint32_t i = 0; i < subpass->color_count; i++) { - uint32_t idx = subpass->color_attachments[i].idx; - if (idx == VK_ATTACHMENT_UNUSED) - continue; - - builder->active_color_attachments |= 1 << i; - builder->color_attachment_formats[i] = pass->attachments[idx].format; - } - } -} - -VkResult -panvk_CreateGraphicsPipelines(VkDevice device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipelines) -{ - VK_FROM_HANDLE(panvk_device, dev, device); - VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache); - - for (uint32_t i = 0; i < count; i++) { - struct panvk_pipeline_builder builder; - panvk_pipeline_builder_init_graphics(&builder, dev, cache, - &pCreateInfos[i], pAllocator); - - struct panvk_pipeline *pipeline; - VkResult result = panvk_pipeline_builder_build(&builder, &pipeline); - panvk_pipeline_builder_finish(&builder); - - if (result != VK_SUCCESS) { - for (uint32_t j = 0; j < i; j++) { - panvk_DestroyPipeline(device, pPipelines[j], pAllocator); - pPipelines[j] = VK_NULL_HANDLE; - } - - return result; - } - - pPipelines[i] = panvk_pipeline_to_handle(pipeline); - } - - return VK_SUCCESS; -} - VkResult panvk_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, diff --git a/src/panfrost/vulkan/panvk_private.h b/src/panfrost/vulkan/panvk_private.h index 30d0b6284fc..95678d04652 100644 --- a/src/panfrost/vulkan/panvk_private.h +++ b/src/panfrost/vulkan/panvk_private.h @@ -57,16 +57,12 @@ #include "drm-uapi/panfrost_drm.h" -#include "midgard/midgard_compile.h" - #include "pan_blend.h" -#include "pan_blitter.h" #include "pan_cs.h" #include "pan_device.h" #include "panvk_mempool.h" #include "pan_texture.h" #include "pan_scoreboard.h" -#include "pan_shader.h" #include "vk_extensions.h" #include "panvk_varyings.h" @@ -172,13 +168,6 @@ struct panvk_physical_device { int master_fd; }; -void -panvk_meta_init(struct panvk_physical_device *dev); - -void -panvk_meta_cleanup(struct panvk_physical_device *dev); - - enum panvk_debug_flags { PANVK_DEBUG_STARTUP = 1 << 0, PANVK_DEBUG_NIR = 1 << 1, @@ -253,6 +242,8 @@ panvk_device_is_lost(struct panvk_device *device) return unlikely(p_atomic_read(&device->_lost)); } +#define TILER_DESC_WORDS 56 + struct panvk_batch { struct list_head node; struct util_dynarray jobs; @@ -269,14 +260,8 @@ struct panvk_batch { mali_ptr fragment_job; struct { struct pan_tiler_context ctx; - struct panfrost_ptr bifrost_descs; - union { - struct { - struct mali_bifrost_tiler_heap_packed heap; - struct mali_bifrost_tiler_packed tiler; - } bifrost; - struct mali_midgard_tiler_packed midgard; - } templ; + struct panfrost_ptr descs; + uint32_t templ[TILER_DESC_WORDS]; } tiler; bool issued; }; @@ -343,12 +328,9 @@ struct panvk_descriptor_set { struct panvk_descriptor_pool *pool; const struct panvk_descriptor_set_layout *layout; struct panvk_descriptor *descs; - struct mali_uniform_buffer_packed *ubos; - struct mali_midgard_sampler_packed *samplers; - union { - struct mali_bifrost_texture_packed *bifrost; - mali_ptr *midgard; - } textures; + void *ubos; + void *samplers; + void *textures; }; #define MAX_SETS 4 @@ -483,6 +465,8 @@ struct panvk_descriptor_state { mali_ptr samplers; }; +#define INVOCATION_DESC_WORDS 2 + struct panvk_draw_info { unsigned first_index; unsigned index_count; @@ -493,7 +477,7 @@ struct panvk_draw_info { unsigned instance_count; int vertex_offset; unsigned offset_start; - struct mali_invocation_packed invocation; + uint32_t invocation[INVOCATION_DESC_WORDS]; struct { mali_ptr varyings; mali_ptr attributes; @@ -665,24 +649,6 @@ struct panvk_cmd_buffer { void panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf); -void -panvk_cmd_close_batch(struct panvk_cmd_buffer *cmdbuf); - -void -panvk_cmd_get_midgard_polygon_list(struct panvk_cmd_buffer *cmdbuf, - unsigned width, unsigned height, - bool has_draws); - -void -panvk_cmd_get_bifrost_tiler_context(struct panvk_cmd_buffer *cmdbuf, - unsigned width, unsigned height); - -void -panvk_cmd_alloc_fb_desc(struct panvk_cmd_buffer *cmdbuf); - -void -panvk_cmd_alloc_tls_desc(struct panvk_cmd_buffer *cmdbuf); - void panvk_pack_color(struct panvk_clear_value *out, const VkClearColorValue *in, @@ -729,6 +695,9 @@ union panvk_sysval_data { uint64_t u64[2]; }; +#define RSD_WORDS 16 +#define BLEND_DESC_WORDS 4 + struct panvk_pipeline { struct vk_object_base base; @@ -763,13 +732,13 @@ struct panvk_pipeline { struct { mali_ptr address; struct pan_shader_info info; - struct mali_renderer_state_packed rsd_template; + uint32_t rsd_template[RSD_WORDS]; bool required; bool dynamic_rsd; } fs; struct { - enum mali_draw_mode topology; + unsigned topology; bool writes_point_size; bool primitive_restart; } ia; @@ -791,13 +760,13 @@ struct panvk_pipeline { struct { bool z_test; bool z_write; - enum mali_func z_compare_func; + unsigned z_compare_func; bool s_test; struct { - enum mali_stencil_op fail_op; - enum mali_stencil_op pass_op; - enum mali_stencil_op z_fail_op; - enum mali_func compare_func; + unsigned fail_op; + unsigned pass_op; + unsigned z_fail_op; + unsigned compare_func; uint8_t compare_mask; uint8_t write_mask; uint8_t ref; @@ -814,7 +783,7 @@ struct panvk_pipeline { struct { struct pan_blend_state state; - struct mali_blend_packed bd_template[8]; + uint32_t bd_template[8][BLEND_DESC_WORDS]; struct { uint8_t index; uint16_t bifrost_factor; @@ -825,11 +794,6 @@ struct panvk_pipeline { VkRect2D scissor; }; -bool -panvk_blend_needs_lowering(const struct panfrost_device *dev, - const struct pan_blend_state *state, - unsigned rt); - struct panvk_image_level { VkDeviceSize offset; VkDeviceSize size; @@ -901,20 +865,22 @@ panvk_image_get_plane_size(const struct panvk_image *image, unsigned plane); unsigned panvk_image_get_total_size(const struct panvk_image *image); +#define TEXTURE_DESC_WORDS 8 + struct panvk_image_view { struct vk_object_base base; struct pan_image_view pview; VkFormat vk_format; struct panfrost_bo *bo; - struct { - struct mali_bifrost_texture_packed tex_desc; - } bifrost; + uint32_t desc[TEXTURE_DESC_WORDS]; }; +#define SAMPLER_DESC_WORDS 8 + struct panvk_sampler { struct vk_object_base base; - struct mali_midgard_sampler_packed desc; + uint32_t desc[SAMPLER_DESC_WORDS]; }; struct panvk_buffer_view { @@ -988,21 +954,6 @@ struct panvk_render_pass { struct panvk_subpass subpasses[0]; }; -static inline enum mali_func -panvk_translate_compare_func(VkCompareOp comp) -{ - STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER); - STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS); - STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL); - STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL); - STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER); - STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL); - STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL == (VkCompareOp)MALI_FUNC_GEQUAL); - STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS); - - return (enum mali_func)comp; -} - VK_DEFINE_HANDLE_CASTS(panvk_cmd_buffer, base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER) VK_DEFINE_HANDLE_CASTS(panvk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) VK_DEFINE_HANDLE_CASTS(panvk_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) @@ -1030,4 +981,68 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_sampler, base, VkSampler, VK_OBJECT_TYPE_SA VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_shader_module, base, VkShaderModule, VK_OBJECT_TYPE_SHADER_MODULE) VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_semaphore, base, VkSemaphore, VK_OBJECT_TYPE_SEMAPHORE) +#define panvk_arch_name(name, version) panvk_## version ## _ ## name + +#define panvk_arch_dispatch(arch, name, ...) \ +do { \ + switch (arch) { \ + case 5: panvk_arch_name(name, v5)(__VA_ARGS__); break; \ + case 6: panvk_arch_name(name, v6)(__VA_ARGS__); break; \ + case 7: panvk_arch_name(name, v7)(__VA_ARGS__); break; \ + default: unreachable("Invalid arch"); \ + } \ +} while (0) + +#ifdef PAN_ARCH +#if PAN_ARCH == 5 +#define panvk_per_arch(name) panvk_arch_name(name, v5) +#elif PAN_ARCH == 6 +#define panvk_per_arch(name) panvk_arch_name(name, v6) +#elif PAN_ARCH == 7 +#define panvk_per_arch(name) panvk_arch_name(name, v7) +#endif +#include "panvk_vX_cmd_buffer.h" +#include "panvk_vX_cs.h" +#include "panvk_vX_meta.h" +#else +#define PAN_ARCH 5 +#define panvk_per_arch(name) panvk_arch_name(name, v5) +#include "panvk_vX_cmd_buffer.h" +#include "panvk_vX_cs.h" +#include "panvk_vX_meta.h" +#undef PAN_ARCH +#undef panvk_per_arch +#define PAN_ARCH 6 +#define panvk_per_arch(name) panvk_arch_name(name, v6) +#include "panvk_vX_cmd_buffer.h" +#include "panvk_vX_cs.h" +#include "panvk_vX_meta.h" +#undef PAN_ARCH +#undef panvk_per_arch +#define PAN_ARCH 7 +#define panvk_per_arch(name) panvk_arch_name(name, v7) +#include "panvk_vX_cmd_buffer.h" +#include "panvk_vX_cs.h" +#include "panvk_vX_meta.h" +#undef PAN_ARCH +#undef panvk_per_arch +#endif + +#ifdef PAN_ARCH +bool +panvk_per_arch(blend_needs_lowering)(const struct panfrost_device *dev, + const struct pan_blend_state *state, + unsigned rt); + +struct panvk_shader * +panvk_per_arch(shader_create)(struct panvk_device *dev, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *stage_info, + const struct panvk_pipeline_layout *layout, + unsigned sysval_ubo, + struct pan_blend_state *blend_state, + bool static_blend_constants, + const VkAllocationCallbacks *alloc); +#endif + #endif /* PANVK_PRIVATE_H */ diff --git a/src/panfrost/vulkan/panvk_shader.c b/src/panfrost/vulkan/panvk_shader.c index a34e7f50da8..3832dc762a6 100644 --- a/src/panfrost/vulkan/panvk_shader.c +++ b/src/panfrost/vulkan/panvk_shader.c @@ -36,353 +36,6 @@ #include "vk_util.h" -static nir_shader * -panvk_spirv_to_nir(const void *code, - size_t codesize, - gl_shader_stage stage, - const char *entry_point_name, - const VkSpecializationInfo *spec_info, - const nir_shader_compiler_options *nir_options) -{ - /* TODO these are made-up */ - const struct spirv_to_nir_options spirv_options = { - .caps = { false }, - .ubo_addr_format = nir_address_format_32bit_index_offset, - .ssbo_addr_format = nir_address_format_32bit_index_offset, - }; - - /* convert VkSpecializationInfo */ - uint32_t num_spec = 0; - struct nir_spirv_specialization *spec = - vk_spec_info_to_nir_spirv(spec_info, &num_spec); - - nir_shader *nir = spirv_to_nir(code, codesize / sizeof(uint32_t), spec, - num_spec, stage, entry_point_name, - &spirv_options, nir_options); - - free(spec); - - assert(nir->info.stage == stage); - nir_validate_shader(nir, "after spirv_to_nir"); - - return nir; -} - -struct panvk_lower_misc_ctx { - struct panvk_shader *shader; - const struct panvk_pipeline_layout *layout; -}; - -static unsigned -get_fixed_sampler_index(nir_deref_instr *deref, - const struct panvk_lower_misc_ctx *ctx) -{ - nir_variable *var = nir_deref_instr_get_variable(deref); - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - const struct panvk_descriptor_set_binding_layout *bind_layout = - &ctx->layout->sets[set].layout->bindings[binding]; - - return bind_layout->sampler_idx + ctx->layout->sets[set].sampler_offset; -} - -static unsigned -get_fixed_texture_index(nir_deref_instr *deref, - const struct panvk_lower_misc_ctx *ctx) -{ - nir_variable *var = nir_deref_instr_get_variable(deref); - unsigned set = var->data.descriptor_set; - unsigned binding = var->data.binding; - const struct panvk_descriptor_set_binding_layout *bind_layout = - &ctx->layout->sets[set].layout->bindings[binding]; - - return bind_layout->tex_idx + ctx->layout->sets[set].tex_offset; -} - -static bool -lower_tex(nir_builder *b, nir_tex_instr *tex, - const struct panvk_lower_misc_ctx *ctx) -{ - bool progress = false; - int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); - - b->cursor = nir_before_instr(&tex->instr); - - if (sampler_src_idx >= 0) { - nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); - tex->sampler_index = get_fixed_sampler_index(deref, ctx); - nir_tex_instr_remove_src(tex, sampler_src_idx); - progress = true; - } - - int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); - if (tex_src_idx >= 0) { - nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src); - tex->texture_index = get_fixed_texture_index(deref, ctx); - nir_tex_instr_remove_src(tex, tex_src_idx); - progress = true; - } - - return progress; -} - -static void -lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *intr, - const struct panvk_lower_misc_ctx *ctx) -{ - nir_ssa_def *vulkan_idx = intr->src[0].ssa; - - unsigned set = nir_intrinsic_desc_set(intr); - unsigned binding = nir_intrinsic_binding(intr); - struct panvk_descriptor_set_layout *set_layout = ctx->layout->sets[set].layout; - struct panvk_descriptor_set_binding_layout *binding_layout = - &set_layout->bindings[binding]; - unsigned base; - - switch (binding_layout->type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - base = binding_layout->ubo_idx + ctx->layout->sets[set].ubo_offset; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - base = binding_layout->ssbo_idx + ctx->layout->sets[set].ssbo_offset; - break; - default: - unreachable("Invalid descriptor type"); - break; - } - - b->cursor = nir_before_instr(&intr->instr); - nir_ssa_def *idx = nir_iadd(b, nir_imm_int(b, base), vulkan_idx); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, idx); - nir_instr_remove(&intr->instr); -} - -static void -lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin) -{ - /* Loading the descriptor happens as part of the load/store instruction so - * this is a no-op. - */ - b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *val = nir_vec2(b, intrin->src[0].ssa, nir_imm_int(b, 0)); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); - nir_instr_remove(&intrin->instr); -} - -static bool -lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, - const struct panvk_lower_misc_ctx *ctx) -{ - switch (intr->intrinsic) { - case nir_intrinsic_vulkan_resource_index: - lower_vulkan_resource_index(b, intr, ctx); - return true; - case nir_intrinsic_load_vulkan_descriptor: - lower_load_vulkan_descriptor(b, intr); - return true; - default: - return false; - } - -} - -static bool -panvk_lower_misc_instr(nir_builder *b, - nir_instr *instr, - void *data) -{ - const struct panvk_lower_misc_ctx *ctx = data; - - switch (instr->type) { - case nir_instr_type_tex: - return lower_tex(b, nir_instr_as_tex(instr), ctx); - case nir_instr_type_intrinsic: - return lower_intrinsic(b, nir_instr_as_intrinsic(instr), ctx); - default: - return false; - } -} - -static bool -panvk_lower_misc(nir_shader *nir, const struct panvk_lower_misc_ctx *ctx) -{ - return nir_shader_instructions_pass(nir, panvk_lower_misc_instr, - nir_metadata_block_index | - nir_metadata_dominance, - (void *)ctx); -} - -static void -panvk_lower_blend(struct panfrost_device *pdev, - nir_shader *nir, - struct pan_blend_state *blend_state, - bool static_blend_constants) -{ - nir_lower_blend_options options = { - .logicop_enable = blend_state->logicop_enable, - .logicop_func = blend_state->logicop_func, - }; - - bool lower_blend = false; - for (unsigned rt = 0; rt < blend_state->rt_count; rt++) { - if (!panvk_blend_needs_lowering(pdev, blend_state, rt)) - continue; - - const struct pan_blend_rt_state *rt_state = &blend_state->rts[rt]; - options.rt[rt].colormask = rt_state->equation.color_mask; - options.format[rt] = rt_state->format; - if (!rt_state->equation.blend_enable) { - static const nir_lower_blend_channel replace = { - .func = BLEND_FUNC_ADD, - .src_factor = BLEND_FACTOR_ZERO, - .invert_src_factor = true, - .dst_factor = BLEND_FACTOR_ZERO, - .invert_dst_factor = false, - }; - - options.rt[rt].rgb = replace; - options.rt[rt].alpha = replace; - } else { - options.rt[rt].rgb.func = rt_state->equation.rgb_func; - options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor; - options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor; - options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor; - options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor; - options.rt[rt].alpha.func = rt_state->equation.alpha_func; - options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor; - options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor; - options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor; - options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor; - } - - lower_blend = true; - } - - /* FIXME: currently untested */ - assert(!lower_blend); - - if (lower_blend) - NIR_PASS_V(nir, nir_lower_blend, options); -} - -struct panvk_shader * -panvk_shader_create(struct panvk_device *dev, - gl_shader_stage stage, - const VkPipelineShaderStageCreateInfo *stage_info, - const struct panvk_pipeline_layout *layout, - unsigned sysval_ubo, - struct pan_blend_state *blend_state, - bool static_blend_constants, - const VkAllocationCallbacks *alloc) -{ - const struct panvk_shader_module *module = panvk_shader_module_from_handle(stage_info->module); - struct panfrost_device *pdev = &dev->physical_device->pdev; - struct panvk_shader *shader; - - shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!shader) - return NULL; - - util_dynarray_init(&shader->binary, NULL); - - /* translate SPIR-V to NIR */ - assert(module->code_size % 4 == 0); - nir_shader *nir = panvk_spirv_to_nir(module->code, - module->code_size, - stage, stage_info->pName, - stage_info->pSpecializationInfo, - pan_shader_get_compiler_options(pdev)); - if (!nir) { - vk_free2(&dev->vk.alloc, alloc, shader); - return NULL; - } - - if (stage == MESA_SHADER_FRAGMENT) - panvk_lower_blend(pdev, nir, blend_state, static_blend_constants); - - /* multi step inlining procedure */ - NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); - NIR_PASS_V(nir, nir_lower_returns); - NIR_PASS_V(nir, nir_inline_functions); - NIR_PASS_V(nir, nir_copy_prop); - NIR_PASS_V(nir, nir_opt_deref); - foreach_list_typed_safe(nir_function, func, node, &nir->functions) { - if (!func->is_entrypoint) - exec_node_remove(&func->node); - } - assert(exec_list_length(&nir->functions) == 1); - NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp); - - /* Split member structs. We do this before lower_io_to_temporaries so that - * it doesn't lower system values to temporaries by accident. - */ - NIR_PASS_V(nir, nir_split_var_copies); - NIR_PASS_V(nir, nir_split_per_member_structs); - - NIR_PASS_V(nir, nir_remove_dead_variables, - nir_var_shader_in | nir_var_shader_out | - nir_var_system_value | nir_var_mem_shared, - NULL); - - NIR_PASS_V(nir, nir_lower_io_to_temporaries, - nir_shader_get_entrypoint(nir), true, true); - - NIR_PASS_V(nir, nir_lower_indirect_derefs, - nir_var_shader_in | nir_var_shader_out, - UINT32_MAX); - - NIR_PASS_V(nir, nir_opt_copy_prop_vars); - NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all); - - NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false); - NIR_PASS_V(nir, nir_lower_explicit_io, - nir_var_mem_ubo | nir_var_mem_ssbo, - nir_address_format_32bit_index_offset); - - nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage); - nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage); - - NIR_PASS_V(nir, nir_lower_system_values); - NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); - - NIR_PASS_V(nir, nir_lower_var_copies); - - struct panvk_lower_misc_ctx ctx = { - .shader = shader, - .layout = layout, - }; - NIR_PASS_V(nir, panvk_lower_misc, &ctx); - - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) { - fprintf(stderr, "translated nir:\n"); - nir_print_shader(nir, stderr); - } - - struct panfrost_compile_inputs inputs = { - .gpu_id = pdev->gpu_id, - .no_ubo_to_push = true, - .sysval_ubo = sysval_ubo, - }; - - pan_shader_compile(pdev, nir, &inputs, &shader->binary, &shader->info); - - /* Patch the descriptor count */ - shader->info.ubo_count = - shader->info.sysvals.sysval_count ? sysval_ubo + 1 : layout->num_ubos; - shader->info.sampler_count = layout->num_samplers; - shader->info.texture_count = layout->num_textures; - - shader->sysval_ubo = sysval_ubo; - - ralloc_free(nir); - - return shader; -} - void panvk_shader_destroy(struct panvk_device *dev, struct panvk_shader *shader, diff --git a/src/panfrost/vulkan/panvk_vX_cmd_buffer.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c new file mode 100644 index 00000000000..ef991dd02cc --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.c @@ -0,0 +1,1173 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_cmd_buffer.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gen_macros.h" + +#include "panvk_cs.h" +#include "panvk_private.h" +#include "panfrost-quirks.h" + +#include "pan_blitter.h" +#include "pan_cs.h" +#include "pan_encoder.h" + +#include "util/rounding.h" +#include "util/u_pack_color.h" +#include "vk_format.h" + +static void +panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf) +{ + assert(cmdbuf->state.bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + + struct panvk_batch *batch = cmdbuf->state.batch; + struct panfrost_ptr job_ptr = + pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB); + + panvk_per_arch(emit_fragment_job)(cmdbuf->state.framebuffer, + cmdbuf->state.batch->fb.desc.gpu, + job_ptr.cpu); + cmdbuf->state.batch->fragment_job = job_ptr.gpu; + util_dynarray_append(&batch->jobs, void *, job_ptr.cpu); +} + +#if PAN_ARCH == 5 +void +panvk_per_arch(cmd_get_polygon_list)(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height, + bool has_draws) +{ + struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + struct panvk_batch *batch = cmdbuf->state.batch; + + if (batch->tiler.ctx.midgard.polygon_list) + return; + + unsigned size = + panfrost_tiler_get_polygon_list_size(pdev, width, height, has_draws); + size = util_next_power_of_two(size); + + /* Create the BO as invisible if we can. In the non-hierarchical tiler case, + * we need to write the polygon list manually because there's not WRITE_VALUE + * job in the chain. */ + bool init_polygon_list = !has_draws && (pdev->quirks & MIDGARD_NO_HIER_TILING); + batch->tiler.ctx.midgard.polygon_list = + panfrost_bo_create(pdev, size, + init_polygon_list ? 0 : PAN_BO_INVISIBLE, + "Polygon list"); + + + if (init_polygon_list) { + assert(batch->tiler.ctx.midgard.polygon_list->ptr.cpu); + uint32_t *polygon_list_body = + batch->tiler.ctx.midgard.polygon_list->ptr.cpu + + MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; + polygon_list_body[0] = 0xa0000000; + } + + batch->tiler.ctx.midgard.disable = !has_draws; +} +#endif + +#if PAN_ARCH <= 5 +static void +panvk_copy_fb_desc(struct panvk_cmd_buffer *cmdbuf, void *src) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + + if (batch->fb.desc.gpu) + return; + + const struct panvk_subpass *subpass = cmdbuf->state.subpass; + uint32_t size = pan_size(MULTI_TARGET_FRAMEBUFFER); + + if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) + size += pan_size(ZS_CRC_EXTENSION); + + size += MAX2(subpass->color_count, 1) * pan_size(RENDER_TARGET); + + memcpy(batch->fb.desc.cpu, src, size); +} +#endif + +void +panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_batch *batch = cmdbuf->state.batch; +#if PAN_ARCH <= 5 + uint32_t tmp_fbd[(pan_size(MULTI_TARGET_FRAMEBUFFER) + + pan_size(ZS_CRC_EXTENSION) + + (MAX_RTS * pan_size(RENDER_TARGET))) / 4]; +#endif + + assert(batch); + + if (!batch->fragment_job && !batch->scoreboard.first_job) { + if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) { + /* Content-less batch, let's drop it */ + vk_free(&cmdbuf->pool->alloc, batch); + } else { + /* Batch has no jobs but is needed for synchronization, let's add a + * NULL job so the SUBMIT ioctl doesn't choke on it. + */ + struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, + JOB_HEADER); + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, + MALI_JOB_TYPE_NULL, false, false, 0, 0, + &ptr, false); + list_addtail(&batch->node, &cmdbuf->batches); + } + cmdbuf->state.batch = NULL; + return; + } + + struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + + list_addtail(&cmdbuf->state.batch->node, &cmdbuf->batches); + + struct pan_tls_info tlsinfo = { 0 }; + + if (cmdbuf->state.pipeline) { + tlsinfo.tls.size = cmdbuf->state.pipeline->tls_size; + tlsinfo.wls.size = cmdbuf->state.pipeline->wls_size; + } + + if (tlsinfo.tls.size) { + tlsinfo.tls.ptr = + pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, tlsinfo.tls.size, 4096).gpu; + } + + if (tlsinfo.wls.size) { + unsigned wls_size = + pan_wls_mem_size(pdev, &cmdbuf->state.compute.wg_count, tlsinfo.wls.size); + tlsinfo.wls.ptr = + pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, wls_size, 4096).gpu; + } + + if ((PAN_ARCH >= 6 || !cmdbuf->state.batch->fb.desc.cpu) && + cmdbuf->state.batch->tls.cpu) { + pan_emit_tls(pdev, &tlsinfo, cmdbuf->state.batch->tls.cpu); + } + + if (cmdbuf->state.batch->fb.desc.cpu) { +#if PAN_ARCH == 5 + panvk_per_arch(cmd_get_polygon_list)(cmdbuf, + batch->fb.info->width, + batch->fb.info->height, + false); + + mali_ptr polygon_list = + cmdbuf->state.batch->tiler.ctx.midgard.polygon_list->ptr.gpu; + struct panfrost_ptr writeval_job = + panfrost_scoreboard_initialize_tiler(&cmdbuf->desc_pool.base, + &cmdbuf->state.batch->scoreboard, + polygon_list); + if (writeval_job.cpu) + util_dynarray_append(&cmdbuf->state.batch->jobs, void *, writeval_job.cpu); +#endif + +#if PAN_ARCH <= 5 + void *fbd = tmp_fbd; +#else + void *fbd = cmdbuf->state.batch->fb.desc.cpu; +#endif + + cmdbuf->state.batch->fb.desc.gpu |= + panvk_per_arch(emit_fb)(cmdbuf->device, + cmdbuf->state.batch, + cmdbuf->state.subpass, + cmdbuf->state.framebuffer, + cmdbuf->state.clear, + &tlsinfo, &cmdbuf->state.batch->tiler.ctx, + fbd); + +#if PAN_ARCH <= 5 + panvk_copy_fb_desc(cmdbuf, tmp_fbd); + memcpy(cmdbuf->state.batch->tiler.templ, + pan_section_ptr(fbd, MULTI_TARGET_FRAMEBUFFER, TILER), + pan_size(TILER_CONTEXT)); +#endif + + panvk_cmd_prepare_fragment_job(cmdbuf); + } + + cmdbuf->state.batch = NULL; +} + +void +panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + panvk_per_arch(cmd_close_batch)(cmdbuf); + + cmdbuf->state.subpass++; + panvk_cmd_open_batch(cmdbuf); + memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); +} + +void +panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents) +{ + VkSubpassBeginInfo binfo = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, + .contents = contents + }; + VkSubpassEndInfo einfo = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, + }; + + panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo); +} + +void +panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + + if (batch->fb.desc.gpu) + return; + + const struct panvk_subpass *subpass = cmdbuf->state.subpass; + bool has_zs_ext = subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED; + unsigned tags = MALI_FBD_TAG_IS_MFBD; + + batch->fb.info = cmdbuf->state.framebuffer; + batch->fb.desc = + pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, + PAN_DESC(MULTI_TARGET_FRAMEBUFFER), + PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION), + PAN_DESC_ARRAY(MAX2(subpass->color_count, 1), RENDER_TARGET)); + + /* Tag the pointer */ + batch->fb.desc.gpu |= tags; +} + +void +panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + + assert(batch); + if (batch->tls.gpu) + return; + + if (PAN_ARCH == 5 && + cmdbuf->state.bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); + batch->tls = batch->fb.desc; + batch->tls.gpu &= ~63ULL; + } else { + batch->tls = + pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); + } +} + +static void +panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf, + unsigned id, union panvk_sysval_data *data) +{ + switch (PAN_SYSVAL_TYPE(id)) { + case PAN_SYSVAL_VIEWPORT_SCALE: + panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport, data); + break; + case PAN_SYSVAL_VIEWPORT_OFFSET: + panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, data); + break; + case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: + /* TODO: support base_{vertex,instance} */ + data->u32[0] = data->u32[1] = data->u32[2] = 0; + break; + default: + unreachable("Invalid static sysval"); + } +} + +static void +panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + + if (!pipeline->num_sysvals) + return; + + for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sysvals); i++) { + unsigned sysval_count = pipeline->sysvals[i].ids.sysval_count; + if (!sysval_count || + (desc_state->sysvals[i] && + !(cmdbuf->state.dirty & pipeline->sysvals[i].dirty_mask))) + continue; + + struct panfrost_ptr sysvals = + pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, sysval_count * 16, 16); + union panvk_sysval_data *data = sysvals.cpu; + + for (unsigned s = 0; s < pipeline->sysvals[i].ids.sysval_count; s++) { + panvk_cmd_upload_sysval(cmdbuf, pipeline->sysvals[i].ids.sysvals[s], + &data[s]); + } + + desc_state->sysvals[i] = sysvals.gpu; + } +} + +static void +panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = + cmdbuf->state.pipeline; + + if (!pipeline->num_ubos || desc_state->ubos) + return; + + panvk_cmd_prepare_sysvals(cmdbuf); + + struct panfrost_ptr ubos = + pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, + pipeline->num_ubos, + UNIFORM_BUFFER); + + panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu); + + desc_state->ubos = ubos.gpu; +} + +static void +panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + unsigned num_textures = pipeline->layout->num_textures; + + if (!num_textures || desc_state->textures) + return; + + unsigned tex_entry_size = PAN_ARCH >= 6 ? + pan_size(TEXTURE) : + sizeof(mali_ptr); + struct panfrost_ptr textures = + pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, + num_textures * tex_entry_size, + tex_entry_size); + + void *texture = textures.cpu; + + for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { + if (!desc_state->sets[i].set) continue; + + memcpy(texture, + desc_state->sets[i].set->textures, + desc_state->sets[i].set->layout->num_textures * + tex_entry_size); + + texture += desc_state->sets[i].set->layout->num_textures * + tex_entry_size; + } + + desc_state->textures = textures.gpu; +} + +static void +panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + unsigned num_samplers = pipeline->layout->num_samplers; + + if (!num_samplers || desc_state->samplers) + return; + + struct panfrost_ptr samplers = + pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, + num_samplers, + SAMPLER); + + void *sampler = samplers.cpu; + + for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { + if (!desc_state->sets[i].set) continue; + + memcpy(sampler, + desc_state->sets[i].set->samplers, + desc_state->sets[i].set->layout->num_samplers * + pan_size(SAMPLER)); + + sampler += desc_state->sets[i].set->layout->num_samplers; + } + + desc_state->samplers = samplers.gpu; +} + +static void +panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + + if (!pipeline->fs.dynamic_rsd) { + draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT]; + return; + } + + if (!cmdbuf->state.fs_rsd) { + struct panfrost_ptr rsd = + pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, + PAN_DESC(RENDERER_STATE), + PAN_DESC_ARRAY(pipeline->blend.state.rt_count, + BLEND)); + + struct mali_renderer_state_packed rsd_dyn; + struct mali_renderer_state_packed *rsd_templ = + (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template; + + STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ)); + + panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn); + pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE); + memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn)); + + void *bd = rsd.cpu + pan_size(RENDERER_STATE); + for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { + if (pipeline->blend.constant[i].index != ~0) { + struct mali_blend_packed bd_dyn; + struct mali_blend_packed *bd_templ = + (struct mali_blend_packed *)&pipeline->blend.bd_template[i]; + + STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= sizeof(*bd_templ)); + panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i, + cmdbuf->state.blend.constants[i], + &bd_dyn); + pan_merge(bd_dyn, (*bd_templ), BLEND); + memcpy(bd, &bd_dyn, sizeof(bd_dyn)); + } + bd += pan_size(BLEND); + } + + cmdbuf->state.fs_rsd = rsd.gpu; + } + + draw->fs_rsd = cmdbuf->state.fs_rsd; +} + +#if PAN_ARCH >= 6 +void +panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + + if (batch->tiler.descs.cpu) + return; + + batch->tiler.descs = + pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, + PAN_DESC(TILER_CONTEXT), + PAN_DESC(TILER_HEAP)); + STATIC_ASSERT(sizeof(batch->tiler.templ) >= + pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); + + struct panfrost_ptr desc = { + .gpu = batch->tiler.descs.gpu, + .cpu = batch->tiler.templ, + }; + + panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc); + memcpy(batch->tiler.descs.cpu, batch->tiler.templ, + pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); + batch->tiler.ctx.bifrost = batch->tiler.descs.gpu; +} +#endif + +static void +panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + +#if PAN_ARCH == 5 + panvk_per_arch(cmd_get_polygon_list)(cmdbuf, + batch->fb.info->width, + batch->fb.info->height, + true); +#else + panvk_per_arch(cmd_get_tiler_context)(cmdbuf, + batch->fb.info->width, + batch->fb.info->height); +#endif + + draw->tiler_ctx = &batch->tiler.ctx; +} + +static void +panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; + + panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base, + draw->vertex_count); + + unsigned buf_count = panvk_varyings_buf_count(varyings); + struct panfrost_ptr bufs = + pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, + buf_count, ATTRIBUTE_BUFFER); + + panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu); + if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) { + draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address + + varyings->varying[VARYING_SLOT_POS].offset; + } + + if (BITSET_TEST(varyings->active, VARYING_SLOT_PSIZ)) { + draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address + + varyings->varying[VARYING_SLOT_POS].offset; + } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { + draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ? + cmdbuf->state.rast.line_width : pipeline->rast.line_width; + } else { + draw->line_width = 1.0f; + } + draw->varying_bufs = bufs.gpu; + + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (!varyings->stage[s].count) continue; + + struct panfrost_ptr attribs = + pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, + varyings->stage[s].count, + ATTRIBUTE); + + panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu); + draw->stages[s].varyings = attribs.gpu; + } +} + +static void +panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + /* TODO: images */ + if (!cmdbuf->state.pipeline->attribs.buf_count) + return; + + if (cmdbuf->state.vb.attribs) { + draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; + draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; + return; + } + + unsigned buf_count = cmdbuf->state.pipeline->attribs.buf_count + + (PAN_ARCH >= 6 ? 1 : 0); + struct panfrost_ptr bufs = + pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, + buf_count * 2, ATTRIBUTE_BUFFER); + + panvk_per_arch(emit_attrib_bufs)(&cmdbuf->state.pipeline->attribs, + cmdbuf->state.vb.bufs, + cmdbuf->state.vb.count, + draw, bufs.cpu); + cmdbuf->state.vb.attrib_bufs = bufs.gpu; + + struct panfrost_ptr attribs = + pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, + cmdbuf->state.pipeline->attribs.attrib_count, + ATTRIBUTE); + + panvk_per_arch(emit_attribs)(cmdbuf->device, &cmdbuf->state.pipeline->attribs, + cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, + attribs.cpu); + cmdbuf->state.vb.attribs = attribs.gpu; + draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; + draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; +} + +static void +panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + + if (pipeline->vpd) { + draw->viewport = pipeline->vpd; + } else if (cmdbuf->state.vpd) { + draw->viewport = cmdbuf->state.vpd; + } else { + struct panfrost_ptr vp = + pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT); + + const VkViewport *viewport = + pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ? + &cmdbuf->state.viewport : &pipeline->viewport; + const VkRect2D *scissor = + pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ? + &cmdbuf->state.scissor : &pipeline->scissor; + + panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu); + draw->viewport = cmdbuf->state.vpd = vp.gpu; + } +} + +static void +panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + struct panfrost_ptr ptr = + pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); + + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + draw->jobs.vertex = ptr; + panvk_per_arch(emit_vertex_job)(cmdbuf->state.pipeline, + draw, ptr.cpu); + +} + +static void +panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + struct panfrost_ptr ptr = + pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB); + + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + draw->jobs.tiler = ptr; + panvk_per_arch(emit_tiler_job)(cmdbuf->state.pipeline, + draw, ptr.cpu); +} + +void +panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + struct panvk_batch *batch = cmdbuf->state.batch; + + /* There are only 16 bits in the descriptor for the job ID, make sure all + * the 3 (2 in Bifrost) jobs in this draw are in the same batch. + */ + if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) { + panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_open_batch(cmdbuf); + batch = cmdbuf->state.batch; + } + + if (cmdbuf->state.pipeline->fs.required) + panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); + + panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf); + panvk_cmd_prepare_ubos(cmdbuf); + panvk_cmd_prepare_textures(cmdbuf); + panvk_cmd_prepare_samplers(cmdbuf); + + /* TODO: indexed draws */ + + struct panvk_draw_info draw = { + .first_vertex = firstVertex, + .vertex_count = vertexCount, + .first_instance = firstInstance, + .instance_count = instanceCount, + .padded_vertex_count = panfrost_padded_vertex_count(vertexCount), + .offset_start = firstVertex, + .tls = batch->tls.gpu, + .fb = batch->fb.desc.gpu, + .ubos = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].ubos, + .textures = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].textures, + .samplers = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].samplers, + }; + + STATIC_ASSERT(sizeof(draw.invocation) >= sizeof(struct mali_invocation_packed)); + panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw.invocation, + 1, vertexCount, instanceCount, 1, 1, 1, true, false); + panvk_draw_prepare_fs_rsd(cmdbuf, &draw); + panvk_draw_prepare_varyings(cmdbuf, &draw); + panvk_draw_prepare_attributes(cmdbuf, &draw); + panvk_draw_prepare_viewport(cmdbuf, &draw); + panvk_draw_prepare_tiler_context(cmdbuf, &draw); + panvk_draw_prepare_vertex_job(cmdbuf, &draw); + panvk_draw_prepare_tiler_job(cmdbuf, &draw); + + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + unsigned vjob_id = + panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, + MALI_JOB_TYPE_VERTEX, false, false, 0, 0, + &draw.jobs.vertex, false); + + if (pipeline->fs.required) { + panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, + MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, + &draw.jobs.tiler, false); + } + + /* Clear the dirty flags all at once */ + cmdbuf->state.dirty = 0; +} + +VkResult +panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + if (cmdbuf->state.batch) + panvk_per_arch(cmd_close_batch)(cmdbuf); + + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_EXECUTABLE; + + return cmdbuf->record_result; +} + +void +panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer, + const VkSubpassEndInfoKHR *pSubpassEndInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + panvk_per_arch(cmd_close_batch)(cmdbuf); + vk_free(&cmdbuf->pool->alloc, cmdbuf->state.clear); + cmdbuf->state.batch = NULL; + cmdbuf->state.pass = NULL; + cmdbuf->state.subpass = NULL; + cmdbuf->state.framebuffer = NULL; + cmdbuf->state.clear = NULL; + memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); +} + +void +panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd) +{ + VkSubpassEndInfoKHR einfo = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, + }; + + panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo); +} + + +void +panvk_per_arch(CmdPipelineBarrier)(VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + /* Caches are flushed/invalidated at batch boundaries for now, nothing to do + * for memory barriers assuming we implement barriers with the creation of a + * new batch. + * FIXME: We can probably do better with a CacheFlush job that has the + * barrier flag set to true. + */ + if (cmdbuf->state.batch) { + panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_open_batch(cmdbuf); + } +} + +static void +panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf, + struct panvk_event *event, + enum panvk_event_op_type type) +{ + struct panvk_event_op op = { + .type = type, + .event = event, + }; + + if (cmdbuf->state.batch == NULL) { + /* No open batch, let's create a new one so this operation happens in + * the right order. + */ + panvk_cmd_open_batch(cmdbuf); + util_dynarray_append(&cmdbuf->state.batch->event_ops, + struct panvk_event_op, + op); + panvk_per_arch(cmd_close_batch)(cmdbuf); + } else { + /* Let's close the current batch so the operation executes before any + * future commands. + */ + util_dynarray_append(&cmdbuf->state.batch->event_ops, + struct panvk_event_op, + op); + panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_open_batch(cmdbuf); + } +} + +static void +panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf, + struct panvk_event *event) +{ + struct panvk_event_op op = { + .type = PANVK_EVENT_OP_WAIT, + .event = event, + }; + + if (cmdbuf->state.batch == NULL) { + /* No open batch, let's create a new one and have it wait for this event. */ + panvk_cmd_open_batch(cmdbuf); + util_dynarray_append(&cmdbuf->state.batch->event_ops, + struct panvk_event_op, + op); + } else { + /* Let's close the current batch so any future commands wait on the + * event signal operation. + */ + if (cmdbuf->state.batch->fragment_job || + cmdbuf->state.batch->scoreboard.first_job) { + panvk_per_arch(cmd_close_batch)(cmdbuf); + panvk_cmd_open_batch(cmdbuf); + } + util_dynarray_append(&cmdbuf->state.batch->event_ops, + struct panvk_event_op, + op); + } +} + +void +panvk_per_arch(CmdSetEvent)(VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_event, event, _event); + + /* vkCmdSetEvent cannot be called inside a render pass */ + assert(cmdbuf->state.pass == NULL); + + panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET); +} + +void +panvk_per_arch(CmdResetEvent)(VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_event, event, _event); + + /* vkCmdResetEvent cannot be called inside a render pass */ + assert(cmdbuf->state.pass == NULL); + + panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET); +} + +void +panvk_per_arch(CmdWaitEvents)(VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + assert(eventCount > 0); + + for (uint32_t i = 0; i < eventCount; i++) { + VK_FROM_HANDLE(panvk_event, event, pEvents[i]); + panvk_add_wait_event_operation(cmdbuf, event); + } +} + +static VkResult +panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf) +{ + struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + + cmdbuf->record_result = VK_SUCCESS; + + list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { + list_del(&batch->node); + util_dynarray_fini(&batch->jobs); + if (!pan_is_bifrost(pdev)) + panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); + + util_dynarray_fini(&batch->event_ops); + + vk_free(&cmdbuf->pool->alloc, batch); + } + + panvk_pool_reset(&cmdbuf->desc_pool); + panvk_pool_reset(&cmdbuf->tls_pool); + panvk_pool_reset(&cmdbuf->varying_pool); + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; + + for (unsigned i = 0; i < MAX_BIND_POINTS; i++) + memset(&cmdbuf->descriptors[i].sets, 0, sizeof(cmdbuf->descriptors[i].sets)); + + return cmdbuf->record_result; +} + +static void +panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf) +{ + struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + struct panvk_device *device = cmdbuf->device; + + list_del(&cmdbuf->pool_link); + + list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { + list_del(&batch->node); + util_dynarray_fini(&batch->jobs); + if (!pan_is_bifrost(pdev)) + panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); + + util_dynarray_fini(&batch->event_ops); + + vk_free(&cmdbuf->pool->alloc, batch); + } + + panvk_pool_cleanup(&cmdbuf->desc_pool); + panvk_pool_cleanup(&cmdbuf->tls_pool); + panvk_pool_cleanup(&cmdbuf->varying_pool); + vk_object_free(&device->vk, NULL, cmdbuf); +} + +static VkResult +panvk_create_cmdbuf(struct panvk_device *device, + struct panvk_cmd_pool *pool, + VkCommandBufferLevel level, + struct panvk_cmd_buffer **cmdbuf_out) +{ + struct panvk_cmd_buffer *cmdbuf; + + cmdbuf = vk_object_zalloc(&device->vk, NULL, sizeof(*cmdbuf), + VK_OBJECT_TYPE_COMMAND_BUFFER); + if (!cmdbuf) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + cmdbuf->device = device; + cmdbuf->level = level; + cmdbuf->pool = pool; + + if (pool) { + list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); + cmdbuf->queue_family_index = pool->queue_family_index; + } else { + /* Init the pool_link so we can safely call list_del when we destroy + * the command buffer + */ + list_inithead(&cmdbuf->pool_link); + cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL; + } + + panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev, + pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024, + "Command buffer descriptor pool", true); + panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev, + pool ? &pool->tls_bo_pool : NULL, + PAN_BO_INVISIBLE, 64 * 1024, "TLS pool", false); + panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev, + pool ? &pool->varying_bo_pool : NULL, + PAN_BO_INVISIBLE, 64 * 1024, "Varyings pool", false); + list_inithead(&cmdbuf->batches); + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; + *cmdbuf_out = cmdbuf; + return VK_SUCCESS; +} + +VkResult +panvk_per_arch(AllocateCommandBuffers)(VkDevice _device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + unsigned i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + struct panvk_cmd_buffer *cmdbuf = NULL; + + if (!list_is_empty(&pool->free_cmd_buffers)) { + cmdbuf = list_first_entry( + &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link); + + list_del(&cmdbuf->pool_link); + list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); + + cmdbuf->level = pAllocateInfo->level; + vk_object_base_reset(&cmdbuf->base); + } else { + result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf); + } + + if (result != VK_SUCCESS) + goto err_free_cmd_bufs; + + pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf); + } + + return VK_SUCCESS; + +err_free_cmd_bufs: + panvk_per_arch(FreeCommandBuffers)(_device, pAllocateInfo->commandPool, i, + pCommandBuffers); + for (unsigned j = 0; j < i; j++) + pCommandBuffers[j] = VK_NULL_HANDLE; + + return result; +} + +void +panvk_per_arch(FreeCommandBuffers)(VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]); + + if (cmdbuf) { + if (cmdbuf->pool) { + list_del(&cmdbuf->pool_link); + panvk_reset_cmdbuf(cmdbuf); + list_addtail(&cmdbuf->pool_link, + &cmdbuf->pool->free_cmd_buffers); + } else + panvk_destroy_cmdbuf(cmdbuf); + } + } +} + +VkResult +panvk_per_arch(ResetCommandBuffer)(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + return panvk_reset_cmdbuf(cmdbuf); +} + +VkResult +panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VkResult result = VK_SUCCESS; + + if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) { + /* If the command buffer has already been reset with + * vkResetCommandBuffer, no need to do it again. + */ + result = panvk_reset_cmdbuf(cmdbuf); + if (result != VK_SUCCESS) + return result; + } + + memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); + + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING; + + return VK_SUCCESS; +} + +void +panvk_per_arch(DestroyCommandPool)(VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); + + list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, + &pool->active_cmd_buffers, pool_link) + panvk_destroy_cmdbuf(cmdbuf); + + list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, + &pool->free_cmd_buffers, pool_link) + panvk_destroy_cmdbuf(cmdbuf); + + panvk_bo_pool_cleanup(&pool->desc_bo_pool); + panvk_bo_pool_cleanup(&pool->varying_bo_pool); + panvk_bo_pool_cleanup(&pool->tls_bo_pool); + vk_object_free(&device->vk, pAllocator, pool); +} + +VkResult +panvk_per_arch(ResetCommandPool)(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); + VkResult result; + + list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers, + pool_link) + { + result = panvk_reset_cmdbuf(cmdbuf); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +void +panvk_per_arch(TrimCommandPool)(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlags flags) +{ + VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); + + if (!pool) + return; + + list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, + &pool->free_cmd_buffers, pool_link) + panvk_destroy_cmdbuf(cmdbuf); +} diff --git a/src/panfrost/vulkan/panvk_varyings.c b/src/panfrost/vulkan/panvk_vX_cmd_buffer.h similarity index 53% rename from src/panfrost/vulkan/panvk_varyings.c rename to src/panfrost/vulkan/panvk_vX_cmd_buffer.h index 4d450a15b09..836f04db9c5 100644 --- a/src/panfrost/vulkan/panvk_varyings.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_buffer.h @@ -21,38 +21,34 @@ * DEALINGS IN THE SOFTWARE. */ -#include "panvk_private.h" -#include "panvk_varyings.h" +#ifndef PANVK_PRIVATE_H +#error "Must be included from panvk_private.h" +#endif -#include "pan_pool.h" +#ifndef PAN_ARCH +#error "no arch" +#endif -unsigned -panvk_varyings_buf_count(const struct panvk_device *dev, - struct panvk_varyings_info *varyings) -{ - const struct panfrost_device *pdev = &dev->physical_device->pdev; - - return util_bitcount(varyings->buf_mask) + (pan_is_bifrost(pdev) ? 1 : 0); -} +#include +#include "compiler/shader_enums.h" void -panvk_varyings_alloc(struct panvk_varyings_info *varyings, - struct pan_pool *varying_mem_pool, - unsigned vertex_count) -{ - for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { - if (!(varyings->buf_mask & (1 << i))) continue; +panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf); - unsigned buf_idx = panvk_varying_buf_index(varyings, i); - unsigned size = varyings->buf[buf_idx].stride * vertex_count; - if (!size) - continue; - struct panfrost_ptr ptr = - pan_pool_alloc_aligned(varying_mem_pool, size, 64); +#if PAN_ARCH <= 5 +void +panvk_per_arch(cmd_get_polygon_list)(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height, + bool has_draws); +#else +void +panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height); +#endif - varyings->buf[buf_idx].size = size; - varyings->buf[buf_idx].address = ptr.gpu; - varyings->buf[buf_idx].cpu = ptr.cpu; - } -} +void +panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf); + +void +panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf); diff --git a/src/panfrost/vulkan/panvk_vX_cs.c b/src/panfrost/vulkan/panvk_vX_cs.c new file mode 100644 index 00000000000..1fe98a3ad47 --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_cs.c @@ -0,0 +1,919 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gen_macros.h" + +#include "util/macros.h" +#include "compiler/shader_enums.h" + +#include "vk_util.h" + +#include "panfrost-quirks.h" +#include "pan_cs.h" +#include "pan_encoder.h" +#include "pan_pool.h" +#include "pan_shader.h" + +#include "panvk_cs.h" +#include "panvk_private.h" +#include "panvk_varyings.h" + +static enum mali_mipmap_mode +panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode) +{ + switch (mode) { + case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST; + case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR; + default: unreachable("Invalid mipmap mode"); + } +} + +static unsigned +panvk_translate_sampler_address_mode(VkSamplerAddressMode mode) +{ + switch (mode) { + case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT; + case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER; + case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; + default: unreachable("Invalid wrap"); + } +} + +static void +panvk_translate_sampler_border_color(const VkSamplerCreateInfo *pCreateInfo, + uint32_t border_color[4]) +{ + const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + + switch (pCreateInfo->borderColor) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + border_color[0] = border_color[1] = border_color[2] = fui(0.0); + border_color[3] = + pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ? + fui(1.0) : fui(0.0); + break; + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + border_color[0] = border_color[1] = border_color[2] = 0; + border_color[3] = + pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ? + UINT_MAX : 0; + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + border_color[0] = border_color[1] = border_color[2] = border_color[3] = fui(1.0); + break; + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + border_color[0] = border_color[1] = border_color[2] = border_color[3] = UINT_MAX; + break; + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + memcpy(border_color, pBorderColor->customBorderColor.int32, sizeof(uint32_t) * 4); + break; + default: + unreachable("Invalid border color"); + } +} + +static mali_pixel_format +panvk_varying_hw_format(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, unsigned idx) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + gl_varying_slot loc = varyings->stage[stage].loc[idx]; + bool fs = stage == MESA_SHADER_FRAGMENT; + + switch (loc) { + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: + return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); + case VARYING_SLOT_POS: + return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) | + panfrost_get_default_swizzle(4); + default: + assert(!panvk_varying_is_builtin(stage, loc)); + return pdev->formats[varyings->varying[loc].format].hw; + } +} + +static void +panvk_emit_varying(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, unsigned idx, + void *attrib) +{ + gl_varying_slot loc = varyings->stage[stage].loc[idx]; + bool fs = stage == MESA_SHADER_FRAGMENT; + + pan_pack(attrib, ATTRIBUTE, cfg) { + if (!panvk_varying_is_builtin(stage, loc)) { + cfg.buffer_index = varyings->varying[loc].buf; + cfg.offset = varyings->varying[loc].offset; + } else { + cfg.buffer_index = + panvk_varying_buf_index(varyings, + panvk_varying_buf_id(fs, loc)); + } + cfg.offset_enable = PAN_ARCH == 5; + cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx); + } +} + +void +panvk_per_arch(emit_varyings)(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, + void *descs) +{ + struct mali_attribute_packed *attrib = descs; + + for (unsigned i = 0; i < varyings->stage[stage].count; i++) + panvk_emit_varying(dev, varyings, stage, i, attrib++); +} + +static void +panvk_emit_varying_buf(const struct panvk_varyings_info *varyings, + enum panvk_varying_buf_id id, void *buf) +{ + unsigned buf_idx = panvk_varying_buf_index(varyings, id); + + pan_pack(buf, ATTRIBUTE_BUFFER, cfg) { +#if PAN_ARCH == 5 + enum mali_attribute_special special_id = panvk_varying_special_buf_id(id); + if (special_id) { + cfg.type = 0; + cfg.special = special_id; + continue; + } +#endif + unsigned offset = varyings->buf[buf_idx].address & 63; + + cfg.stride = varyings->buf[buf_idx].stride; + cfg.size = varyings->buf[buf_idx].size + offset; + cfg.pointer = varyings->buf[buf_idx].address & ~63ULL; + } +} + +void +panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings, + void *descs) +{ + struct mali_attribute_buffer_packed *buf = descs; + + for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { + if (varyings->buf_mask & (1 << i)) + panvk_emit_varying_buf(varyings, i, buf++); + } + + /* We need an empty entry to stop prefetching on Bifrost */ +#if PAN_ARCH >= 6 + memset(buf, 0, sizeof(*buf)); +#endif +} + +static void +panvk_emit_attrib_buf(const struct panvk_attribs_info *info, + const struct panvk_draw_info *draw, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + unsigned idx, void *desc) +{ + const struct panvk_attrib_buf_info *buf_info = &info->buf[idx]; + +#if PAN_ARCH == 5 + if (buf_info->special) { + switch (buf_info->special_id) { + case PAN_VERTEX_ID: + panfrost_vertex_id(draw->padded_vertex_count, desc, + draw->instance_count > 1); + return; + case PAN_INSTANCE_ID: + panfrost_instance_id(draw->padded_vertex_count, desc, + draw->instance_count > 1); + return; + default: + unreachable("Invalid attribute ID"); + } + } +#endif + + assert(idx < buf_count); + const struct panvk_attrib_buf *buf = &bufs[idx]; + unsigned divisor = buf_info->per_instance ? + draw->padded_vertex_count : 0; + unsigned stride = divisor && draw->instance_count == 1 ? + 0 : buf_info->stride; + mali_ptr addr = buf->address & ~63ULL; + unsigned size = buf->size + (buf->address & 63); + + /* TODO: support instanced arrays */ + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + if (draw->instance_count > 1 && divisor) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; + cfg.divisor = divisor; + } + + cfg.pointer = addr; + cfg.stride = stride; + cfg.size = size; + } +} + +void +panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + const struct panvk_draw_info *draw, + void *descs) +{ + struct mali_attribute_buffer_packed *buf = descs; + + for (unsigned i = 0; i < info->buf_count; i++) + panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf++); + + /* A NULL entry is needed to stop prefecting on Bifrost */ +#if PAN_ARCH >= 6 + memset(buf, 0, sizeof(*buf)); +#endif +} + +void +panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo, + void *desc) +{ + uint32_t border_color[4]; + + panvk_translate_sampler_border_color(pCreateInfo, border_color); + + pan_pack(desc, SAMPLER, cfg) { + cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; + cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; + cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode); + cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates; + + cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true); + cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false); + cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false); + cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU); + cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV); + cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW); + cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo); + cfg.border_color_r = border_color[0]; + cfg.border_color_g = border_color[1]; + cfg.border_color_b = border_color[2]; + cfg.border_color_a = border_color[3]; + } +} + +static void +panvk_emit_attrib(const struct panvk_device *dev, + const struct panvk_attribs_info *attribs, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + unsigned idx, void *attrib) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + pan_pack(attrib, ATTRIBUTE, cfg) { + cfg.buffer_index = attribs->attrib[idx].buf; + cfg.offset = attribs->attrib[idx].offset + + (bufs[cfg.buffer_index].address & 63); + cfg.format = pdev->formats[attribs->attrib[idx].format].hw; + } +} + +void +panvk_per_arch(emit_attribs)(const struct panvk_device *dev, + const struct panvk_attribs_info *attribs, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + void *descs) +{ + struct mali_attribute_packed *attrib = descs; + + for (unsigned i = 0; i < attribs->attrib_count; i++) + panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++); +} + +void +panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc) +{ + pan_pack(desc, UNIFORM_BUFFER, cfg) { + cfg.pointer = address; + cfg.entries = DIV_ROUND_UP(size, 16); + } +} + +void +panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline, + const struct panvk_descriptor_state *state, + void *descs) +{ + struct mali_uniform_buffer_packed *ubos = descs; + + for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) { + const struct panvk_descriptor_set_layout *set_layout = + pipeline->layout->sets[i].layout; + const struct panvk_descriptor_set *set = state->sets[i].set; + unsigned offset = pipeline->layout->sets[i].ubo_offset; + + if (!set_layout) + continue; + + if (!set) { + unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos; + memset(&ubos[offset], 0, num_ubos * sizeof(*ubos)); + } else { + memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos)); + if (set_layout->num_dynoffsets) { + panvk_per_arch(emit_ubo)(state->sets[i].dynoffsets.gpu, + set->layout->num_dynoffsets * sizeof(uint32_t), + &ubos[offset + set_layout->num_ubos]); + } + } + } + + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { + if (!pipeline->sysvals[i].ids.sysval_count) + continue; + + panvk_per_arch(emit_ubo)(pipeline->sysvals[i].ubo ? : state->sysvals[i], + pipeline->sysvals[i].ids.sysval_count * 16, + &ubos[pipeline->sysvals[i].ubo_idx]); + } +} + +void +panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job) +{ + void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION); + + memcpy(section, &draw->invocation, pan_size(INVOCATION)); + + pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { + cfg.job_task_split = 5; + } + + pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { + cfg.draw_descriptor_is_64b = true; +#if PAN_ARCH == 5 + cfg.texture_descriptor_is_64b = true; +#endif + cfg.state = pipeline->rsds[MESA_SHADER_VERTEX]; + cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes; + cfg.attribute_buffers = draw->attribute_bufs; + cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings; + cfg.varying_buffers = draw->varying_bufs; + cfg.thread_storage = draw->tls; + cfg.offset_start = draw->offset_start; + cfg.instance_size = draw->instance_count > 1 ? + draw->padded_vertex_count : 1; + cfg.uniform_buffers = draw->ubos; + cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants; + cfg.textures = draw->textures; + cfg.samplers = draw->samplers; + } + + pan_section_pack(job, COMPUTE_JOB, DRAW_PADDING, cfg); +} + +static void +panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *prim) +{ + pan_pack(prim, PRIMITIVE, cfg) { + cfg.draw_mode = pipeline->ia.topology; + if (pipeline->ia.writes_point_size) + cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; + + cfg.first_provoking_vertex = true; + if (pipeline->ia.primitive_restart) + cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; + cfg.job_task_split = 6; + /* TODO: indexed draws */ + cfg.index_count = draw->vertex_count; + } +} + +static void +panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *primsz) +{ + pan_pack(primsz, PRIMITIVE_SIZE, cfg) { + if (pipeline->ia.writes_point_size) { + cfg.size_array = draw->psiz; + } else { + cfg.constant = draw->line_width; + } + } +} + +static void +panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *dcd) +{ + pan_pack(dcd, DRAW, cfg) { + cfg.four_components_per_vertex = true; + cfg.draw_descriptor_is_64b = true; +#if PAN_ARCH == 5 + cfg.texture_descriptor_is_64b = true; +#endif + cfg.front_face_ccw = pipeline->rast.front_ccw; + cfg.cull_front_face = pipeline->rast.cull_front_face; + cfg.cull_back_face = pipeline->rast.cull_back_face; + cfg.position = draw->position; + cfg.state = draw->fs_rsd; + cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes; + cfg.attribute_buffers = draw->attribute_bufs; + cfg.viewport = draw->viewport; + cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings; + cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; +#if PAN_ARCH == 5 + cfg.fbd = draw->fb; +#else + cfg.thread_storage = draw->tls; +#endif + + /* For all primitives but lines DRAW.flat_shading_vertex must + * be set to 0 and the provoking vertex is selected with the + * PRIMITIVE.first_provoking_vertex field. + */ + if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { + /* The logic is inverted on bifrost. */ +#if PAN_ARCH == 5 + cfg.flat_shading_vertex = false; +#else + cfg.flat_shading_vertex = true; +#endif + } + + cfg.offset_start = draw->offset_start; + cfg.instance_size = draw->instance_count > 1 ? + draw->padded_vertex_count : 1; + cfg.uniform_buffers = draw->ubos; + cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants; + cfg.textures = draw->textures; + cfg.samplers = draw->samplers; + + /* TODO: occlusion queries */ + } +} + +void +panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job) +{ + void *section; + + section = pan_section_ptr(job, TILER_JOB, INVOCATION); + memcpy(section, &draw->invocation, pan_size(INVOCATION)); + + section = pan_section_ptr(job, TILER_JOB, PRIMITIVE); + panvk_emit_tiler_primitive(pipeline, draw, section); + + section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE); + panvk_emit_tiler_primitive_size(pipeline, draw, section); + + section = pan_section_ptr(job, TILER_JOB, DRAW); + panvk_emit_tiler_dcd(pipeline, draw, section); + +#if PAN_ARCH >= 6 + pan_section_pack(job, TILER_JOB, TILER, cfg) { + cfg.address = draw->tiler_ctx->bifrost; + } + pan_section_pack(job, TILER_JOB, DRAW_PADDING, padding); + pan_section_pack(job, TILER_JOB, PADDING, padding); +#endif +} + +void +panvk_per_arch(emit_fragment_job)(const struct panvk_framebuffer *fb, + mali_ptr fbdesc, + void *job) +{ + pan_section_pack(job, FRAGMENT_JOB, HEADER, header) { + header.type = MALI_JOB_TYPE_FRAGMENT; + header.index = 1; + } + + pan_section_pack(job, FRAGMENT_JOB, PAYLOAD, payload) { + payload.bound_min_x = 0; + payload.bound_min_y = 0; + + payload.bound_max_x = (fb->width - 1) >> MALI_TILE_SHIFT; + payload.bound_max_y = (fb->height - 1) >> MALI_TILE_SHIFT; + payload.framebuffer = fbdesc; + } +} + +void +panvk_per_arch(emit_viewport)(const VkViewport *viewport, + const VkRect2D *scissor, + void *vpd) +{ + /* The spec says "width must be greater than 0.0" */ + assert(viewport->x >= 0); + int minx = (int)viewport->x; + int maxx = (int)(viewport->x + viewport->width); + + /* Viewport height can be negative */ + int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height)); + int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height)); + + assert(scissor->offset.x >= 0 && scissor->offset.y >= 0); + miny = MAX2(scissor->offset.x, minx); + miny = MAX2(scissor->offset.y, miny); + maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx); + maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy); + + /* Make sure we don't end up with a max < min when width/height is 0 */ + maxx = maxx > minx ? maxx - 1 : maxx; + maxy = maxy > miny ? maxy - 1 : maxy; + + assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f); + assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f); + + pan_pack(vpd, VIEWPORT, cfg) { + cfg.scissor_minimum_x = minx; + cfg.scissor_minimum_y = miny; + cfg.scissor_maximum_x = maxx; + cfg.scissor_maximum_y = maxy; + cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth); + cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth); + } +} + +#if PAN_ARCH >= 6 +static enum mali_bifrost_register_file_format +bifrost_blend_type_from_nir(nir_alu_type nir_type) +{ + switch(nir_type) { + case 0: /* Render target not in use */ + return 0; + case nir_type_float16: + return MALI_BIFROST_REGISTER_FILE_FORMAT_F16; + case nir_type_float32: + return MALI_BIFROST_REGISTER_FILE_FORMAT_F32; + case nir_type_int32: + return MALI_BIFROST_REGISTER_FILE_FORMAT_I32; + case nir_type_uint32: + return MALI_BIFROST_REGISTER_FILE_FORMAT_U32; + case nir_type_int16: + return MALI_BIFROST_REGISTER_FILE_FORMAT_I16; + case nir_type_uint16: + return MALI_BIFROST_REGISTER_FILE_FORMAT_U16; + default: + unreachable("Unsupported blend shader type for NIR alu type"); + } +} +#endif + +void +panvk_per_arch(emit_blend)(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, void *bd) +{ + const struct pan_blend_state *blend = &pipeline->blend.state; + const struct pan_blend_rt_state *rts = &blend->rts[rt]; + bool dithered = false; + + pan_pack(bd, BLEND, cfg) { + if (!blend->rt_count || !rts->equation.color_mask) { + cfg.enable = false; +#if PAN_ARCH >= 6 + cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF; +#endif + continue; + } + + cfg.srgb = util_format_is_srgb(rts->format); + cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); + cfg.round_to_fb_precision = dithered; + +#if PAN_ARCH <= 5 + cfg.midgard.blend_shader = false; + pan_blend_to_fixed_function_equation(blend->rts[rt].equation, + &cfg.midgard.equation); + cfg.midgard.constant = + pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), + blend->constants); +#else + const struct panfrost_device *pdev = &dev->physical_device->pdev; + const struct util_format_description *format_desc = + util_format_description(rts->format); + unsigned chan_size = 0; + for (unsigned i = 0; i < format_desc->nr_channels; i++) + chan_size = MAX2(format_desc->channel[0].size, chan_size); + + pan_blend_to_fixed_function_equation(blend->rts[rt].equation, + &cfg.bifrost.equation); + + /* Fixed point constant */ + float fconst = + pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), + blend->constants); + u16 constant = fconst * ((1 << chan_size) - 1); + constant <<= 16 - chan_size; + cfg.bifrost.constant = constant; + + if (pan_blend_is_opaque(blend->rts[rt].equation)) + cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE; + else + cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_FIXED_FUNCTION; + + /* If we want the conversion to work properly, + * num_comps must be set to 4 + */ + cfg.bifrost.internal.fixed_function.num_comps = 4; + cfg.bifrost.internal.fixed_function.conversion.memory_format = + panfrost_format_to_bifrost_blend(pdev, rts->format, dithered); + cfg.bifrost.internal.fixed_function.conversion.register_format = + bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type); + cfg.bifrost.internal.fixed_function.rt = rt; +#endif + } +} + +void +panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, const float *constants, + void *bd) +{ + float constant = constants[pipeline->blend.constant[rt].index]; + + pan_pack(bd, BLEND, cfg) { + cfg.enable = false; +#if PAN_ARCH == 5 + cfg.midgard.constant = constant; +#else + cfg.bifrost.constant = constant * pipeline->blend.constant[rt].bifrost_factor; +#endif + } +} + +void +panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline, + const struct panvk_cmd_state *state, + void *rsd) +{ + pan_pack(rsd, RENDERER_STATE, cfg) { + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f; + cfg.depth_factor = state->rast.depth_bias.slope_factor; + cfg.depth_bias_clamp = state->rast.depth_bias.clamp; + } + + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + cfg.stencil_front.mask = state->zs.s_front.compare_mask; + cfg.stencil_back.mask = state->zs.s_back.compare_mask; + } + + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask; + cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask; + } + + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + cfg.stencil_front.reference_value = state->zs.s_front.ref; + cfg.stencil_back.reference_value = state->zs.s_back.ref; + } + } +} + +void +panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + void *rsd) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + const struct pan_shader_info *info = &pipeline->fs.info; + + pan_pack(rsd, RENDERER_STATE, cfg) { + if (pipeline->fs.required) { + pan_shader_prepare_rsd(pdev, info, pipeline->fs.address, &cfg); + +#if PAN_ARCH == 5 + /* If either depth or stencil is enabled, discard matters */ + bool zs_enabled = + (pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) || + pipeline->zs.s_test; + + cfg.properties.midgard.work_register_count = info->work_reg_count; + cfg.properties.midgard.force_early_z = + info->fs.can_early_z && !pipeline->ms.alpha_to_coverage && + pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS; + + + /* Workaround a hardware errata where early-z cannot be enabled + * when discarding even when the depth buffer is read-only, by + * lying to the hardware about the discard and setting the + * reads tilebuffer? flag to compensate */ + cfg.properties.midgard.shader_reads_tilebuffer = + info->fs.outputs_read || + (!zs_enabled && info->fs.can_discard); + cfg.properties.midgard.shader_contains_discard = + zs_enabled && info->fs.can_discard; +#else + cfg.properties.bifrost.allow_forward_pixel_to_kill = info->fs.can_fpk; +#endif + } else { +#if PAN_ARCH == 5 + cfg.shader.shader = 0x1; + cfg.properties.midgard.work_register_count = 1; + cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; + cfg.properties.midgard.force_early_z = true; +#else + cfg.properties.bifrost.shader_modifies_coverage = true; + cfg.properties.bifrost.allow_forward_pixel_to_kill = true; + cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true; + cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; +#endif + } + + bool msaa = pipeline->ms.rast_samples > 1; + cfg.multisample_misc.multisample_enable = msaa; + cfg.multisample_misc.sample_mask = + msaa ? pipeline->ms.sample_mask : UINT16_MAX; + + cfg.multisample_misc.depth_function = + pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS; + + cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write; + cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth; + cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth; + cfg.multisample_misc.shader_depth_range_fixed = true; + + cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test; + cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage; + cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; + cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable; + cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable; + cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1; + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) { + cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f; + cfg.depth_factor = pipeline->rast.depth_bias.slope_factor; + cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp; + } + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) { + cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask; + cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask; + } + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) { + cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask; + cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask; + } + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) { + cfg.stencil_front.reference_value = pipeline->zs.s_front.ref; + cfg.stencil_back.reference_value = pipeline->zs.s_back.ref; + } + + cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func; + cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op; + cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op; + cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op; + cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func; + cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op; + cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op; + cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op; + } +} + +void +panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev, + const struct pan_shader_info *shader_info, + mali_ptr shader_ptr, + void *rsd) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + assert(shader_info->stage != MESA_SHADER_FRAGMENT); + + pan_pack(rsd, RENDERER_STATE, cfg) { + pan_shader_prepare_rsd(pdev, shader_info, shader_ptr, &cfg); + } +} + +void +panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, + unsigned width, unsigned height, + const struct panfrost_ptr *descs) +{ +#if PAN_ARCH == 5 + unreachable("Not supported on v5"); +#else + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) { + cfg.size = pdev->tiler_heap->size; + cfg.base = pdev->tiler_heap->ptr.gpu; + cfg.bottom = pdev->tiler_heap->ptr.gpu; + cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size; + } + + pan_pack(descs->cpu, TILER_CONTEXT, cfg) { + cfg.hierarchy_mask = 0x28; + cfg.fb_width = width; + cfg.fb_height = height; + cfg.heap = descs->gpu + pan_size(TILER_CONTEXT); + } +#endif +} + +unsigned +panvk_per_arch(emit_fb)(const struct panvk_device *dev, + const struct panvk_batch *batch, + const struct panvk_subpass *subpass, + const struct panvk_framebuffer *fb, + const struct panvk_clear_value *clears, + const struct pan_tls_info *tlsinfo, + const struct pan_tiler_context *tilerctx, + void *desc) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + struct panvk_image_view *view; + bool crc_valid[8] = { false }; + struct pan_fb_info fbinfo = { + .width = fb->width, + .height = fb->height, + .extent.maxx = fb->width - 1, + .extent.maxy = fb->height - 1, + .nr_samples = 1, + }; + + for (unsigned cb = 0; cb < subpass->color_count; cb++) { + int idx = subpass->color_attachments[cb].idx; + view = idx != VK_ATTACHMENT_UNUSED ? + fb->attachments[idx].iview : NULL; + if (!view) + continue; + fbinfo.rts[cb].view = &view->pview; + fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear; + fbinfo.rts[cb].crc_valid = &crc_valid[cb]; + + memcpy(fbinfo.rts[cb].clear_value, clears[idx].color, + sizeof(fbinfo.rts[cb].clear_value)); + fbinfo.nr_samples = + MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); + } + + if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) { + view = fb->attachments[subpass->zs_attachment.idx].iview; + const struct util_format_description *fdesc = + util_format_description(view->pview.format); + + fbinfo.nr_samples = + MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); + + if (util_format_has_depth(fdesc)) { + fbinfo.zs.clear.z = subpass->zs_attachment.clear; + fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth; + fbinfo.zs.view.zs = &view->pview; + } + + if (util_format_has_depth(fdesc)) { + fbinfo.zs.clear.s = subpass->zs_attachment.clear; + fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth; + if (!fbinfo.zs.view.zs) + fbinfo.zs.view.s = &view->pview; + } + } + + return pan_emit_fbd(pdev, &fbinfo, tlsinfo, tilerctx, desc); +} diff --git a/src/panfrost/vulkan/panvk_vX_cs.h b/src/panfrost/vulkan/panvk_vX_cs.h new file mode 100644 index 00000000000..b8933ce79b2 --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_cs.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PANVK_PRIVATE_H +#error "Must be included from panvk_private.h" +#endif + +#ifndef PAN_ARCH +#error "no arch" +#endif + +#include +#include "compiler/shader_enums.h" + +void +panvk_per_arch(emit_varying)(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, unsigned idx, + void *attrib); + +void +panvk_per_arch(emit_varyings)(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, + void *descs); + +void +panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings, + void *descs); + +void +panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + const struct panvk_draw_info *draw, + void *descs); + +void +panvk_per_arch(emit_attribs)(const struct panvk_device *dev, + const struct panvk_attribs_info *attribs, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + void *descs); + +void +panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc); + +void +panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline, + const struct panvk_descriptor_state *state, + void *descs); + +void +panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo, + void *desc); + +void +panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job); + +void +panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job); + +void +panvk_per_arch(emit_fragment_job)(const struct panvk_framebuffer *fb, + mali_ptr fbdesc, + void *job); + +void +panvk_per_arch(emit_viewport)(const VkViewport *viewport, + const VkRect2D *scissor, + void *vpd); + +void +panvk_per_arch(emit_blend)(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, void *bd); + +void +panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, const float *constants, + void *bd); + +void +panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline, + const struct panvk_cmd_state *state, + void *rsd); + +void +panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + void *rsd); + +void +panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev, + const struct pan_shader_info *shader_info, + mali_ptr shader_ptr, + void *rsd); + +void +panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, + unsigned width, unsigned height, + const struct panfrost_ptr *descs); + +unsigned +panvk_per_arch(emit_fb)(const struct panvk_device *dev, + const struct panvk_batch *batch, + const struct panvk_subpass *subpass, + const struct panvk_framebuffer *fb, + const struct panvk_clear_value *clears, + const struct pan_tls_info *tlsinfo, + const struct pan_tiler_context *tilerctx, + void *desc); diff --git a/src/panfrost/vulkan/panvk_vX_descriptor_set.c b/src/panfrost/vulkan/panvk_vX_descriptor_set.c new file mode 100644 index 00000000000..18e1b994d1a --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_descriptor_set.c @@ -0,0 +1,375 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gen_macros.h" + +#include "panvk_private.h" + +#include +#include +#include +#include +#include + +#include "util/mesa-sha1.h" +#include "vk_descriptors.h" +#include "vk_util.h" + +#include "pan_bo.h" +#include "panvk_cs.h" + +static VkResult +panvk_per_arch(descriptor_set_create)(struct panvk_device *device, + struct panvk_descriptor_pool *pool, + const struct panvk_descriptor_set_layout *layout, + struct panvk_descriptor_set **out_set) +{ + struct panvk_descriptor_set *set; + + /* TODO: Allocate from the pool! */ + set = vk_object_zalloc(&device->vk, NULL, + sizeof(struct panvk_descriptor_set), + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + set->layout = layout; + set->descs = vk_alloc(&device->vk.alloc, + sizeof(*set->descs) * layout->num_descs, 8, + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set->descs) + goto err_free_set; + + if (layout->num_ubos) { + set->ubos = vk_zalloc(&device->vk.alloc, + pan_size(UNIFORM_BUFFER) * layout->num_ubos, 8, + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set->ubos) + goto err_free_set; + } + + if (layout->num_samplers) { + set->samplers = vk_zalloc(&device->vk.alloc, + pan_size(SAMPLER) * layout->num_samplers, 8, + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set->samplers) + goto err_free_set; + } + + if (layout->num_textures) { + set->textures = + vk_zalloc(&device->vk.alloc, + (PAN_ARCH >= 6 ? pan_size(TEXTURE) : sizeof(mali_ptr)) * + layout->num_textures, + 8, VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set->textures) + goto err_free_set; + } + + for (unsigned i = 0; i < layout->binding_count; i++) { + if (!layout->bindings[i].immutable_samplers) + continue; + + for (unsigned j = 0; j < layout->bindings[i].array_size; j++) { + set->descs[layout->bindings[i].desc_idx].image.sampler = + layout->bindings[i].immutable_samplers[j]; + } + } + + *out_set = set; + return VK_SUCCESS; + +err_free_set: + vk_free(&device->vk.alloc, set->textures); + vk_free(&device->vk.alloc, set->samplers); + vk_free(&device->vk.alloc, set->ubos); + vk_free(&device->vk.alloc, set->descs); + vk_object_free(&device->vk, NULL, set); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); +} + +VkResult +panvk_per_arch(AllocateDescriptorSets)(VkDevice _device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_descriptor_pool, pool, pAllocateInfo->descriptorPool); + VkResult result; + unsigned i; + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + VK_FROM_HANDLE(panvk_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + struct panvk_descriptor_set *set = NULL; + + result = panvk_per_arch(descriptor_set_create)(device, pool, layout, &set); + if (result != VK_SUCCESS) + goto err_free_sets; + + pDescriptorSets[i] = panvk_descriptor_set_to_handle(set); + } + + return VK_SUCCESS; + +err_free_sets: + panvk_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets); + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) + pDescriptorSets[i] = VK_NULL_HANDLE; + + return result; +} + +static void +panvk_set_image_desc(struct panvk_descriptor *desc, + const VkDescriptorImageInfo *pImageInfo) +{ + VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler); + VK_FROM_HANDLE(panvk_image_view, image_view, pImageInfo->imageView); + desc->image.sampler = sampler; + desc->image.view = image_view; + desc->image.layout = pImageInfo->imageLayout; +} + +static void +panvk_set_texel_buffer_view_desc(struct panvk_descriptor *desc, + const VkBufferView *pTexelBufferView) +{ + VK_FROM_HANDLE(panvk_buffer_view, buffer_view, *pTexelBufferView); + desc->buffer_view = buffer_view; +} + +static void +panvk_set_buffer_info_desc(struct panvk_descriptor *desc, + const VkDescriptorBufferInfo *pBufferInfo) +{ + VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); + desc->buffer_info.buffer = buffer; + desc->buffer_info.offset = pBufferInfo->offset; + desc->buffer_info.range = pBufferInfo->range; +} + +static void +panvk_per_arch(set_ubo_desc)(void *ubo, + const VkDescriptorBufferInfo *pBufferInfo) +{ + VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); + size_t size = pBufferInfo->range == VK_WHOLE_SIZE ? + (buffer->bo->size - pBufferInfo->offset) : + pBufferInfo->range; + + panvk_per_arch(emit_ubo)(buffer->bo->ptr.gpu + pBufferInfo->offset, size, ubo); +} + +static void +panvk_set_sampler_desc(void *desc, + const VkDescriptorImageInfo *pImageInfo) +{ + VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler); + + memcpy(desc, &sampler->desc, sizeof(sampler->desc)); +} + +static void +panvk_per_arch(set_texture_desc)(struct panvk_descriptor_set *set, + unsigned idx, + const VkDescriptorImageInfo *pImageInfo) +{ + VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView); + +#if PAN_ARCH > 5 + memcpy(&((struct mali_bifrost_texture_packed *)set->textures)[idx], + view->desc, pan_size(TEXTURE)); +#else + ((mali_ptr *)set->textures)[idx] = view->bo->ptr.gpu; +#endif +} + +static void +panvk_per_arch(write_descriptor_set)(struct panvk_device *dev, + const VkWriteDescriptorSet *pDescriptorWrite) +{ + VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorWrite->dstSet); + const struct panvk_descriptor_set_layout *layout = set->layout; + unsigned dest_offset = pDescriptorWrite->dstArrayElement; + unsigned binding = pDescriptorWrite->dstBinding; + struct mali_uniform_buffer_packed *ubos = set->ubos; + struct mali_midgard_sampler_packed *samplers = set->samplers; + unsigned src_offset = 0; + + while (src_offset < pDescriptorWrite->descriptorCount && + binding < layout->binding_count) { + const struct panvk_descriptor_set_binding_layout *binding_layout = + &layout->bindings[binding]; + + if (!binding_layout->array_size) { + binding++; + dest_offset = 0; + continue; + } + + assert(pDescriptorWrite->descriptorType == binding_layout->type); + unsigned ndescs = MIN2(pDescriptorWrite->descriptorCount - src_offset, + binding_layout->array_size - dest_offset); + struct panvk_descriptor *descs = &set->descs[binding_layout->desc_idx + dest_offset]; + assert(binding_layout->desc_idx + dest_offset + ndescs <= set->layout->num_descs); + + switch (pDescriptorWrite->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + for (unsigned i = 0; i < ndescs; i++) { + const VkDescriptorImageInfo *info = &pDescriptorWrite->pImageInfo[src_offset + i]; + + if ((pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || + pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) && + !binding_layout->immutable_samplers) { + unsigned sampler = binding_layout->sampler_idx + dest_offset + i; + + panvk_set_sampler_desc(&samplers[sampler], info); + } + + if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || + pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + unsigned tex = binding_layout->tex_idx + dest_offset + i; + + panvk_per_arch(set_texture_desc)(set, tex, info); + } + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (unsigned i = 0; i < ndescs; i++) + panvk_set_image_desc(&descs[i], &pDescriptorWrite->pImageInfo[src_offset + i]); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (unsigned i = 0; i < ndescs; i++) + panvk_set_texel_buffer_view_desc(&descs[i], &pDescriptorWrite->pTexelBufferView[src_offset + i]); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + for (unsigned i = 0; i < ndescs; i++) { + unsigned ubo = binding_layout->ubo_idx + dest_offset + i; + panvk_per_arch(set_ubo_desc)(&ubos[ubo], + &pDescriptorWrite->pBufferInfo[src_offset + i]); + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (unsigned i = 0; i < ndescs; i++) + panvk_set_buffer_info_desc(&descs[i], &pDescriptorWrite->pBufferInfo[src_offset + i]); + break; + default: + unreachable("Invalid type"); + } + + src_offset += ndescs; + binding++; + dest_offset = 0; + } +} + +static void +panvk_copy_descriptor_set(struct panvk_device *dev, + const VkCopyDescriptorSet *pDescriptorCopy) +{ + VK_FROM_HANDLE(panvk_descriptor_set, dest_set, pDescriptorCopy->dstSet); + VK_FROM_HANDLE(panvk_descriptor_set, src_set, pDescriptorCopy->srcSet); + const struct panvk_descriptor_set_layout *dest_layout = dest_set->layout; + const struct panvk_descriptor_set_layout *src_layout = dest_set->layout; + unsigned dest_offset = pDescriptorCopy->dstArrayElement; + unsigned src_offset = pDescriptorCopy->srcArrayElement; + unsigned dest_binding = pDescriptorCopy->dstBinding; + unsigned src_binding = pDescriptorCopy->srcBinding; + unsigned desc_count = pDescriptorCopy->descriptorCount; + + while (desc_count && src_binding < src_layout->binding_count && + dest_binding < dest_layout->binding_count) { + const struct panvk_descriptor_set_binding_layout *dest_binding_layout = + &src_layout->bindings[dest_binding]; + + if (!dest_binding_layout->array_size) { + dest_binding++; + dest_offset = 0; + continue; + } + + const struct panvk_descriptor_set_binding_layout *src_binding_layout = + &src_layout->bindings[src_binding]; + + if (!src_binding_layout->array_size) { + src_binding++; + src_offset = 0; + continue; + } + + assert(dest_binding_layout->type == src_binding_layout->type); + + unsigned ndescs = MAX3(desc_count, + dest_binding_layout->array_size - dest_offset, + src_binding_layout->array_size - src_offset); + + struct panvk_descriptor *dest_descs = dest_set->descs + dest_binding_layout->desc_idx + dest_offset; + struct panvk_descriptor *src_descs = src_set->descs + src_binding_layout->desc_idx + src_offset; + memcpy(dest_descs, src_descs, ndescs * sizeof(*dest_descs)); + desc_count -= ndescs; + dest_offset += ndescs; + if (dest_offset == dest_binding_layout->array_size) { + dest_binding++; + dest_offset = 0; + continue; + } + src_offset += ndescs; + if (src_offset == src_binding_layout->array_size) { + src_binding++; + src_offset = 0; + continue; + } + } + + assert(!desc_count); +} + +void +panvk_per_arch(UpdateDescriptorSets)(VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + VK_FROM_HANDLE(panvk_device, dev, _device); + + for (unsigned i = 0; i < descriptorWriteCount; i++) + panvk_per_arch(write_descriptor_set)(dev, &pDescriptorWrites[i]); + for (unsigned i = 0; i < descriptorCopyCount; i++) + panvk_copy_descriptor_set(dev, &pDescriptorCopies[i]); +} diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c new file mode 100644 index 00000000000..d6f269d3a0d --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -0,0 +1,317 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_device.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gen_macros.h" + +#include "decode.h" + +#include "panvk_private.h" +#include "panvk_cs.h" + +static void +panvk_queue_submit_batch(struct panvk_queue *queue, + struct panvk_batch *batch, + uint32_t *bos, unsigned nr_bos, + uint32_t *in_fences, + unsigned nr_in_fences) +{ + const struct panvk_device *dev = queue->device; + unsigned debug = dev->physical_device->instance->debug_flags; + const struct panfrost_device *pdev = &dev->physical_device->pdev; + int ret; + + /* Reset the batch if it's already been issued */ + if (batch->issued) { + util_dynarray_foreach(&batch->jobs, void *, job) + memset((*job), 0, 4 * 4); + + /* Reset the tiler before re-issuing the batch */ +#if PAN_ARCH >= 6 + if (batch->tiler.descs.cpu) { + memcpy(batch->tiler.descs.cpu, batch->tiler.templ, + pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); + } +#else + if (batch->fb.desc.cpu) { + void *tiler = pan_section_ptr(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER); + memcpy(tiler, batch->tiler.templ, pan_size(TILER_CONTEXT)); + /* All weights set to 0, nothing to do here */ + pan_section_pack(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS, w); + } +#endif + } + + if (batch->scoreboard.first_job) { + struct drm_panfrost_submit submit = { + .bo_handles = (uintptr_t)bos, + .bo_handle_count = nr_bos, + .in_syncs = (uintptr_t)in_fences, + .in_sync_count = nr_in_fences, + .out_sync = queue->sync, + .jc = batch->scoreboard.first_job, + }; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); + assert(!ret); + + if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) { + ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); + assert(!ret); + } + + if (debug & PANVK_DEBUG_TRACE) + pandecode_jc(batch->scoreboard.first_job, PAN_ARCH >= 6, pdev->gpu_id); + } + + if (batch->fragment_job) { + struct drm_panfrost_submit submit = { + .bo_handles = (uintptr_t)bos, + .bo_handle_count = nr_bos, + .out_sync = queue->sync, + .jc = batch->fragment_job, + .requirements = PANFROST_JD_REQ_FS, + }; + + if (batch->scoreboard.first_job) { + submit.in_syncs = (uintptr_t)(&queue->sync); + submit.in_sync_count = 1; + } else { + submit.in_syncs = (uintptr_t)in_fences; + submit.in_sync_count = nr_in_fences; + } + + ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); + assert(!ret); + if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) { + ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); + assert(!ret); + } + + if (debug & PANVK_DEBUG_TRACE) + pandecode_jc(batch->fragment_job, PAN_ARCH >= 6, pdev->gpu_id); + } + + if (debug & PANVK_DEBUG_TRACE) + pandecode_next_frame(); + + batch->issued = true; +} + +static void +panvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj) +{ + const struct panfrost_device *pdev = &queue->device->physical_device->pdev; + int ret; + + struct drm_syncobj_handle handle = { + .handle = queue->sync, + .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE, + .fd = -1, + }; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); + assert(!ret); + assert(handle.fd >= 0); + + handle.handle = syncobj; + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle); + assert(!ret); + + close(handle.fd); +} + +static void +panvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences, unsigned *nr_in_fences) +{ + util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) { + switch (op->type) { + case PANVK_EVENT_OP_SET: + /* Nothing to do yet */ + break; + case PANVK_EVENT_OP_RESET: + /* Nothing to do yet */ + break; + case PANVK_EVENT_OP_WAIT: + in_fences[*nr_in_fences++] = op->event->syncobj; + break; + default: + unreachable("bad panvk_event_op type\n"); + } + } +} + +static void +panvk_signal_event_syncobjs(struct panvk_queue *queue, struct panvk_batch *batch) +{ + const struct panfrost_device *pdev = &queue->device->physical_device->pdev; + + util_dynarray_foreach(&batch->event_ops, struct panvk_event_op, op) { + switch (op->type) { + case PANVK_EVENT_OP_SET: { + panvk_queue_transfer_sync(queue, op->event->syncobj); + break; + } + case PANVK_EVENT_OP_RESET: { + struct panvk_event *event = op->event; + + struct drm_syncobj_array objs = { + .handles = (uint64_t) (uintptr_t) &event->syncobj, + .count_handles = 1 + }; + + int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs); + assert(!ret); + break; + } + case PANVK_EVENT_OP_WAIT: + /* Nothing left to do */ + break; + default: + unreachable("bad panvk_event_op type\n"); + } + } +} + +VkResult +panvk_per_arch(QueueSubmit)(VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo *pSubmits, + VkFence _fence) +{ + VK_FROM_HANDLE(panvk_queue, queue, _queue); + VK_FROM_HANDLE(panvk_fence, fence, _fence); + const struct panfrost_device *pdev = &queue->device->physical_device->pdev; + + for (uint32_t i = 0; i < submitCount; ++i) { + const VkSubmitInfo *submit = pSubmits + i; + unsigned nr_semaphores = submit->waitSemaphoreCount + 1; + uint32_t semaphores[nr_semaphores]; + + semaphores[0] = queue->sync; + for (unsigned i = 0; i < submit->waitSemaphoreCount; i++) { + VK_FROM_HANDLE(panvk_semaphore, sem, submit->pWaitSemaphores[i]); + + semaphores[i + 1] = sem->syncobj.temporary ? : sem->syncobj.permanent; + } + + for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, (submit->pCommandBuffers[j])); + + list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) { + /* FIXME: should be done at the batch level */ + unsigned nr_bos = + panvk_pool_num_bos(&cmdbuf->desc_pool) + + panvk_pool_num_bos(&cmdbuf->varying_pool) + + panvk_pool_num_bos(&cmdbuf->tls_pool) + + (batch->fb.info ? batch->fb.info->attachment_count : 0) + + (batch->blit.src ? 1 : 0) + + (batch->blit.dst ? 1 : 0) + + (batch->scoreboard.first_tiler ? 1 : 0) + 1; + unsigned bo_idx = 0; + uint32_t bos[nr_bos]; + + panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]); + bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool); + + panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]); + bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool); + + panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]); + bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool); + + if (batch->fb.info) { + for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) { + bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle; + } + } + + if (batch->blit.src) + bos[bo_idx++] = batch->blit.src->gem_handle; + + if (batch->blit.dst) + bos[bo_idx++] = batch->blit.dst->gem_handle; + + if (batch->scoreboard.first_tiler) + bos[bo_idx++] = pdev->tiler_heap->gem_handle; + + bos[bo_idx++] = pdev->sample_positions->gem_handle; + assert(bo_idx == nr_bos); + + unsigned nr_in_fences = 0; + unsigned max_wait_event_syncobjs = + util_dynarray_num_elements(&batch->event_ops, + struct panvk_event_op); + uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs]; + memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences)); + nr_in_fences += nr_semaphores; + + panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences); + + panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences); + + panvk_signal_event_syncobjs(queue, batch); + } + } + + /* Transfer the out fence to signal semaphores */ + for (unsigned i = 0; i < submit->signalSemaphoreCount; i++) { + VK_FROM_HANDLE(panvk_semaphore, sem, submit->pSignalSemaphores[i]); + panvk_queue_transfer_sync(queue, sem->syncobj.temporary ? : sem->syncobj.permanent); + } + } + + if (fence) { + /* Transfer the last out fence to the fence object */ + panvk_queue_transfer_sync(queue, fence->syncobj.temporary ? : fence->syncobj.permanent); + } + + return VK_SUCCESS; +} + +VkResult +panvk_per_arch(CreateSampler)(VkDevice _device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler), + VK_OBJECT_TYPE_SAMPLER); + if (!sampler) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + STATIC_ASSERT(sizeof(sampler->desc) >= pan_size(SAMPLER)); + panvk_per_arch(emit_sampler)(pCreateInfo, &sampler->desc); + *pSampler = panvk_sampler_to_handle(sampler); + + return VK_SUCCESS; +} diff --git a/src/panfrost/vulkan/panvk_vX_image.c b/src/panfrost/vulkan/panvk_vX_image.c new file mode 100644 index 00000000000..f75f7b21305 --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_image.c @@ -0,0 +1,149 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_image.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gen_macros.h" +#include "panvk_private.h" +#include "panfrost-quirks.h" + +#include "util/debug.h" +#include "util/u_atomic.h" +#include "vk_format.h" +#include "vk_object.h" +#include "vk_util.h" +#include "drm-uapi/drm_fourcc.h" + +static enum mali_texture_dimension +panvk_view_type_to_mali_tex_dim(VkImageViewType type) +{ + switch (type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + return MALI_TEXTURE_DIMENSION_1D; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + return MALI_TEXTURE_DIMENSION_2D; + case VK_IMAGE_VIEW_TYPE_3D: + return MALI_TEXTURE_DIMENSION_3D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + return MALI_TEXTURE_DIMENSION_CUBE; + default: + unreachable("Invalid view type"); + } +} + +static void +panvk_convert_swizzle(const VkComponentMapping *in, + unsigned char *out) +{ + const VkComponentSwizzle *comp = &in->r; + for (unsigned i = 0; i < 4; i++) { + switch (comp[i]) { + case VK_COMPONENT_SWIZZLE_IDENTITY: + out[i] = PIPE_SWIZZLE_X + i; + break; + case VK_COMPONENT_SWIZZLE_ZERO: + out[i] = PIPE_SWIZZLE_0; + break; + case VK_COMPONENT_SWIZZLE_ONE: + out[i] = PIPE_SWIZZLE_1; + break; + case VK_COMPONENT_SWIZZLE_R: + out[i] = PIPE_SWIZZLE_X; + break; + case VK_COMPONENT_SWIZZLE_G: + out[i] = PIPE_SWIZZLE_Y; + break; + case VK_COMPONENT_SWIZZLE_B: + out[i] = PIPE_SWIZZLE_Z; + break; + case VK_COMPONENT_SWIZZLE_A: + out[i] = PIPE_SWIZZLE_W; + break; + default: + unreachable("Invalid swizzle"); + } + } +} + +VkResult +panvk_per_arch(CreateImageView)(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_image, image, pCreateInfo->image); + struct panvk_image_view *view; + + view = vk_object_zalloc(&device->vk, pAllocator, sizeof(*view), + VK_OBJECT_TYPE_IMAGE_VIEW); + if (view == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + view->pview.format = vk_format_to_pipe_format(pCreateInfo->format); + + if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) + view->pview.format = util_format_get_depth_only(view->pview.format); + else if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) + view->pview.format = util_format_stencil_only(view->pview.format); + + view->pview.dim = panvk_view_type_to_mali_tex_dim(pCreateInfo->viewType); + view->pview.first_level = pCreateInfo->subresourceRange.baseMipLevel; + view->pview.last_level = pCreateInfo->subresourceRange.baseMipLevel + + pCreateInfo->subresourceRange.levelCount - 1; + view->pview.first_layer = pCreateInfo->subresourceRange.baseArrayLayer; + view->pview.last_layer = pCreateInfo->subresourceRange.baseArrayLayer + + pCreateInfo->subresourceRange.layerCount - 1; + panvk_convert_swizzle(&pCreateInfo->components, view->pview.swizzle); + view->pview.image = &image->pimage; + view->pview.nr_samples = image->pimage.layout.nr_samples; + view->vk_format = pCreateInfo->format; + + struct panfrost_device *pdev = &device->physical_device->pdev; + unsigned bo_size = + panfrost_estimate_texture_payload_size(pdev, &view->pview) + + pan_size(TEXTURE); + + unsigned surf_descs_offset = PAN_ARCH <= 5 ? pan_size(TEXTURE) : 0; + + view->bo = panfrost_bo_create(pdev, bo_size, 0, "Texture descriptor"); + + struct panfrost_ptr surf_descs = { + .cpu = view->bo->ptr.cpu + surf_descs_offset, + .gpu = view->bo->ptr.gpu + surf_descs_offset, + }; + void *tex_desc = PAN_ARCH >= 6 ? + &view->desc : view->bo->ptr.cpu; + + STATIC_ASSERT(sizeof(view->desc) >= pan_size(TEXTURE)); + panfrost_new_texture(pdev, &view->pview, tex_desc, &surf_descs); + + *pView = panvk_image_view_to_handle(view); + return VK_SUCCESS; +} diff --git a/src/panfrost/vulkan/panvk_meta.c b/src/panfrost/vulkan/panvk_vX_meta.c similarity index 60% rename from src/panfrost/vulkan/panvk_meta.c rename to src/panfrost/vulkan/panvk_vX_meta.c index 672414abf80..c9a68764a36 100644 --- a/src/panfrost/vulkan/panvk_meta.c +++ b/src/panfrost/vulkan/panvk_vX_meta.c @@ -21,122 +21,125 @@ * DEALINGS IN THE SOFTWARE. */ +#include "gen_macros.h" + #include "nir/nir_builder.h" #include "pan_blitter.h" #include "pan_encoder.h" +#include "pan_shader.h" #include "panvk_private.h" #include "vk_format.h" void -panvk_CmdBlitImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageBlit *pRegions, - VkFilter filter) +panvk_per_arch(CmdBlitImage)(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit *pRegions, + VkFilter filter) { panvk_stub(); } void -panvk_CmdCopyImage(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkImageCopy *pRegions) +panvk_per_arch(CmdCopyImage)(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy *pRegions) { panvk_stub(); } void -panvk_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkImage destImage, - VkImageLayout destImageLayout, - uint32_t regionCount, - const VkBufferImageCopy *pRegions) +panvk_per_arch(CmdCopyBufferToImage)(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) { panvk_stub(); } void -panvk_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, - VkImage srcImage, - VkImageLayout srcImageLayout, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferImageCopy *pRegions) +panvk_per_arch(CmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) { panvk_stub(); } void -panvk_CmdCopyBuffer(VkCommandBuffer commandBuffer, - VkBuffer srcBuffer, - VkBuffer destBuffer, - uint32_t regionCount, - const VkBufferCopy *pRegions) +panvk_per_arch(CmdCopyBuffer)(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy *pRegions) { panvk_stub(); } void -panvk_CmdResolveImage(VkCommandBuffer cmd_buffer_h, - VkImage src_image_h, - VkImageLayout src_image_layout, - VkImage dest_image_h, - VkImageLayout dest_image_layout, - uint32_t region_count, - const VkImageResolve *regions) +panvk_per_arch(CmdResolveImage)(VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve *regions) { panvk_stub(); } void -panvk_CmdFillBuffer(VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize fillSize, - uint32_t data) +panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) { panvk_stub(); } void -panvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const void *pData) +panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const void *pData) { panvk_stub(); } void -panvk_CmdClearColorImage(VkCommandBuffer commandBuffer, - VkImage image, - VkImageLayout imageLayout, - const VkClearColorValue *pColor, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) +panvk_per_arch(CmdClearColorImage)(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) { panvk_stub(); } void -panvk_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearDepthStencilValue *pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) +panvk_per_arch(CmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) { panvk_stub(); } @@ -203,7 +206,7 @@ panvk_meta_clear_attachments_shader(struct panfrost_device *pdev, mali_ptr shader = pan_pool_upload_aligned(bin_pool, binary.data, binary.size, - pan_is_bifrost(pdev) ? 128 : 64); + PAN_ARCH >= 6 ? 128 : 64); util_dynarray_fini(&binary); ralloc_free(b.shader); @@ -224,8 +227,6 @@ panvk_meta_clear_attachments_emit_rsd(struct panfrost_device *pdev, PAN_DESC(RENDERER_STATE), PAN_DESC(BLEND)); - unsigned fullmask = (1 << util_format_get_nr_components(format)) - 1; - /* TODO: Support multiple render targets */ assert(rt == 0); @@ -243,47 +244,48 @@ panvk_meta_clear_attachments_emit_rsd(struct panfrost_device *pdev, cfg.stencil_front.mask = 0xFF; cfg.stencil_back = cfg.stencil_front; - if (pan_is_bifrost(pdev)) { - cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true; - cfg.properties.bifrost.allow_forward_pixel_to_kill = true; - cfg.properties.bifrost.zs_update_operation = - MALI_PIXEL_KILL_STRONG_EARLY; - cfg.properties.bifrost.pixel_kill_operation = - MALI_PIXEL_KILL_FORCE_EARLY; - } else { - cfg.properties.midgard.shader_reads_tilebuffer = false; - cfg.properties.midgard.work_register_count = shader_info->work_reg_count; - cfg.properties.midgard.force_early_z = true; - cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; - } +#if PAN_ARCH >= 6 + cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true; + cfg.properties.bifrost.allow_forward_pixel_to_kill = true; + cfg.properties.bifrost.zs_update_operation = + MALI_PIXEL_KILL_STRONG_EARLY; + cfg.properties.bifrost.pixel_kill_operation = + MALI_PIXEL_KILL_FORCE_EARLY; +#else + cfg.properties.midgard.shader_reads_tilebuffer = false; + cfg.properties.midgard.work_register_count = shader_info->work_reg_count; + cfg.properties.midgard.force_early_z = true; + cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; +#endif } pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) { cfg.round_to_fb_precision = true; cfg.load_destination = false; - if (pan_is_bifrost(pdev)) { - cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE; - cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; - cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; - cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; - cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; - cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.bifrost.equation.color_mask = 0xf; - cfg.bifrost.internal.fixed_function.num_comps = 4; - cfg.bifrost.internal.fixed_function.conversion.memory_format = - panfrost_format_to_bifrost_blend(pdev, format, false); - cfg.bifrost.internal.fixed_function.conversion.register_format = - shader_info->bifrost.blend[rt].format; - } else { - cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; - cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; - cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; - cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; - cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; - cfg.midgard.equation.color_mask = fullmask; - } +#if PAN_ARCH >= 6 + cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE; + cfg.bifrost.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; + cfg.bifrost.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; + cfg.bifrost.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; + cfg.bifrost.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; + cfg.bifrost.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; + cfg.bifrost.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; + cfg.bifrost.equation.color_mask = 0xf; + cfg.bifrost.internal.fixed_function.num_comps = 4; + cfg.bifrost.internal.fixed_function.conversion.memory_format = + panfrost_format_to_bifrost_blend(pdev, format, false); + cfg.bifrost.internal.fixed_function.conversion.register_format = + shader_info->bifrost.blend[rt].format; +#else + cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; + cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; + cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; + cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; + cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; + cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; + cfg.midgard.equation.color_mask = + (1 << util_format_get_nr_components(format)) - 1; +#endif } return rsd_ptr.gpu; @@ -342,85 +344,49 @@ panvk_meta_clear_attachment_emit_dcd(struct pan_pool *pool, cfg.push_uniforms = push_constants; cfg.position = coords; cfg.viewport = vpd; - cfg.texture_descriptor_is_64b = !pan_is_bifrost(pool->dev); + cfg.texture_descriptor_is_64b = PAN_ARCH <= 5; } } static struct panfrost_ptr -panvk_meta_clear_attachment_emit_bifrost_tiler_job(struct pan_pool *desc_pool, - struct pan_scoreboard *scoreboard, - mali_ptr coords, - mali_ptr ubo, mali_ptr push_constants, - mali_ptr vpd, mali_ptr rsd, - mali_ptr tsd, mali_ptr tiler) +panvk_meta_clear_attachment_emit_tiler_job(struct pan_pool *desc_pool, + struct pan_scoreboard *scoreboard, + mali_ptr coords, + mali_ptr ubo, mali_ptr push_constants, + mali_ptr vpd, mali_ptr rsd, + mali_ptr tsd, mali_ptr tiler) { struct panfrost_ptr job = - pan_pool_alloc_desc(desc_pool, BIFROST_TILER_JOB); + pan_pool_alloc_desc(desc_pool, TILER_JOB); panvk_meta_clear_attachment_emit_dcd(desc_pool, coords, ubo, push_constants, vpd, tsd, rsd, - pan_section_ptr(job.cpu, BIFROST_TILER_JOB, DRAW)); + pan_section_ptr(job.cpu, TILER_JOB, DRAW)); - pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE, cfg) { + pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; cfg.index_count = 4; cfg.job_task_split = 6; } - pan_section_pack(job.cpu, BIFROST_TILER_JOB, PRIMITIVE_SIZE, cfg) { + pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { cfg.constant = 1.0f; } void *invoc = pan_section_ptr(job.cpu, - BIFROST_TILER_JOB, + TILER_JOB, INVOCATION); panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false); - pan_section_pack(job.cpu, BIFROST_TILER_JOB, PADDING, cfg); - pan_section_pack(job.cpu, BIFROST_TILER_JOB, TILER, cfg) { +#if PAN_ARCH >= 6 + pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg); + pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) { cfg.address = tiler; } - - panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, - false, false, 0, 0, &job, false); - return job; -} - -static struct panfrost_ptr -panvk_meta_clear_attachment_emit_midgard_tiler_job(struct pan_pool *desc_pool, - struct pan_scoreboard *scoreboard, - mali_ptr coords, - mali_ptr ubo, mali_ptr push_constants, - mali_ptr vpd, mali_ptr rsd, - mali_ptr tsd) -{ - struct panfrost_ptr job = - pan_pool_alloc_desc(desc_pool, MIDGARD_TILER_JOB); - - panvk_meta_clear_attachment_emit_dcd(desc_pool, - coords, - ubo, push_constants, - vpd, tsd, rsd, - pan_section_ptr(job.cpu, MIDGARD_TILER_JOB, DRAW)); - - pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) { - cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; - cfg.index_count = 4; - cfg.job_task_split = 6; - } - - pan_section_pack(job.cpu, MIDGARD_TILER_JOB, PRIMITIVE_SIZE, cfg) { - cfg.constant = 1.0f; - } - - void *invoc = pan_section_ptr(job.cpu, - MIDGARD_TILER_JOB, - INVOCATION); - panfrost_pack_work_groups_compute(invoc, 1, 4, - 1, 1, 1, 1, true, false); +#endif panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, false, false, 0, 0, &job, false); @@ -477,19 +443,19 @@ panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf, /* TODO: Support depth/stencil */ assert(mask == VK_IMAGE_ASPECT_COLOR_BIT); - panvk_cmd_alloc_fb_desc(cmdbuf); - panvk_cmd_alloc_tls_desc(cmdbuf); + panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); + panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf); - if (pan_is_bifrost(pdev)) { - panvk_cmd_get_bifrost_tiler_context(cmdbuf, - batch->fb.info->width, - batch->fb.info->height); - } else { - panvk_cmd_get_midgard_polygon_list(cmdbuf, +#if PAN_ARCH <= 5 + panvk_per_arch(cmd_get_polygon_list)(cmdbuf, + batch->fb.info->width, + batch->fb.info->height, + true); +#else + panvk_per_arch(cmd_get_tiler_context)(cmdbuf, batch->fb.info->width, - batch->fb.info->height, - true); - } + batch->fb.info->height); +#endif mali_ptr vpd = panvk_meta_emit_viewport(&cmdbuf->desc_pool.base, minx, miny, maxx, maxy); @@ -525,31 +491,16 @@ panvk_meta_clear_attachment(struct panvk_cmd_buffer *cmdbuf, &cmdbuf->desc_pool.base, clear_value); - mali_ptr tsd, tiler; - - if (pan_is_bifrost(pdev)) { - tsd = batch->tls.gpu; - tiler = batch->tiler.bifrost_descs.gpu; - } else { - tsd = batch->fb.desc.gpu; - tiler = 0; - } + mali_ptr tsd = PAN_ARCH >= 6 ? batch->tls.gpu : batch->fb.desc.gpu; + mali_ptr tiler = PAN_ARCH >= 6 ? batch->tiler.descs.gpu : 0; struct panfrost_ptr job; - if (pan_is_bifrost(pdev)) { - job = panvk_meta_clear_attachment_emit_bifrost_tiler_job(&cmdbuf->desc_pool.base, - &batch->scoreboard, - coordinates, - ubo, pushconsts, - vpd, rsd, tsd, tiler); - } else { - job = panvk_meta_clear_attachment_emit_midgard_tiler_job(&cmdbuf->desc_pool.base, - &batch->scoreboard, - coordinates, - ubo, pushconsts, - vpd, rsd, tsd); - } + job = panvk_meta_clear_attachment_emit_tiler_job(&cmdbuf->desc_pool.base, + &batch->scoreboard, + coordinates, + ubo, pushconsts, + vpd, rsd, tsd, tiler); util_dynarray_append(&batch->jobs, void *, job.cpu); } @@ -585,11 +536,11 @@ panvk_meta_clear_attachment_init(struct panvk_physical_device *dev) } void -panvk_CmdClearAttachments(VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment *pAttachments, - uint32_t rectCount, - const VkClearRect *pRects) +panvk_per_arch(CmdClearAttachments)(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); const struct panvk_subpass *subpass = cmdbuf->state.subpass; @@ -617,7 +568,7 @@ panvk_CmdClearAttachments(VkCommandBuffer commandBuffer, } void -panvk_meta_init(struct panvk_physical_device *dev) +panvk_per_arch(meta_init)(struct panvk_physical_device *dev) { panvk_pool_init(&dev->meta.bin_pool, &dev->pdev, NULL, PAN_BO_EXECUTE, 16 * 1024, "panvk_meta binary pool", false); @@ -635,7 +586,7 @@ panvk_meta_init(struct panvk_physical_device *dev) } void -panvk_meta_cleanup(struct panvk_physical_device *dev) +panvk_per_arch(meta_cleanup)(struct panvk_physical_device *dev) { pan_blitter_cleanup(&dev->pdev); panvk_pool_cleanup(&dev->meta.blitter.desc_pool); diff --git a/src/panfrost/vulkan/panvk_vX_meta.h b/src/panfrost/vulkan/panvk_vX_meta.h new file mode 100644 index 00000000000..126d45c5948 --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_meta.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PANVK_PRIVATE_H +#error "Must be included from panvk_private.h" +#endif + +#ifndef PAN_ARCH +#error "no arch" +#endif + +void +panvk_per_arch(meta_init)(struct panvk_physical_device *dev); + +void +panvk_per_arch(meta_cleanup)(struct panvk_physical_device *dev); diff --git a/src/panfrost/vulkan/panvk_vX_pipeline.c b/src/panfrost/vulkan/panvk_vX_pipeline.c new file mode 100644 index 00000000000..67e46293f0b --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_pipeline.c @@ -0,0 +1,991 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_pipeline.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_cs.h" +#include "panvk_private.h" + +#include "pan_bo.h" + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "spirv/nir_spirv.h" +#include "util/debug.h" +#include "util/mesa-sha1.h" +#include "util/u_atomic.h" +#include "vk_format.h" +#include "vk_util.h" + +#include "panfrost/util/pan_lower_framebuffer.h" + +#include "panfrost-quirks.h" + +struct panvk_pipeline_builder +{ + struct panvk_device *device; + struct panvk_pipeline_cache *cache; + const VkAllocationCallbacks *alloc; + const VkGraphicsPipelineCreateInfo *create_info; + const struct panvk_pipeline_layout *layout; + + struct panvk_shader *shaders[MESA_SHADER_STAGES]; + struct { + uint32_t shader_offset; + uint32_t rsd_offset; + uint32_t sysvals_offset; + } stages[MESA_SHADER_STAGES]; + uint32_t blend_shader_offsets[MAX_RTS]; + uint32_t shader_total_size; + uint32_t static_state_size; + uint32_t vpd_offset; + + bool rasterizer_discard; + /* these states are affectd by rasterizer_discard */ + VkSampleCountFlagBits samples; + bool use_depth_stencil_attachment; + uint8_t active_color_attachments; + enum pipe_format color_attachment_formats[MAX_RTS]; +}; + +static VkResult +panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder, + struct panvk_pipeline **out_pipeline) +{ + struct panvk_device *dev = builder->device; + + struct panvk_pipeline *pipeline = + vk_object_zalloc(&dev->vk, builder->alloc, + sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); + if (!pipeline) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + pipeline->layout = builder->layout; + *out_pipeline = pipeline; + return VK_SUCCESS; +} + +static void +panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder) +{ + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + if (!builder->shaders[i]) + continue; + panvk_shader_destroy(builder->device, builder->shaders[i], builder->alloc); + } +} + +static bool +panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id) +{ + return !(pipeline->dynamic_state_mask & (1 << id)); +} + +static VkResult +panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { + NULL + }; + for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { + gl_shader_stage stage = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage); + stage_infos[stage] = &builder->create_info->pStages[i]; + } + + /* compile shaders in reverse order */ + unsigned sysval_ubo = builder->layout->num_ubos; + + for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; + stage > MESA_SHADER_NONE; stage--) { + const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; + if (!stage_info) + continue; + + struct panvk_shader *shader; + + shader = panvk_per_arch(shader_create)(builder->device, stage, stage_info, + builder->layout, sysval_ubo, + &pipeline->blend.state, + panvk_pipeline_static_state(pipeline, + VK_DYNAMIC_STATE_BLEND_CONSTANTS), + builder->alloc); + if (!shader) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + if (shader->info.sysvals.sysval_count) + sysval_ubo++; + + builder->shaders[stage] = shader; + builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128); + builder->stages[stage].shader_offset = builder->shader_total_size; + builder->shader_total_size += + util_dynarray_num_elements(&shader->binary, uint8_t); + } + + return VK_SUCCESS; +} + +static VkResult +panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panfrost_bo *bin_bo = + panfrost_bo_create(&builder->device->physical_device->pdev, + builder->shader_total_size, PAN_BO_EXECUTE, + "Shader"); + + pipeline->binary_bo = bin_bo; + panfrost_bo_mmap(bin_bo); + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader) + continue; + + memcpy(pipeline->binary_bo->ptr.cpu + builder->stages[i].shader_offset, + util_dynarray_element(&shader->binary, uint8_t, 0), + util_dynarray_num_elements(&shader->binary, uint8_t)); + } + + return VK_SUCCESS; +} + +static bool +panvk_pipeline_static_sysval(struct panvk_pipeline *pipeline, + unsigned id) +{ + switch (id) { + case PAN_SYSVAL_VIEWPORT_SCALE: + case PAN_SYSVAL_VIEWPORT_OFFSET: + return panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT); + default: + return false; + } +} + +static void +panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panfrost_device *pdev = + &builder->device->physical_device->pdev; + unsigned bo_size = 0; + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader) + continue; + + if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT) + continue; + + bo_size = ALIGN_POT(bo_size, pan_alignment(RENDERER_STATE)); + builder->stages[i].rsd_offset = bo_size; + bo_size += pan_size(RENDERER_STATE); + if (i == MESA_SHADER_FRAGMENT) + bo_size += pan_size(BLEND) * pipeline->blend.state.rt_count; + } + + if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) && + panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) { + bo_size = ALIGN_POT(bo_size, pan_alignment(VIEWPORT)); + builder->vpd_offset = bo_size; + bo_size += pan_size(VIEWPORT); + } + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader || !shader->info.sysvals.sysval_count) + continue; + + bool static_sysvals = true; + for (unsigned s = 0; s < shader->info.sysvals.sysval_count; s++) { + unsigned id = shader->info.sysvals.sysvals[i]; + static_sysvals &= panvk_pipeline_static_sysval(pipeline, id); + switch (PAN_SYSVAL_TYPE(id)) { + case PAN_SYSVAL_VIEWPORT_SCALE: + case PAN_SYSVAL_VIEWPORT_OFFSET: + pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VIEWPORT; + break; + default: + break; + } + } + + if (!static_sysvals) { + builder->stages[i].sysvals_offset = ~0; + continue; + } + + bo_size = ALIGN_POT(bo_size, 16); + builder->stages[i].sysvals_offset = bo_size; + bo_size += shader->info.sysvals.sysval_count * 16; + } + + if (bo_size) { + pipeline->state_bo = + panfrost_bo_create(pdev, bo_size, 0, "Pipeline descriptors"); + panfrost_bo_mmap(pipeline->state_bo); + } +} + +static void +panvk_pipeline_builder_upload_sysval(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline, + unsigned id, union panvk_sysval_data *data) +{ + switch (PAN_SYSVAL_TYPE(id)) { + case PAN_SYSVAL_VIEWPORT_SCALE: + panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports, + data); + break; + case PAN_SYSVAL_VIEWPORT_OFFSET: + panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports, + data); + break; + default: + unreachable("Invalid static sysval"); + } +} + +static void +panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline, + gl_shader_stage stage) +{ + const struct panvk_shader *shader = builder->shaders[stage]; + + pipeline->sysvals[stage].ids = shader->info.sysvals; + pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo; + + if (!shader->info.sysvals.sysval_count || + builder->stages[stage].sysvals_offset == ~0) + return; + + union panvk_sysval_data *static_data = + pipeline->state_bo->ptr.cpu + builder->stages[stage].sysvals_offset; + + pipeline->sysvals[stage].ubo = + pipeline->state_bo->ptr.gpu + builder->stages[stage].sysvals_offset; + + for (unsigned i = 0; i < shader->info.sysvals.sysval_count; i++) { + unsigned id = shader->info.sysvals.sysvals[i]; + + panvk_pipeline_builder_upload_sysval(builder, + pipeline, + id, &static_data[i]); + } +} + +static void +panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader) + continue; + + pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size); + pipeline->wls_size = MAX2(pipeline->tls_size, shader->info.wls_size); + + if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size) + pipeline->ia.writes_point_size = true; + + mali_ptr shader_ptr = pipeline->binary_bo->ptr.gpu + + builder->stages[i].shader_offset; + + void *rsd = pipeline->state_bo->ptr.cpu + builder->stages[i].rsd_offset; + mali_ptr gpu_rsd = pipeline->state_bo->ptr.gpu + builder->stages[i].rsd_offset; + + if (i != MESA_SHADER_FRAGMENT) { + panvk_per_arch(emit_non_fs_rsd)(builder->device, &shader->info, shader_ptr, rsd); + } else if (!pipeline->fs.dynamic_rsd) { + void *bd = rsd + pan_size(RENDERER_STATE); + + panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, rsd); + for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) { + panvk_per_arch(emit_blend)(builder->device, pipeline, rt, bd); + bd += pan_size(BLEND); + } + } else { + gpu_rsd = 0; + panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, &pipeline->fs.rsd_template); + for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) { + panvk_per_arch(emit_blend)(builder->device, pipeline, rt, + &pipeline->blend.bd_template[rt]); + } + } + + pipeline->rsds[i] = gpu_rsd; + panvk_pipeline_builder_init_sysvals(builder, pipeline, i); + } + + pipeline->num_ubos = builder->layout->num_ubos; + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { + if (pipeline->sysvals[i].ids.sysval_count) + pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1); + } + + pipeline->num_sysvals = 0; + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) + pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count; +} + + +static void +panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + /* The spec says: + * + * pViewportState is a pointer to an instance of the + * VkPipelineViewportStateCreateInfo structure, and is ignored if the + * pipeline has rasterization disabled. + */ + if (!builder->rasterizer_discard && + panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) && + panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) { + void *vpd = pipeline->state_bo->ptr.cpu + builder->vpd_offset; + panvk_per_arch(emit_viewport)(builder->create_info->pViewportState->pViewports, + builder->create_info->pViewportState->pScissors, + vpd); + pipeline->vpd = pipeline->state_bo->ptr.gpu + + builder->vpd_offset; + } else { + if (builder->create_info->pViewportState->pViewports) + pipeline->viewport = builder->create_info->pViewportState->pViewports[0]; + + if (builder->create_info->pViewportState->pScissors) + pipeline->scissor = builder->create_info->pViewportState->pScissors[0]; + } +} + +static void +panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + const VkPipelineDynamicStateCreateInfo *dynamic_info = + builder->create_info->pDynamicState; + + if (!dynamic_info) + return; + + for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { + VkDynamicState state = dynamic_info->pDynamicStates[i]; + switch (state) { + case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE: + pipeline->dynamic_state_mask |= 1 << state; + break; + default: + unreachable("unsupported dynamic state"); + } + } + +} + +static enum mali_draw_mode +translate_prim_topology(VkPrimitiveTopology in) +{ + switch (in) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return MALI_DRAW_MODE_POINTS; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + return MALI_DRAW_MODE_LINES; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + return MALI_DRAW_MODE_LINE_STRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + return MALI_DRAW_MODE_TRIANGLES; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + return MALI_DRAW_MODE_TRIANGLE_STRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + return MALI_DRAW_MODE_TRIANGLE_FAN; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + default: + unreachable("Invalid primitive type"); + } +} + +static void +panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + pipeline->ia.primitive_restart = + builder->create_info->pInputAssemblyState->primitiveRestartEnable; + pipeline->ia.topology = + translate_prim_topology(builder->create_info->pInputAssemblyState->topology); +} + +static enum pipe_logicop +translate_logicop(VkLogicOp in) +{ + switch (in) { + case VK_LOGIC_OP_CLEAR: return PIPE_LOGICOP_CLEAR; + case VK_LOGIC_OP_AND: return PIPE_LOGICOP_AND; + case VK_LOGIC_OP_AND_REVERSE: return PIPE_LOGICOP_AND_REVERSE; + case VK_LOGIC_OP_COPY: return PIPE_LOGICOP_COPY; + case VK_LOGIC_OP_AND_INVERTED: return PIPE_LOGICOP_AND_INVERTED; + case VK_LOGIC_OP_NO_OP: return PIPE_LOGICOP_NOOP; + case VK_LOGIC_OP_XOR: return PIPE_LOGICOP_XOR; + case VK_LOGIC_OP_OR: return PIPE_LOGICOP_OR; + case VK_LOGIC_OP_NOR: return PIPE_LOGICOP_NOR; + case VK_LOGIC_OP_EQUIVALENT: return PIPE_LOGICOP_EQUIV; + case VK_LOGIC_OP_INVERT: return PIPE_LOGICOP_INVERT; + case VK_LOGIC_OP_OR_REVERSE: return PIPE_LOGICOP_OR_REVERSE; + case VK_LOGIC_OP_COPY_INVERTED: return PIPE_LOGICOP_COPY_INVERTED; + case VK_LOGIC_OP_OR_INVERTED: return PIPE_LOGICOP_OR_INVERTED; + case VK_LOGIC_OP_NAND: return PIPE_LOGICOP_NAND; + case VK_LOGIC_OP_SET: return PIPE_LOGICOP_SET; + default: unreachable("Invalid logicop"); + } +} + +static enum blend_func +translate_blend_op(VkBlendOp in) +{ + switch (in) { + case VK_BLEND_OP_ADD: return BLEND_FUNC_ADD; + case VK_BLEND_OP_SUBTRACT: return BLEND_FUNC_SUBTRACT; + case VK_BLEND_OP_REVERSE_SUBTRACT: return BLEND_FUNC_REVERSE_SUBTRACT; + case VK_BLEND_OP_MIN: return BLEND_FUNC_MIN; + case VK_BLEND_OP_MAX: return BLEND_FUNC_MAX; + default: unreachable("Invalid blend op"); + } +} + +static enum blend_factor +translate_blend_factor(VkBlendFactor in, bool dest_has_alpha) +{ + switch (in) { + case VK_BLEND_FACTOR_ZERO: + case VK_BLEND_FACTOR_ONE: + return BLEND_FACTOR_ZERO; + case VK_BLEND_FACTOR_SRC_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + return BLEND_FACTOR_SRC_COLOR; + case VK_BLEND_FACTOR_DST_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + return BLEND_FACTOR_DST_COLOR; + case VK_BLEND_FACTOR_SRC_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + return BLEND_FACTOR_SRC_ALPHA; + case VK_BLEND_FACTOR_DST_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return dest_has_alpha ? BLEND_FACTOR_DST_ALPHA : BLEND_FACTOR_ZERO; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + return BLEND_FACTOR_CONSTANT_COLOR; + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + return BLEND_FACTOR_CONSTANT_ALPHA; + case VK_BLEND_FACTOR_SRC1_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + return BLEND_FACTOR_SRC1_COLOR; + case VK_BLEND_FACTOR_SRC1_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: + return BLEND_FACTOR_SRC1_ALPHA; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + return BLEND_FACTOR_SRC_ALPHA_SATURATE; + default: unreachable("Invalid blend factor"); + } +} + +static bool +inverted_blend_factor(VkBlendFactor in, bool dest_has_alpha) +{ + switch (in) { + case VK_BLEND_FACTOR_ONE: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: + return true; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return dest_has_alpha ? true : false; + case VK_BLEND_FACTOR_DST_ALPHA: + return !dest_has_alpha ? true : false; + default: + return false; + } +} + +bool +panvk_per_arch(blend_needs_lowering)(const struct panfrost_device *dev, + const struct pan_blend_state *state, + unsigned rt) +{ + /* LogicOp requires a blend shader */ + if (state->logicop_enable) + return true; + + /* Not all formats can be blended by fixed-function hardware */ + if (!panfrost_blendable_formats_v7[state->rts[rt].format].internal) + return true; + + unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation); + + /* v6 doesn't support blend constants in FF blend equations. + * v7 only uses the constant from RT 0 (TODO: what if it's the same + * constant? or a constant is shared?) + */ + if (constant_mask && (PAN_ARCH == 6 || (PAN_ARCH == 7 && rt > 0))) + return true; + + if (!pan_blend_is_homogenous_constant(constant_mask, state->constants)) + return true; + + bool supports_2src = pan_blend_supports_2src(dev->arch); + return !pan_blend_can_fixed_function(state->rts[rt].equation, supports_2src); +} + +static void +panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panfrost_device *pdev = &builder->device->physical_device->pdev; + pipeline->blend.state.logicop_enable = + builder->create_info->pColorBlendState->logicOpEnable; + pipeline->blend.state.logicop_func = + translate_logicop(builder->create_info->pColorBlendState->logicOp); + pipeline->blend.state.rt_count = util_last_bit(builder->active_color_attachments); + memcpy(pipeline->blend.state.constants, + builder->create_info->pColorBlendState->blendConstants, + sizeof(pipeline->blend.state.constants)); + + for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { + const VkPipelineColorBlendAttachmentState *in = + &builder->create_info->pColorBlendState->pAttachments[i]; + struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i]; + + out->format = builder->color_attachment_formats[i]; + + bool dest_has_alpha = util_format_has_alpha(out->format); + + out->nr_samples = builder->create_info->pMultisampleState->rasterizationSamples; + out->equation.blend_enable = in->blendEnable; + out->equation.color_mask = in->colorWriteMask; + out->equation.rgb_func = translate_blend_op(in->colorBlendOp); + out->equation.rgb_src_factor = translate_blend_factor(in->srcColorBlendFactor, dest_has_alpha); + out->equation.rgb_invert_src_factor = inverted_blend_factor(in->srcColorBlendFactor, dest_has_alpha); + out->equation.rgb_dst_factor = translate_blend_factor(in->dstColorBlendFactor, dest_has_alpha); + out->equation.rgb_invert_dst_factor = inverted_blend_factor(in->dstColorBlendFactor, dest_has_alpha); + out->equation.alpha_func = translate_blend_op(in->alphaBlendOp); + out->equation.alpha_src_factor = translate_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha); + out->equation.alpha_invert_src_factor = inverted_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha); + out->equation.alpha_dst_factor = translate_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha); + out->equation.alpha_invert_dst_factor = inverted_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha); + + unsigned constant_mask = + panvk_per_arch(blend_needs_lowering)(pdev, &pipeline->blend.state, i) ? + 0 : pan_blend_constant_mask(out->equation); + pipeline->blend.constant[i].index = ffs(constant_mask) - 1; + if (constant_mask && PAN_ARCH >= 6) { + /* On Bifrost, the blend constant is expressed with a UNORM of the + * size of the target format. The value is then shifted such that + * used bits are in the MSB. Here we calculate the factor at pipeline + * creation time so we only have to do a + * hw_constant = float_constant * factor; + * at descriptor emission time. + */ + const struct util_format_description *format_desc = + util_format_description(out->format); + unsigned chan_size = 0; + for (unsigned c = 0; c < format_desc->nr_channels; c++) + chan_size = MAX2(format_desc->channel[c].size, chan_size); + pipeline->blend.constant[i].bifrost_factor = + ((1 << chan_size) - 1) << (16 - chan_size); + } + } +} + +static void +panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + unsigned nr_samples = + MAX2(builder->create_info->pMultisampleState->rasterizationSamples, 1); + + pipeline->ms.rast_samples = + builder->create_info->pMultisampleState->rasterizationSamples; + pipeline->ms.sample_mask = + builder->create_info->pMultisampleState->pSampleMask ? + builder->create_info->pMultisampleState->pSampleMask[0] : UINT16_MAX; + pipeline->ms.min_samples = + MAX2(builder->create_info->pMultisampleState->minSampleShading * nr_samples, 1); +} + +static enum mali_stencil_op +translate_stencil_op(VkStencilOp in) +{ + switch (in) { + case VK_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP; + case VK_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO; + case VK_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return MALI_STENCIL_OP_INCR_SAT; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return MALI_STENCIL_OP_DECR_SAT; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: return MALI_STENCIL_OP_INCR_WRAP; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: return MALI_STENCIL_OP_DECR_WRAP; + case VK_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT; + default: unreachable("Invalid stencil op"); + } +} + +static void +panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + pipeline->zs.z_test = builder->create_info->pDepthStencilState->depthTestEnable; + pipeline->zs.z_write = builder->create_info->pDepthStencilState->depthWriteEnable; + pipeline->zs.z_compare_func = + panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->depthCompareOp); + pipeline->zs.s_test = builder->create_info->pDepthStencilState->stencilTestEnable; + pipeline->zs.s_front.fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->front.failOp); + pipeline->zs.s_front.pass_op = + translate_stencil_op(builder->create_info->pDepthStencilState->front.passOp); + pipeline->zs.s_front.z_fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->front.depthFailOp); + pipeline->zs.s_front.compare_func = + panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->front.compareOp); + pipeline->zs.s_front.compare_mask = + builder->create_info->pDepthStencilState->front.compareMask; + pipeline->zs.s_front.write_mask = + builder->create_info->pDepthStencilState->front.writeMask; + pipeline->zs.s_front.ref = + builder->create_info->pDepthStencilState->front.reference; + pipeline->zs.s_back.fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->back.failOp); + pipeline->zs.s_back.pass_op = + translate_stencil_op(builder->create_info->pDepthStencilState->back.passOp); + pipeline->zs.s_back.z_fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->back.depthFailOp); + pipeline->zs.s_back.compare_func = + panvk_per_arch(translate_compare_func)(builder->create_info->pDepthStencilState->back.compareOp); + pipeline->zs.s_back.compare_mask = + builder->create_info->pDepthStencilState->back.compareMask; + pipeline->zs.s_back.write_mask = + builder->create_info->pDepthStencilState->back.writeMask; + pipeline->zs.s_back.ref = + builder->create_info->pDepthStencilState->back.reference; +} + +static void +panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + pipeline->rast.clamp_depth = builder->create_info->pRasterizationState->depthClampEnable; + pipeline->rast.depth_bias.enable = builder->create_info->pRasterizationState->depthBiasEnable; + pipeline->rast.depth_bias.constant_factor = + builder->create_info->pRasterizationState->depthBiasConstantFactor; + pipeline->rast.depth_bias.clamp = builder->create_info->pRasterizationState->depthBiasClamp; + pipeline->rast.depth_bias.slope_factor = builder->create_info->pRasterizationState->depthBiasSlopeFactor; + pipeline->rast.front_ccw = builder->create_info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE; + pipeline->rast.cull_front_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT; + pipeline->rast.cull_back_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT; +} + +static bool +panvk_fs_required(struct panvk_pipeline *pipeline) +{ + const struct pan_shader_info *info = &pipeline->fs.info; + + /* If we generally have side effects */ + if (info->fs.sidefx) + return true; + + /* If colour is written we need to execute */ + const struct pan_blend_state *blend = &pipeline->blend.state; + for (unsigned i = 0; i < blend->rt_count; ++i) { + if (blend->rts[i].equation.color_mask) + return true; + } + + /* If depth is written and not implied we need to execute. + * TODO: Predicate on Z/S writes being enabled */ + return (info->fs.writes_depth || info->fs.writes_stencil); +} + +#define PANVK_DYNAMIC_FS_RSD_MASK \ + ((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) | \ + (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) | \ + (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | \ + (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | \ + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + +static void +panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + if (!builder->shaders[MESA_SHADER_FRAGMENT]) + return; + + pipeline->fs.dynamic_rsd = + pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK; + pipeline->fs.address = pipeline->binary_bo->ptr.gpu + + builder->stages[MESA_SHADER_FRAGMENT].shader_offset; + pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info; + pipeline->fs.required = panvk_fs_required(pipeline); +} + +static void +panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings, + gl_shader_stage stage, + const struct pan_shader_varying *varying, + bool input) +{ + bool fs = stage == MESA_SHADER_FRAGMENT; + gl_varying_slot loc = varying->location; + enum panvk_varying_buf_id buf_id = + panvk_varying_buf_id(fs, loc); + + varyings->stage[stage].loc[varyings->stage[stage].count++] = loc; + + if (panvk_varying_is_builtin(stage, loc)) { + varyings->buf_mask |= 1 << buf_id; + return; + } + + assert(loc < ARRAY_SIZE(varyings->varying)); + + enum pipe_format new_fmt = varying->format; + enum pipe_format old_fmt = varyings->varying[loc].format; + + BITSET_SET(varyings->active, loc); + + /* We expect inputs to either be set by a previous stage or be built + * in, skip the entry if that's not the case, we'll emit a const + * varying returning zero for those entries. + */ + if (input && old_fmt == PIPE_FORMAT_NONE) + return; + + unsigned new_size = util_format_get_blocksize(new_fmt); + unsigned old_size = util_format_get_blocksize(old_fmt); + + if (old_size < new_size) + varyings->varying[loc].format = new_fmt; + + varyings->buf_mask |= 1 << buf_id; +} + +static void +panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) { + if (!builder->shaders[s]) + continue; + + const struct pan_shader_info *info = &builder->shaders[s]->info; + + for (unsigned i = 0; i < info->varyings.input_count; i++) { + panvk_pipeline_update_varying_slot(&pipeline->varyings, s, + &info->varyings.input[i], + true); + } + + for (unsigned i = 0; i < info->varyings.output_count; i++) { + panvk_pipeline_update_varying_slot(&pipeline->varyings, s, + &info->varyings.output[i], + false); + } + } + + /* TODO: Xfb */ + gl_varying_slot loc; + BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) { + enum panvk_varying_buf_id buf_id = + panvk_varying_buf_id(false, loc); + unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id); + unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc); + + pipeline->varyings.varying[loc].buf = buf_idx; + pipeline->varyings.varying[loc].offset = + pipeline->varyings.buf[buf_idx].stride; + pipeline->varyings.buf[buf_idx].stride += varying_sz; + } +} + +static void +panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panvk_attribs_info *attribs = &pipeline->attribs; + const VkPipelineVertexInputStateCreateInfo *info = + builder->create_info->pVertexInputState; + + for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count); + attribs->buf[desc->binding].stride = desc->stride; + attribs->buf[desc->binding].special = false; + } + + for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + attribs->attrib[desc->location].buf = desc->binding; + attribs->attrib[desc->location].format = + vk_format_to_pipe_format(desc->format); + attribs->attrib[desc->location].offset = desc->offset; + } + + const struct pan_shader_info *vs = + &builder->shaders[MESA_SHADER_VERTEX]->info; + + if (vs->attribute_count >= PAN_VERTEX_ID) { + attribs->buf[attribs->buf_count].special = true; + attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID; + attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++; + attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT; + } + + if (vs->attribute_count >= PAN_INSTANCE_ID) { + attribs->buf[attribs->buf_count].special = true; + attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID; + attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++; + attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT; + } + + attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count); +} + +static VkResult +panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder, + struct panvk_pipeline **pipeline) +{ + VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline); + if (result != VK_SUCCESS) + return result; + + /* TODO: make those functions return a result and handle errors */ + panvk_pipeline_builder_parse_dynamic(builder, *pipeline); + panvk_pipeline_builder_parse_color_blend(builder, *pipeline); + panvk_pipeline_builder_compile_shaders(builder, *pipeline); + panvk_pipeline_builder_collect_varyings(builder, *pipeline); + panvk_pipeline_builder_parse_input_assembly(builder, *pipeline); + panvk_pipeline_builder_parse_multisample(builder, *pipeline); + panvk_pipeline_builder_parse_zs(builder, *pipeline); + panvk_pipeline_builder_parse_rast(builder, *pipeline); + panvk_pipeline_builder_parse_vertex_input(builder, *pipeline); + + + panvk_pipeline_builder_upload_shaders(builder, *pipeline); + panvk_pipeline_builder_init_fs_state(builder, *pipeline); + panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline); + panvk_pipeline_builder_init_shaders(builder, *pipeline); + panvk_pipeline_builder_parse_viewport(builder, *pipeline); + + return VK_SUCCESS; +} + +static void +panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder *builder, + struct panvk_device *dev, + struct panvk_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *create_info, + const VkAllocationCallbacks *alloc) +{ + VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout); + assert(layout); + *builder = (struct panvk_pipeline_builder) { + .device = dev, + .cache = cache, + .layout = layout, + .create_info = create_info, + .alloc = alloc, + }; + + builder->rasterizer_discard = + create_info->pRasterizationState->rasterizerDiscardEnable; + + if (builder->rasterizer_discard) { + builder->samples = VK_SAMPLE_COUNT_1_BIT; + } else { + builder->samples = create_info->pMultisampleState->rasterizationSamples; + + const struct panvk_render_pass *pass = panvk_render_pass_from_handle(create_info->renderPass); + const struct panvk_subpass *subpass = &pass->subpasses[create_info->subpass]; + + builder->use_depth_stencil_attachment = + subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED; + + assert(subpass->color_count == create_info->pColorBlendState->attachmentCount); + builder->active_color_attachments = 0; + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t idx = subpass->color_attachments[i].idx; + if (idx == VK_ATTACHMENT_UNUSED) + continue; + + builder->active_color_attachments |= 1 << i; + builder->color_attachment_formats[i] = pass->attachments[idx].format; + } + } +} + +VkResult +panvk_per_arch(CreateGraphicsPipelines)(VkDevice device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(panvk_device, dev, device); + VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache); + + for (uint32_t i = 0; i < count; i++) { + struct panvk_pipeline_builder builder; + panvk_pipeline_builder_init_graphics(&builder, dev, cache, + &pCreateInfos[i], pAllocator); + + struct panvk_pipeline *pipeline; + VkResult result = panvk_pipeline_builder_build(&builder, &pipeline); + panvk_pipeline_builder_finish(&builder); + + if (result != VK_SUCCESS) { + for (uint32_t j = 0; j < i; j++) { + panvk_DestroyPipeline(device, pPipelines[j], pAllocator); + pPipelines[j] = VK_NULL_HANDLE; + } + + return result; + } + + pPipelines[i] = panvk_pipeline_to_handle(pipeline); + } + + return VK_SUCCESS; +} diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c new file mode 100644 index 00000000000..f0245919cb5 --- /dev/null +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -0,0 +1,386 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_shader.c which is: + * Copyright © 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "gen_macros.h" + +#include "panvk_private.h" + +#include "nir_builder.h" +#include "nir_lower_blend.h" +#include "spirv/nir_spirv.h" +#include "util/mesa-sha1.h" + +#include "panfrost-quirks.h" +#include "pan_shader.h" + +#include "vk_util.h" + +static nir_shader * +panvk_spirv_to_nir(const void *code, + size_t codesize, + gl_shader_stage stage, + const char *entry_point_name, + const VkSpecializationInfo *spec_info, + const nir_shader_compiler_options *nir_options) +{ + /* TODO these are made-up */ + const struct spirv_to_nir_options spirv_options = { + .caps = { false }, + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_32bit_index_offset, + }; + + /* convert VkSpecializationInfo */ + uint32_t num_spec = 0; + struct nir_spirv_specialization *spec = + vk_spec_info_to_nir_spirv(spec_info, &num_spec); + + nir_shader *nir = spirv_to_nir(code, codesize / sizeof(uint32_t), spec, + num_spec, stage, entry_point_name, + &spirv_options, nir_options); + + free(spec); + + assert(nir->info.stage == stage); + nir_validate_shader(nir, "after spirv_to_nir"); + + return nir; +} + +struct panvk_lower_misc_ctx { + struct panvk_shader *shader; + const struct panvk_pipeline_layout *layout; +}; + +static unsigned +get_fixed_sampler_index(nir_deref_instr *deref, + const struct panvk_lower_misc_ctx *ctx) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct panvk_descriptor_set_binding_layout *bind_layout = + &ctx->layout->sets[set].layout->bindings[binding]; + + return bind_layout->sampler_idx + ctx->layout->sets[set].sampler_offset; +} + +static unsigned +get_fixed_texture_index(nir_deref_instr *deref, + const struct panvk_lower_misc_ctx *ctx) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct panvk_descriptor_set_binding_layout *bind_layout = + &ctx->layout->sets[set].layout->bindings[binding]; + + return bind_layout->tex_idx + ctx->layout->sets[set].tex_offset; +} + +static bool +lower_tex(nir_builder *b, nir_tex_instr *tex, + const struct panvk_lower_misc_ctx *ctx) +{ + bool progress = false; + int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + + b->cursor = nir_before_instr(&tex->instr); + + if (sampler_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); + tex->sampler_index = get_fixed_sampler_index(deref, ctx); + nir_tex_instr_remove_src(tex, sampler_src_idx); + progress = true; + } + + int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + if (tex_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src); + tex->texture_index = get_fixed_texture_index(deref, ctx); + nir_tex_instr_remove_src(tex, tex_src_idx); + progress = true; + } + + return progress; +} + +static void +lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *intr, + const struct panvk_lower_misc_ctx *ctx) +{ + nir_ssa_def *vulkan_idx = intr->src[0].ssa; + + unsigned set = nir_intrinsic_desc_set(intr); + unsigned binding = nir_intrinsic_binding(intr); + struct panvk_descriptor_set_layout *set_layout = ctx->layout->sets[set].layout; + struct panvk_descriptor_set_binding_layout *binding_layout = + &set_layout->bindings[binding]; + unsigned base; + + switch (binding_layout->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + base = binding_layout->ubo_idx + ctx->layout->sets[set].ubo_offset; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + base = binding_layout->ssbo_idx + ctx->layout->sets[set].ssbo_offset; + break; + default: + unreachable("Invalid descriptor type"); + break; + } + + b->cursor = nir_before_instr(&intr->instr); + nir_ssa_def *idx = nir_iadd(b, nir_imm_int(b, base), vulkan_idx); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, idx); + nir_instr_remove(&intr->instr); +} + +static void +lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin) +{ + /* Loading the descriptor happens as part of the load/store instruction so + * this is a no-op. + */ + b->cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *val = nir_vec2(b, intrin->src[0].ssa, nir_imm_int(b, 0)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); + nir_instr_remove(&intrin->instr); +} + +static bool +lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, + const struct panvk_lower_misc_ctx *ctx) +{ + switch (intr->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + lower_vulkan_resource_index(b, intr, ctx); + return true; + case nir_intrinsic_load_vulkan_descriptor: + lower_load_vulkan_descriptor(b, intr); + return true; + default: + return false; + } + +} + +static bool +panvk_lower_misc_instr(nir_builder *b, + nir_instr *instr, + void *data) +{ + const struct panvk_lower_misc_ctx *ctx = data; + + switch (instr->type) { + case nir_instr_type_tex: + return lower_tex(b, nir_instr_as_tex(instr), ctx); + case nir_instr_type_intrinsic: + return lower_intrinsic(b, nir_instr_as_intrinsic(instr), ctx); + default: + return false; + } +} + +static bool +panvk_lower_misc(nir_shader *nir, const struct panvk_lower_misc_ctx *ctx) +{ + return nir_shader_instructions_pass(nir, panvk_lower_misc_instr, + nir_metadata_block_index | + nir_metadata_dominance, + (void *)ctx); +} + +static void +panvk_lower_blend(struct panfrost_device *pdev, + nir_shader *nir, + struct pan_blend_state *blend_state, + bool static_blend_constants) +{ + nir_lower_blend_options options = { + .logicop_enable = blend_state->logicop_enable, + .logicop_func = blend_state->logicop_func, + }; + + bool lower_blend = false; + for (unsigned rt = 0; rt < blend_state->rt_count; rt++) { + if (!panvk_per_arch(blend_needs_lowering)(pdev, blend_state, rt)) + continue; + + const struct pan_blend_rt_state *rt_state = &blend_state->rts[rt]; + options.rt[rt].colormask = rt_state->equation.color_mask; + options.format[rt] = rt_state->format; + if (!rt_state->equation.blend_enable) { + static const nir_lower_blend_channel replace = { + .func = BLEND_FUNC_ADD, + .src_factor = BLEND_FACTOR_ZERO, + .invert_src_factor = true, + .dst_factor = BLEND_FACTOR_ZERO, + .invert_dst_factor = false, + }; + + options.rt[rt].rgb = replace; + options.rt[rt].alpha = replace; + } else { + options.rt[rt].rgb.func = rt_state->equation.rgb_func; + options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor; + options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor; + options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor; + options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor; + options.rt[rt].alpha.func = rt_state->equation.alpha_func; + options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor; + options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor; + options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor; + options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor; + } + + lower_blend = true; + } + + /* FIXME: currently untested */ + assert(!lower_blend); + + if (lower_blend) + NIR_PASS_V(nir, nir_lower_blend, options); +} + +struct panvk_shader * +panvk_per_arch(shader_create)(struct panvk_device *dev, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *stage_info, + const struct panvk_pipeline_layout *layout, + unsigned sysval_ubo, + struct pan_blend_state *blend_state, + bool static_blend_constants, + const VkAllocationCallbacks *alloc) +{ + const struct panvk_shader_module *module = panvk_shader_module_from_handle(stage_info->module); + struct panfrost_device *pdev = &dev->physical_device->pdev; + struct panvk_shader *shader; + + shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!shader) + return NULL; + + util_dynarray_init(&shader->binary, NULL); + + /* translate SPIR-V to NIR */ + assert(module->code_size % 4 == 0); + nir_shader *nir = panvk_spirv_to_nir(module->code, + module->code_size, + stage, stage_info->pName, + stage_info->pSpecializationInfo, + pan_shader_get_compiler_options(pdev)); + if (!nir) { + vk_free2(&dev->vk.alloc, alloc, shader); + return NULL; + } + + if (stage == MESA_SHADER_FRAGMENT) + panvk_lower_blend(pdev, nir, blend_state, static_blend_constants); + + /* multi step inlining procedure */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_deref); + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (!func->is_entrypoint) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp); + + /* Split member structs. We do this before lower_io_to_temporaries so that + * it doesn't lower system values to temporaries by accident. + */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | + nir_var_system_value | nir_var_mem_shared, + NULL); + + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), true, true); + + NIR_PASS_V(nir, nir_lower_indirect_derefs, + nir_var_shader_in | nir_var_shader_out, + UINT32_MAX); + + NIR_PASS_V(nir, nir_opt_copy_prop_vars); + NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all); + + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false); + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_32bit_index_offset); + + nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage); + nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage); + + NIR_PASS_V(nir, nir_lower_system_values); + NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); + + NIR_PASS_V(nir, nir_lower_var_copies); + + struct panvk_lower_misc_ctx ctx = { + .shader = shader, + .layout = layout, + }; + NIR_PASS_V(nir, panvk_lower_misc, &ctx); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) { + fprintf(stderr, "translated nir:\n"); + nir_print_shader(nir, stderr); + } + + struct panfrost_compile_inputs inputs = { + .gpu_id = pdev->gpu_id, + .no_ubo_to_push = true, + .sysval_ubo = sysval_ubo, + }; + + pan_shader_compile(pdev, nir, &inputs, &shader->binary, &shader->info); + + /* Patch the descriptor count */ + shader->info.ubo_count = + shader->info.sysvals.sysval_count ? sysval_ubo + 1 : layout->num_ubos; + shader->info.sampler_count = layout->num_samplers; + shader->info.texture_count = layout->num_textures; + + shader->sysval_ubo = sysval_ubo; + + ralloc_free(nir); + + return shader; +} diff --git a/src/panfrost/vulkan/panvk_varyings.h b/src/panfrost/vulkan/panvk_varyings.h index 14dd5327ca7..01bb7499f56 100644 --- a/src/panfrost/vulkan/panvk_varyings.h +++ b/src/panfrost/vulkan/panvk_varyings.h @@ -28,9 +28,10 @@ #include "util/format/u_format.h" #include "compiler/shader_enums.h" -#include "gen_macros.h" #include "panfrost-job.h" +#include "pan_pool.h" + struct pan_pool; struct panvk_device; @@ -69,15 +70,6 @@ struct panvk_varyings_info { unsigned buf_mask; }; -void -panvk_varyings_alloc(struct panvk_varyings_info *varyings, - struct pan_pool *varying_mem_pool, - unsigned vertex_count); - -unsigned -panvk_varyings_buf_count(const struct panvk_device *dev, - struct panvk_varyings_info *varyings); - static inline unsigned panvk_varying_buf_index(const struct panvk_varyings_info *varyings, enum panvk_varying_buf_id b) @@ -114,6 +106,7 @@ panvk_varying_is_builtin(gl_shader_stage stage, gl_varying_slot loc) } } +#if defined(PAN_ARCH) && PAN_ARCH <= 5 static inline enum mali_attribute_special panvk_varying_special_buf_id(enum panvk_varying_buf_id buf_id) { @@ -126,6 +119,7 @@ panvk_varying_special_buf_id(enum panvk_varying_buf_id buf_id) return 0; } } +#endif static inline unsigned panvk_varying_size(const struct panvk_varyings_info *varyings, @@ -141,4 +135,34 @@ panvk_varying_size(const struct panvk_varyings_info *varyings, } } +#ifdef PAN_ARCH +static inline unsigned +panvk_varyings_buf_count(struct panvk_varyings_info *varyings) +{ + return util_bitcount(varyings->buf_mask) + (PAN_ARCH >= 6 ? 1 : 0); +} +#endif + +static inline void +panvk_varyings_alloc(struct panvk_varyings_info *varyings, + struct pan_pool *varying_mem_pool, + unsigned vertex_count) +{ + for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { + if (!(varyings->buf_mask & (1 << i))) continue; + + unsigned buf_idx = panvk_varying_buf_index(varyings, i); + unsigned size = varyings->buf[buf_idx].stride * vertex_count; + if (!size) + continue; + + struct panfrost_ptr ptr = + pan_pool_alloc_aligned(varying_mem_pool, size, 64); + + varyings->buf[buf_idx].size = size; + varyings->buf[buf_idx].address = ptr.gpu; + varyings->buf[buf_idx].cpu = ptr.cpu; + } +} + #endif