diff --git a/meson.build b/meson.build index ff333961a0d..d05c8a26558 100644 --- a/meson.build +++ b/meson.build @@ -278,6 +278,7 @@ endif with_intel_vk = _vulkan_drivers.contains('intel') with_amd_vk = _vulkan_drivers.contains('amd') with_freedreno_vk = _vulkan_drivers.contains('freedreno') +with_panfrost_vk = _vulkan_drivers.contains('panfrost') with_swrast_vk = _vulkan_drivers.contains('swrast') with_virtio_vk = _vulkan_drivers.contains('virtio-experimental') with_freedreno_kgsl = get_option('freedreno-kgsl') diff --git a/meson_options.txt b/meson_options.txt index 54e15e9f850..1e0986b5eba 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -184,7 +184,7 @@ option( 'vulkan-drivers', type : 'array', value : ['auto'], - choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'swrast', 'virtio-experimental'], + choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'panfrost', 'swrast', 'virtio-experimental'], description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built' ) option( diff --git a/src/meson.build b/src/meson.build index 190041207bf..acc7fe94abb 100644 --- a/src/meson.build +++ b/src/meson.build @@ -86,7 +86,7 @@ endif if with_gallium_freedreno or with_freedreno_vk subdir('freedreno') endif -if with_gallium_panfrost or with_gallium_lima +if with_gallium_panfrost or with_gallium_lima or with_panfrost_vk subdir('panfrost') endif if with_gallium_virgl or with_virtio_vk diff --git a/src/panfrost/meson.build b/src/panfrost/meson.build index dc0473b634a..cd6febe7877 100644 --- a/src/panfrost/meson.build +++ b/src/panfrost/meson.build @@ -66,3 +66,7 @@ bifrost_compiler = executable( ], build_by_default : with_tools.contains('panfrost') ) + +if with_panfrost_vk + subdir('vulkan') +endif diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build new file mode 100644 index 00000000000..06d95d4c6b0 --- /dev/null +++ b/src/panfrost/vulkan/meson.build @@ -0,0 +1,122 @@ +# Copyright © 2021 Collabora Ltd. +# +# Derived from the freedreno driver which is: +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +panvk_entrypoints = custom_target( + 'panvk_entrypoints.[ch]', + input : [vk_entrypoints_gen, vk_api_xml], + output : ['panvk_entrypoints.h', 'panvk_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', + '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'panvk', + ], +) + +libpanvk_files = files( + 'panvk_cmd_buffer.c', + 'panvk_cs.c', + 'panvk_device.c', + 'panvk_descriptor_set.c', + 'panvk_formats.c', + 'panvk_image.c', + 'panvk_meta.c', + 'panvk_pass.c', + 'panvk_pipeline.c', + 'panvk_pipeline_cache.c', + 'panvk_private.h', + 'panvk_query.c', + 'panvk_shader.c', + 'panvk_sync.c', + 'panvk_util.c', + 'panvk_varyings.c', + 'panvk_wsi.c', + 'panvk_wsi_display.c', +) + +panvk_deps = [] +panvk_flags = [] + +if system_has_kms_drm + panvk_flags += '-DVK_USE_PLATFORM_DISPLAY_KHR' + libpanvk_files += files('panvk_wsi_display.c') +endif + +if with_platform_wayland + panvk_deps += [dep_wayland_client, dep_wl_protocols] + panvk_flags += '-DVK_USE_PLATFORM_WAYLAND_KHR' + libpanvk_files += files('panvk_wsi_wayland.c') + libpanvk_files += [wayland_drm_client_protocol_h, wayland_drm_protocol_c] +endif + +libvulkan_panfrost = shared_library( + 'vulkan_panfrost', + [libpanvk_files, panvk_entrypoints], + include_directories : [ + inc_include, + inc_src, + inc_compiler, + inc_gallium, # XXX: pipe/p_format.h + inc_gallium_aux, # XXX: renderonly + inc_vulkan_wsi, + inc_panfrost, + ], + link_with : [ + libvulkan_wsi, + libpanfrost_shared, + libpanfrost_midgard, + libpanfrost_bifrost, + libpanfrost_decode, + libpanfrost_lib, + libpanfrost_util, + ], + dependencies : [ + dep_dl, + dep_elf, + dep_libdrm, + dep_m, + dep_thread, + dep_valgrind, + idep_nir, + panvk_deps, + idep_vulkan_util, + idep_mesautil, + ], + c_args : [no_override_init_args, panvk_flags], + link_args : [ld_args_bsymbolic, ld_args_gc_sections], + install : true, +) + +panfrost_icd = custom_target( + 'panfrost_icd', + input : [vk_icd_gen, vk_api_xml], + output : 'panfrost_icd.@0@.json'.format(host_machine.cpu()), + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.1', '--xml', '@INPUT1@', + '--lib-path', join_paths(get_option('prefix'), get_option('libdir'), + 'libvulkan_panfrost.so'), + '--out', '@OUTPUT@', + ], + build_by_default : true, + install_dir : with_vulkan_icd_dir, + install : true, +) diff --git a/src/panfrost/vulkan/panvk_cmd_buffer.c b/src/panfrost/vulkan/panvk_cmd_buffer.c new file mode 100644 index 00000000000..b6d159aec40 --- /dev/null +++ b/src/panfrost/vulkan/panvk_cmd_buffer.c @@ -0,0 +1,1467 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_cmd_buffer.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_cs.h" +#include "panvk_private.h" +#include "panfrost-quirks.h" + +#include "pan_blitter.h" +#include "pan_encoder.h" + +#include "util/rounding.h" +#include "util/u_pack_color.h" +#include "vk_format.h" + +static VkResult +panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_device *device = cmdbuf->device; + struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + + cmdbuf->record_result = VK_SUCCESS; + + list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { + list_del(&batch->node); + util_dynarray_fini(&batch->jobs); + if (!pan_is_bifrost(pdev)) + panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); + vk_free(&cmdbuf->pool->alloc, batch); + } + + panfrost_pool_cleanup(&cmdbuf->desc_pool); + panfrost_pool_cleanup(&cmdbuf->tls_pool); + panfrost_pool_cleanup(&cmdbuf->varying_pool); + panfrost_pool_init(&cmdbuf->desc_pool, NULL, &device->physical_device->pdev, + 0, 64 * 1024, "Command buffer descriptor pool", + true, true); + panfrost_pool_init(&cmdbuf->tls_pool, NULL, &device->physical_device->pdev, + PAN_BO_INVISIBLE, 64 * 1024, "TLS pool", false, true); + panfrost_pool_init(&cmdbuf->varying_pool, NULL, &device->physical_device->pdev, + PAN_BO_INVISIBLE, 64 * 1024, "Varyings pool", false, true); + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; + + for (unsigned i = 0; i < MAX_BIND_POINTS; i++) + memset(&cmdbuf->descriptors[i].sets, 0, sizeof(cmdbuf->descriptors[i].sets)); + + return cmdbuf->record_result; +} + +static VkResult +panvk_create_cmdbuf(struct panvk_device *device, + struct panvk_cmd_pool *pool, + VkCommandBufferLevel level, + struct panvk_cmd_buffer **cmdbuf_out) +{ + struct panvk_cmd_buffer *cmdbuf; + + cmdbuf = vk_object_zalloc(&device->vk, NULL, sizeof(*cmdbuf), + VK_OBJECT_TYPE_COMMAND_BUFFER); + if (!cmdbuf) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + cmdbuf->device = device; + cmdbuf->level = level; + cmdbuf->pool = pool; + panfrost_pool_init(&cmdbuf->desc_pool, NULL, &device->physical_device->pdev, + 0, 64 * 1024, "Command buffer descriptor pool", + true, true); + panfrost_pool_init(&cmdbuf->tls_pool, NULL, &device->physical_device->pdev, + PAN_BO_INVISIBLE, 64 * 1024, "TLS pool", false, true); + panfrost_pool_init(&cmdbuf->varying_pool, NULL, &device->physical_device->pdev, + PAN_BO_INVISIBLE, 64 * 1024, "Varyings pool", false, true); + list_inithead(&cmdbuf->batches); + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; + *cmdbuf_out = cmdbuf; + return VK_SUCCESS; +} + +static void +panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf) +{ + struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + struct panvk_device *device = cmdbuf->device; + + list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { + list_del(&batch->node); + util_dynarray_fini(&batch->jobs); + if (!pan_is_bifrost(pdev)) + panfrost_bo_unreference(batch->tiler.ctx.midgard.polygon_list); + vk_free(&cmdbuf->pool->alloc, batch); + } + + panfrost_pool_cleanup(&cmdbuf->desc_pool); + panfrost_pool_cleanup(&cmdbuf->tls_pool); + panfrost_pool_cleanup(&cmdbuf->varying_pool); + vk_object_free(&device->vk, NULL, cmdbuf); +} + +VkResult +panvk_AllocateCommandBuffers(VkDevice _device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool); + + VkResult result = VK_SUCCESS; + unsigned i; + + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { + struct panvk_cmd_buffer *cmdbuf = NULL; + + result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf); + if (result != VK_SUCCESS) + goto err_free_cmd_bufs; + + pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf); + } + + return VK_SUCCESS; + +err_free_cmd_bufs: + panvk_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, + pCommandBuffers); + for (unsigned j = 0; j < i; j++) + pCommandBuffers[j] = VK_NULL_HANDLE; + + return result; +} + +void +panvk_FreeCommandBuffers(VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers) +{ + for (uint32_t i = 0; i < commandBufferCount; i++) { + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]); + + panvk_destroy_cmdbuf(cmdbuf); + } +} + +VkResult +panvk_ResetCommandBuffer(VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + return panvk_reset_cmdbuf(cmdbuf); +} + +VkResult +panvk_BeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VkResult result = VK_SUCCESS; + + if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) { + /* If the command buffer has already been reset with + * vkResetCommandBuffer, no need to do it again. + */ + result = panvk_reset_cmdbuf(cmdbuf); + if (result != VK_SUCCESS) + return result; + } + + memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); + + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING; + + return VK_SUCCESS; +} + +void +panvk_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + assert(firstBinding + bindingCount <= MAX_VBS); + + for (uint32_t i = 0; i < bindingCount; i++) { + struct panvk_buffer *buf = panvk_buffer_from_handle(pBuffers[i]); + + cmdbuf->state.vb.bufs[firstBinding + i].address = buf->bo->ptr.gpu + pOffsets[i]; + cmdbuf->state.vb.bufs[firstBinding + i].size = buf->size - pOffsets[i]; + } + cmdbuf->state.vb.count = MAX2(cmdbuf->state.vb.count, firstBinding + bindingCount); + cmdbuf->state.vb.attrib_bufs = cmdbuf->state.vb.attribs = 0; +} + +void +panvk_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType) +{ + panvk_stub(); +} + +void +panvk_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout _layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_pipeline_layout, layout, _layout); + + struct panvk_descriptor_state *descriptors_state = + &cmdbuf->descriptors[pipelineBindPoint]; + + for (unsigned i = 0; i < descriptorSetCount; ++i) { + unsigned idx = i + firstSet; + VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorSets[i]); + + descriptors_state->sets[idx].set = set; + + if (layout->num_dynoffsets) { + assert(dynamicOffsetCount >= set->layout->num_dynoffsets); + + descriptors_state->sets[idx].dynoffsets = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + ALIGN(layout->num_dynoffsets, 4) * + sizeof(*pDynamicOffsets), + 16); + memcpy(descriptors_state->sets[idx].dynoffsets.cpu, + pDynamicOffsets, + sizeof(*pDynamicOffsets) * set->layout->num_dynoffsets); + dynamicOffsetCount -= set->layout->num_dynoffsets; + pDynamicOffsets += set->layout->num_dynoffsets; + } + + if (set->layout->num_ubos || set->layout->num_dynoffsets) + descriptors_state->ubos = 0; + + if (set->layout->num_textures) + descriptors_state->textures = 0; + + if (set->layout->num_samplers) + descriptors_state->samplers = 0; + } + + assert(!dynamicOffsetCount); +} + +void +panvk_CmdPushConstants(VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void *pValues) +{ + panvk_stub(); +} + +VkResult +panvk_EndCommandBuffer(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + cmdbuf->status = PANVK_CMD_BUFFER_STATUS_EXECUTABLE; + + return cmdbuf->record_result; +} + +void +panvk_CmdBindPipeline(VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline _pipeline) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_pipeline, pipeline, _pipeline); + + cmdbuf->state.bind_point = pipelineBindPoint; + cmdbuf->state.pipeline = pipeline; + cmdbuf->state.varyings = pipeline->varyings; + cmdbuf->state.vb.attrib_bufs = cmdbuf->state.vb.attribs = 0; + cmdbuf->state.fs_rsd = 0; + memset(cmdbuf->descriptors[pipelineBindPoint].sysvals, 0, + sizeof(cmdbuf->descriptors[pipelineBindPoint].sysvals)); + + /* Sysvals are passed through UBOs, we need dirty the UBO array if the + * pipeline contain shaders using sysvals. + */ + if (pipeline->num_sysvals) + cmdbuf->descriptors[pipelineBindPoint].ubos = 0; +} + +void +panvk_CmdSetViewport(VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport *pViewports) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + assert(viewportCount == 1); + assert(!firstViewport); + + cmdbuf->state.viewport = pViewports[0]; + cmdbuf->state.vpd = 0; + cmdbuf->state.dirty |= PANVK_DYNAMIC_VIEWPORT; +} + +void +panvk_CmdSetScissor(VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D *pScissors) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + assert(scissorCount == 1); + assert(!firstScissor); + + cmdbuf->state.scissor = pScissors[0]; + cmdbuf->state.vpd = 0; + cmdbuf->state.dirty |= PANVK_DYNAMIC_SCISSOR; +} + +void +panvk_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + cmdbuf->state.rast.line_width = lineWidth; + cmdbuf->state.dirty |= PANVK_DYNAMIC_LINE_WIDTH; +} + +void +panvk_CmdSetDepthBias(VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + cmdbuf->state.rast.depth_bias.constant_factor = depthBiasConstantFactor; + cmdbuf->state.rast.depth_bias.clamp = depthBiasClamp; + cmdbuf->state.rast.depth_bias.slope_factor = depthBiasSlopeFactor; + cmdbuf->state.dirty |= PANVK_DYNAMIC_DEPTH_BIAS; + cmdbuf->state.fs_rsd = 0; +} + +void +panvk_CmdSetBlendConstants(VkCommandBuffer commandBuffer, + const float blendConstants[4]) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + memcpy(cmdbuf->state.blend.constants, blendConstants, + sizeof(cmdbuf->state.blend.constants)); + cmdbuf->state.dirty |= PANVK_DYNAMIC_BLEND_CONSTANTS; + cmdbuf->state.fs_rsd = 0; +} + +void +panvk_CmdSetDepthBounds(VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds) +{ + panvk_stub(); +} + +void +panvk_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zs.s_front.compare_mask = compareMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zs.s_back.compare_mask = compareMask; + + cmdbuf->state.dirty |= PANVK_DYNAMIC_STENCIL_COMPARE_MASK; + cmdbuf->state.fs_rsd = 0; +} + +void +panvk_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zs.s_front.write_mask = writeMask; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zs.s_back.write_mask = writeMask; + + cmdbuf->state.dirty |= PANVK_DYNAMIC_STENCIL_WRITE_MASK; + cmdbuf->state.fs_rsd = 0; +} + +void +panvk_CmdSetStencilReference(VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + if (faceMask & VK_STENCIL_FACE_FRONT_BIT) + cmdbuf->state.zs.s_front.ref = reference; + + if (faceMask & VK_STENCIL_FACE_BACK_BIT) + cmdbuf->state.zs.s_back.ref = reference; + + cmdbuf->state.dirty |= PANVK_DYNAMIC_STENCIL_REFERENCE; + cmdbuf->state.fs_rsd = 0; +} + +void +panvk_CmdExecuteCommands(VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer *pCmdBuffers) +{ + panvk_stub(); +} + +VkResult +panvk_CreateCommandPool(VkDevice _device, + const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkCommandPool *pCmdPool) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_cmd_pool *pool; + + pool = vk_object_alloc(&device->vk, pAllocator, sizeof(*pool), + VK_OBJECT_TYPE_COMMAND_POOL); + if (pool == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + pool->alloc = *pAllocator; + else + pool->alloc = device->vk.alloc; + + pool->queue_family_index = pCreateInfo->queueFamilyIndex; + *pCmdPool = panvk_cmd_pool_to_handle(pool); + return VK_SUCCESS; +} + +void +panvk_DestroyCommandPool(VkDevice _device, + VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); + vk_object_free(&device->vk, pAllocator, pool); +} + +VkResult +panvk_ResetCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_TrimCommandPool(VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlags flags) +{ + panvk_stub(); +} + +static void +panvk_pack_color_32(uint32_t *packed, uint32_t v) +{ + for (unsigned i = 0; i < 4; ++i) + packed[i] = v; +} + +static void +panvk_pack_color_64(uint32_t *packed, uint32_t lo, uint32_t hi) +{ + for (unsigned i = 0; i < 4; i += 2) { + packed[i + 0] = lo; + packed[i + 1] = hi; + } +} + +void +panvk_pack_color(struct panvk_clear_value *out, + const VkClearColorValue *in, + enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + /* Alpha magicked to 1.0 if there is no alpha */ + bool has_alpha = util_format_has_alpha(format); + float clear_alpha = has_alpha ? in->float32[3] : 1.0f; + uint32_t *packed = out->color; + + if (util_format_is_rgba8_variant(desc) && desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) { + panvk_pack_color_32(packed, + ((uint32_t) float_to_ubyte(clear_alpha) << 24) | + ((uint32_t) float_to_ubyte(in->float32[2]) << 16) | + ((uint32_t) float_to_ubyte(in->float32[1]) << 8) | + ((uint32_t) float_to_ubyte(in->float32[0]) << 0)); + } else if (format == PIPE_FORMAT_B5G6R5_UNORM) { + /* First, we convert the components to R5, G6, B5 separately */ + unsigned r5 = _mesa_roundevenf(SATURATE(in->float32[0]) * 31.0); + unsigned g6 = _mesa_roundevenf(SATURATE(in->float32[1]) * 63.0); + unsigned b5 = _mesa_roundevenf(SATURATE(in->float32[2]) * 31.0); + + /* Then we pack into a sparse u32. TODO: Why these shifts? */ + panvk_pack_color_32(packed, (b5 << 25) | (g6 << 14) | (r5 << 5)); + } else if (format == PIPE_FORMAT_B4G4R4A4_UNORM) { + /* Convert to 4-bits */ + unsigned r4 = _mesa_roundevenf(SATURATE(in->float32[0]) * 15.0); + unsigned g4 = _mesa_roundevenf(SATURATE(in->float32[1]) * 15.0); + unsigned b4 = _mesa_roundevenf(SATURATE(in->float32[2]) * 15.0); + unsigned a4 = _mesa_roundevenf(SATURATE(clear_alpha) * 15.0); + + /* Pack on *byte* intervals */ + panvk_pack_color_32(packed, (a4 << 28) | (b4 << 20) | (g4 << 12) | (r4 << 4)); + } else if (format == PIPE_FORMAT_B5G5R5A1_UNORM) { + /* Scale as expected but shift oddly */ + unsigned r5 = _mesa_roundevenf(SATURATE(in->float32[0]) * 31.0); + unsigned g5 = _mesa_roundevenf(SATURATE(in->float32[1]) * 31.0); + unsigned b5 = _mesa_roundevenf(SATURATE(in->float32[2]) * 31.0); + unsigned a1 = _mesa_roundevenf(SATURATE(clear_alpha) * 1.0); + + panvk_pack_color_32(packed, (a1 << 31) | (b5 << 25) | (g5 << 15) | (r5 << 5)); + } else { + /* Otherwise, it's generic subject to replication */ + + union util_color out = { 0 }; + unsigned size = util_format_get_blocksize(format); + + util_pack_color(in->float32, format, &out); + + if (size == 1) { + unsigned b = out.ui[0]; + unsigned s = b | (b << 8); + panvk_pack_color_32(packed, s | (s << 16)); + } else if (size == 2) + panvk_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16)); + else if (size == 3 || size == 4) + panvk_pack_color_32(packed, out.ui[0]); + else if (size == 6 || size == 8) + panvk_pack_color_64(packed, out.ui[0], out.ui[1]); + else if (size == 12 || size == 16) + memcpy(packed, out.ui, 16); + else + unreachable("Unknown generic format size packing clear colour"); + } +} + +static void +panvk_cmd_prepare_clear_values(struct panvk_cmd_buffer *cmdbuf, + const VkClearValue *in) +{ + for (unsigned i = 0; i < cmdbuf->state.pass->attachment_count; i++) { + const struct panvk_render_pass_attachment *attachment = + &cmdbuf->state.pass->attachments[i]; + enum pipe_format fmt = attachment->format; + + if (util_format_is_depth_or_stencil(fmt)) { + if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR || + attachment->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + cmdbuf->state.clear[i].depth = in[i].depthStencil.depth; + cmdbuf->state.clear[i].stencil = in[i].depthStencil.stencil; + } + } else if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { + panvk_pack_color(&cmdbuf->state.clear[i], &in[i].color, fmt); + } + } +} + +void +panvk_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBegin, + const VkSubpassBeginInfo *pSubpassBeginInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_render_pass, pass, pRenderPassBegin->renderPass); + VK_FROM_HANDLE(panvk_framebuffer, fb, pRenderPassBegin->framebuffer); + + cmdbuf->state.pass = pass; + cmdbuf->state.subpass = pass->subpasses; + cmdbuf->state.framebuffer = fb; + cmdbuf->state.render_area = pRenderPassBegin->renderArea; + cmdbuf->state.batch = vk_zalloc(&cmdbuf->pool->alloc, + sizeof(*cmdbuf->state.batch), 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + util_dynarray_init(&cmdbuf->state.batch->jobs, NULL); + cmdbuf->state.clear = vk_zalloc(&cmdbuf->pool->alloc, + sizeof(*cmdbuf->state.clear) * + pRenderPassBegin->clearValueCount, 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + assert(pRenderPassBegin->clearValueCount == pass->attachment_count); + panvk_cmd_prepare_clear_values(cmdbuf, pRenderPassBegin->pClearValues); + memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); +} + +void +panvk_CmdBeginRenderPass(VkCommandBuffer cmd, + const VkRenderPassBeginInfo *info, + VkSubpassContents contents) +{ + VkSubpassBeginInfo subpass_info = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, + .contents = contents + }; + + return panvk_CmdBeginRenderPass2(cmd, info, &subpass_info); +} + +static void +panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf) +{ + assert(cmdbuf->state.bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS); + + struct panvk_batch *batch = cmdbuf->state.batch; + struct panfrost_ptr job_ptr = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + MALI_FRAGMENT_JOB_LENGTH, 64); + + panvk_emit_fragment_job(cmdbuf->device, cmdbuf->state.framebuffer, + cmdbuf->state.batch->fb.desc.gpu, + job_ptr.cpu); + cmdbuf->state.batch->fragment_job = job_ptr.gpu; + util_dynarray_append(&batch->jobs, void *, job_ptr.cpu); +} + +void +panvk_cmd_get_midgard_polygon_list(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height, + bool has_draws) +{ + struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + struct panvk_batch *batch = cmdbuf->state.batch; + + assert(!pan_is_bifrost(pdev)); + + if (batch->tiler.ctx.midgard.polygon_list) + return; + + unsigned size = + panfrost_tiler_get_polygon_list_size(pdev, width, height, has_draws); + size = util_next_power_of_two(size); + + /* Create the BO as invisible if we can. In the non-hierarchical tiler case, + * we need to write the polygon list manually because there's not WRITE_VALUE + * job in the chain. */ + bool init_polygon_list = !has_draws && (pdev->quirks & MIDGARD_NO_HIER_TILING); + batch->tiler.ctx.midgard.polygon_list = + panfrost_bo_create(pdev, size, + init_polygon_list ? 0 : PAN_BO_INVISIBLE, + "Polygon list"); + + + if (init_polygon_list) { + assert(batch->tiler.ctx.midgard.polygon_list->ptr.cpu); + uint32_t *polygon_list_body = + batch->tiler.ctx.midgard.polygon_list->ptr.cpu + + MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE; + polygon_list_body[0] = 0xa0000000; + } + + batch->tiler.ctx.midgard.disable = !has_draws; +} + +void +panvk_cmd_close_batch(struct panvk_cmd_buffer *cmdbuf) +{ + assert(cmdbuf->state.batch); + + if (!cmdbuf->state.batch->fragment_job && + !cmdbuf->state.batch->scoreboard.first_job) { + vk_free(&cmdbuf->pool->alloc, cmdbuf->state.batch); + cmdbuf->state.batch = NULL; + return; + } + + const struct panfrost_device *pdev = cmdbuf->desc_pool.dev; + struct panvk_batch *batch = cmdbuf->state.batch; + + list_addtail(&cmdbuf->state.batch->node, &cmdbuf->batches); + + struct pan_tls_info tlsinfo = { + .tls.size = cmdbuf->state.pipeline->tls_size, + .wls.size = cmdbuf->state.pipeline->wls_size, + }; + + if (tlsinfo.tls.size) { + tlsinfo.tls.ptr = + panfrost_pool_alloc_aligned(&cmdbuf->tls_pool, tlsinfo.tls.size, 4096).gpu; + } + + if (tlsinfo.wls.size) { + unsigned wls_size = + pan_wls_mem_size(pdev, &cmdbuf->state.compute.wg_count, tlsinfo.wls.size); + tlsinfo.wls.ptr = + panfrost_pool_alloc_aligned(&cmdbuf->tls_pool, wls_size, 4096).gpu; + } + + if ((pan_is_bifrost(pdev) || !cmdbuf->state.batch->fb.desc.cpu) && + cmdbuf->state.batch->tls.cpu) { + pan_emit_tls(pdev, &tlsinfo, cmdbuf->state.batch->tls.cpu); + } + + if (cmdbuf->state.batch->fb.desc.cpu) { + if (!pan_is_bifrost(pdev)) { + panvk_cmd_get_midgard_polygon_list(cmdbuf, + batch->fb.info->width, + batch->fb.info->height, + false); + + mali_ptr polygon_list = + cmdbuf->state.batch->tiler.ctx.midgard.polygon_list->ptr.gpu; + struct panfrost_ptr writeval_job = + panfrost_scoreboard_initialize_tiler(&cmdbuf->desc_pool, + &cmdbuf->state.batch->scoreboard, + polygon_list); + if (writeval_job.cpu) + util_dynarray_append(&cmdbuf->state.batch->jobs, void *, writeval_job.cpu); + } + + cmdbuf->state.batch->fb.desc.gpu |= + panvk_emit_fb(cmdbuf->device, + cmdbuf->state.batch, + cmdbuf->state.subpass, + cmdbuf->state.pipeline, + cmdbuf->state.framebuffer, + cmdbuf->state.clear, + &tlsinfo, &cmdbuf->state.batch->tiler.ctx, + cmdbuf->state.batch->fb.desc.cpu); + + if (!pan_is_bifrost(pdev)) { + memcpy(&cmdbuf->state.batch->tiler.templ.midgard, + pan_section_ptr(cmdbuf->state.batch->fb.desc.cpu, + MULTI_TARGET_FRAMEBUFFER, TILER), + sizeof(cmdbuf->state.batch->tiler.templ.midgard)); + } + + panvk_cmd_prepare_fragment_job(cmdbuf); + } + + cmdbuf->state.batch = NULL; +} + +void +panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf) +{ + assert(!cmdbuf->state.batch); + cmdbuf->state.batch = vk_zalloc(&cmdbuf->pool->alloc, + sizeof(*cmdbuf->state.batch), 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + assert(cmdbuf->state.batch); +} + +void +panvk_CmdNextSubpass2(VkCommandBuffer commandBuffer, + const VkSubpassBeginInfo *pSubpassBeginInfo, + const VkSubpassEndInfo *pSubpassEndInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + panvk_cmd_close_batch(cmdbuf); + + cmdbuf->state.subpass++; + panvk_cmd_open_batch(cmdbuf); + memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); +} + +void +panvk_CmdNextSubpass(VkCommandBuffer cmd, VkSubpassContents contents) +{ + VkSubpassBeginInfo binfo = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, + .contents = contents + }; + VkSubpassEndInfo einfo = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, + }; + + panvk_CmdNextSubpass2(cmd, &binfo, &einfo); +} + + +static void +panvk_cmd_alloc_fb_desc(struct panvk_cmd_buffer *cmdbuf) +{ + if (!cmdbuf->state.pipeline->fs.required) + return; + + struct panvk_batch *batch = cmdbuf->state.batch; + + if (batch->fb.desc.gpu) + return; + + const struct panvk_subpass *subpass = cmdbuf->state.subpass; + unsigned size = MALI_MULTI_TARGET_FRAMEBUFFER_LENGTH + + (MALI_RENDER_TARGET_LENGTH * subpass->color_count) + + (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED ? + MALI_ZS_CRC_EXTENSION_LENGTH : 0); + unsigned tags = MALI_FBD_TAG_IS_MFBD; + + batch->fb.info = cmdbuf->state.framebuffer; + batch->fb.desc = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, size, 64); + + /* Tag the pointer */ + batch->fb.desc.gpu |= tags; +} + +static void +panvk_cmd_alloc_tls_desc(struct panvk_cmd_buffer *cmdbuf) +{ + const struct panfrost_device *pdev = + &cmdbuf->device->physical_device->pdev; + struct panvk_batch *batch = cmdbuf->state.batch; + + assert(batch); + if (batch->tls.gpu) + return; + + if (!pan_is_bifrost(pdev) && + cmdbuf->state.bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + panvk_cmd_alloc_fb_desc(cmdbuf); + batch->tls = batch->fb.desc; + batch->tls.gpu &= ~63ULL; + } else { + batch->tls = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + MALI_LOCAL_STORAGE_LENGTH, 64); + } +} + +static void +panvk_cmd_upload_sysval(struct panvk_cmd_buffer *cmdbuf, + unsigned id, union panvk_sysval_data *data) +{ + switch (PAN_SYSVAL_TYPE(id)) { + case PAN_SYSVAL_VIEWPORT_SCALE: + panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport, data); + break; + case PAN_SYSVAL_VIEWPORT_OFFSET: + panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, data); + break; + case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: + /* TODO: support base_{vertex,instance} */ + data->u32[0] = data->u32[1] = data->u32[2] = 0; + break; + default: + unreachable("Invalid static sysval"); + } +} + +static void +panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + + if (!pipeline->num_sysvals) + return; + + for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sysvals); i++) { + unsigned sysval_count = pipeline->sysvals[i].ids.sysval_count; + if (!sysval_count || + (desc_state->sysvals[i] && + !(cmdbuf->state.dirty & pipeline->sysvals[i].dirty_mask))) + continue; + + struct panfrost_ptr sysvals = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, sysval_count * 16, 16); + union panvk_sysval_data *data = sysvals.cpu; + + for (unsigned s = 0; s < pipeline->sysvals[i].ids.sysval_count; s++) { + panvk_cmd_upload_sysval(cmdbuf, pipeline->sysvals[i].ids.sysvals[s], + &data[s]); + } + + desc_state->sysvals[i] = sysvals.gpu; + } +} + +static void +panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = + cmdbuf->state.pipeline; + + if (!pipeline->num_ubos || desc_state->ubos) + return; + + panvk_cmd_prepare_sysvals(cmdbuf); + + struct panfrost_ptr ubos = + panfrost_pool_alloc_desc_array(&cmdbuf->desc_pool, + pipeline->num_ubos, + UNIFORM_BUFFER); + + panvk_emit_ubos(pipeline, desc_state, ubos.cpu); + + desc_state->ubos = ubos.gpu; +} + +static void +panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + unsigned num_textures = pipeline->layout->num_textures; + + if (!num_textures || desc_state->textures) + return; + + const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + unsigned tex_entry_size = pan_is_bifrost(pdev) ? + sizeof(struct mali_bifrost_texture_packed) : + sizeof(mali_ptr); + struct panfrost_ptr textures = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + num_textures * tex_entry_size, + tex_entry_size); + + void *texture = textures.cpu; + + for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { + if (!desc_state->sets[i].set) continue; + + memcpy(texture, + desc_state->sets[i].set->textures.midgard, + desc_state->sets[i].set->layout->num_textures * + tex_entry_size); + + texture += desc_state->sets[i].set->layout->num_textures * + tex_entry_size; + } + + desc_state->textures = textures.gpu; +} + +static void +panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf) +{ + struct panvk_descriptor_state *desc_state = + &cmdbuf->descriptors[cmdbuf->state.bind_point]; + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + unsigned num_samplers = pipeline->layout->num_samplers; + + if (!num_samplers || desc_state->samplers) + return; + + struct panfrost_ptr samplers = + panfrost_pool_alloc_desc_array(&cmdbuf->desc_pool, + num_samplers, + MIDGARD_SAMPLER); + + struct mali_midgard_sampler_packed *sampler = samplers.cpu; + + for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { + if (!desc_state->sets[i].set) continue; + + memcpy(sampler, + desc_state->sets[i].set->samplers, + desc_state->sets[i].set->layout->num_samplers * + sizeof(*sampler)); + + sampler += desc_state->sets[i].set->layout->num_samplers; + } + + desc_state->samplers = samplers.gpu; +} + +static void +panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + + if (!pipeline->fs.dynamic_rsd) { + draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT]; + return; + } + + if (!cmdbuf->state.fs_rsd) { + struct panfrost_ptr rsd = + panfrost_pool_alloc_desc_aggregate(&cmdbuf->desc_pool, + PAN_DESC(RENDERER_STATE), + PAN_DESC_ARRAY(pipeline->blend.state.rt_count, + BLEND)); + + struct mali_renderer_state_packed rsd_dyn; + + panvk_emit_dyn_fs_rsd(cmdbuf->device, pipeline, &cmdbuf->state, &rsd_dyn); + pan_merge(rsd_dyn, pipeline->fs.rsd_template, RENDERER_STATE); + memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn)); + + void *bd = rsd.cpu + MALI_RENDERER_STATE_LENGTH; + for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { + if (pipeline->blend.constant[i].index != ~0) { + struct mali_blend_packed bd_dyn; + + panvk_emit_blend_constant(cmdbuf->device, pipeline, i, + cmdbuf->state.blend.constants[i], + &bd_dyn); + pan_merge(bd_dyn, pipeline->blend.bd_template[i], BLEND); + memcpy(bd, &bd_dyn, sizeof(bd_dyn)); + } + bd += MALI_BLEND_LENGTH; + } + + cmdbuf->state.fs_rsd = rsd.gpu; + } + + draw->fs_rsd = cmdbuf->state.fs_rsd; +} + +void +panvk_cmd_get_bifrost_tiler_context(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + + if (batch->tiler.bifrost_descs.cpu) + return; + + batch->tiler.bifrost_descs = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + MALI_BIFROST_TILER_LENGTH + + MALI_BIFROST_TILER_HEAP_LENGTH, + 64); + + panvk_emit_bifrost_tiler_context(cmdbuf->device, width, height, + &batch->tiler.bifrost_descs); + memcpy(&batch->tiler.templ.bifrost, batch->tiler.bifrost_descs.cpu, + sizeof(batch->tiler.templ.bifrost)); + batch->tiler.ctx.bifrost = batch->tiler.bifrost_descs.gpu; +} + +static void +panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + struct panvk_batch *batch = cmdbuf->state.batch; + + if (pan_is_bifrost(pdev)) { + panvk_cmd_get_bifrost_tiler_context(cmdbuf, + batch->fb.info->width, + batch->fb.info->height); + } else { + panvk_cmd_get_midgard_polygon_list(cmdbuf, + batch->fb.info->width, + batch->fb.info->height, + true); + } + + draw->tiler_ctx = &batch->tiler.ctx; +} + +static void +panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; + + panvk_varyings_alloc(varyings, &cmdbuf->varying_pool, + draw->vertex_count); + + unsigned buf_count = panvk_varyings_buf_count(cmdbuf->device, varyings); + struct panfrost_ptr bufs = + panfrost_pool_alloc_desc_array(&cmdbuf->desc_pool, + buf_count, ATTRIBUTE_BUFFER); + + panvk_emit_varying_bufs(cmdbuf->device, varyings, bufs.cpu); + if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) { + draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address + + varyings->varying[VARYING_SLOT_POS].offset; + } + + if (BITSET_TEST(varyings->active, VARYING_SLOT_PSIZ)) { + draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address + + varyings->varying[VARYING_SLOT_POS].offset; + } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { + draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ? + cmdbuf->state.rast.line_width : pipeline->rast.line_width; + } else { + draw->line_width = 1.0f; + } + draw->varying_bufs = bufs.gpu; + + for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { + if (!varyings->stage[s].count) continue; + + struct panfrost_ptr attribs = + panfrost_pool_alloc_desc_array(&cmdbuf->desc_pool, + varyings->stage[s].count, + ATTRIBUTE); + + panvk_emit_varyings(cmdbuf->device, varyings, s, attribs.cpu); + draw->stages[s].varyings = attribs.gpu; + } +} + +static void +panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + /* TODO: images */ + struct panfrost_device *pdev = cmdbuf->desc_pool.dev; + + if (!cmdbuf->state.pipeline->attribs.buf_count) + return; + + if (cmdbuf->state.vb.attribs) { + draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; + draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; + return; + } + + unsigned buf_count = cmdbuf->state.pipeline->attribs.buf_count + + (pan_is_bifrost(pdev) ? 1 : 0); + struct panfrost_ptr bufs = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + buf_count * 2 * + MALI_ATTRIBUTE_BUFFER_LENGTH, + MALI_ATTRIBUTE_BUFFER_LENGTH * 2); + + panvk_emit_attrib_bufs(cmdbuf->device, + &cmdbuf->state.pipeline->attribs, + cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, + draw, bufs.cpu); + cmdbuf->state.vb.attrib_bufs = bufs.gpu; + + struct panfrost_ptr attribs = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + cmdbuf->state.pipeline->attribs.attrib_count * + MALI_ATTRIBUTE_LENGTH, + MALI_ATTRIBUTE_LENGTH); + + panvk_emit_attribs(cmdbuf->device, &cmdbuf->state.pipeline->attribs, + cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, + attribs.cpu); + cmdbuf->state.vb.attribs = attribs.gpu; + draw->stages[MESA_SHADER_VERTEX].attributes = cmdbuf->state.vb.attribs; + draw->attribute_bufs = cmdbuf->state.vb.attrib_bufs; +} + +static void +panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + + if (pipeline->vpd) { + draw->viewport = pipeline->vpd; + } else if (cmdbuf->state.vpd) { + draw->viewport = cmdbuf->state.vpd; + } else { + struct panfrost_ptr vp = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + MALI_VIEWPORT_LENGTH, + MALI_VIEWPORT_LENGTH); + + const VkViewport *viewport = + pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ? + &cmdbuf->state.viewport : &pipeline->viewport; + const VkRect2D *scissor = + pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ? + &cmdbuf->state.scissor : &pipeline->scissor; + + panvk_emit_viewport(viewport, scissor, vp.cpu); + draw->viewport = cmdbuf->state.vpd = vp.gpu; + } +} + +static void +panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + struct panvk_batch *batch = cmdbuf->state.batch; + struct panfrost_ptr ptr = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + MALI_COMPUTE_JOB_LENGTH, 64); + + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + draw->jobs.vertex = ptr; + panvk_emit_vertex_job(cmdbuf->device, + cmdbuf->state.pipeline, + draw, ptr.cpu); + +} + +static void +panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, + struct panvk_draw_info *draw) +{ + const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; + struct panvk_batch *batch = cmdbuf->state.batch; + struct panfrost_ptr ptr = + panfrost_pool_alloc_aligned(&cmdbuf->desc_pool, + pan_is_bifrost(pdev) ? + MALI_BIFROST_TILER_JOB_LENGTH : + MALI_MIDGARD_TILER_JOB_LENGTH, + 64); + + util_dynarray_append(&batch->jobs, void *, ptr.cpu); + draw->jobs.tiler = ptr; + panvk_emit_tiler_job(cmdbuf->device, + cmdbuf->state.pipeline, + draw, ptr.cpu); +} + +void +panvk_CmdDraw(VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + struct panvk_batch *batch = cmdbuf->state.batch; + + panvk_cmd_alloc_fb_desc(cmdbuf); + panvk_cmd_alloc_tls_desc(cmdbuf); + panvk_cmd_prepare_ubos(cmdbuf); + panvk_cmd_prepare_textures(cmdbuf); + panvk_cmd_prepare_samplers(cmdbuf); + + /* TODO: indexed draws */ + + struct panvk_draw_info draw = { + .first_vertex = firstVertex, + .vertex_count = vertexCount, + .first_instance = firstInstance, + .instance_count = instanceCount, + .padded_vertex_count = panfrost_padded_vertex_count(vertexCount), + .offset_start = firstVertex, + .tls = batch->tls.gpu, + .fb = batch->fb.desc.gpu, + .ubos = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].ubos, + .textures = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].textures, + .samplers = cmdbuf->descriptors[VK_PIPELINE_BIND_POINT_GRAPHICS].samplers, + }; + + panfrost_pack_work_groups_compute(&draw.invocation, 1, vertexCount, + instanceCount, 1, 1, 1, true, false); + panvk_draw_prepare_fs_rsd(cmdbuf, &draw); + panvk_draw_prepare_varyings(cmdbuf, &draw); + panvk_draw_prepare_attributes(cmdbuf, &draw); + panvk_draw_prepare_viewport(cmdbuf, &draw); + panvk_draw_prepare_tiler_context(cmdbuf, &draw); + panvk_draw_prepare_vertex_job(cmdbuf, &draw); + panvk_draw_prepare_tiler_job(cmdbuf, &draw); + + const struct panvk_pipeline *pipeline = cmdbuf->state.pipeline; + unsigned vjob_id = + panfrost_add_job(&cmdbuf->desc_pool, &batch->scoreboard, + MALI_JOB_TYPE_VERTEX, false, false, 0, 0, + &draw.jobs.vertex, false); + + if (pipeline->fs.required) { + panfrost_add_job(&cmdbuf->desc_pool, &batch->scoreboard, + MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, + &draw.jobs.tiler, false); + } + + /* Clear the dirty flags all at once */ + cmdbuf->state.dirty = 0; +} + +void +panvk_CmdDrawIndexed(VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance) +{ + panvk_stub(); +} + +void +panvk_CmdDrawIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + panvk_stub(); +} + +void +panvk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride) +{ + panvk_stub(); +} + +void +panvk_CmdDispatchBase(VkCommandBuffer commandBuffer, + uint32_t base_x, + uint32_t base_y, + uint32_t base_z, + uint32_t x, + uint32_t y, + uint32_t z) +{ + panvk_stub(); +} + +void +panvk_CmdDispatch(VkCommandBuffer commandBuffer, + uint32_t x, + uint32_t y, + uint32_t z) +{ + panvk_stub(); +} + +void +panvk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, + VkBuffer _buffer, + VkDeviceSize offset) +{ + panvk_stub(); +} + +void +panvk_CmdEndRenderPass2(VkCommandBuffer commandBuffer, + const VkSubpassEndInfoKHR *pSubpassEndInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + panvk_cmd_close_batch(cmdbuf); + vk_free(&cmdbuf->pool->alloc, cmdbuf->state.clear); + cmdbuf->state.batch = NULL; + cmdbuf->state.pass = NULL; + cmdbuf->state.subpass = NULL; + cmdbuf->state.framebuffer = NULL; + cmdbuf->state.clear = NULL; + memset(&cmdbuf->state.compute, 0, sizeof(cmdbuf->state.compute)); +} + +void +panvk_CmdEndRenderPass(VkCommandBuffer cmd) +{ + VkSubpassEndInfoKHR einfo = { + .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, + }; + + panvk_CmdEndRenderPass2(cmd, &einfo); +} + + +void +panvk_CmdPipelineBarrier(VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags destStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + /* Caches are flushed/invalidated at batch boundaries for now, nothing to do + * for memory barriers assuming we implement barriers with the creation of a + * new batch. + * FIXME: We can probably do better with a CacheFlush job that has the + * barrier flag set to true. + */ + if (cmdbuf->state.batch) { + panvk_cmd_close_batch(cmdbuf); + panvk_cmd_open_batch(cmdbuf); + } +} + +void +panvk_CmdSetEvent(VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + panvk_stub(); +} + +void +panvk_CmdResetEvent(VkCommandBuffer commandBuffer, + VkEvent _event, + VkPipelineStageFlags stageMask) +{ + panvk_stub(); +} + +void +panvk_CmdWaitEvents(VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers) +{ + panvk_stub(); +} + +void +panvk_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) +{ + panvk_stub(); +} diff --git a/src/panfrost/vulkan/panvk_cs.c b/src/panfrost/vulkan/panvk_cs.c new file mode 100644 index 00000000000..aa8dd5a286d --- /dev/null +++ b/src/panfrost/vulkan/panvk_cs.c @@ -0,0 +1,850 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "util/macros.h" +#include "compiler/shader_enums.h" + +#include "panfrost-quirks.h" +#include "pan_cs.h" +#include "pan_encoder.h" +#include "pan_pool.h" + +#include "panvk_cs.h" +#include "panvk_private.h" +#include "panvk_varyings.h" + +static mali_pixel_format +panvk_varying_hw_format(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, unsigned idx) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + gl_varying_slot loc = varyings->stage[stage].loc[idx]; + bool fs = stage == MESA_SHADER_FRAGMENT; + + switch (loc) { + case VARYING_SLOT_PNTC: + case VARYING_SLOT_PSIZ: + return (MALI_R16F << 12) | + (pdev->quirks & HAS_SWIZZLES ? + panfrost_get_default_swizzle(1) : + panfrost_bifrost_swizzle(1)); + case VARYING_SLOT_POS: + return ((fs ? MALI_RGBA32F : MALI_SNAP_4) << 12) | + (pdev->quirks & HAS_SWIZZLES ? + panfrost_get_default_swizzle(4) : + panfrost_bifrost_swizzle(4)); + default: + assert(!panvk_varying_is_builtin(stage, loc)); + return pdev->formats[varyings->varying[loc].format].hw; + } +} + +static void +panvk_emit_varying(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, unsigned idx, + void *attrib) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + gl_varying_slot loc = varyings->stage[stage].loc[idx]; + bool fs = stage == MESA_SHADER_FRAGMENT; + + pan_pack(attrib, ATTRIBUTE, cfg) { + if (!panvk_varying_is_builtin(stage, loc)) { + cfg.buffer_index = varyings->varying[loc].buf; + cfg.offset = varyings->varying[loc].offset; + } else { + cfg.buffer_index = + panvk_varying_buf_index(varyings, + panvk_varying_buf_id(fs, loc)); + } + cfg.offset_enable = !pan_is_bifrost(pdev); + cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx); + } +} + +void +panvk_emit_varyings(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, + void *descs) +{ + struct mali_attribute_packed *attrib = descs; + + for (unsigned i = 0; i < varyings->stage[stage].count; i++) + panvk_emit_varying(dev, varyings, stage, i, attrib++); +} + +static void +panvk_emit_varying_buf(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + enum panvk_varying_buf_id id, void *buf) +{ + unsigned buf_idx = panvk_varying_buf_index(varyings, id); + enum mali_attribute_special special_id = panvk_varying_special_buf_id(id); + + pan_pack(buf, ATTRIBUTE_BUFFER, cfg) { + if (special_id) { + cfg.type = 0; + cfg.special = special_id; + } else { + unsigned offset = varyings->buf[buf_idx].address & 63; + + cfg.stride = varyings->buf[buf_idx].stride; + cfg.size = varyings->buf[buf_idx].size + offset; + cfg.pointer = varyings->buf[buf_idx].address & ~63ULL; + } + } +} + +void +panvk_emit_varying_bufs(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + void *descs) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + struct mali_attribute_buffer_packed *buf = descs; + + for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { + if (varyings->buf_mask & (1 << i)) + panvk_emit_varying_buf(dev, varyings, i, buf++); + } + + if (pan_is_bifrost(pdev)) + memset(buf, 0, sizeof(*buf)); +} + +static void +panvk_emit_attrib_buf(const struct panvk_device *dev, + const struct panvk_attribs_info *info, + const struct panvk_draw_info *draw, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + unsigned idx, void *desc) +{ + ASSERTED const struct panfrost_device *pdev = &dev->physical_device->pdev; + const struct panvk_attrib_buf_info *buf_info = &info->buf[idx]; + + if (buf_info->special) { + assert(!pan_is_bifrost(pdev)); + switch (buf_info->special_id) { + case PAN_VERTEX_ID: + panfrost_vertex_id(draw->padded_vertex_count, desc, + draw->instance_count > 1); + return; + case PAN_INSTANCE_ID: + panfrost_instance_id(draw->padded_vertex_count, desc, + draw->instance_count > 1); + return; + default: + unreachable("Invalid attribute ID"); + } + } + + assert(idx < buf_count); + const struct panvk_attrib_buf *buf = &bufs[idx]; + unsigned divisor = buf_info->per_instance ? + draw->padded_vertex_count : 0; + unsigned stride = divisor && draw->instance_count == 1 ? + 0 : buf_info->stride; + mali_ptr addr = buf->address & ~63ULL; + unsigned size = buf->size + (buf->address & 63); + + /* TODO: support instanced arrays */ + pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { + if (draw->instance_count > 1 && divisor) { + cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; + cfg.divisor = divisor; + } + + cfg.pointer = addr; + cfg.stride = stride; + cfg.size = size; + } +} + +void +panvk_emit_attrib_bufs(const struct panvk_device *dev, + const struct panvk_attribs_info *info, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + const struct panvk_draw_info *draw, + void *descs) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + struct mali_attribute_buffer_packed *buf = descs; + + for (unsigned i = 0; i < info->buf_count; i++) + panvk_emit_attrib_buf(dev, info, draw, bufs, buf_count, i, buf++); + + /* A NULL entry is needed to stop prefecting on Bifrost */ + if (pan_is_bifrost(pdev)) + memset(buf, 0, sizeof(*buf)); +} + +static void +panvk_emit_attrib(const struct panvk_device *dev, + const struct panvk_attribs_info *attribs, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + unsigned idx, void *attrib) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + pan_pack(attrib, ATTRIBUTE, cfg) { + cfg.buffer_index = attribs->attrib[idx].buf; + cfg.offset = attribs->attrib[idx].offset + + (bufs[cfg.buffer_index].address & 63); + cfg.format = pdev->formats[attribs->attrib[idx].format].hw; + } +} + +void +panvk_emit_attribs(const struct panvk_device *dev, + const struct panvk_attribs_info *attribs, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + void *descs) +{ + struct mali_attribute_packed *attrib = descs; + + for (unsigned i = 0; i < attribs->attrib_count; i++) + panvk_emit_attrib(dev, attribs, bufs, buf_count, i, attrib++); +} + +void +panvk_emit_ubos(const struct panvk_pipeline *pipeline, + const struct panvk_descriptor_state *state, + void *descs) +{ + struct mali_uniform_buffer_packed *ubos = descs; + + for (unsigned i = 0; i < ARRAY_SIZE(state->sets); i++) { + const struct panvk_descriptor_set_layout *set_layout = + pipeline->layout->sets[i].layout; + const struct panvk_descriptor_set *set = state->sets[i].set; + unsigned offset = pipeline->layout->sets[i].ubo_offset; + + if (!set_layout) + continue; + + if (!set) { + unsigned num_ubos = (set_layout->num_dynoffsets != 0) + set_layout->num_ubos; + memset(&ubos[offset], 0, num_ubos * sizeof(*ubos)); + } else { + memcpy(&ubos[offset], set->ubos, set_layout->num_ubos * sizeof(*ubos)); + if (set_layout->num_dynoffsets) { + pan_pack(&ubos[offset + set_layout->num_ubos], UNIFORM_BUFFER, cfg) { + cfg.pointer = state->sets[i].dynoffsets.gpu; + cfg.entries = DIV_ROUND_UP(set->layout->num_dynoffsets, 16); + } + } + } + } + + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { + if (!pipeline->sysvals[i].ids.sysval_count) + continue; + + pan_pack(&ubos[pipeline->sysvals[i].ubo_idx], UNIFORM_BUFFER, cfg) { + cfg.pointer = pipeline->sysvals[i].ubo ? : + state->sysvals[i]; + cfg.entries = pipeline->sysvals[i].ids.sysval_count; + } + } +} + +void +panvk_emit_vertex_job(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION); + + memcpy(section, &draw->invocation, MALI_INVOCATION_LENGTH); + + pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { + cfg.job_task_split = 5; + } + + pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { + cfg.draw_descriptor_is_64b = true; + if (!pan_is_bifrost(pdev)) + cfg.texture_descriptor_is_64b = true; + cfg.state = pipeline->rsds[MESA_SHADER_VERTEX]; + cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes; + cfg.attribute_buffers = draw->attribute_bufs; + cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings; + cfg.varying_buffers = draw->varying_bufs; + cfg.thread_storage = draw->tls; + cfg.offset_start = draw->offset_start; + cfg.instance_size = draw->instance_count > 1 ? + draw->padded_vertex_count : 1; + cfg.uniform_buffers = draw->ubos; + cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants; + cfg.textures = draw->textures; + cfg.samplers = draw->samplers; + } + + pan_section_pack(job, COMPUTE_JOB, DRAW_PADDING, cfg); +} + +void +panvk_emit_tiler_job(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + void *section = pan_is_bifrost(pdev) ? + pan_section_ptr(job, BIFROST_TILER_JOB, INVOCATION) : + pan_section_ptr(job, MIDGARD_TILER_JOB, INVOCATION); + + memcpy(section, &draw->invocation, MALI_INVOCATION_LENGTH); + + section = pan_is_bifrost(pdev) ? + pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE) : + pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE); + + pan_pack(section, PRIMITIVE, cfg) { + cfg.draw_mode = pipeline->ia.topology; + if (pipeline->ia.writes_point_size) + cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; + + cfg.first_provoking_vertex = true; + if (pipeline->ia.primitive_restart) + cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; + cfg.job_task_split = 6; + /* TODO: indexed draws */ + cfg.index_count = draw->vertex_count; + } + + section = pan_is_bifrost(pdev) ? + pan_section_ptr(job, BIFROST_TILER_JOB, PRIMITIVE_SIZE) : + pan_section_ptr(job, MIDGARD_TILER_JOB, PRIMITIVE_SIZE); + pan_pack(section, PRIMITIVE_SIZE, cfg) { + if (pipeline->ia.writes_point_size) { + cfg.size_array = draw->psiz; + } else { + cfg.constant = draw->line_width; + } + } + + section = pan_is_bifrost(pdev) ? + pan_section_ptr(job, BIFROST_TILER_JOB, DRAW) : + pan_section_ptr(job, MIDGARD_TILER_JOB, DRAW); + + pan_pack(section, DRAW, cfg) { + cfg.four_components_per_vertex = true; + cfg.draw_descriptor_is_64b = true; + if (!pan_is_bifrost(pdev)) + cfg.texture_descriptor_is_64b = true; + cfg.front_face_ccw = pipeline->rast.front_ccw; + cfg.cull_front_face = pipeline->rast.cull_front_face; + cfg.cull_back_face = pipeline->rast.cull_back_face; + cfg.position = draw->position; + cfg.state = draw->fs_rsd; + cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes; + cfg.attribute_buffers = draw->attribute_bufs; + cfg.viewport = draw->viewport; + cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings; + cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; + if (pan_is_bifrost(pdev)) + cfg.thread_storage = draw->tls; + else + cfg.fbd = draw->fb; + + /* For all primitives but lines DRAW.flat_shading_vertex must + * be set to 0 and the provoking vertex is selected with the + * PRIMITIVE.first_provoking_vertex field. + */ + if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || + pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { + /* The logic is inverted on bifrost. */ + cfg.flat_shading_vertex = pan_is_bifrost(pdev) ? + true : false; + } + + cfg.offset_start = draw->offset_start; + cfg.instance_size = draw->instance_count > 1 ? + draw->padded_vertex_count : 1; + cfg.uniform_buffers = draw->ubos; + cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants; + cfg.textures = draw->textures; + cfg.samplers = draw->samplers; + + /* TODO: occlusion queries */ + } + + if (pan_is_bifrost(pdev)) { + pan_section_pack(job, BIFROST_TILER_JOB, TILER, cfg) { + cfg.address = draw->tiler_ctx->bifrost; + } + pan_section_pack(job, BIFROST_TILER_JOB, DRAW_PADDING, padding); + pan_section_pack(job, BIFROST_TILER_JOB, PADDING, padding); + } +} + +void +panvk_emit_fragment_job(const struct panvk_device *dev, + const struct panvk_framebuffer *fb, + mali_ptr fbdesc, + void *job) +{ + pan_section_pack(job, FRAGMENT_JOB, HEADER, header) { + header.type = MALI_JOB_TYPE_FRAGMENT; + header.index = 1; + } + + pan_section_pack(job, FRAGMENT_JOB, PAYLOAD, payload) { + payload.bound_min_x = 0; + payload.bound_min_y = 0; + + payload.bound_max_x = (fb->width - 1) >> MALI_TILE_SHIFT; + payload.bound_max_y = (fb->height - 1) >> MALI_TILE_SHIFT; + payload.framebuffer = fbdesc; + } +} + +void +panvk_emit_viewport(const VkViewport *viewport, const VkRect2D *scissor, + void *vpd) +{ + /* The spec says "width must be greater than 0.0" */ + assert(viewport->x >= 0); + int minx = (int)viewport->x; + int maxx = (int)(viewport->x + viewport->width); + + /* Viewport height can be negative */ + int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height)); + int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height)); + + assert(scissor->offset.x >= 0 && scissor->offset.y >= 0); + miny = MAX2(scissor->offset.x, minx); + miny = MAX2(scissor->offset.y, miny); + maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx); + maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy); + + /* Make sure we don't end up with a max < min when width/height is 0 */ + maxx = maxx > minx ? maxx - 1 : maxx; + maxy = maxy > miny ? maxy - 1 : maxy; + + assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f); + assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f); + + pan_pack(vpd, VIEWPORT, cfg) { + cfg.scissor_minimum_x = minx; + cfg.scissor_minimum_y = miny; + cfg.scissor_maximum_x = maxx; + cfg.scissor_maximum_y = maxy; + cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth); + cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth); + } +} + +void +panvk_sysval_upload_viewport_scale(const VkViewport *viewport, + union panvk_sysval_data *data) +{ + data->f32[0] = 0.5f * viewport->width; + data->f32[1] = 0.5f * viewport->height; + data->f32[2] = 0.5f * (viewport->maxDepth - viewport->minDepth); +} + +void +panvk_sysval_upload_viewport_offset(const VkViewport *viewport, + union panvk_sysval_data *data) +{ + data->f32[0] = (0.5f * viewport->width) + viewport->x; + data->f32[1] = (0.5f * viewport->height) + viewport->y; + data->f32[2] = (0.5f * (viewport->maxDepth - viewport->minDepth)) + viewport->minDepth; +} + +static enum mali_bifrost_register_file_format +bifrost_blend_type_from_nir(nir_alu_type nir_type) +{ + switch(nir_type) { + case 0: /* Render target not in use */ + return 0; + case nir_type_float16: + return MALI_BIFROST_REGISTER_FILE_FORMAT_F16; + case nir_type_float32: + return MALI_BIFROST_REGISTER_FILE_FORMAT_F32; + case nir_type_int32: + return MALI_BIFROST_REGISTER_FILE_FORMAT_I32; + case nir_type_uint32: + return MALI_BIFROST_REGISTER_FILE_FORMAT_U32; + case nir_type_int16: + return MALI_BIFROST_REGISTER_FILE_FORMAT_I16; + case nir_type_uint16: + return MALI_BIFROST_REGISTER_FILE_FORMAT_U16; + default: + unreachable("Unsupported blend shader type for NIR alu type"); + } +} + +static void +panvk_emit_bifrost_blend(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, void *bd) +{ + const struct pan_blend_state *blend = &pipeline->blend.state; + const struct panfrost_device *pdev = &dev->physical_device->pdev; + const struct pan_blend_rt_state *rts = &blend->rts[rt]; + + pan_pack(bd, BLEND, cfg) { + if (!blend->rt_count || !rts->equation.color_mask) { + cfg.enable = false; + cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF; + continue; + } + + cfg.srgb = util_format_is_srgb(rts->format); + cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); + cfg.round_to_fb_precision = true; + + const struct util_format_description *format_desc = + util_format_description(rts->format); + unsigned chan_size = 0; + for (unsigned i = 0; i < format_desc->nr_channels; i++) + chan_size = MAX2(format_desc->channel[0].size, chan_size); + + pan_blend_to_fixed_function_equation(blend->rts[rt].equation, + &cfg.bifrost.equation); + + /* Fixed point constant */ + float fconst = + pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), + blend->constants); + u16 constant = fconst * ((1 << chan_size) - 1); + constant <<= 16 - chan_size; + cfg.bifrost.constant = constant; + + if (pan_blend_is_opaque(blend->rts[rt].equation)) + cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE; + else + cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_FIXED_FUNCTION; + + /* If we want the conversion to work properly, + * num_comps must be set to 4 + */ + cfg.bifrost.internal.fixed_function.num_comps = 4; + cfg.bifrost.internal.fixed_function.conversion.memory_format = + panfrost_format_to_bifrost_blend(pdev, rts->format); + cfg.bifrost.internal.fixed_function.conversion.register_format = + bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type); + cfg.bifrost.internal.fixed_function.rt = rt; + } +} + +static void +panvk_emit_midgard_blend(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, void *bd) +{ + const struct pan_blend_state *blend = &pipeline->blend.state; + const struct pan_blend_rt_state *rts = &blend->rts[rt]; + + pan_pack(bd, BLEND, cfg) { + if (!blend->rt_count || !rts->equation.color_mask) { + cfg.enable = false; + continue; + } + + cfg.srgb = util_format_is_srgb(rts->format); + cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); + cfg.round_to_fb_precision = true; + cfg.midgard.blend_shader = false; + pan_blend_to_fixed_function_equation(blend->rts[rt].equation, + &cfg.midgard.equation); + cfg.midgard.constant = + pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), + blend->constants); + } +} + +void +panvk_emit_blend(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, void *bd) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + if (pan_is_bifrost(pdev)) + panvk_emit_bifrost_blend(dev, pipeline, rt, bd); + else + panvk_emit_midgard_blend(dev, pipeline, rt, bd); +} + +void +panvk_emit_blend_constant(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, const float *constants, void *bd) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + float constant = constants[pipeline->blend.constant[rt].index]; + + pan_pack(bd, BLEND, cfg) { + cfg.enable = false; + if (pan_is_bifrost(pdev)) { + cfg.bifrost.constant = constant * pipeline->blend.constant[rt].bifrost_factor; + } else { + cfg.midgard.constant = constant; + } + } +} + +void +panvk_emit_dyn_fs_rsd(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + const struct panvk_cmd_state *state, + void *rsd) +{ + pan_pack(rsd, RENDERER_STATE, cfg) { + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f; + cfg.depth_factor = state->rast.depth_bias.slope_factor; + cfg.depth_bias_clamp = state->rast.depth_bias.clamp; + } + + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + cfg.stencil_front.mask = state->zs.s_front.compare_mask; + cfg.stencil_back.mask = state->zs.s_back.compare_mask; + } + + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask; + cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask; + } + + if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + cfg.stencil_front.reference_value = state->zs.s_front.ref; + cfg.stencil_back.reference_value = state->zs.s_back.ref; + } + } +} + +void +panvk_emit_base_fs_rsd(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + void *rsd) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + const struct pan_shader_info *info = &pipeline->fs.info; + + pan_pack(rsd, RENDERER_STATE, cfg) { + if (pipeline->fs.required) { + pan_shader_prepare_rsd(pdev, info, pipeline->fs.address, &cfg); + if (pan_is_bifrost(pdev)) { + cfg.properties.bifrost.allow_forward_pixel_to_kill = info->fs.can_fpk; + } else { + /* If either depth or stencil is enabled, discard matters */ + bool zs_enabled = + (pipeline->zs.z_test && pipeline->zs.z_compare_func != MALI_FUNC_ALWAYS) || + pipeline->zs.s_test; + + cfg.properties.midgard.work_register_count = info->work_reg_count; + cfg.properties.midgard.force_early_z = + info->fs.can_early_z && !pipeline->ms.alpha_to_coverage && + pipeline->zs.z_compare_func == MALI_FUNC_ALWAYS; + + + /* Workaround a hardware errata where early-z cannot be enabled + * when discarding even when the depth buffer is read-only, by + * lying to the hardware about the discard and setting the + * reads tilebuffer? flag to compensate */ + cfg.properties.midgard.shader_reads_tilebuffer = + info->fs.outputs_read || + (!zs_enabled && info->fs.can_discard); + cfg.properties.midgard.shader_contains_discard = + zs_enabled && info->fs.can_discard; + } + } else { + if (pan_is_bifrost(pdev)) { + cfg.properties.bifrost.shader_modifies_coverage = true; + cfg.properties.bifrost.allow_forward_pixel_to_kill = true; + cfg.properties.bifrost.allow_forward_pixel_to_be_killed = true; + cfg.properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; + } else { + cfg.shader.shader = 0x1; + cfg.properties.midgard.work_register_count = 1; + cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; + cfg.properties.midgard.force_early_z = true; + } + } + + bool msaa = pipeline->ms.rast_samples > 1; + cfg.multisample_misc.multisample_enable = msaa; + cfg.multisample_misc.sample_mask = + msaa ? pipeline->ms.sample_mask : UINT16_MAX; + + cfg.multisample_misc.depth_function = + pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS; + + cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write; + cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth; + cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth; + cfg.multisample_misc.shader_depth_range_fixed = true; + + cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test; + cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage; + cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; + cfg.stencil_mask_misc.depth_range_1 = pipeline->rast.depth_bias.enable; + cfg.stencil_mask_misc.depth_range_2 = pipeline->rast.depth_bias.enable; + cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1; + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) { + cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f; + cfg.depth_factor = pipeline->rast.depth_bias.slope_factor; + cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp; + } + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) { + cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask; + cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask; + } + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) { + cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask; + cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask; + } + + if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) { + cfg.stencil_front.reference_value = pipeline->zs.s_front.ref; + cfg.stencil_back.reference_value = pipeline->zs.s_back.ref; + } + + cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func; + cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op; + cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op; + cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op; + cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func; + cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op; + cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op; + cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op; + } +} + +void +panvk_emit_non_fs_rsd(const struct panvk_device *dev, + const struct pan_shader_info *shader_info, + mali_ptr shader_ptr, + void *rsd) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + assert(shader_info->stage != MESA_SHADER_FRAGMENT); + + pan_pack(rsd, RENDERER_STATE, cfg) { + pan_shader_prepare_rsd(pdev, shader_info, shader_ptr, &cfg); + } +} + +void +panvk_emit_bifrost_tiler_context(const struct panvk_device *dev, + unsigned width, unsigned height, + const struct panfrost_ptr *descs) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + pan_pack(descs->cpu + MALI_BIFROST_TILER_LENGTH, BIFROST_TILER_HEAP, cfg) { + cfg.size = pdev->tiler_heap->size; + cfg.base = pdev->tiler_heap->ptr.gpu; + cfg.bottom = pdev->tiler_heap->ptr.gpu; + cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size; + } + + pan_pack(descs->cpu, BIFROST_TILER, cfg) { + cfg.hierarchy_mask = 0x28; + cfg.fb_width = width; + cfg.fb_height = height; + cfg.heap = descs->gpu + MALI_BIFROST_TILER_LENGTH; + } +} + +unsigned +panvk_emit_fb(const struct panvk_device *dev, + const struct panvk_batch *batch, + const struct panvk_subpass *subpass, + const struct panvk_pipeline *pipeline, + const struct panvk_framebuffer *fb, + const struct panvk_clear_value *clears, + const struct pan_tls_info *tlsinfo, + const struct pan_tiler_context *tilerctx, + void *desc) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + struct panvk_image_view *view; + bool crc_valid[8] = { false }; + struct pan_fb_info fbinfo = { + .width = fb->width, + .height = fb->height, + .extent.maxx = fb->width - 1, + .extent.maxy = fb->height - 1, + .nr_samples = 1, + }; + + for (unsigned cb = 0; cb < subpass->color_count; cb++) { + int idx = subpass->color_attachments[cb].idx; + view = idx != VK_ATTACHMENT_UNUSED ? + fb->attachments[idx].iview : NULL; + if (!view) + continue; + fbinfo.rts[cb].view = &view->pview; + fbinfo.rts[cb].clear = subpass->color_attachments[idx].clear; + fbinfo.rts[cb].crc_valid = &crc_valid[cb]; + + memcpy(fbinfo.rts[cb].clear_value, clears[idx].color, + sizeof(fbinfo.rts[cb].clear_value)); + fbinfo.nr_samples = + MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); + } + + if (subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED) { + view = fb->attachments[subpass->zs_attachment.idx].iview; + const struct util_format_description *fdesc = + util_format_description(view->pview.format); + + fbinfo.nr_samples = + MAX2(fbinfo.nr_samples, view->pview.image->layout.nr_samples); + + if (util_format_has_depth(fdesc)) { + fbinfo.zs.clear.z = subpass->zs_attachment.clear; + fbinfo.zs.clear_value.depth = clears[subpass->zs_attachment.idx].depth; + fbinfo.zs.view.zs = &view->pview; + } + + if (util_format_has_depth(fdesc)) { + fbinfo.zs.clear.s = subpass->zs_attachment.clear; + fbinfo.zs.clear_value.stencil = clears[subpass->zs_attachment.idx].depth; + if (!fbinfo.zs.view.zs) + fbinfo.zs.view.s = &view->pview; + } + } + + return pan_emit_fbd(pdev, &fbinfo, tlsinfo, tilerctx, desc); +} diff --git a/src/panfrost/vulkan/panvk_cs.h b/src/panfrost/vulkan/panvk_cs.h new file mode 100644 index 00000000000..5bd03883b3f --- /dev/null +++ b/src/panfrost/vulkan/panvk_cs.h @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PANVK_CS_H +#define PANVK_CS_H + +#include + +#include "compiler/shader_enums.h" +#include "panfrost-job.h" +#include "pan_cs.h" + +struct pan_blend_state; +struct pan_shader_info; +struct panfrost_ptr; +struct pan_pool; + +union panvk_sysval_data; +struct panvk_framebuffer; +struct panvk_cmd_state; +struct panvk_compute_dim; +struct panvk_device; +struct panvk_batch; +struct panvk_varyings_info; +struct panvk_attrib_buf; +struct panvk_attribs_info; +struct panvk_pipeline; +struct panvk_draw_info; +struct panvk_descriptor_state; +struct panvk_subpass; +struct panvk_clear_value; + +void +panvk_emit_varyings(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + gl_shader_stage stage, + void *descs); + +void +panvk_emit_varying_bufs(const struct panvk_device *dev, + const struct panvk_varyings_info *varyings, + void *descs); + +void +panvk_emit_attrib_bufs(const struct panvk_device *dev, + const struct panvk_attribs_info *info, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + const struct panvk_draw_info *draw, + void *descs); + +void +panvk_emit_attribs(const struct panvk_device *dev, + const struct panvk_attribs_info *attribs, + const struct panvk_attrib_buf *bufs, + unsigned buf_count, + void *descs); + +void +panvk_emit_ubos(const struct panvk_pipeline *pipeline, + const struct panvk_descriptor_state *state, + void *descs); + +void +panvk_emit_vertex_job(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job); + +void +panvk_emit_tiler_job(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + const struct panvk_draw_info *draw, + void *job); + +void +panvk_emit_fragment_job(const struct panvk_device *dev, + const struct panvk_framebuffer *fb, + mali_ptr fbdesc, + void *job); + +void +panvk_emit_viewport(const VkViewport *viewport, const VkRect2D *scissor, + void *vpd); + +void +panvk_emit_blend(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, void *bd); + +void +panvk_emit_blend_constant(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + unsigned rt, const float *constants, void *bd); + +void +panvk_emit_dyn_fs_rsd(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + const struct panvk_cmd_state *state, + void *rsd); + +void +panvk_emit_base_fs_rsd(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + void *rsd); + +void +panvk_emit_non_fs_rsd(const struct panvk_device *dev, + const struct pan_shader_info *shader_info, + mali_ptr shader_ptr, + void *rsd); + +void +panvk_emit_bifrost_tiler_context(const struct panvk_device *dev, + unsigned width, unsigned height, + const struct panfrost_ptr *descs); + +unsigned +panvk_emit_fb(const struct panvk_device *dev, + const struct panvk_batch *batch, + const struct panvk_subpass *subpass, + const struct panvk_pipeline *pipeline, + const struct panvk_framebuffer *fb, + const struct panvk_clear_value *clears, + const struct pan_tls_info *tlsinfo, + const struct pan_tiler_context *tilerctx, + void *desc); + +void +panvk_emit_tls(const struct panvk_device *dev, + const struct panvk_pipeline *pipeline, + const struct pan_compute_dim *wg_count, + struct pan_pool *tls_pool, + void *desc); + +void +panvk_sysval_upload_viewport_scale(const VkViewport *viewport, + union panvk_sysval_data *data); + +void +panvk_sysval_upload_viewport_offset(const VkViewport *viewport, + union panvk_sysval_data *data); + +#endif diff --git a/src/panfrost/vulkan/panvk_descriptor_set.c b/src/panfrost/vulkan/panvk_descriptor_set.c new file mode 100644 index 00000000000..ed260476d0f --- /dev/null +++ b/src/panfrost/vulkan/panvk_descriptor_set.c @@ -0,0 +1,838 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "panvk_private.h" + +#include +#include +#include +#include +#include + +#include "util/mesa-sha1.h" +#include "vk_descriptors.h" +#include "vk_util.h" + +#include "pan_bo.h" +#include "midgard_pack.h" + +VkResult +panvk_CreateDescriptorSetLayout(VkDevice _device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_descriptor_set_layout *set_layout; + VkDescriptorSetLayoutBinding *bindings; + + assert(pCreateInfo->bindingCount); + + VkResult result = + vk_create_sorted_bindings(pCreateInfo->pBindings, + pCreateInfo->bindingCount, + &bindings); + if (result != VK_SUCCESS) + return vk_error(device->instance, result); + + unsigned num_immutable_samplers = 0; + for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) { + if (bindings[i].pImmutableSamplers) + num_immutable_samplers += bindings[i].descriptorCount; + } + + unsigned max_binding = bindings[pCreateInfo->bindingCount - 1].binding; + size_t size = sizeof(*set_layout) + + (sizeof(struct panvk_descriptor_set_binding_layout) * + (max_binding + 1)) + + (sizeof(struct panvk_sampler *) * num_immutable_samplers); + set_layout = vk_object_zalloc(&device->vk, pAllocator, size, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); + if (!set_layout) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto err_free_bindings; + } + + struct panvk_sampler **immutable_samplers = + (struct panvk_sampler **)((uint8_t *)set_layout + sizeof(*set_layout) + + (sizeof(struct panvk_descriptor_set_binding_layout) * + (max_binding + 1))); + + set_layout->flags = pCreateInfo->flags; + set_layout->binding_count = max_binding + 1; + + unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0, ssbo_idx = 0; + unsigned dynoffset_idx = 0, desc_idx = 0; + + for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = &bindings[i]; + struct panvk_descriptor_set_binding_layout *binding_layout = + &set_layout->bindings[binding->binding]; + + binding_layout->type = binding->descriptorType; + binding_layout->array_size = binding->descriptorCount; + binding_layout->shader_stages = binding->stageFlags; + if (binding->pImmutableSamplers) { + binding_layout->immutable_samplers = immutable_samplers; + immutable_samplers += binding_layout->array_size; + for (unsigned j = 0; j < binding_layout->array_size; j++) { + VK_FROM_HANDLE(panvk_sampler, sampler, binding->pImmutableSamplers[j]); + binding_layout->immutable_samplers[j] = sampler; + } + } + + binding_layout->desc_idx = desc_idx; + desc_idx += binding->descriptorCount; + switch (binding_layout->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + binding_layout->sampler_idx = sampler_idx; + sampler_idx += binding_layout->array_size; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + binding_layout->sampler_idx = sampler_idx; + binding_layout->tex_idx = tex_idx; + sampler_idx += binding_layout->array_size; + tex_idx += binding_layout->array_size; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + binding_layout->tex_idx = tex_idx; + tex_idx += binding_layout->array_size; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + binding_layout->dynoffset_idx = dynoffset_idx; + dynoffset_idx += binding_layout->array_size; + FALLTHROUGH; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + binding_layout->ubo_idx = ubo_idx; + ubo_idx += binding_layout->array_size; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + binding_layout->dynoffset_idx = dynoffset_idx; + dynoffset_idx += binding_layout->array_size; + FALLTHROUGH; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + binding_layout->ssbo_idx = ssbo_idx; + ssbo_idx += binding_layout->array_size; + break; + default: + unreachable("Invalid descriptor type"); + } + } + + set_layout->num_descs = desc_idx; + set_layout->num_samplers = sampler_idx; + set_layout->num_textures = tex_idx; + set_layout->num_ubos = ubo_idx; + set_layout->num_ssbos = ssbo_idx; + set_layout->num_dynoffsets = dynoffset_idx; + + free(bindings); + *pSetLayout = panvk_descriptor_set_layout_to_handle(set_layout); + return VK_SUCCESS; + +err_free_bindings: + free(bindings); + return vk_error(device->instance, result); +} + +void +panvk_DestroyDescriptorSetLayout(VkDevice _device, + VkDescriptorSetLayout _set_layout, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_descriptor_set_layout, set_layout, _set_layout); + + if (!set_layout) + return; + + vk_object_free(&device->vk, pAllocator, set_layout); +} + +/* FIXME: make sure those values are correct */ +#define PANVK_MAX_TEXTURES (1 << 16) +#define PANVK_MAX_SAMPLERS (1 << 16) +#define PANVK_MAX_UBOS 255 + +void +panvk_GetDescriptorSetLayoutSupport(VkDevice _device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + VkDescriptorSetLayoutSupport *pSupport) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + + pSupport->supported = false; + + VkDescriptorSetLayoutBinding *bindings; + VkResult result = + vk_create_sorted_bindings(pCreateInfo->pBindings, + pCreateInfo->bindingCount, + &bindings); + if (result != VK_SUCCESS) { + vk_error(device->instance, result); + return; + } + + unsigned sampler_idx = 0, tex_idx = 0, ubo_idx = 0, ssbo_idx = 0, dynoffset_idx = 0; + for (unsigned i = 0; i < pCreateInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = &bindings[i]; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + sampler_idx += binding->descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + sampler_idx += binding->descriptorCount; + tex_idx += binding->descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + tex_idx += binding->descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + dynoffset_idx += binding->descriptorCount; + FALLTHROUGH; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + ubo_idx += binding->descriptorCount; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + dynoffset_idx += binding->descriptorCount; + FALLTHROUGH; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + ssbo_idx += binding->descriptorCount; + break; + default: + unreachable("Invalid descriptor type"); + } + } + + /* The maximum values apply to all sets attached to a pipeline since all + * sets descriptors have to be merged in a single array. + */ + if (tex_idx > PANVK_MAX_TEXTURES / MAX_SETS || + sampler_idx > PANVK_MAX_SAMPLERS / MAX_SETS || + ubo_idx > PANVK_MAX_UBOS / MAX_SETS) + return; + + pSupport->supported = true; +} + +/* + * Pipeline layouts. These have nothing to do with the pipeline. They are + * just multiple descriptor set layouts pasted together. + */ + +VkResult +panvk_CreatePipelineLayout(VkDevice _device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_pipeline_layout *layout; + struct mesa_sha1 ctx; + + layout = vk_object_zalloc(&device->vk, pAllocator, sizeof(*layout), + VK_OBJECT_TYPE_PIPELINE_LAYOUT); + if (layout == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->num_sets = pCreateInfo->setLayoutCount; + _mesa_sha1_init(&ctx); + + unsigned sampler_idx = 0, tex_idx = 0, ssbo_idx = 0, ubo_idx = 0, dynoffset_idx = 0; + for (unsigned set = 0; set < pCreateInfo->setLayoutCount; set++) { + VK_FROM_HANDLE(panvk_descriptor_set_layout, set_layout, + pCreateInfo->pSetLayouts[set]); + layout->sets[set].layout = set_layout; + layout->sets[set].sampler_offset = sampler_idx; + layout->sets[set].tex_offset = tex_idx; + layout->sets[set].ubo_offset = ubo_idx; + layout->sets[set].ssbo_offset = ssbo_idx; + layout->sets[set].dynoffset_offset = dynoffset_idx; + sampler_idx += set_layout->num_samplers; + tex_idx += set_layout->num_textures; + ubo_idx += set_layout->num_ubos + (set_layout->num_dynoffsets != 0); + ssbo_idx += set_layout->num_ssbos; + dynoffset_idx += set_layout->num_dynoffsets; + + for (unsigned b = 0; b < set_layout->binding_count; b++) { + struct panvk_descriptor_set_binding_layout *binding_layout = + &set_layout->bindings[b]; + + if (binding_layout->immutable_samplers) { + for (unsigned s = 0; s < binding_layout->array_size; s++) { + struct panvk_sampler *sampler = binding_layout->immutable_samplers[s]; + + _mesa_sha1_update(&ctx, &sampler->desc, sizeof(sampler->desc)); + } + } + _mesa_sha1_update(&ctx, &binding_layout->type, sizeof(binding_layout->type)); + _mesa_sha1_update(&ctx, &binding_layout->array_size, sizeof(binding_layout->array_size)); + _mesa_sha1_update(&ctx, &binding_layout->desc_idx, sizeof(binding_layout->sampler_idx)); + _mesa_sha1_update(&ctx, &binding_layout->shader_stages, sizeof(binding_layout->shader_stages)); + } + } + + layout->num_samplers = sampler_idx; + layout->num_textures = tex_idx; + layout->num_ubos = ubo_idx; + layout->num_ssbos = ssbo_idx; + layout->num_dynoffsets = dynoffset_idx; + + _mesa_sha1_final(&ctx, layout->sha1); + + *pPipelineLayout = panvk_pipeline_layout_to_handle(layout); + return VK_SUCCESS; +} + +void +panvk_DestroyPipelineLayout(VkDevice _device, + VkPipelineLayout _pipelineLayout, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_pipeline_layout, pipeline_layout, _pipelineLayout); + + if (!pipeline_layout) + return; + + vk_object_free(&device->vk, pAllocator, pipeline_layout); +} + +VkResult +panvk_CreateDescriptorPool(VkDevice _device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_descriptor_pool *pool; + + pool = vk_object_zalloc(&device->vk, pAllocator, + sizeof(struct panvk_descriptor_pool), + VK_OBJECT_TYPE_DESCRIPTOR_POOL); + if (!pool) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + pool->max.sets = pCreateInfo->maxSets; + + for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { + unsigned desc_count = pCreateInfo->pPoolSizes[i].descriptorCount; + + switch(pCreateInfo->pPoolSizes[i].type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + pool->max.samplers += desc_count; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + pool->max.combined_image_samplers += desc_count; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + pool->max.sampled_images += desc_count; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + pool->max.storage_images += desc_count; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + pool->max.uniform_texel_bufs += desc_count; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + pool->max.storage_texel_bufs += desc_count; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + pool->max.input_attachments += desc_count; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + pool->max.uniform_bufs += desc_count; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + pool->max.storage_bufs += desc_count; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + pool->max.uniform_dyn_bufs += desc_count; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + pool->max.storage_dyn_bufs += desc_count; + break; + default: + unreachable("Invalid descriptor type"); + } + } + + *pDescriptorPool = panvk_descriptor_pool_to_handle(pool); + return VK_SUCCESS; +} + +void +panvk_DestroyDescriptorPool(VkDevice _device, + VkDescriptorPool _pool, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_descriptor_pool, pool, _pool); + + if (pool) + vk_object_free(&device->vk, pAllocator, pool); +} + +VkResult +panvk_ResetDescriptorPool(VkDevice _device, + VkDescriptorPool _pool, + VkDescriptorPoolResetFlags flags) +{ + VK_FROM_HANDLE(panvk_descriptor_pool, pool, _pool); + memset(&pool->cur, 0, sizeof(pool->cur)); + return VK_SUCCESS; +} + +static VkResult +panvk_descriptor_set_create(struct panvk_device *device, + struct panvk_descriptor_pool *pool, + const struct panvk_descriptor_set_layout *layout, + struct panvk_descriptor_set **out_set) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + struct panvk_descriptor_set *set; + + /* TODO: Allocate from the pool! */ + set = vk_object_zalloc(&device->vk, NULL, + sizeof(struct panvk_descriptor_set), + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + set->layout = layout; + set->descs = vk_alloc(&device->vk.alloc, + sizeof(*set->descs) * layout->num_descs, 8, + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set->descs) + goto err_free_set; + + if (layout->num_ubos) { + set->ubos = vk_zalloc(&device->vk.alloc, + sizeof(*set->ubos) * layout->num_ubos, 8, + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set->ubos) + goto err_free_set; + } + + if (layout->num_samplers) { + set->samplers = vk_zalloc(&device->vk.alloc, + sizeof(*set->samplers) * layout->num_samplers, 8, + VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set->samplers) + goto err_free_set; + } + + if (layout->num_textures) { + if (pan_is_bifrost(pdev)) { + set->textures.bifrost = vk_zalloc(&device->vk.alloc, + sizeof(*set->textures.bifrost) * + layout->num_textures, + 8, VK_OBJECT_TYPE_DESCRIPTOR_SET); + } else { + set->textures.midgard = vk_zalloc(&device->vk.alloc, + sizeof(*set->textures.midgard) * + layout->num_textures, + 8, VK_OBJECT_TYPE_DESCRIPTOR_SET); + } + + if (!set->textures.midgard) + goto err_free_set; + } + + for (unsigned i = 0; i < layout->binding_count; i++) { + if (!layout->bindings[i].immutable_samplers) + continue; + + for (unsigned j = 0; j < layout->bindings[i].array_size; j++) { + set->descs[layout->bindings[i].desc_idx].image.sampler = + layout->bindings[i].immutable_samplers[j]; + } + } + + *out_set = set; + return VK_SUCCESS; + +err_free_set: + vk_free(&device->vk.alloc, set->textures.midgard); + vk_free(&device->vk.alloc, set->samplers); + vk_free(&device->vk.alloc, set->ubos); + vk_free(&device->vk.alloc, set->descs); + vk_object_free(&device->vk, NULL, set); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); +} + +static void +panvk_descriptor_set_destroy(struct panvk_device *device, + struct panvk_descriptor_pool *pool, + struct panvk_descriptor_set *set) +{ + vk_free(&device->vk.alloc, set->textures.midgard); + vk_free(&device->vk.alloc, set->samplers); + vk_free(&device->vk.alloc, set->ubos); + vk_free(&device->vk.alloc, set->descs); + vk_object_free(&device->vk, NULL, set); +} + +VkResult +panvk_AllocateDescriptorSets(VkDevice _device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_descriptor_pool, pool, pAllocateInfo->descriptorPool); + VkResult result; + unsigned i; + + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + VK_FROM_HANDLE(panvk_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + struct panvk_descriptor_set *set = NULL; + + result = panvk_descriptor_set_create(device, pool, layout, &set); + if (result != VK_SUCCESS) + goto err_free_sets; + + pDescriptorSets[i] = panvk_descriptor_set_to_handle(set); + } + + return VK_SUCCESS; + +err_free_sets: + panvk_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets); + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) + pDescriptorSets[i] = VK_NULL_HANDLE; + + return result; +} + +VkResult +panvk_FreeDescriptorSets(VkDevice _device, + VkDescriptorPool descriptorPool, + uint32_t count, + const VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_descriptor_pool, pool, descriptorPool); + + for (unsigned i = 0; i < count; i++) { + VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorSets[i]); + + if (set) + panvk_descriptor_set_destroy(device, pool, set); + } + return VK_SUCCESS; +} + +static void +panvk_set_image_desc(struct panvk_descriptor *desc, + const VkDescriptorImageInfo *pImageInfo) +{ + VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler); + VK_FROM_HANDLE(panvk_image_view, image_view, pImageInfo->imageView); + desc->image.sampler = sampler; + desc->image.view = image_view; + desc->image.layout = pImageInfo->imageLayout; +} + +static void +panvk_set_texel_buffer_view_desc(struct panvk_descriptor *desc, + const VkBufferView *pTexelBufferView) +{ + VK_FROM_HANDLE(panvk_buffer_view, buffer_view, *pTexelBufferView); + desc->buffer_view = buffer_view; +} + +static void +panvk_set_buffer_info_desc(struct panvk_descriptor *desc, + const VkDescriptorBufferInfo *pBufferInfo) +{ + VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); + desc->buffer_info.buffer = buffer; + desc->buffer_info.offset = pBufferInfo->offset; + desc->buffer_info.range = pBufferInfo->range; +} + +static void +panvk_set_ubo_desc(void *ubo, + const VkDescriptorBufferInfo *pBufferInfo) +{ + VK_FROM_HANDLE(panvk_buffer, buffer, pBufferInfo->buffer); + size_t size = pBufferInfo->range == VK_WHOLE_SIZE ? + (buffer->bo->size - pBufferInfo->offset) : + pBufferInfo->range; + + pan_pack(ubo, UNIFORM_BUFFER, cfg) { + cfg.pointer = buffer->bo->ptr.gpu + pBufferInfo->offset; + cfg.entries = DIV_ROUND_UP(size, 16); + } +} + +static void +panvk_set_sampler_desc(void *desc, + const VkDescriptorImageInfo *pImageInfo) +{ + VK_FROM_HANDLE(panvk_sampler, sampler, pImageInfo->sampler); + + memcpy(desc, &sampler->desc, sizeof(sampler->desc)); +} + +static void +panvk_set_bifrost_texture_desc(struct mali_bifrost_texture_packed *desc, + const VkDescriptorImageInfo *pImageInfo) +{ + VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView); + + *desc = view->bifrost.tex_desc; +} + +static void +panvk_set_midgard_texture_desc(mali_ptr *desc, + const VkDescriptorImageInfo *pImageInfo) +{ + VK_FROM_HANDLE(panvk_image_view, view, pImageInfo->imageView); + + *desc = view->bo->ptr.gpu; +} + +static void +panvk_write_descriptor_set(struct panvk_device *dev, + const VkWriteDescriptorSet *pDescriptorWrite) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + VK_FROM_HANDLE(panvk_descriptor_set, set, pDescriptorWrite->dstSet); + const struct panvk_descriptor_set_layout *layout = set->layout; + unsigned dest_offset = pDescriptorWrite->dstArrayElement; + unsigned binding = pDescriptorWrite->dstBinding; + unsigned src_offset = 0; + + while (src_offset < pDescriptorWrite->descriptorCount && + binding < layout->binding_count) { + const struct panvk_descriptor_set_binding_layout *binding_layout = + &layout->bindings[binding]; + + if (!binding_layout->array_size) { + binding++; + dest_offset = 0; + continue; + } + + assert(pDescriptorWrite->descriptorType == binding_layout->type); + unsigned ndescs = MIN2(pDescriptorWrite->descriptorCount - src_offset, + binding_layout->array_size - dest_offset); + struct panvk_descriptor *descs = &set->descs[binding_layout->desc_idx + dest_offset]; + assert(binding_layout->desc_idx + dest_offset + ndescs <= set->layout->num_descs); + + switch (pDescriptorWrite->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + for (unsigned i = 0; i < ndescs; i++) { + const VkDescriptorImageInfo *info = &pDescriptorWrite->pImageInfo[src_offset + i]; + + if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || + pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + unsigned sampler = binding_layout->sampler_idx + dest_offset + i; + + panvk_set_sampler_desc(&set->samplers[sampler], info); + } + + if (pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || + pDescriptorWrite->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + unsigned tex = binding_layout->tex_idx + dest_offset + i; + + if (pan_is_bifrost(pdev)) + panvk_set_bifrost_texture_desc(&set->textures.bifrost[tex], info); + else + panvk_set_midgard_texture_desc(&set->textures.midgard[tex], info); + } + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (unsigned i = 0; i < ndescs; i++) + panvk_set_image_desc(&descs[i], &pDescriptorWrite->pImageInfo[src_offset + i]); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (unsigned i = 0; i < ndescs; i++) + panvk_set_texel_buffer_view_desc(&descs[i], &pDescriptorWrite->pTexelBufferView[src_offset + i]); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + for (unsigned i = 0; i < ndescs; i++) { + unsigned ubo = binding_layout->ubo_idx + dest_offset + i; + panvk_set_ubo_desc(&set->ubos[ubo], + &pDescriptorWrite->pBufferInfo[src_offset + i]); + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (unsigned i = 0; i < ndescs; i++) + panvk_set_buffer_info_desc(&descs[i], &pDescriptorWrite->pBufferInfo[src_offset + i]); + break; + default: + unreachable("Invalid type"); + } + + src_offset += ndescs; + binding++; + dest_offset = 0; + } +} + +static void +panvk_copy_descriptor_set(struct panvk_device *dev, + const VkCopyDescriptorSet *pDescriptorCopy) +{ + VK_FROM_HANDLE(panvk_descriptor_set, dest_set, pDescriptorCopy->dstSet); + VK_FROM_HANDLE(panvk_descriptor_set, src_set, pDescriptorCopy->srcSet); + const struct panvk_descriptor_set_layout *dest_layout = dest_set->layout; + const struct panvk_descriptor_set_layout *src_layout = dest_set->layout; + unsigned dest_offset = pDescriptorCopy->dstArrayElement; + unsigned src_offset = pDescriptorCopy->srcArrayElement; + unsigned dest_binding = pDescriptorCopy->dstBinding; + unsigned src_binding = pDescriptorCopy->srcBinding; + unsigned desc_count = pDescriptorCopy->descriptorCount; + + while (desc_count && src_binding < src_layout->binding_count && + dest_binding < dest_layout->binding_count) { + const struct panvk_descriptor_set_binding_layout *dest_binding_layout = + &src_layout->bindings[dest_binding]; + + if (!dest_binding_layout->array_size) { + dest_binding++; + dest_offset = 0; + continue; + } + + const struct panvk_descriptor_set_binding_layout *src_binding_layout = + &src_layout->bindings[src_binding]; + + if (!src_binding_layout->array_size) { + src_binding++; + src_offset = 0; + continue; + } + + assert(dest_binding_layout->type == src_binding_layout->type); + + unsigned ndescs = MAX3(desc_count, + dest_binding_layout->array_size - dest_offset, + src_binding_layout->array_size - src_offset); + + struct panvk_descriptor *dest_descs = dest_set->descs + dest_binding_layout->desc_idx + dest_offset; + struct panvk_descriptor *src_descs = src_set->descs + src_binding_layout->desc_idx + src_offset; + memcpy(dest_descs, src_descs, ndescs * sizeof(*dest_descs)); + desc_count -= ndescs; + dest_offset += ndescs; + if (dest_offset == dest_binding_layout->array_size) { + dest_binding++; + dest_offset = 0; + continue; + } + src_offset += ndescs; + if (src_offset == src_binding_layout->array_size) { + src_binding++; + src_offset = 0; + continue; + } + } + + assert(!desc_count); +} + +void +panvk_UpdateDescriptorSets(VkDevice _device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + VK_FROM_HANDLE(panvk_device, dev, _device); + + for (unsigned i = 0; i < descriptorWriteCount; i++) + panvk_write_descriptor_set(dev, &pDescriptorWrites[i]); + for (unsigned i = 0; i < descriptorCopyCount; i++) + panvk_copy_descriptor_set(dev, &pDescriptorCopies[i]); +} + +VkResult +panvk_CreateDescriptorUpdateTemplate(VkDevice _device, + const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_DestroyDescriptorUpdateTemplate(VkDevice _device, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const VkAllocationCallbacks *pAllocator) +{ + panvk_stub(); +} + +void +panvk_UpdateDescriptorSetWithTemplate(VkDevice _device, + VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, + const void *pData) +{ + panvk_stub(); +} + +VkResult +panvk_CreateSamplerYcbcrConversion(VkDevice device, + const VkSamplerYcbcrConversionCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSamplerYcbcrConversion *pYcbcrConversion) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_DestroySamplerYcbcrConversion(VkDevice device, + VkSamplerYcbcrConversion ycbcrConversion, + const VkAllocationCallbacks *pAllocator) +{ + panvk_stub(); +} diff --git a/src/panfrost/vulkan/panvk_device.c b/src/panfrost/vulkan/panvk_device.c new file mode 100644 index 00000000000..6457a393b06 --- /dev/null +++ b/src/panfrost/vulkan/panvk_device.c @@ -0,0 +1,2094 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_device.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +#include "panfrost-quirks.h" +#include "pan_blitter.h" +#include "pan_bo.h" +#include "pan_encoder.h" +#include "pan_util.h" +#include "decode.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "drm-uapi/panfrost_drm.h" + +#include "util/debug.h" +#include "util/strtod.h" +#include "vk_format.h" +#include "vk_util.h" + +#ifdef VK_USE_PLATFORM_WAYLAND_KHR +#include +#include "wayland-drm-client-protocol.h" +#endif + +#include "panvk_cs.h" + +VkResult +_panvk_device_set_lost(struct panvk_device *device, + const char *file, int line, + const char *msg, ...) +{ + /* Set the flag indicating that waits should return in finite time even + * after device loss. + */ + p_atomic_inc(&device->_lost); + + /* TODO: Report the log message through VkDebugReportCallbackEXT instead */ + fprintf(stderr, "%s:%d: ", file, line); + va_list ap; + va_start(ap, msg); + vfprintf(stderr, msg, ap); + va_end(ap); + + if (env_var_as_boolean("PANVK_ABORT_ON_DEVICE_LOSS", false)) + abort(); + + return VK_ERROR_DEVICE_LOST; +} + +static int +panvk_device_get_cache_uuid(uint16_t family, void *uuid) +{ + uint32_t mesa_timestamp; + uint16_t f = family; + memset(uuid, 0, VK_UUID_SIZE); + memcpy(uuid, &mesa_timestamp, 4); + memcpy((char *) uuid + 4, &f, 2); + snprintf((char *) uuid + 6, VK_UUID_SIZE - 10, "pan"); + return 0; +} + +static void +panvk_get_driver_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); + snprintf(uuid, VK_UUID_SIZE, "panfrost"); +} + +static void +panvk_get_device_uuid(void *uuid) +{ + memset(uuid, 0, VK_UUID_SIZE); +} + +static const struct debug_control panvk_debug_options[] = { + { "startup", PANVK_DEBUG_STARTUP }, + { "nir", PANVK_DEBUG_NIR }, + { "trace", PANVK_DEBUG_TRACE }, + { "sync", PANVK_DEBUG_SYNC }, + { "afbc", PANVK_DEBUG_AFBC }, + { "linear", PANVK_DEBUG_LINEAR }, + { NULL, 0 } +}; + +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) +#define PANVK_USE_WSI_PLATFORM +#endif + +#define PANVK_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION) + +VkResult +panvk_EnumerateInstanceVersion(uint32_t *pApiVersion) +{ + *pApiVersion = PANVK_API_VERSION; + return VK_SUCCESS; +} + +static const struct vk_instance_extension_table panvk_instance_extensions = { +#ifdef PANVK_USE_WSI_PLATFORM + .KHR_surface = true, +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + .KHR_wayland_surface = true, +#endif +}; + +static void +panvk_get_device_extensions(const struct panvk_physical_device *device, + struct vk_device_extension_table *ext) +{ + *ext = (struct vk_device_extension_table) { +#ifdef PANVK_USE_WSI_PLATFORM + .KHR_swapchain = true, +#endif + .EXT_custom_border_color = true, + }; +} + +VkResult +panvk_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) +{ + struct panvk_instance *instance; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); + + pAllocator = pAllocator ? : vk_default_allocator(); + instance = vk_zalloc(pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_instance_dispatch_table dispatch_table; + + vk_instance_dispatch_table_from_entrypoints(&dispatch_table, + &panvk_instance_entrypoints, + true); + result = vk_instance_init(&instance->vk, + &panvk_instance_extensions, + &dispatch_table, + pCreateInfo, + pAllocator); + if (result != VK_SUCCESS) { + vk_free(pAllocator, instance); + return vk_error(NULL, result); + } + + instance->physical_device_count = -1; + instance->debug_flags = parse_debug_string(getenv("PANVK_DEBUG"), + panvk_debug_options); + + if (instance->debug_flags & PANVK_DEBUG_STARTUP) + panvk_logi("Created an instance"); + + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); + + *pInstance = panvk_instance_to_handle(instance); + + return VK_SUCCESS; +} + +static void +panvk_physical_device_finish(struct panvk_physical_device *device) +{ + panvk_wsi_finish(device); + + panvk_meta_cleanup(device); + pan_blitter_cleanup(&device->pdev); + panfrost_close_device(&device->pdev); + close(device->local_fd); + if (device->master_fd != -1) + close(device->master_fd); + + vk_physical_device_finish(&device->vk); +} + +void +panvk_DestroyInstance(VkInstance _instance, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_instance, instance, _instance); + + if (!instance) + return; + + for (int i = 0; i < instance->physical_device_count; ++i) { + panvk_physical_device_finish(instance->physical_devices + i); + } + + vk_instance_finish(&instance->vk); + vk_free(&instance->vk.alloc, instance); +} + +static VkResult +panvk_physical_device_init(struct panvk_physical_device *device, + struct panvk_instance *instance, + drmDevicePtr drm_device) +{ + const char *path = drm_device->nodes[DRM_NODE_RENDER]; + VkResult result = VK_SUCCESS; + drmVersionPtr version; + int fd; + int master_fd = -1; + + if (!getenv("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) { + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "WARNING: panvk is not a conformant vulkan implementation, " + "pass PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know what you're doing."); + } + + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) { + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "failed to open device %s", path); + } + + version = drmGetVersion(fd); + if (!version) { + close(fd); + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "failed to query kernel driver version for device %s", + path); + } + + if (strcmp(version->name, "panfrost")) { + drmFreeVersion(version); + close(fd); + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "device %s does not use the panfrost kernel driver", path); + } + + drmFreeVersion(version); + + if (instance->debug_flags & PANVK_DEBUG_STARTUP) + panvk_logi("Found compatible device '%s'.", path); + + struct vk_device_extension_table supported_extensions; + panvk_get_device_extensions(device, &supported_extensions); + + struct vk_physical_device_dispatch_table dispatch_table; + vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, + &panvk_physical_device_entrypoints, + true); + + result = vk_physical_device_init(&device->vk, &instance->vk, + &supported_extensions, + &dispatch_table); + + if (result != VK_SUCCESS) { + vk_error(instance, result); + goto fail; + } + + device->instance = instance; + assert(strlen(path) < ARRAY_SIZE(device->path)); + strncpy(device->path, path, ARRAY_SIZE(device->path)); + + if (instance->vk.enabled_extensions.KHR_display) { + master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC); + if (master_fd >= 0) { + /* TODO: free master_fd is accel is not working? */ + } + } + + device->master_fd = master_fd; + device->local_fd = fd; + device->pdev.debug = PAN_DBG_TRACE; + panfrost_open_device(NULL, fd, &device->pdev); + + if (device->pdev.quirks & MIDGARD_SFBD) { + result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "%s not supported", + panfrost_model_name(device->pdev.gpu_id)); + goto fail; + } + + pan_blitter_init(&device->pdev); + panvk_meta_init(device); + + memset(device->name, 0, sizeof(device->name)); + sprintf(device->name, "%s", panfrost_model_name(device->pdev.gpu_id)); + + if (panvk_device_get_cache_uuid(device->pdev.gpu_id, device->cache_uuid)) { + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "cannot generate UUID"); + goto fail; + } + + fprintf(stderr, "WARNING: panvk is not a conformant vulkan implementation, " + "testing use only.\n"); + + panvk_get_driver_uuid(&device->device_uuid); + panvk_get_device_uuid(&device->device_uuid); + + result = panvk_wsi_init(device); + if (result != VK_SUCCESS) { + vk_error(instance, result); + goto fail; + } + + return VK_SUCCESS; + +fail: + close(fd); + if (master_fd != -1) + close(master_fd); + return result; +} + +static VkResult +panvk_enumerate_devices(struct panvk_instance *instance) +{ + /* TODO: Check for more devices ? */ + drmDevicePtr devices[8]; + VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; + int max_devices; + + instance->physical_device_count = 0; + + max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); + + if (instance->debug_flags & PANVK_DEBUG_STARTUP) + panvk_logi("Found %d drm nodes", max_devices); + + if (max_devices < 1) + return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); + + for (unsigned i = 0; i < (unsigned) max_devices; i++) { + if ((devices[i]->available_nodes & (1 << DRM_NODE_RENDER)) && + devices[i]->bustype == DRM_BUS_PLATFORM) { + + result = panvk_physical_device_init(instance->physical_devices + + instance->physical_device_count, + instance, devices[i]); + if (result == VK_SUCCESS) + ++instance->physical_device_count; + else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) + break; + } + } + drmFreeDevices(devices, max_devices); + + return result; +} + +VkResult +panvk_EnumeratePhysicalDevices(VkInstance _instance, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices) +{ + VK_FROM_HANDLE(panvk_instance, instance, _instance); + VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount); + + VkResult result; + + if (instance->physical_device_count < 0) { + result = panvk_enumerate_devices(instance); + if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) + return result; + } + + for (uint32_t i = 0; i < instance->physical_device_count; ++i) { + vk_outarray_append(&out, p) + { + *p = panvk_physical_device_to_handle(instance->physical_devices + i); + } + } + + return vk_outarray_status(&out); +} + +VkResult +panvk_EnumeratePhysicalDeviceGroups(VkInstance _instance, + uint32_t *pPhysicalDeviceGroupCount, + VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties) +{ + VK_FROM_HANDLE(panvk_instance, instance, _instance); + VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties, + pPhysicalDeviceGroupCount); + VkResult result; + + if (instance->physical_device_count < 0) { + result = panvk_enumerate_devices(instance); + if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) + return result; + } + + for (uint32_t i = 0; i < instance->physical_device_count; ++i) { + vk_outarray_append(&out, p) + { + p->physicalDeviceCount = 1; + p->physicalDevices[0] = + panvk_physical_device_to_handle(instance->physical_devices + i); + p->subsetAllocation = false; + } + } + + return VK_SUCCESS; +} + +void +panvk_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2 *pFeatures) +{ + vk_foreach_struct(ext, pFeatures->pNext) + { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: { + VkPhysicalDeviceVulkan11Features *features = (void *) ext; + features->storageBuffer16BitAccess = false; + features->uniformAndStorageBuffer16BitAccess = false; + features->storagePushConstant16 = false; + features->storageInputOutput16 = false; + features->multiview = false; + features->multiviewGeometryShader = false; + features->multiviewTessellationShader = false; + features->variablePointersStorageBuffer = true; + features->variablePointers = true; + features->protectedMemory = false; + features->samplerYcbcrConversion = false; + features->shaderDrawParameters = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: { + VkPhysicalDeviceVulkan12Features *features = (void *) ext; + features->samplerMirrorClampToEdge = false; + features->drawIndirectCount = false; + features->storageBuffer8BitAccess = false; + features->uniformAndStorageBuffer8BitAccess = false; + features->storagePushConstant8 = false; + features->shaderBufferInt64Atomics = false; + features->shaderSharedInt64Atomics = false; + features->shaderFloat16 = false; + features->shaderInt8 = false; + + features->descriptorIndexing = false; + features->shaderInputAttachmentArrayDynamicIndexing = false; + features->shaderUniformTexelBufferArrayDynamicIndexing = false; + features->shaderStorageTexelBufferArrayDynamicIndexing = false; + features->shaderUniformBufferArrayNonUniformIndexing = false; + features->shaderSampledImageArrayNonUniformIndexing = false; + features->shaderStorageBufferArrayNonUniformIndexing = false; + features->shaderStorageImageArrayNonUniformIndexing = false; + features->shaderInputAttachmentArrayNonUniformIndexing = false; + features->shaderUniformTexelBufferArrayNonUniformIndexing = false; + features->shaderStorageTexelBufferArrayNonUniformIndexing = false; + features->descriptorBindingUniformBufferUpdateAfterBind = false; + features->descriptorBindingSampledImageUpdateAfterBind = false; + features->descriptorBindingStorageImageUpdateAfterBind = false; + features->descriptorBindingStorageBufferUpdateAfterBind = false; + features->descriptorBindingUniformTexelBufferUpdateAfterBind = false; + features->descriptorBindingStorageTexelBufferUpdateAfterBind = false; + features->descriptorBindingUpdateUnusedWhilePending = false; + features->descriptorBindingPartiallyBound = false; + features->descriptorBindingVariableDescriptorCount = false; + features->runtimeDescriptorArray = false; + + features->samplerFilterMinmax = false; + features->scalarBlockLayout = false; + features->imagelessFramebuffer = false; + features->uniformBufferStandardLayout = false; + features->shaderSubgroupExtendedTypes = false; + features->separateDepthStencilLayouts = false; + features->hostQueryReset = false; + features->timelineSemaphore = false; + features->bufferDeviceAddress = false; + features->bufferDeviceAddressCaptureReplay = false; + features->bufferDeviceAddressMultiDevice = false; + features->vulkanMemoryModel = false; + features->vulkanMemoryModelDeviceScope = false; + features->vulkanMemoryModelAvailabilityVisibilityChains = false; + features->shaderOutputViewportIndex = false; + features->shaderOutputLayer = false; + features->subgroupBroadcastDynamicId = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { + VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext; + features->variablePointersStorageBuffer = true; + features->variablePointers = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { + VkPhysicalDeviceMultiviewFeatures *features = + (VkPhysicalDeviceMultiviewFeatures *) ext; + features->multiview = false; + features->multiviewGeometryShader = false; + features->multiviewTessellationShader = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { + VkPhysicalDeviceShaderDrawParametersFeatures *features = + (VkPhysicalDeviceShaderDrawParametersFeatures *) ext; + features->shaderDrawParameters = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { + VkPhysicalDeviceProtectedMemoryFeatures *features = + (VkPhysicalDeviceProtectedMemoryFeatures *) ext; + features->protectedMemory = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { + VkPhysicalDevice16BitStorageFeatures *features = + (VkPhysicalDevice16BitStorageFeatures *) ext; + features->storageBuffer16BitAccess = false; + features->uniformAndStorageBuffer16BitAccess = false; + features->storagePushConstant16 = false; + features->storageInputOutput16 = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { + VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = + (VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext; + features->samplerYcbcrConversion = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: { + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features = + (VkPhysicalDeviceDescriptorIndexingFeaturesEXT *) ext; + features->shaderInputAttachmentArrayDynamicIndexing = false; + features->shaderUniformTexelBufferArrayDynamicIndexing = false; + features->shaderStorageTexelBufferArrayDynamicIndexing = false; + features->shaderUniformBufferArrayNonUniformIndexing = false; + features->shaderSampledImageArrayNonUniformIndexing = false; + features->shaderStorageBufferArrayNonUniformIndexing = false; + features->shaderStorageImageArrayNonUniformIndexing = false; + features->shaderInputAttachmentArrayNonUniformIndexing = false; + features->shaderUniformTexelBufferArrayNonUniformIndexing = false; + features->shaderStorageTexelBufferArrayNonUniformIndexing = false; + features->descriptorBindingUniformBufferUpdateAfterBind = false; + features->descriptorBindingSampledImageUpdateAfterBind = false; + features->descriptorBindingStorageImageUpdateAfterBind = false; + features->descriptorBindingStorageBufferUpdateAfterBind = false; + features->descriptorBindingUniformTexelBufferUpdateAfterBind = false; + features->descriptorBindingStorageTexelBufferUpdateAfterBind = false; + features->descriptorBindingUpdateUnusedWhilePending = false; + features->descriptorBindingPartiallyBound = false; + features->descriptorBindingVariableDescriptorCount = false; + features->runtimeDescriptorArray = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { + VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = + (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext; + features->conditionalRendering = false; + features->inheritedConditionalRendering = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: { + VkPhysicalDeviceTransformFeedbackFeaturesEXT *features = + (VkPhysicalDeviceTransformFeedbackFeaturesEXT *) ext; + features->transformFeedback = false; + features->geometryStreams = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { + VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features = + (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext; + features->indexTypeUint8 = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: { + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features = + (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext; + features->vertexAttributeInstanceRateDivisor = true; + features->vertexAttributeInstanceRateZeroDivisor = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: { + VkPhysicalDevicePrivateDataFeaturesEXT *features = + (VkPhysicalDevicePrivateDataFeaturesEXT *)ext; + features->privateData = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: { + VkPhysicalDeviceDepthClipEnableFeaturesEXT *features = + (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext; + features->depthClipEnable = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: { + VkPhysicalDevice4444FormatsFeaturesEXT *features = (void *)ext; + features->formatA4R4G4B4 = true; + features->formatA4B4G4R4 = true; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: { + VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = (void *) ext; + features->customBorderColors = true; + features->customBorderColorWithoutFormat = true; + break; + } + default: + break; + } + } + + pFeatures->features = (VkPhysicalDeviceFeatures) { + .fullDrawIndexUint32 = true, + .independentBlend = true, + .wideLines = true, + .largePoints = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + }; +} + +void +panvk_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2 *pProperties) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physicalDevice); + + vk_foreach_struct(ext, pProperties->pNext) + { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { + VkPhysicalDevicePushDescriptorPropertiesKHR *properties = (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext; + properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { + VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties *)ext; + memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); + memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); + properties->deviceLUIDValid = false; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { + VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties *)ext; + properties->maxMultiviewViewCount = 0; + properties->maxMultiviewInstanceIndex = 0; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { + VkPhysicalDevicePointClippingProperties *properties = (VkPhysicalDevicePointClippingProperties *)ext; + properties->pointClippingBehavior = + VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { + VkPhysicalDeviceMaintenance3Properties *properties = (VkPhysicalDeviceMaintenance3Properties *)ext; + /* Make sure everything is addressable by a signed 32-bit int, and + * our largest descriptors are 96 bytes. */ + properties->maxPerSetDescriptors = (1ull << 31) / 96; + /* Our buffer size fields allow only this much */ + properties->maxMemoryAllocationSize = 0xFFFFFFFFull; + break; + } + default: + break; + } + } + + VkSampleCountFlags sample_counts = 0xf; + + /* make sure that the entire descriptor set is addressable with a signed + * 32-bit int. So the sum of all limits scaled by descriptor size has to + * be at most 2 GiB. the combined image & samples object count as one of + * both. This limit is for the pipeline layout, not for the set layout, but + * there is no set limit, so we just set a pipeline limit. I don't think + * any app is going to hit this soon. */ + size_t max_descriptor_set_size = + ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) / + (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + + 32 /* storage buffer, 32 due to potential space wasted on alignment */ + + 32 /* sampler, largest when combined with image */ + + 64 /* sampled image */ + 64 /* storage image */); + + VkPhysicalDeviceLimits limits = { + .maxImageDimension1D = (1 << 14), + .maxImageDimension2D = (1 << 14), + .maxImageDimension3D = (1 << 11), + .maxImageDimensionCube = (1 << 14), + .maxImageArrayLayers = (1 << 11), + .maxTexelBufferElements = 128 * 1024 * 1024, + .maxUniformBufferRange = UINT32_MAX, + .maxStorageBufferRange = UINT32_MAX, + .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, + .maxMemoryAllocationCount = UINT32_MAX, + .maxSamplerAllocationCount = 64 * 1024, + .bufferImageGranularity = 64, /* A cache line */ + .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */ + .maxBoundDescriptorSets = MAX_SETS, + .maxPerStageDescriptorSamplers = max_descriptor_set_size, + .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, + .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, + .maxPerStageDescriptorSampledImages = max_descriptor_set_size, + .maxPerStageDescriptorStorageImages = max_descriptor_set_size, + .maxPerStageDescriptorInputAttachments = max_descriptor_set_size, + .maxPerStageResources = max_descriptor_set_size, + .maxDescriptorSetSamplers = max_descriptor_set_size, + .maxDescriptorSetUniformBuffers = max_descriptor_set_size, + .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, + .maxDescriptorSetStorageBuffers = max_descriptor_set_size, + .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, + .maxDescriptorSetSampledImages = max_descriptor_set_size, + .maxDescriptorSetStorageImages = max_descriptor_set_size, + .maxDescriptorSetInputAttachments = max_descriptor_set_size, + .maxVertexInputAttributes = 32, + .maxVertexInputBindings = 32, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 64, + .maxTessellationPatchSize = 32, + .maxTessellationControlPerVertexInputComponents = 128, + .maxTessellationControlPerVertexOutputComponents = 128, + .maxTessellationControlPerPatchOutputComponents = 120, + .maxTessellationControlTotalOutputComponents = 4096, + .maxTessellationEvaluationInputComponents = 128, + .maxTessellationEvaluationOutputComponents = 128, + .maxGeometryShaderInvocations = 127, + .maxGeometryInputComponents = 64, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 256, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, + .maxFragmentOutputAttachments = 8, + .maxFragmentDualSrcAttachments = 1, + .maxFragmentCombinedOutputResources = 8, + .maxComputeSharedMemorySize = 32768, + .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, + .maxComputeWorkGroupInvocations = 2048, + .maxComputeWorkGroupSize = { 2048, 2048, 2048 }, + .subPixelPrecisionBits = 4 /* FIXME */, + .subTexelPrecisionBits = 4 /* FIXME */, + .mipmapPrecisionBits = 4 /* FIXME */, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 16, + .maxSamplerAnisotropy = 16, + .maxViewports = MAX_VIEWPORTS, + .maxViewportDimensions = { (1 << 14), (1 << 14) }, + .viewportBoundsRange = { INT16_MIN, INT16_MAX }, + .viewportSubPixelBits = 8, + .minMemoryMapAlignment = 4096, /* A page */ + .minTexelBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 4, + .minStorageBufferOffsetAlignment = 4, + .minTexelOffset = -32, + .maxTexelOffset = 31, + .minTexelGatherOffset = -32, + .maxTexelGatherOffset = 31, + .minInterpolationOffset = -2, + .maxInterpolationOffset = 2, + .subPixelInterpolationOffsetBits = 8, + .maxFramebufferWidth = (1 << 14), + .maxFramebufferHeight = (1 << 14), + .maxFramebufferLayers = (1 << 10), + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .maxColorAttachments = MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = true, + .timestampPeriod = 1, + .maxClipDistances = 8, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .discreteQueuePriorities = 1, + .pointSizeRange = { 0.125, 255.875 }, + .lineWidthRange = { 0.0, 7.9921875 }, + .pointSizeGranularity = (1.0 / 8.0), + .lineWidthGranularity = (1.0 / 128.0), + .strictLines = false, /* FINISHME */ + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 128, + .optimalBufferCopyRowPitchAlignment = 128, + .nonCoherentAtomSize = 64, + }; + + pProperties->properties = (VkPhysicalDeviceProperties) { + .apiVersion = PANVK_API_VERSION, + .driverVersion = vk_get_driver_version(), + .vendorID = 0, /* TODO */ + .deviceID = 0, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + .limits = limits, + .sparseProperties = { 0 }, + }; + + strcpy(pProperties->properties.deviceName, pdevice->name); + memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); +} + +static const VkQueueFamilyProperties panvk_queue_family_properties = { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .timestampValidBits = 64, + .minImageTransferGranularity = { 1, 1, 1 }, +}; + +void +panvk_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties *pQueueFamilyProperties) +{ + VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); + + vk_outarray_append(&out, p) { *p = panvk_queue_family_properties; } +} + +void +panvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties2 *pQueueFamilyProperties) +{ + VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); + + vk_outarray_append(&out, p) + { + p->queueFamilyProperties = panvk_queue_family_properties; + } +} + +static uint64_t +panvk_get_system_heap_size() +{ + struct sysinfo info; + sysinfo(&info); + + uint64_t total_ram = (uint64_t)info.totalram * info.mem_unit; + + /* We don't want to burn too much ram with the GPU. If the user has 4GiB + * or less, we use at most half. If they have more than 4GiB, we use 3/4. + */ + uint64_t available_ram; + if (total_ram <= 4ull * 1024 * 1024 * 1024) + available_ram = total_ram / 2; + else + available_ram = total_ram * 3 / 4; + + return available_ram; +} + +void +panvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) +{ + pMemoryProperties->memoryProperties = (VkPhysicalDeviceMemoryProperties) { + .memoryHeapCount = 1, + .memoryHeaps[0].size = panvk_get_system_heap_size(), + .memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .memoryTypeCount = 1, + .memoryTypes[0].propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .memoryTypes[0].heapIndex = 0, + }; +} + +static VkResult +panvk_queue_init(struct panvk_device *device, + struct panvk_queue *queue, + uint32_t queue_family_index, + int idx, + VkDeviceQueueCreateFlags flags) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + + vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE); + queue->device = device; + queue->queue_family_index = queue_family_index; + queue->flags = flags; + + struct drm_syncobj_create create = { + .flags = DRM_SYNCOBJ_CREATE_SIGNALED, + }; + + int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); + if (ret) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + queue->sync = create.handle; + return VK_SUCCESS; +} + +static void +panvk_queue_finish(struct panvk_queue *queue) +{ +} + +VkResult +panvk_CreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) +{ + VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); + VkResult result; + struct panvk_device *device; + + /* Check enabled features */ + if (pCreateInfo->pEnabledFeatures) { + VkPhysicalDeviceFeatures2 supported_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + }; + panvk_GetPhysicalDeviceFeatures2(physicalDevice, &supported_features); + VkBool32 *supported_feature = (VkBool32 *) &supported_features.features; + VkBool32 *enabled_feature = (VkBool32 *) pCreateInfo->pEnabledFeatures; + unsigned num_features = + sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); + for (uint32_t i = 0; i < num_features; i++) { + if (enabled_feature[i] && !supported_feature[i]) + return vk_error(physical_device->instance, + VK_ERROR_FEATURE_NOT_PRESENT); + } + } + + device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, + sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device) + return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_device_dispatch_table dispatch_table; + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &panvk_device_entrypoints, + true); + result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, + pCreateInfo, pAllocator); + if (result != VK_SUCCESS) { + vk_free(&device->vk.alloc, device); + return vk_errorf(physical_device->instance, result, "vk_device_init failed"); + } + + device->instance = physical_device->instance; + device->physical_device = physical_device; + + for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { + const VkDeviceQueueCreateInfo *queue_create = + &pCreateInfo->pQueueCreateInfos[i]; + uint32_t qfi = queue_create->queueFamilyIndex; + device->queues[qfi] = + vk_alloc(&device->vk.alloc, + queue_create->queueCount * sizeof(struct panvk_queue), + 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!device->queues[qfi]) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + goto fail; + } + + memset(device->queues[qfi], 0, + queue_create->queueCount * sizeof(struct panvk_queue)); + + device->queue_count[qfi] = queue_create->queueCount; + + for (unsigned q = 0; q < queue_create->queueCount; q++) { + result = panvk_queue_init(device, &device->queues[qfi][q], qfi, q, + queue_create->flags); + if (result != VK_SUCCESS) + goto fail; + } + } + + *pDevice = panvk_device_to_handle(device); + return VK_SUCCESS; + +fail: + for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) { + for (unsigned q = 0; q < device->queue_count[i]; q++) + panvk_queue_finish(&device->queues[i][q]); + if (device->queue_count[i]) + vk_object_free(&device->vk, NULL, device->queues[i]); + } + + vk_free(&device->vk.alloc, device); + return result; +} + +void +panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + + if (!device) + return; + + for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) { + for (unsigned q = 0; q < device->queue_count[i]; q++) + panvk_queue_finish(&device->queues[i][q]); + if (device->queue_count[i]) + vk_object_free(&device->vk, NULL, device->queues[i]); + } + + vk_free(&device->vk.alloc, device); +} + +VkResult +panvk_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + *pPropertyCount = 0; + return VK_SUCCESS; +} + +void +panvk_GetDeviceQueue2(VkDevice _device, + const VkDeviceQueueInfo2 *pQueueInfo, + VkQueue *pQueue) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_queue *queue; + + queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex]; + if (pQueueInfo->flags != queue->flags) { + /* From the Vulkan 1.1.70 spec: + * + * "The queue returned by vkGetDeviceQueue2 must have the same + * flags value from this structure as that used at device + * creation time in a VkDeviceQueueCreateInfo instance. If no + * matching flags were specified at device creation time then + * pQueue will return VK_NULL_HANDLE." + */ + *pQueue = VK_NULL_HANDLE; + return; + } + + *pQueue = panvk_queue_to_handle(queue); +} + +void +panvk_GetDeviceQueue(VkDevice _device, + uint32_t queueFamilyIndex, + uint32_t queueIndex, + VkQueue *pQueue) +{ + const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2, + .queueFamilyIndex = queueFamilyIndex, + .queueIndex = queueIndex + }; + + panvk_GetDeviceQueue2(_device, &info, pQueue); +} + +static void +panvk_queue_submit_batch(struct panvk_queue *queue, + struct panvk_batch *batch, + uint32_t *bos, unsigned nr_bos, + uint32_t *in_fences, + unsigned nr_in_fences) +{ + const struct panvk_device *dev = queue->device; + unsigned debug = dev->physical_device->instance->debug_flags; + const struct panfrost_device *pdev = &dev->physical_device->pdev; + int ret; + + /* Reset the batch if it's already been issued */ + if (batch->issued) { + util_dynarray_foreach(&batch->jobs, void *, job) + memset((*job), 0, 4 * 4); + + /* Reset the tiler before re-issuing the batch */ + if (pan_is_bifrost(pdev) && batch->tiler.bifrost_descs.cpu) { + memcpy(batch->tiler.bifrost_descs.cpu, &batch->tiler.templ.bifrost, + sizeof(batch->tiler.templ.bifrost)); + } else if (!pan_is_bifrost(pdev) && batch->fb.desc.cpu) { + void *tiler = pan_section_ptr(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER); + memcpy(tiler, &batch->tiler.templ.midgard, sizeof(batch->tiler.templ.midgard)); + /* All weights set to 0, nothing to do here */ + pan_section_pack(batch->fb.desc.cpu, MULTI_TARGET_FRAMEBUFFER, TILER_WEIGHTS, w); + } + } + + if (batch->scoreboard.first_job) { + struct drm_panfrost_submit submit = { + .bo_handles = (uintptr_t)bos, + .bo_handle_count = nr_bos, + .in_syncs = (uintptr_t)in_fences, + .in_sync_count = nr_in_fences, + .out_sync = queue->sync, + .jc = batch->scoreboard.first_job, + }; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); + assert(!ret); + + if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) { + ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); + assert(!ret); + } + + if (debug & PANVK_DEBUG_TRACE) + pandecode_jc(batch->scoreboard.first_job, pan_is_bifrost(pdev), pdev->gpu_id); + } + + if (batch->fragment_job) { + struct drm_panfrost_submit submit = { + .bo_handles = (uintptr_t)bos, + .bo_handle_count = nr_bos, + .out_sync = queue->sync, + .jc = batch->fragment_job, + .requirements = PANFROST_JD_REQ_FS, + }; + + if (batch->scoreboard.first_job) { + submit.in_syncs = (uintptr_t)(&queue->sync); + submit.in_sync_count = 1; + } else { + submit.in_syncs = (uintptr_t)in_fences; + submit.in_sync_count = nr_in_fences; + } + + ret = drmIoctl(pdev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit); + assert(!ret); + if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) { + ret = drmSyncobjWait(pdev->fd, &submit.out_sync, 1, INT64_MAX, 0, NULL); + assert(!ret); + } + + if (debug & PANVK_DEBUG_TRACE) + pandecode_jc(batch->fragment_job, pan_is_bifrost(pdev), pdev->gpu_id); + } + + if (debug & PANVK_DEBUG_TRACE) + pandecode_next_frame(); + + batch->issued = true; +} + +static void +panvk_queue_transfer_sync(struct panvk_queue *queue, + struct panvk_syncobj *dst) +{ + const struct panfrost_device *pdev = &queue->device->physical_device->pdev; + int ret; + + struct drm_syncobj_handle handle = { + .handle = queue->sync, + .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE, + .fd = -1, + }; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); + assert(!ret); + assert(handle.fd >= 0); + + handle.handle = dst->temporary ? : dst->permanent; + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle); + assert(!ret); + + close(handle.fd); +} + +VkResult +panvk_QueueSubmit(VkQueue _queue, + uint32_t submitCount, + const VkSubmitInfo *pSubmits, + VkFence _fence) +{ + VK_FROM_HANDLE(panvk_queue, queue, _queue); + VK_FROM_HANDLE(panvk_fence, fence, _fence); + const struct panfrost_device *pdev = &queue->device->physical_device->pdev; + + for (uint32_t i = 0; i < submitCount; ++i) { + const VkSubmitInfo *submit = pSubmits + i; + unsigned nr_in_fences = submit->waitSemaphoreCount + 1; + uint32_t in_fences[nr_in_fences]; + + in_fences[0] = queue->sync; + for (unsigned i = 0; i < submit->waitSemaphoreCount; i++) { + VK_FROM_HANDLE(panvk_semaphore, sem, submit->pWaitSemaphores[i]); + + in_fences[i + 1] = sem->syncobj.temporary ? : sem->syncobj.permanent; + } + + for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, (submit->pCommandBuffers[j])); + + list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) { + /* FIXME: should be done at the batch level */ + unsigned nr_bos = + util_dynarray_num_elements(&cmdbuf->desc_pool.bos, struct panfrost_bo *) + + util_dynarray_num_elements(&cmdbuf->varying_pool.bos, struct panfrost_bo *) + + util_dynarray_num_elements(&cmdbuf->tls_pool.bos, struct panfrost_bo *) + + (batch->fb.info ? batch->fb.info->attachment_count : 0) + + (batch->blit.src ? 1 : 0) + + (batch->blit.dst ? 1 : 0) + + (batch->scoreboard.first_tiler ? 1 : 0) + 1; + unsigned bo_idx = 0; + uint32_t bos[nr_bos]; + + util_dynarray_foreach(&cmdbuf->desc_pool.bos, struct panfrost_bo *, bo) { + bos[bo_idx++] = (*bo)->gem_handle; + } + + util_dynarray_foreach(&cmdbuf->varying_pool.bos, struct panfrost_bo *, bo) { + bos[bo_idx++] = (*bo)->gem_handle; + } + + util_dynarray_foreach(&cmdbuf->tls_pool.bos, struct panfrost_bo *, bo) { + bos[bo_idx++] = (*bo)->gem_handle; + } + + if (batch->fb.info) { + for (unsigned i = 0; i < batch->fb.info->attachment_count; i++) { + bos[bo_idx++] = batch->fb.info->attachments[i].iview->pview.image->data.bo->gem_handle; + } + } + + if (batch->blit.src) + bos[bo_idx++] = batch->blit.src->gem_handle; + + if (batch->blit.dst) + bos[bo_idx++] = batch->blit.dst->gem_handle; + + if (batch->scoreboard.first_tiler) + bos[bo_idx++] = pdev->tiler_heap->gem_handle; + + bos[bo_idx++] = pdev->sample_positions->gem_handle; + assert(bo_idx == nr_bos); + panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences, nr_in_fences); + } + } + + /* Transfer the out fence to signal semaphores */ + for (unsigned i = 0; i < submit->signalSemaphoreCount; i++) { + VK_FROM_HANDLE(panvk_semaphore, sem, submit->pSignalSemaphores[i]); + panvk_queue_transfer_sync(queue, &sem->syncobj); + } + } + + if (fence) { + /* Transfer the last out fence to the fence object */ + panvk_queue_transfer_sync(queue, &fence->syncobj); + } + + return VK_SUCCESS; +} + +VkResult +panvk_QueueWaitIdle(VkQueue _queue) +{ + VK_FROM_HANDLE(panvk_queue, queue, _queue); + + if (panvk_device_is_lost(queue->device)) + return VK_ERROR_DEVICE_LOST; + + const struct panfrost_device *pdev = &queue->device->physical_device->pdev; + struct drm_syncobj_wait wait = { + .handles = (uint64_t) (uintptr_t)(&queue->sync), + .count_handles = 1, + .timeout_nsec = INT64_MAX, + .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, + }; + int ret; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait); + assert(!ret); + + return VK_SUCCESS; +} + +VkResult +panvk_DeviceWaitIdle(VkDevice _device) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + + if (panvk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) { + for (unsigned q = 0; q < device->queue_count[i]; q++) { + panvk_QueueWaitIdle(panvk_queue_to_handle(&device->queues[i][q])); + } + } + return VK_SUCCESS; +} + +VkResult +panvk_EnumerateInstanceExtensionProperties(const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); + + return vk_enumerate_instance_extension_properties(&panvk_instance_extensions, + pPropertyCount, pProperties); +} + +PFN_vkVoidFunction +panvk_GetInstanceProcAddr(VkInstance _instance, const char *pName) +{ + VK_FROM_HANDLE(panvk_instance, instance, _instance); + return vk_instance_get_proc_addr(&instance->vk, + &panvk_instance_entrypoints, + pName); +} + +/* The loader wants us to expose a second GetInstanceProcAddr function + * to work around certain LD_PRELOAD issues seen in apps. + */ +PUBLIC +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName); + +PUBLIC +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) +{ + return panvk_GetInstanceProcAddr(instance, pName); +} + +VkResult +panvk_AllocateMemory(VkDevice _device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMem) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_device_memory *mem; + + assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); + + if (pAllocateInfo->allocationSize == 0) { + /* Apparently, this is allowed */ + *pMem = VK_NULL_HANDLE; + return VK_SUCCESS; + } + + mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem), + VK_OBJECT_TYPE_DEVICE_MEMORY); + if (mem == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + const VkImportMemoryFdInfoKHR *fd_info = + vk_find_struct_const(pAllocateInfo->pNext, + IMPORT_MEMORY_FD_INFO_KHR); + + if (fd_info && !fd_info->handleType) + fd_info = NULL; + + if (fd_info) { + assert(fd_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + fd_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + /* + * TODO Importing the same fd twice gives us the same handle without + * reference counting. We need to maintain a per-instance handle-to-bo + * table and add reference count to panvk_bo. + */ + mem->bo = panfrost_bo_import(&device->physical_device->pdev, fd_info->fd); + /* take ownership and close the fd */ + close(fd_info->fd); + } else { + mem->bo = panfrost_bo_create(&device->physical_device->pdev, + pAllocateInfo->allocationSize, 0, + "User-requested memory"); + } + + assert(mem->bo); + + *pMem = panvk_device_memory_to_handle(mem); + + return VK_SUCCESS; +} + +void +panvk_FreeMemory(VkDevice _device, + VkDeviceMemory _mem, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_device_memory, mem, _mem); + + if (mem == NULL) + return; + + panfrost_bo_unreference(mem->bo); + vk_object_free(&device->vk, pAllocator, mem); +} + +VkResult +panvk_MapMemory(VkDevice _device, + VkDeviceMemory _memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void **ppData) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_device_memory, mem, _memory); + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + if (!mem->bo->ptr.cpu) + panfrost_bo_mmap(mem->bo); + + *ppData = mem->bo->ptr.cpu; + + if (*ppData) { + *ppData += offset; + return VK_SUCCESS; + } + + return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED); +} + +void +panvk_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) +{ +} + +VkResult +panvk_FlushMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VkResult +panvk_InvalidateMappedMemoryRanges(VkDevice _device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +void +panvk_GetBufferMemoryRequirements(VkDevice _device, + VkBuffer _buffer, + VkMemoryRequirements *pMemoryRequirements) +{ + VK_FROM_HANDLE(panvk_buffer, buffer, _buffer); + + pMemoryRequirements->memoryTypeBits = 1; + pMemoryRequirements->alignment = 64; + pMemoryRequirements->size = + align64(buffer->size, pMemoryRequirements->alignment); +} + +void +panvk_GetBufferMemoryRequirements2(VkDevice device, + const VkBufferMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + panvk_GetBufferMemoryRequirements(device, pInfo->buffer, + &pMemoryRequirements->memoryRequirements); +} + +void +panvk_GetImageMemoryRequirements(VkDevice _device, + VkImage _image, + VkMemoryRequirements *pMemoryRequirements) +{ + VK_FROM_HANDLE(panvk_image, image, _image); + + pMemoryRequirements->memoryTypeBits = 1; + pMemoryRequirements->size = panvk_image_get_total_size(image); + pMemoryRequirements->alignment = 4096; +} + +void +panvk_GetImageMemoryRequirements2(VkDevice device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + panvk_GetImageMemoryRequirements(device, pInfo->image, + &pMemoryRequirements->memoryRequirements); +} + +void +panvk_GetImageSparseMemoryRequirements(VkDevice device, VkImage image, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements *pSparseMemoryRequirements) +{ + panvk_stub(); +} + +void +panvk_GetImageSparseMemoryRequirements2(VkDevice device, + const VkImageSparseMemoryRequirementsInfo2 *pInfo, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) +{ + panvk_stub(); +} + +void +panvk_GetDeviceMemoryCommitment(VkDevice device, + VkDeviceMemory memory, + VkDeviceSize *pCommittedMemoryInBytes) +{ + *pCommittedMemoryInBytes = 0; +} + +VkResult +panvk_BindBufferMemory2(VkDevice device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfo *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; ++i) { + VK_FROM_HANDLE(panvk_device_memory, mem, pBindInfos[i].memory); + VK_FROM_HANDLE(panvk_buffer, buffer, pBindInfos[i].buffer); + + if (mem) { + buffer->bo = mem->bo; + buffer->bo_offset = pBindInfos[i].memoryOffset; + } else { + buffer->bo = NULL; + } + } + return VK_SUCCESS; +} + +VkResult +panvk_BindBufferMemory(VkDevice device, + VkBuffer buffer, + VkDeviceMemory memory, + VkDeviceSize memoryOffset) +{ + const VkBindBufferMemoryInfo info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .buffer = buffer, + .memory = memory, + .memoryOffset = memoryOffset + }; + + return panvk_BindBufferMemory2(device, 1, &info); +} + +VkResult +panvk_BindImageMemory2(VkDevice device, + uint32_t bindInfoCount, + const VkBindImageMemoryInfo *pBindInfos) +{ + for (uint32_t i = 0; i < bindInfoCount; ++i) { + VK_FROM_HANDLE(panvk_image, image, pBindInfos[i].image); + VK_FROM_HANDLE(panvk_device_memory, mem, pBindInfos[i].memory); + + if (mem) { + panfrost_bo_reference(mem->bo); + image->pimage.data.bo = mem->bo; + image->pimage.data.offset = pBindInfos[i].memoryOffset; + /* Reset the AFBC headers */ + if (drm_is_afbc(image->pimage.layout.modifier)) { + void *base = image->pimage.data.bo->ptr.cpu + image->pimage.data.offset; + + for (unsigned layer = 0; layer < image->pimage.layout.array_size; layer++) { + for (unsigned level = 0; level < image->pimage.layout.nr_slices; level++) { + void *header = base + + (layer * image->pimage.layout.array_stride) + + image->pimage.layout.slices[level].offset; + memset(header, 0, image->pimage.layout.slices[level].afbc.header_size); + } + } + } + } else { + panfrost_bo_unreference(image->pimage.data.bo); + image->pimage.data.bo = NULL; + image->pimage.data.offset = pBindInfos[i].memoryOffset; + } + } + + return VK_SUCCESS; +} + +VkResult +panvk_BindImageMemory(VkDevice device, + VkImage image, + VkDeviceMemory memory, + VkDeviceSize memoryOffset) +{ + const VkBindImageMemoryInfo info = { + .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, + .image = image, + .memory = memory, + .memoryOffset = memoryOffset + }; + + return panvk_BindImageMemory2(device, 1, &info); +} + +VkResult +panvk_QueueBindSparse(VkQueue _queue, + uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, + VkFence _fence) +{ + return VK_SUCCESS; +} + +VkResult +panvk_CreateEvent(VkDevice _device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_DestroyEvent(VkDevice _device, + VkEvent _event, + const VkAllocationCallbacks *pAllocator) +{ + panvk_stub(); +} + +VkResult +panvk_GetEventStatus(VkDevice _device, VkEvent _event) +{ + panvk_stub(); + return VK_EVENT_RESET; +} + +VkResult +panvk_SetEvent(VkDevice _device, VkEvent _event) +{ + panvk_stub(); + return VK_SUCCESS; +} + +VkResult +panvk_ResetEvent(VkDevice _device, VkEvent _event) +{ + panvk_stub(); + return VK_SUCCESS; +} + +VkResult +panvk_CreateBuffer(VkDevice _device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *pBuffer) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_buffer *buffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); + + buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer), + VK_OBJECT_TYPE_BUFFER); + if (buffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + buffer->size = pCreateInfo->size; + buffer->usage = pCreateInfo->usage; + buffer->flags = pCreateInfo->flags; + + *pBuffer = panvk_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +void +panvk_DestroyBuffer(VkDevice _device, + VkBuffer _buffer, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_buffer, buffer, _buffer); + + if (!buffer) + return; + + vk_object_free(&device->vk, pAllocator, buffer); +} + +VkResult +panvk_CreateFramebuffer(VkDevice _device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_framebuffer *framebuffer; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); + + size_t size = sizeof(*framebuffer) + sizeof(struct panvk_attachment_info) * + pCreateInfo->attachmentCount; + framebuffer = vk_object_alloc(&device->vk, pAllocator, size, + VK_OBJECT_TYPE_FRAMEBUFFER); + if (framebuffer == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + framebuffer->attachment_count = pCreateInfo->attachmentCount; + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + VkImageView _iview = pCreateInfo->pAttachments[i]; + struct panvk_image_view *iview = panvk_image_view_from_handle(_iview); + framebuffer->attachments[i].iview = iview; + } + + *pFramebuffer = panvk_framebuffer_to_handle(framebuffer); + return VK_SUCCESS; +} + +void +panvk_DestroyFramebuffer(VkDevice _device, + VkFramebuffer _fb, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_framebuffer, fb, _fb); + + if (fb) + vk_object_free(&device->vk, pAllocator, fb); +} + +static enum mali_mipmap_mode +panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode) +{ + switch (mode) { + case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST; + case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR; + default: unreachable("Invalid mipmap mode"); + } +} + +static unsigned +panvk_translate_sampler_address_mode(VkSamplerAddressMode mode) +{ + switch (mode) { + case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT; + case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER; + case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; + default: unreachable("Invalid wrap"); + } +} + +static enum mali_func +panvk_translate_sampler_compare_func(const VkSamplerCreateInfo *pCreateInfo) +{ + if (!pCreateInfo->compareEnable) + return MALI_FUNC_NEVER; + + enum mali_func f = panvk_translate_compare_func(pCreateInfo->compareOp); + return panfrost_flip_compare_func(f); +} + +static void +panvk_init_midgard_sampler(struct panvk_sampler *sampler, + const VkSamplerCreateInfo *pCreateInfo) +{ + const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + + pan_pack(&sampler->desc, MIDGARD_SAMPLER, cfg) { + cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; + cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; + cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode); + cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates; + cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true); + cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false); + cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false); + + cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU); + cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV); + cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW); + cfg.compare_function = panvk_translate_sampler_compare_func(pCreateInfo); + + switch (pCreateInfo->borderColor) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + cfg.border_color_r = fui(0.0); + cfg.border_color_g = fui(0.0); + cfg.border_color_b = fui(0.0); + cfg.border_color_a = + pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ? + fui(1.0) : fui(0.0); + break; + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + cfg.border_color_r = 0; + cfg.border_color_g = 0; + cfg.border_color_b = 0; + cfg.border_color_a = + pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ? + UINT_MAX : 0; + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + cfg.border_color_r = fui(1.0); + cfg.border_color_g = fui(1.0); + cfg.border_color_b = fui(1.0); + cfg.border_color_a = fui(1.0); + break; + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + cfg.border_color_r = UINT_MAX; + cfg.border_color_g = UINT_MAX; + cfg.border_color_b = UINT_MAX; + cfg.border_color_a = UINT_MAX; + break; + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + cfg.border_color_r = pBorderColor->customBorderColor.int32[0]; + cfg.border_color_g = pBorderColor->customBorderColor.int32[1]; + cfg.border_color_b = pBorderColor->customBorderColor.int32[2]; + cfg.border_color_a = pBorderColor->customBorderColor.int32[3]; + break; + default: + unreachable("Invalid border color"); + } + } +} + +static void +panvk_init_bifrost_sampler(struct panvk_sampler *sampler, + const VkSamplerCreateInfo *pCreateInfo) +{ + const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = + vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); + + pan_pack(&sampler->desc, BIFROST_SAMPLER, cfg) { + cfg.point_sample_magnify = pCreateInfo->magFilter == VK_FILTER_LINEAR; + cfg.point_sample_minify = pCreateInfo->minFilter == VK_FILTER_LINEAR; + cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode); + cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates; + + cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true); + cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false); + cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false); + cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU); + cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV); + cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW); + cfg.compare_function = panvk_translate_sampler_compare_func(pCreateInfo); + + switch (pCreateInfo->borderColor) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + cfg.border_color_r = fui(0.0); + cfg.border_color_g = fui(0.0); + cfg.border_color_b = fui(0.0); + cfg.border_color_a = + pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ? + fui(1.0) : fui(0.0); + break; + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + cfg.border_color_r = 0; + cfg.border_color_g = 0; + cfg.border_color_b = 0; + cfg.border_color_a = + pCreateInfo->borderColor == VK_BORDER_COLOR_INT_OPAQUE_BLACK ? + UINT_MAX : 0; + break; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + cfg.border_color_r = fui(1.0); + cfg.border_color_g = fui(1.0); + cfg.border_color_b = fui(1.0); + cfg.border_color_a = fui(1.0); + break; + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + cfg.border_color_r = UINT_MAX; + cfg.border_color_g = UINT_MAX; + cfg.border_color_b = UINT_MAX; + cfg.border_color_a = UINT_MAX; + break; + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + cfg.border_color_r = pBorderColor->customBorderColor.int32[0]; + cfg.border_color_g = pBorderColor->customBorderColor.int32[1]; + cfg.border_color_b = pBorderColor->customBorderColor.int32[2]; + cfg.border_color_a = pBorderColor->customBorderColor.int32[3]; + break; + default: + unreachable("Invalid border color"); + } + } +} + +static void +panvk_init_sampler(struct panvk_device *device, + struct panvk_sampler *sampler, + const VkSamplerCreateInfo *pCreateInfo) +{ + if (pan_is_bifrost(&device->physical_device->pdev)) + panvk_init_bifrost_sampler(sampler, pCreateInfo); + else + panvk_init_midgard_sampler(sampler, pCreateInfo); +} + +VkResult +panvk_CreateSampler(VkDevice _device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_sampler *sampler; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); + + sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler), + VK_OBJECT_TYPE_SAMPLER); + if (!sampler) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + panvk_init_sampler(device, sampler, pCreateInfo); + *pSampler = panvk_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +void +panvk_DestroySampler(VkDevice _device, + VkSampler _sampler, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_sampler, sampler, _sampler); + + if (!sampler) + return; + + vk_object_free(&device->vk, pAllocator, sampler); +} + +/* vk_icd.h does not declare this function, so we declare it here to + * suppress Wmissing-prototypes. + */ +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); + +PUBLIC VKAPI_ATTR VkResult VKAPI_CALL +vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) +{ + /* For the full details on loader interface versioning, see + * . + * What follows is a condensed summary, to help you navigate the large and + * confusing official doc. + * + * - Loader interface v0 is incompatible with later versions. We don't + * support it. + * + * - In loader interface v1: + * - The first ICD entrypoint called by the loader is + * vk_icdGetInstanceProcAddr(). The ICD must statically expose this + * entrypoint. + * - The ICD must statically expose no other Vulkan symbol unless it + * is linked with -Bsymbolic. + * - Each dispatchable Vulkan handle created by the ICD must be + * a pointer to a struct whose first member is VK_LOADER_DATA. The + * ICD must initialize VK_LOADER_DATA.loadMagic to + * ICD_LOADER_MAGIC. + * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and + * vkDestroySurfaceKHR(). The ICD must be capable of working with + * such loader-managed surfaces. + * + * - Loader interface v2 differs from v1 in: + * - The first ICD entrypoint called by the loader is + * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must + * statically expose this entrypoint. + * + * - Loader interface v3 differs from v2 in: + * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), + * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, + * because the loader no longer does so. + */ + *pSupportedVersion = MIN2(*pSupportedVersion, 3u); + return VK_SUCCESS; +} + +VkResult +panvk_GetMemoryFdKHR(VkDevice _device, + const VkMemoryGetFdInfoKHR *pGetFdInfo, + int *pFd) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_device_memory, memory, pGetFdInfo->memory); + + assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); + + /* At the moment, we support only the below handle types. */ + assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || + pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + + int prime_fd = panfrost_bo_export(memory->bo); + if (prime_fd < 0) + return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + *pFd = prime_fd; + return VK_SUCCESS; +} + +VkResult +panvk_GetMemoryFdPropertiesKHR(VkDevice _device, + VkExternalMemoryHandleTypeFlagBits handleType, + int fd, + VkMemoryFdPropertiesKHR *pMemoryFdProperties) +{ + assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); + pMemoryFdProperties->memoryTypeBits = 1; + return VK_SUCCESS; +} + +void +panvk_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, + VkExternalSemaphoreProperties *pExternalSemaphoreProperties) +{ + if ((pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT || + pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->compatibleHandleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + pExternalSemaphoreProperties->externalSemaphoreFeatures = + VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT; + } else { + pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; + pExternalSemaphoreProperties->compatibleHandleTypes = 0; + pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; + } +} + +void +panvk_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, + VkExternalFenceProperties *pExternalFenceProperties) +{ + pExternalFenceProperties->exportFromImportedHandleTypes = 0; + pExternalFenceProperties->compatibleHandleTypes = 0; + pExternalFenceProperties->externalFenceFeatures = 0; +} + +void +panvk_GetDeviceGroupPeerMemoryFeatures(VkDevice device, + uint32_t heapIndex, + uint32_t localDeviceIndex, + uint32_t remoteDeviceIndex, + VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) +{ + assert(localDeviceIndex == remoteDeviceIndex); + + *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | + VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | + VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; +} diff --git a/src/panfrost/vulkan/panvk_formats.c b/src/panfrost/vulkan/panvk_formats.c new file mode 100644 index 00000000000..b45b6afc38a --- /dev/null +++ b/src/panfrost/vulkan/panvk_formats.c @@ -0,0 +1,484 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_formats.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +#include "util/format_r11g11b10f.h" +#include "util/format_srgb.h" +#include "util/half_float.h" +#include "vulkan/util/vk_format.h" +#include "vk_format.h" +#include "vk_util.h" +#include "panfrost/lib/pan_texture.h" + +static void +get_format_properties(struct panvk_physical_device *physical_device, + VkFormat format, + VkFormatProperties *out_properties) +{ + struct panfrost_device *pdev = &physical_device->pdev; + VkFormatFeatureFlags tex = 0, buffer = 0; + enum pipe_format pfmt = vk_format_to_pipe_format(format); + const struct panfrost_format fmt = pdev->formats[pfmt]; + + if (!pfmt || !fmt.hw) + goto end; + + /* 3byte formats are not supported by the buffer <-> image copy helpers. */ + if (util_format_get_blocksize(pfmt) == 3) + goto end; + + /* We don't support compressed formats yet: this is causing trouble when + * doing a vkCmdCopyImage() between a compressed and a non-compressed format + * on a tiled/AFBC resource. + */ + if (util_format_is_compressed(pfmt)) + goto end; + + buffer |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT; + + if (fmt.bind & PIPE_BIND_VERTEX_BUFFER) + buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; + + if (fmt.bind & PIPE_BIND_SAMPLER_VIEW) { + tex |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT | + VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT | + VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT; + + buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; + + tex |= VK_FORMAT_FEATURE_BLIT_SRC_BIT; + } + + if (fmt.bind & PIPE_BIND_RENDER_TARGET) { + tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; + + tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; + buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; + + /* Can always blend via blend shaders */ + tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + + if (fmt.bind & PIPE_BIND_DEPTH_STENCIL) + tex |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; + +end: + out_properties->linearTilingFeatures = tex; + out_properties->optimalTilingFeatures = tex; + out_properties->bufferFeatures = buffer; +} + +void +panvk_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties *pFormatProperties) +{ + VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); + + get_format_properties(physical_device, format, pFormatProperties); +} + +void +panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2 *pFormatProperties) +{ + VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); + + get_format_properties(physical_device, format, + &pFormatProperties->formatProperties); + + VkDrmFormatModifierPropertiesListEXT *list = + vk_find_struct(pFormatProperties->pNext, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT); + if (list) { + VK_OUTARRAY_MAKE(out, list->pDrmFormatModifierProperties, + &list->drmFormatModifierCount); + + vk_outarray_append(&out, mod_props) { + mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR; + mod_props->drmFormatModifierPlaneCount = 1; + } + } +} + +static VkResult +get_image_format_properties(struct panvk_physical_device *physical_device, + const VkPhysicalDeviceImageFormatInfo2 *info, + VkImageFormatProperties *pImageFormatProperties, + VkFormatFeatureFlags *p_feature_flags) +{ + VkFormatProperties format_props; + VkFormatFeatureFlags format_feature_flags; + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArraySize; + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + enum pipe_format format = vk_format_to_pipe_format(info->format); + + get_format_properties(physical_device, info->format, &format_props); + + switch (info->tiling) { + case VK_IMAGE_TILING_LINEAR: + format_feature_flags = format_props.linearTilingFeatures; + break; + + case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT: + /* The only difference between optimal and linear is currently whether + * depth/stencil attachments are allowed on depth/stencil formats. + * There's no reason to allow importing depth/stencil textures, so just + * disallow it and then this annoying edge case goes away. + * + * TODO: If anyone cares, we could enable this by looking at the + * modifier and checking if it's LINEAR or not. + */ + if (util_format_is_depth_or_stencil(format)) + goto unsupported; + + assert(format_props.optimalTilingFeatures == format_props.linearTilingFeatures); + /* fallthrough */ + case VK_IMAGE_TILING_OPTIMAL: + format_feature_flags = format_props.optimalTilingFeatures; + break; + default: + unreachable("bad VkPhysicalDeviceImageFormatInfo2"); + } + + if (format_feature_flags == 0) + goto unsupported; + + if (info->type != VK_IMAGE_TYPE_2D && + util_format_is_depth_or_stencil(format)) + goto unsupported; + + switch (info->type) { + default: + unreachable("bad vkimage type"); + case VK_IMAGE_TYPE_1D: + maxExtent.width = 16384; + maxExtent.height = 1; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_2D: + maxExtent.width = 16384; + maxExtent.height = 16384; + maxExtent.depth = 1; + maxMipLevels = 15; /* log2(maxWidth) + 1 */ + maxArraySize = 2048; + break; + case VK_IMAGE_TYPE_3D: + maxExtent.width = 2048; + maxExtent.height = 2048; + maxExtent.depth = 2048; + maxMipLevels = 12; /* log2(maxWidth) + 1 */ + maxArraySize = 1; + break; + } + + if (info->tiling == VK_IMAGE_TILING_OPTIMAL && + info->type == VK_IMAGE_TYPE_2D && + (format_feature_flags & + (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && + !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { + sampleCounts |= VK_SAMPLE_COUNT_4_BIT; + } + + if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { + if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { + if (!(format_feature_flags & + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + goto unsupported; + } + } + + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArrayLayers = maxArraySize, + .sampleCounts = sampleCounts, + + /* FINISHME: Accurately calculate + * VkImageFormatProperties::maxResourceSize. + */ + .maxResourceSize = UINT32_MAX, + }; + + if (p_feature_flags) + *p_feature_flags = format_feature_flags; + + return VK_SUCCESS; +unsupported: + *pImageFormatProperties = (VkImageFormatProperties) { + .maxExtent = { 0, 0, 0 }, + .maxMipLevels = 0, + .maxArrayLayers = 0, + .sampleCounts = 0, + .maxResourceSize = 0, + }; + + return VK_ERROR_FORMAT_NOT_SUPPORTED; +} + + +VkResult +panvk_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags createFlags, + VkImageFormatProperties *pImageFormatProperties) +{ + VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); + + const VkPhysicalDeviceImageFormatInfo2 info = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, + .pNext = NULL, + .format = format, + .type = type, + .tiling = tiling, + .usage = usage, + .flags = createFlags, + }; + + return get_image_format_properties(physical_device, &info, + pImageFormatProperties, NULL); +} + +static VkResult +panvk_get_external_image_format_properties(const struct panvk_physical_device *physical_device, + const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, + VkExternalMemoryHandleTypeFlagBits handleType, + VkExternalMemoryProperties *external_properties) +{ + VkExternalMemoryFeatureFlagBits flags = 0; + VkExternalMemoryHandleTypeFlags export_flags = 0; + VkExternalMemoryHandleTypeFlags compat_flags = 0; + + /* From the Vulkan 1.1.98 spec: + * + * If handleType is not compatible with the format, type, tiling, + * usage, and flags specified in VkPhysicalDeviceImageFormatInfo2, + * then vkGetPhysicalDeviceImageFormatProperties2 returns + * VK_ERROR_FORMAT_NOT_SUPPORTED. + */ + switch (handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: + switch (pImageFormatInfo->type) { + case VK_IMAGE_TYPE_2D: + flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | + VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = export_flags = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + break; + default: + return vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)", + handleType, pImageFormatInfo->type); + } + break; + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: + flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; + compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; + break; + default: + return vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, + "VkExternalMemoryTypeFlagBits(0x%x) unsupported", + handleType); + } + + *external_properties = (VkExternalMemoryProperties) { + .externalMemoryFeatures = flags, + .exportFromImportedHandleTypes = export_flags, + .compatibleHandleTypes = compat_flags, + }; + + return VK_SUCCESS; +} + +VkResult +panvk_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2 *base_info, + VkImageFormatProperties2 *base_props) +{ + VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); + const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; + const VkPhysicalDeviceImageViewImageFormatInfoEXT *image_view_info = NULL; + VkExternalImageFormatProperties *external_props = NULL; + VkFilterCubicImageViewImageFormatPropertiesEXT *cubic_props = NULL; + VkFormatFeatureFlags format_feature_flags; + VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL; + VkResult result; + + result = get_image_format_properties(physical_device, base_info, + &base_props->imageFormatProperties, + &format_feature_flags); + if (result != VK_SUCCESS) + return result; + + /* Extract input structs */ + vk_foreach_struct_const(s, base_info->pNext) + { + switch (s->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: + external_info = (const void *) s; + break; + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT: + image_view_info = (const void *) s; + break; + default: + break; + } + } + + /* Extract output structs */ + vk_foreach_struct(s, base_props->pNext) + { + switch (s->sType) { + case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: + external_props = (void *) s; + break; + case VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT: + cubic_props = (void *) s; + break; + case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: + ycbcr_props = (void *) s; + break; + default: + break; + } + } + + /* From the Vulkan 1.0.42 spec: + * + * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will + * behave as if VkPhysicalDeviceExternalImageFormatInfo was not + * present and VkExternalImageFormatProperties will be ignored. + */ + if (external_info && external_info->handleType != 0) { + result = panvk_get_external_image_format_properties(physical_device, + base_info, + external_info->handleType, + &external_props->externalMemoryProperties); + if (result != VK_SUCCESS) + goto fail; + } + + if (cubic_props) { + /* note: blob only allows cubic filtering for 2D and 2D array views + * its likely we can enable it for 1D and CUBE, needs testing however + */ + if ((image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D || + image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) && + (format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT)) { + cubic_props->filterCubic = true; + cubic_props->filterCubicMinmax = true; + } else { + cubic_props->filterCubic = false; + cubic_props->filterCubicMinmax = false; + } + } + + if (ycbcr_props) + ycbcr_props->combinedImageSamplerDescriptorCount = 1; + + return VK_SUCCESS; + +fail: + if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) { + /* From the Vulkan 1.0.42 spec: + * + * If the combination of parameters to + * vkGetPhysicalDeviceImageFormatProperties2 is not supported by + * the implementation for use in vkCreateImage, then all members of + * imageFormatProperties will be filled with zero. + */ + base_props->imageFormatProperties = (VkImageFormatProperties) {}; + } + + return result; +} + +void +panvk_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + uint32_t samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t *pNumProperties, + VkSparseImageFormatProperties *pProperties) +{ + panvk_stub(); +} + +void +panvk_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties2 *pProperties) +{ + panvk_stub(); +} + +void +panvk_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) +{ + panvk_stub(); +} diff --git a/src/panfrost/vulkan/panvk_image.c b/src/panfrost/vulkan/panvk_image.c new file mode 100644 index 00000000000..52a984dfda0 --- /dev/null +++ b/src/panfrost/vulkan/panvk_image.c @@ -0,0 +1,438 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_image.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" +#include "panfrost-quirks.h" + +#include "util/debug.h" +#include "util/u_atomic.h" +#include "vk_format.h" +#include "vk_object.h" +#include "vk_util.h" +#include "drm-uapi/drm_fourcc.h" + +unsigned +panvk_image_get_plane_size(const struct panvk_image *image, unsigned plane) +{ + assert(!plane); + return image->pimage.layout.data_size; +} + +unsigned +panvk_image_get_total_size(const struct panvk_image *image) +{ + assert(util_format_get_num_planes(image->pimage.layout.format) == 1); + return image->pimage.layout.data_size; +} + +static enum mali_texture_dimension +panvk_image_type_to_mali_tex_dim(VkImageType type) +{ + switch (type) { + case VK_IMAGE_TYPE_1D: return MALI_TEXTURE_DIMENSION_1D; + case VK_IMAGE_TYPE_2D: return MALI_TEXTURE_DIMENSION_2D; + case VK_IMAGE_TYPE_3D: return MALI_TEXTURE_DIMENSION_3D; + default: unreachable("Invalid image type"); + } +} + +static VkResult +panvk_image_create(VkDevice _device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *alloc, + VkImage *pImage, + uint64_t modifier, + const VkSubresourceLayout *plane_layouts) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + const struct panfrost_device *pdev = &device->physical_device->pdev; + struct panvk_image *image = NULL; + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); + + assert(pCreateInfo->mipLevels > 0); + assert(pCreateInfo->arrayLayers > 0); + assert(pCreateInfo->samples > 0); + assert(pCreateInfo->extent.width > 0); + assert(pCreateInfo->extent.height > 0); + assert(pCreateInfo->extent.depth > 0); + + image = vk_object_zalloc(&device->vk, alloc, sizeof(*image), + VK_OBJECT_TYPE_IMAGE); + if (!image) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + image->type = pCreateInfo->imageType; + + image->vk_format = pCreateInfo->format; + image->tiling = pCreateInfo->tiling; + image->usage = pCreateInfo->usage; + image->flags = pCreateInfo->flags; + image->extent = pCreateInfo->extent; + pan_image_layout_init(pdev, &image->pimage.layout, modifier, + vk_format_to_pipe_format(pCreateInfo->format), + panvk_image_type_to_mali_tex_dim(pCreateInfo->imageType), + pCreateInfo->extent.width, pCreateInfo->extent.height, + pCreateInfo->extent.depth, pCreateInfo->arrayLayers, + pCreateInfo->samples, pCreateInfo->mipLevels, + PAN_IMAGE_CRC_NONE, NULL); + + image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; + if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { + for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) { + if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL) + image->queue_family_mask |= (1u << PANVK_MAX_QUEUE_FAMILIES) - 1u; + else + image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i]; + } + } + + if (vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO)) + image->shareable = true; + + *pImage = panvk_image_to_handle(image); + return VK_SUCCESS; +} + +static uint64_t +panvk_image_select_mod(VkDevice _device, + const VkImageCreateInfo *pCreateInfo, + const VkSubresourceLayout **plane_layouts) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + const struct panfrost_device *pdev = &device->physical_device->pdev; + enum pipe_format fmt = vk_format_to_pipe_format(pCreateInfo->format); + bool noafbc = !(device->physical_device->instance->debug_flags & PANVK_DEBUG_AFBC); + bool linear = device->physical_device->instance->debug_flags & PANVK_DEBUG_LINEAR; + + *plane_layouts = NULL; + + if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) + return DRM_FORMAT_MOD_LINEAR; + + if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + const VkImageDrmFormatModifierListCreateInfoEXT *mod_info = + vk_find_struct_const(pCreateInfo->pNext, + IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT); + const VkImageDrmFormatModifierExplicitCreateInfoEXT *drm_explicit_info = + vk_find_struct_const(pCreateInfo->pNext, + IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT); + + assert(mod_info || drm_explicit_info); + + uint64_t modifier; + + if (mod_info) { + modifier = DRM_FORMAT_MOD_LINEAR; + for (unsigned i = 0; i < mod_info->drmFormatModifierCount; i++) { + if (drm_is_afbc(mod_info->pDrmFormatModifiers[i]) && !noafbc) { + modifier = mod_info->pDrmFormatModifiers[i]; + break; + } + } + } else { + modifier = drm_explicit_info->drmFormatModifier; + assert(modifier == DRM_FORMAT_MOD_LINEAR || + modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || + (drm_is_afbc(modifier) && !noafbc)); + *plane_layouts = drm_explicit_info->pPlaneLayouts; + } + + return modifier; + } + + const struct wsi_image_create_info *wsi_info = + vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA); + if (wsi_info && wsi_info->scanout) + return DRM_FORMAT_MOD_LINEAR; + + assert(pCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL); + + if (linear) + return DRM_FORMAT_MOD_LINEAR; + + /* Image store don't work on AFBC images */ + if (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) + return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + + /* AFBC does not support layered multisampling */ + if (pCreateInfo->samples > 1) + return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + + if (pdev->quirks & MIDGARD_NO_AFBC) + return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + + /* Only a small selection of formats are AFBC'able */ + if (!panfrost_format_supports_afbc(pdev, fmt)) + return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + + /* 3D AFBC is only supported on Bifrost v7+. It's supposed to + * be supported on Midgard but it doesn't seem to work. + */ + if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D && pdev->arch < 7) + return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + + /* For one tile, AFBC is a loss compared to u-interleaved */ + if (pCreateInfo->extent.width <= 16 && pCreateInfo->extent.height <= 16) + return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + + if (noafbc) + return DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED; + + uint64_t afbc_type = AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | + AFBC_FORMAT_MOD_SPARSE; + + if (panfrost_afbc_can_ytr(fmt)) + afbc_type |= AFBC_FORMAT_MOD_YTR; + + return DRM_FORMAT_MOD_ARM_AFBC(afbc_type); +} + +VkResult +panvk_CreateImage(VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage) +{ + const VkSubresourceLayout *plane_layouts; + uint64_t modifier = panvk_image_select_mod(device, pCreateInfo, &plane_layouts); + + return panvk_image_create(device, pCreateInfo, pAllocator, pImage, modifier, plane_layouts); +} + +void +panvk_DestroyImage(VkDevice _device, + VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_image, image, _image); + + if (!image) + return; + + vk_object_free(&device->vk, pAllocator, image); +} + +static unsigned +panvk_plane_index(VkFormat format, VkImageAspectFlags aspect_mask) +{ + switch (aspect_mask) { + default: + return 0; + case VK_IMAGE_ASPECT_PLANE_1_BIT: + return 1; + case VK_IMAGE_ASPECT_PLANE_2_BIT: + return 2; + case VK_IMAGE_ASPECT_STENCIL_BIT: + return format == VK_FORMAT_D32_SFLOAT_S8_UINT; + } +} + +void +panvk_GetImageSubresourceLayout(VkDevice _device, + VkImage _image, + const VkImageSubresource *pSubresource, + VkSubresourceLayout *pLayout) +{ + VK_FROM_HANDLE(panvk_image, image, _image); + + unsigned plane = panvk_plane_index(image->vk_format, pSubresource->aspectMask); + assert(plane < PANVK_MAX_PLANES); + + const struct pan_image_slice_layout *slice_layout = + &image->pimage.layout.slices[pSubresource->mipLevel]; + + pLayout->offset = slice_layout->offset + + (pSubresource->arrayLayer * + image->pimage.layout.array_stride); + pLayout->size = slice_layout->size; + pLayout->rowPitch = slice_layout->line_stride; + pLayout->arrayPitch = image->pimage.layout.array_stride; + pLayout->depthPitch = slice_layout->surface_stride; +} + +static enum mali_texture_dimension +panvk_view_type_to_mali_tex_dim(VkImageViewType type) +{ + switch (type) { + case VK_IMAGE_VIEW_TYPE_1D: + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + return MALI_TEXTURE_DIMENSION_1D; + case VK_IMAGE_VIEW_TYPE_2D: + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + return MALI_TEXTURE_DIMENSION_2D; + case VK_IMAGE_VIEW_TYPE_3D: + return MALI_TEXTURE_DIMENSION_3D; + case VK_IMAGE_VIEW_TYPE_CUBE: + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + return MALI_TEXTURE_DIMENSION_CUBE; + default: + unreachable("Invalid view type"); + } +} + +static void +panvk_convert_swizzle(const VkComponentMapping *in, + unsigned char *out) +{ + const VkComponentSwizzle *comp = &in->r; + for (unsigned i = 0; i < 4; i++) { + switch (comp[i]) { + case VK_COMPONENT_SWIZZLE_IDENTITY: + out[i] = PIPE_SWIZZLE_X + i; + break; + case VK_COMPONENT_SWIZZLE_ZERO: + out[i] = PIPE_SWIZZLE_0; + break; + case VK_COMPONENT_SWIZZLE_ONE: + out[i] = PIPE_SWIZZLE_1; + break; + case VK_COMPONENT_SWIZZLE_R: + out[i] = PIPE_SWIZZLE_X; + break; + case VK_COMPONENT_SWIZZLE_G: + out[i] = PIPE_SWIZZLE_Y; + break; + case VK_COMPONENT_SWIZZLE_B: + out[i] = PIPE_SWIZZLE_Z; + break; + case VK_COMPONENT_SWIZZLE_A: + out[i] = PIPE_SWIZZLE_W; + break; + default: + unreachable("Invalid swizzle"); + } + } +} + + + +VkResult +panvk_CreateImageView(VkDevice _device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_image, image, pCreateInfo->image); + struct panvk_image_view *view; + + view = vk_object_zalloc(&device->vk, pAllocator, sizeof(*view), + VK_OBJECT_TYPE_IMAGE_VIEW); + if (view == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + view->pview.format = vk_format_to_pipe_format(pCreateInfo->format); + + if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) + view->pview.format = util_format_get_depth_only(view->pview.format); + else if (pCreateInfo->subresourceRange.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) + view->pview.format = util_format_stencil_only(view->pview.format); + + view->pview.dim = panvk_view_type_to_mali_tex_dim(pCreateInfo->viewType); + view->pview.first_level = pCreateInfo->subresourceRange.baseMipLevel; + view->pview.last_level = pCreateInfo->subresourceRange.baseMipLevel + + pCreateInfo->subresourceRange.levelCount - 1; + view->pview.first_layer = pCreateInfo->subresourceRange.baseArrayLayer; + view->pview.last_layer = pCreateInfo->subresourceRange.baseArrayLayer + + pCreateInfo->subresourceRange.layerCount - 1; + panvk_convert_swizzle(&pCreateInfo->components, view->pview.swizzle); + view->pview.image = &image->pimage; + view->pview.nr_samples = image->pimage.layout.nr_samples; + view->vk_format = pCreateInfo->format; + + struct panfrost_device *pdev = &device->physical_device->pdev; + unsigned bo_size = + panfrost_estimate_texture_payload_size(pdev, &view->pview); + + unsigned surf_descs_offset = 0; + if (!pan_is_bifrost(pdev)) { + bo_size += MALI_MIDGARD_TEXTURE_LENGTH; + surf_descs_offset = MALI_MIDGARD_TEXTURE_LENGTH; + } + + view->bo = panfrost_bo_create(pdev, bo_size, 0, "Texture descriptor"); + + struct panfrost_ptr surf_descs = { + .cpu = view->bo->ptr.cpu + surf_descs_offset, + .gpu = view->bo->ptr.gpu + surf_descs_offset, + }; + void *tex_desc = pan_is_bifrost(pdev) ? + &view->bifrost.tex_desc : view->bo->ptr.cpu; + + panfrost_new_texture(pdev, &view->pview, tex_desc, &surf_descs); + + *pView = panvk_image_view_to_handle(view); + return VK_SUCCESS; +} + +void +panvk_DestroyImageView(VkDevice _device, + VkImageView _view, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_image_view, view, _view); + + if (!view) + return; + + panfrost_bo_unreference(view->bo); + vk_object_free(&device->vk, pAllocator, view); +} + +VkResult +panvk_CreateBufferView(VkDevice _device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_DestroyBufferView(VkDevice _device, + VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + panvk_stub(); +} + +VkResult +panvk_GetImageDrmFormatModifierPropertiesEXT(VkDevice device, + VkImage _image, + VkImageDrmFormatModifierPropertiesEXT *pProperties) +{ + VK_FROM_HANDLE(panvk_image, image, _image); + + assert(pProperties->sType == VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT); + + pProperties->drmFormatModifier = image->pimage.layout.modifier; + return VK_SUCCESS; +} diff --git a/src/panfrost/vulkan/panvk_meta.c b/src/panfrost/vulkan/panvk_meta.c new file mode 100644 index 00000000000..5ef96b0a682 --- /dev/null +++ b/src/panfrost/vulkan/panvk_meta.c @@ -0,0 +1,167 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir/nir_builder.h" +#include "pan_encoder.h" + +#include "panvk_private.h" + +#include "vk_format.h" + +void +panvk_CmdBlitImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageBlit *pRegions, + VkFilter filter) + +{ + panvk_stub(); +} + +void +panvk_CmdCopyImage(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkImageCopy *pRegions) +{ + panvk_stub(); +} + +void +panvk_CmdCopyBufferToImage(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage destImage, + VkImageLayout destImageLayout, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) +{ + panvk_stub(); +} + +void +panvk_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferImageCopy *pRegions) +{ + panvk_stub(); +} + +void +panvk_CmdCopyBuffer(VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer destBuffer, + uint32_t regionCount, + const VkBufferCopy *pRegions) +{ + panvk_stub(); +} + +void +panvk_CmdResolveImage(VkCommandBuffer cmd_buffer_h, + VkImage src_image_h, + VkImageLayout src_image_layout, + VkImage dest_image_h, + VkImageLayout dest_image_layout, + uint32_t region_count, + const VkImageResolve *regions) +{ + panvk_stub(); +} + +void +panvk_CmdFillBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize fillSize, + uint32_t data) +{ + panvk_stub(); +} + +void +panvk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const void *pData) +{ + panvk_stub(); +} + +void +panvk_CmdClearColorImage(VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + panvk_stub(); +} + +void +panvk_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, + VkImage image_h, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + panvk_stub(); +} + +void +panvk_CmdClearAttachments(VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects) +{ + panvk_stub(); +} + +void +panvk_meta_init(struct panvk_physical_device *dev) +{ + panfrost_pool_init(&dev->meta.bin_pool, NULL, &dev->pdev, PAN_BO_EXECUTE, + 16 * 1024, "panvk_meta binary pool", false, true); + panfrost_pool_init(&dev->meta.desc_pool, NULL, &dev->pdev, 0, + 16 * 1024, "panvk_meta descriptor pool", false, true); +} + +void +panvk_meta_cleanup(struct panvk_physical_device *dev) +{ + panfrost_pool_cleanup(&dev->meta.desc_pool); + panfrost_pool_cleanup(&dev->meta.bin_pool); +} diff --git a/src/panfrost/vulkan/panvk_pass.c b/src/panfrost/vulkan/panvk_pass.c new file mode 100644 index 00000000000..82ed878bdbc --- /dev/null +++ b/src/panfrost/vulkan/panvk_pass.c @@ -0,0 +1,211 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_pass.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "panvk_private.h" + +#include "vk_format.h" +#include "vk_util.h" + +VkResult +panvk_CreateRenderPass2(VkDevice _device, + const VkRenderPassCreateInfo2 *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_render_pass *pass; + size_t size; + size_t attachments_offset; + VkRenderPassMultiviewCreateInfo *multiview_info = NULL; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2); + + size = sizeof(*pass); + size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); + attachments_offset = size; + size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); + + pass = vk_object_zalloc(&device->vk, pAllocator, size, + VK_OBJECT_TYPE_RENDER_PASS); + if (pass == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + pass->attachment_count = pCreateInfo->attachmentCount; + pass->subpass_count = pCreateInfo->subpassCount; + pass->attachments = (void *) pass + attachments_offset; + + vk_foreach_struct(ext, pCreateInfo->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: + multiview_info = (VkRenderPassMultiviewCreateInfo *) ext; + break; + default: + break; + } + } + + for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { + struct panvk_render_pass_attachment *att = &pass->attachments[i]; + + att->format = vk_format_to_pipe_format(pCreateInfo->pAttachments[i].format); + att->samples = pCreateInfo->pAttachments[i].samples; + att->load_op = pCreateInfo->pAttachments[i].loadOp; + att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; + att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; + att->final_layout = pCreateInfo->pAttachments[i].finalLayout; + att->store_op = pCreateInfo->pAttachments[i].storeOp; + att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; + att->clear_subpass = ~0; + } + + uint32_t subpass_attachment_count = 0; + struct panvk_subpass_attachment *p; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; + + subpass_attachment_count += + desc->inputAttachmentCount + desc->colorAttachmentCount + + (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + + (desc->pDepthStencilAttachment != NULL); + } + + if (subpass_attachment_count) { + pass->subpass_attachments = + vk_alloc2(&device->vk.alloc, pAllocator, + subpass_attachment_count * + sizeof(struct panvk_subpass_attachment), + 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pass->subpass_attachments == NULL) { + vk_object_free(&device->vk, pAllocator, pass); + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + } + + p = pass->subpass_attachments; + for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { + const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i]; + struct panvk_subpass *subpass = &pass->subpasses[i]; + + subpass->input_count = desc->inputAttachmentCount; + subpass->color_count = desc->colorAttachmentCount; + if (multiview_info) + subpass->view_mask = multiview_info->pViewMasks[i]; + + if (desc->inputAttachmentCount > 0) { + subpass->input_attachments = p; + p += desc->inputAttachmentCount; + + for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { + subpass->input_attachments[j] = (struct panvk_subpass_attachment) { + .idx = desc->pInputAttachments[j].attachment, + .layout = desc->pInputAttachments[j].layout, + }; + if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) + pass->attachments[desc->pInputAttachments[j].attachment] + .view_mask |= subpass->view_mask; + } + } + + if (desc->colorAttachmentCount > 0) { + subpass->color_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t idx = desc->pColorAttachments[j].attachment; + + subpass->color_attachments[j] = (struct panvk_subpass_attachment) { + .idx = idx, + .layout = desc->pColorAttachments[j].layout, + }; + + if (idx != VK_ATTACHMENT_UNUSED) { + pass->attachments[idx].view_mask |= subpass->view_mask; + if (pass->attachments[idx].clear_subpass == ~0) { + pass->attachments[idx].clear_subpass = i; + subpass->color_attachments[j].clear = true; + } + } + } + } + + if (desc->pResolveAttachments) { + subpass->resolve_attachments = p; + p += desc->colorAttachmentCount; + + for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { + uint32_t idx = desc->pResolveAttachments[j].attachment; + + subpass->resolve_attachments[j] = (struct panvk_subpass_attachment) { + .idx = idx, + .layout = desc->pResolveAttachments[j].layout, + }; + + if (idx != VK_ATTACHMENT_UNUSED) + pass->attachments[idx].view_mask |= subpass->view_mask; + } + } + + unsigned idx = desc->pDepthStencilAttachment ? + desc->pDepthStencilAttachment->attachment : + VK_ATTACHMENT_UNUSED; + subpass->zs_attachment.idx = idx; + if (idx != VK_ATTACHMENT_UNUSED) { + subpass->zs_attachment.layout = desc->pDepthStencilAttachment->layout; + pass->attachments[idx].view_mask |= subpass->view_mask; + if (pass->attachments[idx].clear_subpass == ~0) { + pass->attachments[idx].clear_subpass = i; + subpass->zs_attachment.clear = true; + } + } + } + + *pRenderPass = panvk_render_pass_to_handle(pass); + return VK_SUCCESS; +} + +void +panvk_DestroyRenderPass(VkDevice _device, + VkRenderPass _pass, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_render_pass, pass, _pass); + + if (!pass) + return; + + vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments); + vk_object_free(&device->vk, pAllocator, pass); +} + +void +panvk_GetRenderAreaGranularity(VkDevice _device, + VkRenderPass renderPass, + VkExtent2D *pGranularity) +{ + panvk_stub(); +} diff --git a/src/panfrost/vulkan/panvk_pipeline.c b/src/panfrost/vulkan/panvk_pipeline.c new file mode 100644 index 00000000000..e51e9ec9464 --- /dev/null +++ b/src/panfrost/vulkan/panvk_pipeline.c @@ -0,0 +1,1015 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_pipeline.c which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_cs.h" +#include "panvk_private.h" + +#include "pan_bo.h" + +#include "nir/nir.h" +#include "nir/nir_builder.h" +#include "spirv/nir_spirv.h" +#include "util/debug.h" +#include "util/mesa-sha1.h" +#include "util/u_atomic.h" +#include "vk_format.h" +#include "vk_util.h" + +#include "panfrost/util/pan_lower_framebuffer.h" + +#include "panfrost-quirks.h" + +struct panvk_pipeline_builder +{ + struct panvk_device *device; + struct panvk_pipeline_cache *cache; + const VkAllocationCallbacks *alloc; + const VkGraphicsPipelineCreateInfo *create_info; + const struct panvk_pipeline_layout *layout; + + struct panvk_shader *shaders[MESA_SHADER_STAGES]; + struct { + uint32_t shader_offset; + uint32_t rsd_offset; + uint32_t sysvals_offset; + } stages[MESA_SHADER_STAGES]; + uint32_t blend_shader_offsets[MAX_RTS]; + uint32_t shader_total_size; + uint32_t static_state_size; + uint32_t vpd_offset; + + bool rasterizer_discard; + /* these states are affectd by rasterizer_discard */ + VkSampleCountFlagBits samples; + bool use_depth_stencil_attachment; + uint8_t active_color_attachments; + enum pipe_format color_attachment_formats[MAX_RTS]; +}; + +static VkResult +panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder, + struct panvk_pipeline **out_pipeline) +{ + struct panvk_device *dev = builder->device; + + struct panvk_pipeline *pipeline = + vk_object_zalloc(&dev->vk, builder->alloc, + sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); + if (!pipeline) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + pipeline->layout = builder->layout; + *out_pipeline = pipeline; + return VK_SUCCESS; +} + +static void +panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder) +{ + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + if (!builder->shaders[i]) + continue; + panvk_shader_destroy(builder->device, builder->shaders[i], builder->alloc); + } +} + +static bool +panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id) +{ + return !(pipeline->dynamic_state_mask & (1 << id)); +} + +static VkResult +panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { + NULL + }; + for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { + gl_shader_stage stage = vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage); + stage_infos[stage] = &builder->create_info->pStages[i]; + } + + /* compile shaders in reverse order */ + unsigned sysval_ubo = builder->layout->num_ubos; + + for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; + stage > MESA_SHADER_NONE; stage--) { + const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; + if (!stage_info) + continue; + + struct panvk_shader *shader; + + shader = panvk_shader_create(builder->device, stage, stage_info, + builder->layout, sysval_ubo, + &pipeline->blend.state, + panvk_pipeline_static_state(pipeline, + VK_DYNAMIC_STATE_BLEND_CONSTANTS), + builder->alloc); + if (!shader) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + if (shader->info.sysvals.sysval_count) + sysval_ubo++; + + builder->shaders[stage] = shader; + builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128); + builder->stages[stage].shader_offset = builder->shader_total_size; + builder->shader_total_size += + util_dynarray_num_elements(&shader->binary, uint8_t); + } + + return VK_SUCCESS; +} + +static VkResult +panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panfrost_bo *bin_bo = + panfrost_bo_create(&builder->device->physical_device->pdev, + builder->shader_total_size, PAN_BO_EXECUTE, + "Shader"); + + pipeline->binary_bo = bin_bo; + panfrost_bo_mmap(bin_bo); + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader) + continue; + + memcpy(pipeline->binary_bo->ptr.cpu + builder->stages[i].shader_offset, + util_dynarray_element(&shader->binary, uint8_t, 0), + util_dynarray_num_elements(&shader->binary, uint8_t)); + } + + return VK_SUCCESS; +} + +static bool +panvk_pipeline_static_sysval(struct panvk_pipeline *pipeline, + unsigned id) +{ + switch (id) { + case PAN_SYSVAL_VIEWPORT_SCALE: + case PAN_SYSVAL_VIEWPORT_OFFSET: + return panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT); + default: + return false; + } +} + +static void +panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panfrost_device *pdev = + &builder->device->physical_device->pdev; + unsigned bo_size = 0; + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader) + continue; + + if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT) + continue; + + bo_size = ALIGN_POT(bo_size, MALI_RENDERER_STATE_ALIGN); + builder->stages[i].rsd_offset = bo_size; + bo_size += MALI_RENDERER_STATE_LENGTH; + if (i == MESA_SHADER_FRAGMENT) + bo_size += MALI_BLEND_LENGTH * pipeline->blend.state.rt_count; + } + + if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) && + panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) { + bo_size = ALIGN_POT(bo_size, MALI_VIEWPORT_ALIGN); + builder->vpd_offset = bo_size; + bo_size += MALI_VIEWPORT_LENGTH; + } + + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader || !shader->info.sysvals.sysval_count) + continue; + + bool static_sysvals = true; + for (unsigned s = 0; s < shader->info.sysvals.sysval_count; s++) { + unsigned id = shader->info.sysvals.sysvals[i]; + static_sysvals &= panvk_pipeline_static_sysval(pipeline, id); + switch (PAN_SYSVAL_TYPE(id)) { + case PAN_SYSVAL_VIEWPORT_SCALE: + case PAN_SYSVAL_VIEWPORT_OFFSET: + pipeline->sysvals[i].dirty_mask |= PANVK_DYNAMIC_VIEWPORT; + break; + default: + break; + } + } + + if (!static_sysvals) { + builder->stages[i].sysvals_offset = ~0; + continue; + } + + bo_size = ALIGN_POT(bo_size, 16); + builder->stages[i].sysvals_offset = bo_size; + bo_size += shader->info.sysvals.sysval_count * 16; + } + + if (bo_size) { + pipeline->state_bo = + panfrost_bo_create(pdev, bo_size, 0, "Pipeline descriptors"); + panfrost_bo_mmap(pipeline->state_bo); + } +} + +static void +panvk_pipeline_builder_upload_sysval(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline, + unsigned id, union panvk_sysval_data *data) +{ + switch (PAN_SYSVAL_TYPE(id)) { + case PAN_SYSVAL_VIEWPORT_SCALE: + panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports, + data); + break; + case PAN_SYSVAL_VIEWPORT_OFFSET: + panvk_sysval_upload_viewport_scale(builder->create_info->pViewportState->pViewports, + data); + break; + default: + unreachable("Invalid static sysval"); + } +} + +static void +panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline, + gl_shader_stage stage) +{ + const struct panvk_shader *shader = builder->shaders[stage]; + + pipeline->sysvals[stage].ids = shader->info.sysvals; + pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo; + + if (!shader->info.sysvals.sysval_count || + builder->stages[stage].sysvals_offset == ~0) + return; + + union panvk_sysval_data *static_data = + pipeline->state_bo->ptr.cpu + builder->stages[stage].sysvals_offset; + + pipeline->sysvals[stage].ubo = + pipeline->state_bo->ptr.gpu + builder->stages[stage].sysvals_offset; + + for (unsigned i = 0; i < shader->info.sysvals.sysval_count; i++) { + unsigned id = shader->info.sysvals.sysvals[i]; + + panvk_pipeline_builder_upload_sysval(builder, + pipeline, + id, &static_data[i]); + } +} + +static void +panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { + const struct panvk_shader *shader = builder->shaders[i]; + if (!shader) + continue; + + pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size); + pipeline->wls_size = MAX2(pipeline->tls_size, shader->info.wls_size); + + if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size) + pipeline->ia.writes_point_size = true; + + mali_ptr shader_ptr = pipeline->binary_bo->ptr.gpu + + builder->stages[i].shader_offset; + + void *rsd = pipeline->state_bo->ptr.cpu + builder->stages[i].rsd_offset; + mali_ptr gpu_rsd = pipeline->state_bo->ptr.gpu + builder->stages[i].rsd_offset; + + if (i != MESA_SHADER_FRAGMENT) { + panvk_emit_non_fs_rsd(builder->device, &shader->info, shader_ptr, rsd); + } else if (!pipeline->fs.dynamic_rsd) { + void *bd = rsd + MALI_RENDERER_STATE_LENGTH; + + panvk_emit_base_fs_rsd(builder->device, pipeline, rsd); + for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) { + panvk_emit_blend(builder->device, pipeline, rt, bd); + bd += MALI_BLEND_LENGTH; + } + } else { + gpu_rsd = 0; + panvk_emit_base_fs_rsd(builder->device, pipeline, &pipeline->fs.rsd_template); + for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1); rt++) { + panvk_emit_blend(builder->device, pipeline, rt, + &pipeline->blend.bd_template[rt]); + } + } + + pipeline->rsds[i] = gpu_rsd; + panvk_pipeline_builder_init_sysvals(builder, pipeline, i); + } + + pipeline->num_ubos = builder->layout->num_ubos; + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { + if (pipeline->sysvals[i].ids.sysval_count) + pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1); + } + + pipeline->num_sysvals = 0; + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) + pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count; +} + + +static void +panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + /* The spec says: + * + * pViewportState is a pointer to an instance of the + * VkPipelineViewportStateCreateInfo structure, and is ignored if the + * pipeline has rasterization disabled. + */ + if (!builder->rasterizer_discard && + panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) && + panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) { + void *vpd = pipeline->state_bo->ptr.cpu + builder->vpd_offset; + panvk_emit_viewport(builder->create_info->pViewportState->pViewports, + builder->create_info->pViewportState->pScissors, + vpd); + pipeline->vpd = pipeline->state_bo->ptr.gpu + + builder->vpd_offset; + } else { + if (builder->create_info->pViewportState->pViewports) + pipeline->viewport = builder->create_info->pViewportState->pViewports[0]; + + if (builder->create_info->pViewportState->pScissors) + pipeline->scissor = builder->create_info->pViewportState->pScissors[0]; + } +} + +static void +panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + const VkPipelineDynamicStateCreateInfo *dynamic_info = + builder->create_info->pDynamicState; + + if (!dynamic_info) + return; + + for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { + VkDynamicState state = dynamic_info->pDynamicStates[i]; + switch (state) { + case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE: + pipeline->dynamic_state_mask |= 1 << state; + break; + default: + unreachable("unsupported dynamic state"); + } + } + +} + +static enum mali_draw_mode +translate_prim_topology(VkPrimitiveTopology in) +{ + switch (in) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return MALI_DRAW_MODE_POINTS; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + return MALI_DRAW_MODE_LINES; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + return MALI_DRAW_MODE_LINE_STRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + return MALI_DRAW_MODE_TRIANGLES; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + return MALI_DRAW_MODE_TRIANGLE_STRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + return MALI_DRAW_MODE_TRIANGLE_FAN; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + default: + unreachable("Invalid primitive type"); + } +} + +static void +panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + pipeline->ia.primitive_restart = + builder->create_info->pInputAssemblyState->primitiveRestartEnable; + pipeline->ia.topology = + translate_prim_topology(builder->create_info->pInputAssemblyState->topology); +} + +static enum pipe_logicop +translate_logicop(VkLogicOp in) +{ + switch (in) { + case VK_LOGIC_OP_CLEAR: return PIPE_LOGICOP_CLEAR; + case VK_LOGIC_OP_AND: return PIPE_LOGICOP_AND; + case VK_LOGIC_OP_AND_REVERSE: return PIPE_LOGICOP_AND_REVERSE; + case VK_LOGIC_OP_COPY: return PIPE_LOGICOP_COPY; + case VK_LOGIC_OP_AND_INVERTED: return PIPE_LOGICOP_AND_INVERTED; + case VK_LOGIC_OP_NO_OP: return PIPE_LOGICOP_NOOP; + case VK_LOGIC_OP_XOR: return PIPE_LOGICOP_XOR; + case VK_LOGIC_OP_OR: return PIPE_LOGICOP_OR; + case VK_LOGIC_OP_NOR: return PIPE_LOGICOP_NOR; + case VK_LOGIC_OP_EQUIVALENT: return PIPE_LOGICOP_EQUIV; + case VK_LOGIC_OP_INVERT: return PIPE_LOGICOP_INVERT; + case VK_LOGIC_OP_OR_REVERSE: return PIPE_LOGICOP_OR_REVERSE; + case VK_LOGIC_OP_COPY_INVERTED: return PIPE_LOGICOP_COPY_INVERTED; + case VK_LOGIC_OP_OR_INVERTED: return PIPE_LOGICOP_OR_INVERTED; + case VK_LOGIC_OP_NAND: return PIPE_LOGICOP_NAND; + case VK_LOGIC_OP_SET: return PIPE_LOGICOP_SET; + default: unreachable("Invalid logicop"); + } +} + +static enum blend_func +translate_blend_op(VkBlendOp in) +{ + switch (in) { + case VK_BLEND_OP_ADD: return BLEND_FUNC_ADD; + case VK_BLEND_OP_SUBTRACT: return BLEND_FUNC_SUBTRACT; + case VK_BLEND_OP_REVERSE_SUBTRACT: return BLEND_FUNC_REVERSE_SUBTRACT; + case VK_BLEND_OP_MIN: return BLEND_FUNC_MIN; + case VK_BLEND_OP_MAX: return BLEND_FUNC_MAX; + default: unreachable("Invalid blend op"); + } +} + +static enum blend_factor +translate_blend_factor(VkBlendFactor in, bool dest_has_alpha) +{ + switch (in) { + case VK_BLEND_FACTOR_ZERO: + case VK_BLEND_FACTOR_ONE: + return BLEND_FACTOR_ZERO; + case VK_BLEND_FACTOR_SRC_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + return BLEND_FACTOR_SRC_COLOR; + case VK_BLEND_FACTOR_DST_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + return BLEND_FACTOR_DST_COLOR; + case VK_BLEND_FACTOR_SRC_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + return BLEND_FACTOR_SRC_ALPHA; + case VK_BLEND_FACTOR_DST_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return dest_has_alpha ? BLEND_FACTOR_DST_ALPHA : BLEND_FACTOR_ZERO; + case VK_BLEND_FACTOR_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + return BLEND_FACTOR_CONSTANT_COLOR; + case VK_BLEND_FACTOR_CONSTANT_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + return BLEND_FACTOR_CONSTANT_ALPHA; + case VK_BLEND_FACTOR_SRC1_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + return BLEND_FACTOR_SRC1_COLOR; + case VK_BLEND_FACTOR_SRC1_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: + return BLEND_FACTOR_SRC1_ALPHA; + case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: + return BLEND_FACTOR_SRC_ALPHA_SATURATE; + default: unreachable("Invalid blend factor"); + } +} + +static bool +inverted_blend_factor(VkBlendFactor in, bool dest_has_alpha) +{ + switch (in) { + case VK_BLEND_FACTOR_ONE: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: + case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: + return true; + case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: + return dest_has_alpha ? true : false; + case VK_BLEND_FACTOR_DST_ALPHA: + return !dest_has_alpha ? true : false; + default: + return false; + } +} + +bool +panvk_blend_needs_lowering(const struct panfrost_device *dev, + const struct pan_blend_state *state, + unsigned rt) +{ + /* LogicOp requires a blend shader */ + if (state->logicop_enable) + return true; + + /* Not all formats can be blended by fixed-function hardware */ + if (!panfrost_blendable_formats[state->rts[rt].format].internal) + return true; + + unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation); + + /* v6 doesn't support blend constants in FF blend equations. + * v7 only uses the constant from RT 0 (TODO: what if it's the same + * constant? or a constant is shared?) + */ + if (constant_mask && (dev->arch == 6 || (dev->arch == 7 && rt > 0))) + return true; + + if (!pan_blend_is_homogenous_constant(constant_mask, state->constants)) + return true; + + return !pan_blend_can_fixed_function(state->rts[rt].equation); +} + +static void +panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panfrost_device *pdev = &builder->device->physical_device->pdev; + pipeline->blend.state.logicop_enable = + builder->create_info->pColorBlendState->logicOpEnable; + pipeline->blend.state.logicop_func = + translate_logicop(builder->create_info->pColorBlendState->logicOp); + pipeline->blend.state.rt_count = util_last_bit(builder->active_color_attachments); + memcpy(pipeline->blend.state.constants, + builder->create_info->pColorBlendState->blendConstants, + sizeof(pipeline->blend.state.constants)); + + for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { + const VkPipelineColorBlendAttachmentState *in = + &builder->create_info->pColorBlendState->pAttachments[i]; + struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i]; + + out->format = builder->color_attachment_formats[i]; + + bool dest_has_alpha = util_format_has_alpha(out->format); + + out->nr_samples = builder->create_info->pMultisampleState->rasterizationSamples; + out->equation.blend_enable = in->blendEnable; + out->equation.color_mask = in->colorWriteMask; + out->equation.rgb_func = translate_blend_op(in->colorBlendOp); + out->equation.rgb_src_factor = translate_blend_factor(in->srcColorBlendFactor, dest_has_alpha); + out->equation.rgb_invert_src_factor = inverted_blend_factor(in->srcColorBlendFactor, dest_has_alpha); + out->equation.rgb_dst_factor = translate_blend_factor(in->dstColorBlendFactor, dest_has_alpha); + out->equation.rgb_invert_dst_factor = inverted_blend_factor(in->dstColorBlendFactor, dest_has_alpha); + out->equation.alpha_func = translate_blend_op(in->alphaBlendOp); + out->equation.alpha_src_factor = translate_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha); + out->equation.alpha_invert_src_factor = inverted_blend_factor(in->srcAlphaBlendFactor, dest_has_alpha); + out->equation.alpha_dst_factor = translate_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha); + out->equation.alpha_invert_dst_factor = inverted_blend_factor(in->dstAlphaBlendFactor, dest_has_alpha); + + unsigned constant_mask = + panvk_blend_needs_lowering(pdev, &pipeline->blend.state, i) ? + 0 : pan_blend_constant_mask(out->equation); + pipeline->blend.constant[i].index = ffs(constant_mask) - 1; + if (constant_mask && pan_is_bifrost(pdev)) { + /* On Bifrost, the blend constant is expressed with a UNORM of the + * size of the target format. The value is then shifted such that + * used bits are in the MSB. Here we calculate the factor at pipeline + * creation time so we only have to do a + * hw_constant = float_constant * factor; + * at descriptor emission time. + */ + const struct util_format_description *format_desc = + util_format_description(out->format); + unsigned chan_size = 0; + for (unsigned c = 0; c < format_desc->nr_channels; c++) + chan_size = MAX2(format_desc->channel[c].size, chan_size); + pipeline->blend.constant[i].bifrost_factor = + ((1 << chan_size) - 1) << (16 - chan_size); + } + } +} + +static void +panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + unsigned nr_samples = + MAX2(builder->create_info->pMultisampleState->rasterizationSamples, 1); + + pipeline->ms.rast_samples = + builder->create_info->pMultisampleState->rasterizationSamples; + pipeline->ms.sample_mask = + builder->create_info->pMultisampleState->pSampleMask ? + builder->create_info->pMultisampleState->pSampleMask[0] : UINT16_MAX; + pipeline->ms.min_samples = + MAX2(builder->create_info->pMultisampleState->minSampleShading * nr_samples, 1); +} + +static enum mali_stencil_op +translate_stencil_op(VkStencilOp in) +{ + switch (in) { + case VK_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP; + case VK_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO; + case VK_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return MALI_STENCIL_OP_INCR_SAT; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return MALI_STENCIL_OP_DECR_SAT; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: return MALI_STENCIL_OP_INCR_WRAP; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: return MALI_STENCIL_OP_DECR_WRAP; + case VK_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT; + default: unreachable("Invalid stencil op"); + } +} + +static void +panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + pipeline->zs.z_test = builder->create_info->pDepthStencilState->depthTestEnable; + pipeline->zs.z_write = builder->create_info->pDepthStencilState->depthWriteEnable; + pipeline->zs.z_compare_func = + panvk_translate_compare_func(builder->create_info->pDepthStencilState->depthCompareOp); + pipeline->zs.s_test = builder->create_info->pDepthStencilState->stencilTestEnable; + pipeline->zs.s_front.fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->front.failOp); + pipeline->zs.s_front.pass_op = + translate_stencil_op(builder->create_info->pDepthStencilState->front.passOp); + pipeline->zs.s_front.z_fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->front.depthFailOp); + pipeline->zs.s_front.compare_func = + panvk_translate_compare_func(builder->create_info->pDepthStencilState->front.compareOp); + pipeline->zs.s_front.compare_mask = + builder->create_info->pDepthStencilState->front.compareMask; + pipeline->zs.s_front.write_mask = + builder->create_info->pDepthStencilState->front.writeMask; + pipeline->zs.s_front.ref = + builder->create_info->pDepthStencilState->front.reference; + pipeline->zs.s_back.fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->back.failOp); + pipeline->zs.s_back.pass_op = + translate_stencil_op(builder->create_info->pDepthStencilState->back.passOp); + pipeline->zs.s_back.z_fail_op = + translate_stencil_op(builder->create_info->pDepthStencilState->back.depthFailOp); + pipeline->zs.s_back.compare_func = + panvk_translate_compare_func(builder->create_info->pDepthStencilState->back.compareOp); + pipeline->zs.s_back.compare_mask = + builder->create_info->pDepthStencilState->back.compareMask; + pipeline->zs.s_back.write_mask = + builder->create_info->pDepthStencilState->back.writeMask; + pipeline->zs.s_back.ref = + builder->create_info->pDepthStencilState->back.reference; +} + +static void +panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + pipeline->rast.clamp_depth = builder->create_info->pRasterizationState->depthClampEnable; + pipeline->rast.depth_bias.enable = builder->create_info->pRasterizationState->depthBiasEnable; + pipeline->rast.depth_bias.constant_factor = + builder->create_info->pRasterizationState->depthBiasConstantFactor; + pipeline->rast.depth_bias.clamp = builder->create_info->pRasterizationState->depthBiasClamp; + pipeline->rast.depth_bias.slope_factor = builder->create_info->pRasterizationState->depthBiasSlopeFactor; + pipeline->rast.front_ccw = builder->create_info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE; + pipeline->rast.cull_front_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT; + pipeline->rast.cull_back_face = builder->create_info->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT; +} + +static bool +panvk_fs_required(struct panvk_pipeline *pipeline) +{ + const struct pan_shader_info *info = &pipeline->fs.info; + + /* If we generally have side effects */ + if (info->fs.sidefx) + return true; + + /* If colour is written we need to execute */ + const struct pan_blend_state *blend = &pipeline->blend.state; + for (unsigned i = 0; i < blend->rt_count; ++i) { + if (blend->rts[i].equation.color_mask) + return true; + } + + /* If depth is written and not implied we need to execute. + * TODO: Predicate on Z/S writes being enabled */ + return (info->fs.writes_depth || info->fs.writes_stencil); +} + +#define PANVK_DYNAMIC_FS_RSD_MASK \ + ((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) | \ + (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) | \ + (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | \ + (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | \ + (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) + +static void +panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + if (!builder->shaders[MESA_SHADER_FRAGMENT]) + return; + + pipeline->fs.dynamic_rsd = + pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK; + pipeline->fs.address = pipeline->binary_bo->ptr.gpu + + builder->stages[MESA_SHADER_FRAGMENT].shader_offset; + pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info; + pipeline->fs.required = panvk_fs_required(pipeline); +} + +static void +panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings, + gl_shader_stage stage, + const struct pan_shader_varying *varying, + bool input) +{ + bool fs = stage == MESA_SHADER_FRAGMENT; + gl_varying_slot loc = varying->location; + enum panvk_varying_buf_id buf_id = + panvk_varying_buf_id(fs, loc); + + varyings->stage[stage].loc[varyings->stage[stage].count++] = loc; + + if (panvk_varying_is_builtin(stage, loc)) { + varyings->buf_mask |= 1 << buf_id; + return; + } + + assert(loc < ARRAY_SIZE(varyings->varying)); + + enum pipe_format new_fmt = varying->format; + enum pipe_format old_fmt = varyings->varying[loc].format; + + BITSET_SET(varyings->active, loc); + + /* We expect inputs to either be set by a previous stage or be built + * in, skip the entry if that's not the case, we'll emit a const + * varying returning zero for those entries. + */ + if (input && old_fmt == PIPE_FORMAT_NONE) + return; + + unsigned new_size = util_format_get_blocksize(new_fmt); + unsigned old_size = util_format_get_blocksize(old_fmt); + + if (old_size < new_size) + varyings->varying[loc].format = new_fmt; + + varyings->buf_mask |= 1 << buf_id; +} + +static void +panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) { + if (!builder->shaders[s]) + continue; + + const struct pan_shader_info *info = &builder->shaders[s]->info; + + for (unsigned i = 0; i < info->varyings.input_count; i++) { + panvk_pipeline_update_varying_slot(&pipeline->varyings, s, + &info->varyings.input[i], + true); + } + + for (unsigned i = 0; i < info->varyings.output_count; i++) { + panvk_pipeline_update_varying_slot(&pipeline->varyings, s, + &info->varyings.output[i], + false); + } + } + + /* TODO: Xfb */ + gl_varying_slot loc; + BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) { + enum panvk_varying_buf_id buf_id = + panvk_varying_buf_id(false, loc); + unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id); + unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc); + + pipeline->varyings.varying[loc].buf = buf_idx; + pipeline->varyings.varying[loc].offset = + pipeline->varyings.buf[buf_idx].stride; + pipeline->varyings.buf[buf_idx].stride += varying_sz; + } +} + +static void +panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder *builder, + struct panvk_pipeline *pipeline) +{ + struct panvk_attribs_info *attribs = &pipeline->attribs; + const VkPipelineVertexInputStateCreateInfo *info = + builder->create_info->pVertexInputState; + + for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) { + const VkVertexInputBindingDescription *desc = + &info->pVertexBindingDescriptions[i]; + attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count); + attribs->buf[desc->binding].stride = desc->stride; + attribs->buf[desc->binding].special = false; + } + + for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) { + const VkVertexInputAttributeDescription *desc = + &info->pVertexAttributeDescriptions[i]; + attribs->attrib[desc->location].buf = desc->binding; + attribs->attrib[desc->location].format = + vk_format_to_pipe_format(desc->format); + attribs->attrib[desc->location].offset = desc->offset; + } + + const struct pan_shader_info *vs = + &builder->shaders[MESA_SHADER_VERTEX]->info; + + if (vs->attribute_count >= PAN_VERTEX_ID) { + attribs->buf[attribs->buf_count].special = true; + attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID; + attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++; + attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT; + } + + if (vs->attribute_count >= PAN_INSTANCE_ID) { + attribs->buf[attribs->buf_count].special = true; + attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID; + attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++; + attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT; + } + + attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count); +} + +static VkResult +panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder, + struct panvk_pipeline **pipeline) +{ + VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline); + if (result != VK_SUCCESS) + return result; + + /* TODO: make those functions return a result and handle errors */ + panvk_pipeline_builder_parse_dynamic(builder, *pipeline); + panvk_pipeline_builder_parse_color_blend(builder, *pipeline); + panvk_pipeline_builder_compile_shaders(builder, *pipeline); + panvk_pipeline_builder_collect_varyings(builder, *pipeline); + panvk_pipeline_builder_parse_input_assembly(builder, *pipeline); + panvk_pipeline_builder_parse_multisample(builder, *pipeline); + panvk_pipeline_builder_parse_zs(builder, *pipeline); + panvk_pipeline_builder_parse_rast(builder, *pipeline); + panvk_pipeline_builder_parse_vertex_input(builder, *pipeline); + + + panvk_pipeline_builder_upload_shaders(builder, *pipeline); + panvk_pipeline_builder_init_fs_state(builder, *pipeline); + panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline); + panvk_pipeline_builder_init_shaders(builder, *pipeline); + panvk_pipeline_builder_parse_viewport(builder, *pipeline); + + return VK_SUCCESS; +} + +static void +panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder *builder, + struct panvk_device *dev, + struct panvk_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *create_info, + const VkAllocationCallbacks *alloc) +{ + VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout); + assert(layout); + *builder = (struct panvk_pipeline_builder) { + .device = dev, + .cache = cache, + .layout = layout, + .create_info = create_info, + .alloc = alloc, + }; + + builder->rasterizer_discard = + create_info->pRasterizationState->rasterizerDiscardEnable; + + if (builder->rasterizer_discard) { + builder->samples = VK_SAMPLE_COUNT_1_BIT; + } else { + builder->samples = create_info->pMultisampleState->rasterizationSamples; + + const struct panvk_render_pass *pass = panvk_render_pass_from_handle(create_info->renderPass); + const struct panvk_subpass *subpass = &pass->subpasses[create_info->subpass]; + + builder->use_depth_stencil_attachment = + subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED; + + assert(subpass->color_count == create_info->pColorBlendState->attachmentCount); + builder->active_color_attachments = 0; + for (uint32_t i = 0; i < subpass->color_count; i++) { + uint32_t idx = subpass->color_attachments[i].idx; + if (idx == VK_ATTACHMENT_UNUSED) + continue; + + builder->active_color_attachments |= 1 << i; + builder->color_attachment_formats[i] = pass->attachments[idx].format; + } + } +} + +VkResult +panvk_CreateGraphicsPipelines(VkDevice device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + VK_FROM_HANDLE(panvk_device, dev, device); + VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache); + + for (uint32_t i = 0; i < count; i++) { + struct panvk_pipeline_builder builder; + panvk_pipeline_builder_init_graphics(&builder, dev, cache, + &pCreateInfos[i], pAllocator); + + struct panvk_pipeline *pipeline; + VkResult result = panvk_pipeline_builder_build(&builder, &pipeline); + panvk_pipeline_builder_finish(&builder); + + if (result != VK_SUCCESS) { + for (uint32_t j = 0; j < i; j++) { + panvk_DestroyPipeline(device, pPipelines[j], pAllocator); + pPipelines[j] = VK_NULL_HANDLE; + } + + return result; + } + + pPipelines[i] = panvk_pipeline_to_handle(pipeline); + } + + return VK_SUCCESS; +} + +VkResult +panvk_CreateComputePipelines(VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_DestroyPipeline(VkDevice _device, + VkPipeline _pipeline, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_pipeline, pipeline, _pipeline); + + panfrost_bo_unreference(pipeline->binary_bo); + panfrost_bo_unreference(pipeline->state_bo); + vk_object_free(&device->vk, pAllocator, pipeline); +} diff --git a/src/panfrost/vulkan/panvk_pipeline_cache.c b/src/panfrost/vulkan/panvk_pipeline_cache.c new file mode 100644 index 00000000000..6b86aab874e --- /dev/null +++ b/src/panfrost/vulkan/panvk_pipeline_cache.c @@ -0,0 +1,86 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_pipeline_cache.c which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +#include "util/debug.h" +#include "util/disk_cache.h" +#include "util/mesa-sha1.h" +#include "util/u_atomic.h" + +VkResult +panvk_CreatePipelineCache(VkDevice _device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_pipeline_cache *cache; + + cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache), + VK_OBJECT_TYPE_PIPELINE_CACHE); + if (cache == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (pAllocator) + cache->alloc = *pAllocator; + else + cache->alloc = device->vk.alloc; + + *pPipelineCache = panvk_pipeline_cache_to_handle(cache); + return VK_SUCCESS; +} + +void +panvk_DestroyPipelineCache(VkDevice _device, + VkPipelineCache _cache, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_pipeline_cache, cache, _cache); + + vk_object_free(&device->vk, pAllocator, cache); +} + +VkResult +panvk_GetPipelineCacheData(VkDevice _device, + VkPipelineCache _cache, + size_t *pDataSize, + void *pData) +{ + panvk_stub(); + return VK_SUCCESS; +} + +VkResult +panvk_MergePipelineCaches(VkDevice _device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches) +{ + panvk_stub(); + return VK_SUCCESS; +} diff --git a/src/panfrost/vulkan/panvk_private.h b/src/panfrost/vulkan/panvk_private.h new file mode 100644 index 00000000000..8d74748bd0a --- /dev/null +++ b/src/panfrost/vulkan/panvk_private.h @@ -0,0 +1,994 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * derived from tu_private.h driver which is: + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PANVK_PRIVATE_H +#define PANVK_PRIVATE_H + +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_VALGRIND +#include +#include +#define VG(x) x +#else +#define VG(x) +#endif + +#include "c11/threads.h" +#include "compiler/shader_enums.h" +#include "util/list.h" +#include "util/macros.h" +#include "vk_alloc.h" +#include "vk_device.h" +#include "vk_instance.h" +#include "vk_object.h" +#include "vk_physical_device.h" +#include "wsi_common.h" + +#include "drm-uapi/panfrost_drm.h" + +#include "midgard/midgard_compile.h" + +#include "pan_blend.h" +#include "pan_blitter.h" +#include "pan_cs.h" +#include "pan_device.h" +#include "pan_pool.h" +#include "pan_texture.h" +#include "pan_scoreboard.h" +#include "pan_shader.h" +#include "panvk_varyings.h" + +/* Pre-declarations needed for WSI entrypoints */ +struct wl_surface; +struct wl_display; +typedef struct xcb_connection_t xcb_connection_t; +typedef uint32_t xcb_visualid_t; +typedef uint32_t xcb_window_t; + +#include +#include +#include + +#include "panvk_entrypoints.h" + +#define MAX_BIND_POINTS 2 /* compute + graphics */ +#define MAX_VBS 16 +#define MAX_VERTEX_ATTRIBS 16 +#define MAX_RTS 8 +#define MAX_VSC_PIPES 32 +#define MAX_VIEWPORTS 1 +#define MAX_SCISSORS 16 +#define MAX_DISCARD_RECTANGLES 4 +#define MAX_PUSH_CONSTANTS_SIZE 128 +#define MAX_PUSH_DESCRIPTORS 32 +#define MAX_DYNAMIC_UNIFORM_BUFFERS 16 +#define MAX_DYNAMIC_STORAGE_BUFFERS 8 +#define MAX_DYNAMIC_BUFFERS \ + (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) +#define MAX_SAMPLES_LOG2 4 +#define NUM_META_FS_KEYS 13 +#define PANVK_MAX_DRM_DEVICES 1 +#define MAX_VIEWS 8 + +#define NUM_DEPTH_CLEAR_PIPELINES 3 + +#define panvk_printflike(a, b) __attribute__((__format__(__printf__, a, b))) + +/* Whenever we generate an error, pass it through this function. Useful for + * debugging, where we can break on it. Only call at error site, not when + * propagating errors. Might be useful to plug in a stack trace here. + */ + +struct panvk_instance; + +VkResult +__vk_errorf(struct panvk_instance *instance, + VkResult error, + const char *file, + int line, + const char *format, + ...); + +#define vk_error(instance, error) \ + __vk_errorf(instance, error, __FILE__, __LINE__, NULL); +#define vk_errorf(instance, error, format, ...) \ + __vk_errorf(instance, error, __FILE__, __LINE__, format, ##__VA_ARGS__); + +void +panvk_logi(const char *format, ...) panvk_printflike(1, 2); +void +panvk_logi_v(const char *format, va_list va); + +#define panvk_stub() assert(!"stub") + +struct panvk_meta { + struct pan_pool bin_pool; + struct pan_pool desc_pool; +}; + +struct panvk_physical_device { + struct vk_physical_device vk; + + /* The API agnostic device object. */ + struct panfrost_device pdev; + + struct panvk_instance *instance; + + char path[20]; + char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; + uint8_t driver_uuid[VK_UUID_SIZE]; + uint8_t device_uuid[VK_UUID_SIZE]; + uint8_t cache_uuid[VK_UUID_SIZE]; + + struct wsi_device wsi_device; + struct panvk_meta meta; + + int local_fd; + int master_fd; +}; + +void +panvk_meta_init(struct panvk_physical_device *dev); + +void +panvk_meta_cleanup(struct panvk_physical_device *dev); + + +enum panvk_debug_flags { + PANVK_DEBUG_STARTUP = 1 << 0, + PANVK_DEBUG_NIR = 1 << 1, + PANVK_DEBUG_TRACE = 1 << 2, + PANVK_DEBUG_SYNC = 1 << 3, + PANVK_DEBUG_AFBC = 1 << 4, + PANVK_DEBUG_LINEAR = 1 << 5, +}; + +struct panvk_instance { + struct vk_instance vk; + + uint32_t api_version; + int physical_device_count; + struct panvk_physical_device physical_devices[PANVK_MAX_DRM_DEVICES]; + + enum panvk_debug_flags debug_flags; +}; + +VkResult +panvk_wsi_init(struct panvk_physical_device *physical_device); +void +panvk_wsi_finish(struct panvk_physical_device *physical_device); + +bool +panvk_instance_extension_supported(const char *name); +uint32_t +panvk_physical_device_api_version(struct panvk_physical_device *dev); +bool +panvk_physical_device_extension_supported(struct panvk_physical_device *dev, + const char *name); + +struct panvk_pipeline_cache { + struct vk_object_base base; + VkAllocationCallbacks alloc; +}; + +/* queue types */ +#define PANVK_QUEUE_GENERAL 0 + +#define PANVK_MAX_QUEUE_FAMILIES 1 + +struct panvk_queue { + struct vk_object_base base; + struct panvk_device *device; + uint32_t queue_family_index; + VkDeviceQueueCreateFlags flags; + uint32_t sync; +}; + +struct panvk_device { + struct vk_device vk; + + struct panvk_instance *instance; + + struct panvk_queue *queues[PANVK_MAX_QUEUE_FAMILIES]; + int queue_count[PANVK_MAX_QUEUE_FAMILIES]; + + struct panvk_physical_device *physical_device; + int _lost; +}; + +VkResult _panvk_device_set_lost(struct panvk_device *device, + const char *file, int line, + const char *msg, ...) PRINTFLIKE(4, 5); +#define panvk_device_set_lost(dev, ...) \ + _panvk_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__) + +static inline bool +panvk_device_is_lost(struct panvk_device *device) +{ + return unlikely(p_atomic_read(&device->_lost)); +} + +struct panvk_batch { + struct list_head node; + struct util_dynarray jobs; + struct pan_scoreboard scoreboard; + struct { + const struct panvk_framebuffer *info; + struct panfrost_ptr desc; + } fb; + struct { + struct panfrost_bo *src, *dst; + } blit; + struct panfrost_ptr tls; + mali_ptr fragment_job; + struct { + struct pan_tiler_context ctx; + struct panfrost_ptr bifrost_descs; + union { + struct { + struct mali_bifrost_tiler_heap_packed heap; + struct mali_bifrost_tiler_packed tiler; + } bifrost; + struct mali_midgard_tiler_packed midgard; + } templ; + } tiler; + bool issued; +}; + +struct panvk_syncobj { + uint32_t permanent, temporary; +}; + +struct panvk_fence { + struct vk_object_base base; + struct panvk_syncobj syncobj; +}; + +struct panvk_semaphore { + struct vk_object_base base; + struct panvk_syncobj syncobj; +}; + +int +panvk_signal_syncobjs(struct panvk_device *device, + struct panvk_syncobj *syncobj1, + struct panvk_syncobj *syncobj2); + +int +panvk_syncobj_to_fd(struct panvk_device *device, + struct panvk_syncobj *sync); + +struct panvk_device_memory { + struct vk_object_base base; + struct panfrost_bo *bo; +}; + +struct panvk_descriptor { + union { + struct { + VkImageLayout layout; + struct panvk_image_view *view; + struct panvk_sampler *sampler; + } image; + + struct { + struct panvk_buffer *buffer; + uint64_t offset; + uint64_t range; + } buffer_info; + + struct panvk_buffer_view *buffer_view; + }; +}; + +struct panvk_descriptor_set { + struct vk_object_base base; + struct panvk_descriptor_pool *pool; + const struct panvk_descriptor_set_layout *layout; + struct panvk_descriptor *descs; + struct mali_uniform_buffer_packed *ubos; + struct mali_midgard_sampler_packed *samplers; + union { + struct mali_bifrost_texture_packed *bifrost; + mali_ptr *midgard; + } textures; +}; + +#define MAX_SETS 4 + +struct panvk_descriptor_set_binding_layout { + VkDescriptorType type; + + /* Number of array elements in this binding */ + unsigned array_size; + + /* Indices in the desc arrays */ + unsigned desc_idx; + union { + struct { + unsigned sampler_idx; + unsigned tex_idx; + }; + struct { + union { + unsigned ssbo_idx; + unsigned ubo_idx; + }; + unsigned dynoffset_idx; + }; + }; + + /* Shader stages affected by this set+binding */ + uint16_t shader_stages; + + struct panvk_sampler **immutable_samplers; +}; + +struct panvk_descriptor_set_layout { + struct vk_object_base base; + + /* The create flags for this descriptor set layout */ + VkDescriptorSetLayoutCreateFlags flags; + + /* Shader stages affected by this descriptor set */ + uint16_t shader_stages; + + unsigned num_descs; + unsigned num_samplers; + unsigned num_textures; + unsigned num_ubos; + unsigned num_ssbos; + unsigned num_dynoffsets; + + /* Number of bindings in this descriptor set */ + uint32_t binding_count; + + /* Bindings in this descriptor set */ + struct panvk_descriptor_set_binding_layout bindings[0]; +}; + +struct panvk_pipeline_layout { + struct vk_object_base base; + unsigned char sha1[20]; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_ubos; + unsigned num_ssbos; + unsigned num_dynoffsets; + uint32_t num_sets; + + struct { + struct panvk_descriptor_set_layout *layout; + unsigned sampler_offset; + unsigned tex_offset; + unsigned ubo_offset; + unsigned ssbo_offset; + unsigned dynoffset_offset; + } sets[MAX_SETS]; +}; + +struct panvk_desc_pool_counters { + unsigned samplers; + unsigned combined_image_samplers; + unsigned sampled_images; + unsigned storage_images; + unsigned uniform_texel_bufs; + unsigned storage_texel_bufs; + unsigned input_attachments; + unsigned uniform_bufs; + unsigned storage_bufs; + unsigned uniform_dyn_bufs; + unsigned storage_dyn_bufs; + unsigned sets; +}; + +struct panvk_descriptor_pool { + struct vk_object_base base; + struct panvk_desc_pool_counters max; + struct panvk_desc_pool_counters cur; + struct panvk_descriptor_set *sets; +}; + +struct panvk_buffer { + struct vk_object_base base; + VkDeviceSize size; + + VkBufferUsageFlags usage; + VkBufferCreateFlags flags; + + struct panfrost_bo *bo; + VkDeviceSize bo_offset; +}; + +enum panvk_dynamic_state_bits { + PANVK_DYNAMIC_VIEWPORT = 1 << 0, + PANVK_DYNAMIC_SCISSOR = 1 << 1, + PANVK_DYNAMIC_LINE_WIDTH = 1 << 2, + PANVK_DYNAMIC_DEPTH_BIAS = 1 << 3, + PANVK_DYNAMIC_BLEND_CONSTANTS = 1 << 4, + PANVK_DYNAMIC_DEPTH_BOUNDS = 1 << 5, + PANVK_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, + PANVK_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, + PANVK_DYNAMIC_STENCIL_REFERENCE = 1 << 8, + PANVK_DYNAMIC_DISCARD_RECTANGLE = 1 << 9, + PANVK_DYNAMIC_ALL = (1 << 10) - 1, +}; + +struct panvk_descriptor_state { + struct { + const struct panvk_descriptor_set *set; + struct panfrost_ptr dynoffsets; + } sets[MAX_SETS]; + mali_ptr sysvals[MESA_SHADER_STAGES]; + mali_ptr ubos; + mali_ptr textures; + mali_ptr samplers; +}; + +struct panvk_draw_info { + unsigned first_index; + unsigned index_count; + unsigned first_vertex; + unsigned vertex_count; + unsigned padded_vertex_count; + unsigned first_instance; + unsigned instance_count; + int vertex_offset; + unsigned offset_start; + struct mali_invocation_packed invocation; + struct { + mali_ptr varyings; + mali_ptr attributes; + mali_ptr push_constants; + } stages[MESA_SHADER_STAGES]; + mali_ptr varying_bufs; + mali_ptr attribute_bufs; + mali_ptr textures; + mali_ptr samplers; + mali_ptr ubos; + mali_ptr position; + union { + mali_ptr psiz; + float line_width; + }; + mali_ptr tls; + mali_ptr fb; + const struct pan_tiler_context *tiler_ctx; + mali_ptr fs_rsd; + mali_ptr viewport; + struct { + struct panfrost_ptr vertex; + struct panfrost_ptr tiler; + } jobs; +}; + +struct panvk_attrib_info { + unsigned buf; + unsigned offset; + enum pipe_format format; +}; + +struct panvk_attrib_buf_info { + bool special; + union { + struct { + unsigned stride; + bool per_instance; + }; + unsigned special_id; + }; +}; + +struct panvk_attribs_info { + struct panvk_attrib_info attrib[PAN_MAX_ATTRIBUTE]; + unsigned attrib_count; + struct panvk_attrib_buf_info buf[PAN_MAX_ATTRIBUTE]; + unsigned buf_count; +}; + +struct panvk_attrib_buf { + mali_ptr address; + unsigned size; +}; + +struct panvk_cmd_state { + VkPipelineBindPoint bind_point; + + struct panvk_pipeline *pipeline; + + uint32_t dirty; + + struct panvk_varyings_info varyings; + mali_ptr fs_rsd; + + struct { + float constants[8][4]; + } blend; + + struct { + struct pan_compute_dim wg_count; + } compute; + + struct { + struct { + float constant_factor; + float clamp; + float slope_factor; + } depth_bias; + float line_width; + } rast; + + struct { + struct panvk_attrib_buf bufs[MAX_VBS]; + unsigned count; + mali_ptr attribs; + mali_ptr attrib_bufs; + } vb; + + /* Index buffer */ + struct { + struct panvk_buffer *buffer; + uint64_t offset; + uint32_t type; + uint32_t max_index_count; + uint8_t index_size; + uint64_t index_va; + } ib; + + struct { + struct { + uint8_t compare_mask; + uint8_t write_mask; + uint8_t ref; + } s_front, s_back; + } zs; + + const struct panvk_render_pass *pass; + const struct panvk_subpass *subpass; + const struct panvk_framebuffer *framebuffer; + VkRect2D render_area; + + struct panvk_clear_value *clear; + + mali_ptr vpd; + VkViewport viewport; + VkRect2D scissor; + + struct panvk_batch *batch; +}; + +struct panvk_cmd_pool { + struct vk_object_base base; + VkAllocationCallbacks alloc; + uint32_t queue_family_index; +}; + +enum panvk_cmd_buffer_status { + PANVK_CMD_BUFFER_STATUS_INVALID, + PANVK_CMD_BUFFER_STATUS_INITIAL, + PANVK_CMD_BUFFER_STATUS_RECORDING, + PANVK_CMD_BUFFER_STATUS_EXECUTABLE, + PANVK_CMD_BUFFER_STATUS_PENDING, +}; + +struct panvk_cmd_buffer { + struct vk_object_base base; + + struct panvk_device *device; + + struct panvk_cmd_pool *pool; + struct pan_pool desc_pool; + struct pan_pool varying_pool; + struct pan_pool tls_pool; + struct list_head batches; + + VkCommandBufferUsageFlags usage_flags; + VkCommandBufferLevel level; + enum panvk_cmd_buffer_status status; + + struct panvk_cmd_state state; + uint32_t queue_family_index; + + uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE]; + VkShaderStageFlags push_constant_stages; + struct panvk_descriptor_set meta_push_descriptors; + + struct panvk_descriptor_state descriptors[MAX_BIND_POINTS]; + + VkResult record_result; +}; + +void +panvk_cmd_open_batch(struct panvk_cmd_buffer *cmdbuf); + +void +panvk_cmd_close_batch(struct panvk_cmd_buffer *cmdbuf); + +void +panvk_cmd_get_midgard_polygon_list(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height, + bool has_draws); + +void +panvk_cmd_get_bifrost_tiler_context(struct panvk_cmd_buffer *cmdbuf, + unsigned width, unsigned height); + +void +panvk_pack_color(struct panvk_clear_value *out, + const VkClearColorValue *in, + enum pipe_format format); + +struct panvk_event { + struct vk_object_base base; +}; + +struct panvk_shader_module { + struct vk_object_base base; + unsigned char sha1[20]; + + uint32_t code_size; + const uint32_t *code[0]; +}; + +struct panvk_shader { + struct pan_shader_info info; + struct util_dynarray binary; + unsigned sysval_ubo; +}; + +struct panvk_shader * +panvk_shader_create(struct panvk_device *dev, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *stage_info, + const struct panvk_pipeline_layout *layout, + unsigned sysval_ubo, + struct pan_blend_state *blend_state, + bool static_blend_constants, + const VkAllocationCallbacks *alloc); + +void +panvk_shader_destroy(struct panvk_device *dev, + struct panvk_shader *shader, + const VkAllocationCallbacks *alloc); + +union panvk_sysval_data { + float f32[4]; + double f64[2]; + uint32_t u32[4]; + uint64_t u64[2]; +}; + +struct panvk_pipeline { + struct vk_object_base base; + + struct panvk_varyings_info varyings; + struct panvk_attribs_info attribs; + + const struct panvk_pipeline_layout *layout; + + unsigned active_stages; + + uint32_t dynamic_state_mask; + + struct panfrost_bo *binary_bo; + struct panfrost_bo *state_bo; + + mali_ptr vpd; + mali_ptr rsds[MESA_SHADER_STAGES]; + + unsigned num_ubos; + unsigned num_sysvals; + + struct { + unsigned ubo_idx; + mali_ptr ubo; + struct panfrost_sysvals ids; + uint32_t dirty_mask; + } sysvals[MESA_SHADER_STAGES]; + + unsigned tls_size; + unsigned wls_size; + + struct { + mali_ptr address; + struct pan_shader_info info; + struct mali_renderer_state_packed rsd_template; + bool required; + bool dynamic_rsd; + } fs; + + struct { + enum mali_draw_mode topology; + bool writes_point_size; + bool primitive_restart; + } ia; + + struct { + bool clamp_depth; + float line_width; + struct { + bool enable; + float constant_factor; + float clamp; + float slope_factor; + } depth_bias; + bool front_ccw; + bool cull_front_face; + bool cull_back_face; + } rast; + + struct { + bool z_test; + bool z_write; + enum mali_func z_compare_func; + bool s_test; + struct { + enum mali_stencil_op fail_op; + enum mali_stencil_op pass_op; + enum mali_stencil_op z_fail_op; + enum mali_func compare_func; + uint8_t compare_mask; + uint8_t write_mask; + uint8_t ref; + } s_front, s_back; + } zs; + + struct { + uint8_t rast_samples; + uint8_t min_samples; + uint16_t sample_mask; + bool alpha_to_coverage; + bool alpha_to_one; + } ms; + + struct { + struct pan_blend_state state; + struct mali_blend_packed bd_template[8]; + struct { + uint8_t index; + uint16_t bifrost_factor; + } constant[8]; + } blend; + + VkViewport viewport; + VkRect2D scissor; +}; + +bool +panvk_blend_needs_lowering(const struct panfrost_device *dev, + const struct pan_blend_state *state, + unsigned rt); + +struct panvk_image_level { + VkDeviceSize offset; + VkDeviceSize size; + uint32_t pitch; +}; + +struct panvk_slice_layout { + unsigned width; + unsigned height; + unsigned depth; + unsigned offset; + unsigned line_stride; + unsigned size; + + /* If there is a header preceding each slice, how big is + * that header? Used for AFBC. + */ + unsigned afbc_header_size; + + /* If checksumming is enabled following the slice, what + * is its offset/stride? + */ + struct { + unsigned offset; + unsigned stride; + unsigned size; + } checksum; +}; + +#define PANVK_MAX_MIP_LEVELS 13 + +struct panvk_plane_layout { + struct panvk_slice_layout slices[PANVK_MAX_MIP_LEVELS]; + unsigned offset; + unsigned array_stride; + unsigned size; +}; + +struct panvk_plane_memory { + const struct panfrost_bo *bo; + unsigned offset; +}; + +#define PANVK_MAX_PLANES 1 + +struct panvk_image { + struct vk_object_base base; + struct pan_image pimage; + VkImageType type; + + /* The original VkFormat provided by the client. This may not match any + * of the actual surface formats. + */ + VkFormat vk_format; + VkImageAspectFlags aspects; + VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */ + VkImageTiling tiling; /** VkImageCreateInfo::tiling */ + VkImageCreateFlags flags; /** VkImageCreateInfo::flags */ + VkExtent3D extent; + + unsigned queue_family_mask; + bool exclusive; + bool shareable; +}; + +unsigned +panvk_image_get_plane_size(const struct panvk_image *image, unsigned plane); + +unsigned +panvk_image_get_total_size(const struct panvk_image *image); + +struct panvk_image_view { + struct vk_object_base base; + struct pan_image_view pview; + + VkFormat vk_format; + struct panfrost_bo *bo; + struct { + struct mali_bifrost_texture_packed tex_desc; + } bifrost; +}; + +struct panvk_sampler { + struct vk_object_base base; + struct mali_midgard_sampler_packed desc; +}; + +struct panvk_buffer_view { + struct vk_object_base base; +}; + +struct panvk_attachment_info { + struct panvk_image_view *iview; +}; + +struct panvk_framebuffer { + struct vk_object_base base; + + uint32_t width; + uint32_t height; + uint32_t layers; + + uint32_t attachment_count; + struct panvk_attachment_info attachments[0]; +}; + +struct panvk_clear_value { + union { + uint32_t color[4]; + struct { + float depth; + uint8_t stencil; + }; + }; +}; + +struct panvk_subpass_attachment { + uint32_t idx; + VkImageLayout layout; + bool clear; +}; + +struct panvk_subpass { + uint32_t input_count; + uint32_t color_count; + struct panvk_subpass_attachment *input_attachments; + uint8_t active_color_attachments; + struct panvk_subpass_attachment *color_attachments; + struct panvk_subpass_attachment *resolve_attachments; + struct panvk_subpass_attachment zs_attachment; + + uint32_t view_mask; +}; + +struct panvk_render_pass_attachment { + VkAttachmentDescriptionFlags flags; + enum pipe_format format; + unsigned samples; + VkAttachmentLoadOp load_op; + VkAttachmentStoreOp store_op; + VkAttachmentLoadOp stencil_load_op; + VkAttachmentStoreOp stencil_store_op; + VkImageLayout initial_layout; + VkImageLayout final_layout; + unsigned view_mask; + unsigned clear_subpass; +}; + +struct panvk_render_pass { + struct vk_object_base base; + + uint32_t attachment_count; + uint32_t subpass_count; + struct panvk_subpass_attachment *subpass_attachments; + struct panvk_render_pass_attachment *attachments; + struct panvk_subpass subpasses[0]; +}; + +static inline enum mali_func +panvk_translate_compare_func(VkCompareOp comp) +{ + STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER); + STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS); + STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL); + STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL); + STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER); + STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL); + STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL == (VkCompareOp)MALI_FUNC_GEQUAL); + STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS); + + return (enum mali_func)comp; +} + +VK_DEFINE_HANDLE_CASTS(panvk_cmd_buffer, base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER) +VK_DEFINE_HANDLE_CASTS(panvk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) +VK_DEFINE_HANDLE_CASTS(panvk_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) +VK_DEFINE_HANDLE_CASTS(panvk_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE) +VK_DEFINE_HANDLE_CASTS(panvk_queue, base, VkQueue, VK_OBJECT_TYPE_QUEUE) + +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_cmd_pool, base, VkCommandPool, VK_OBJECT_TYPE_COMMAND_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_buffer_view, base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_set, base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_descriptor_set_layout, base, + VkDescriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_fence, base, VkFence, VK_OBJECT_TYPE_FENCE) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_framebuffer, base, VkFramebuffer, VK_OBJECT_TYPE_FRAMEBUFFER) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_image, base, VkImage, VK_OBJECT_TYPE_IMAGE) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_image_view, base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW); +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline_cache, base, VkPipelineCache, VK_OBJECT_TYPE_PIPELINE_CACHE) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_render_pass, base, VkRenderPass, VK_OBJECT_TYPE_RENDER_PASS) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_sampler, base, VkSampler, VK_OBJECT_TYPE_SAMPLER) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_shader_module, base, VkShaderModule, VK_OBJECT_TYPE_SHADER_MODULE) +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_semaphore, base, VkSemaphore, VK_OBJECT_TYPE_SEMAPHORE) + +#endif /* PANVK_PRIVATE_H */ diff --git a/src/panfrost/vulkan/panvk_query.c b/src/panfrost/vulkan/panvk_query.c new file mode 100644 index 00000000000..6f9ca04ea46 --- /dev/null +++ b/src/panfrost/vulkan/panvk_query.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +VkResult +panvk_CreateQueryPool(VkDevice _device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_DestroyQueryPool(VkDevice _device, + VkQueryPool _pool, + const VkAllocationCallbacks *pAllocator) +{ + panvk_stub(); +} + +VkResult +panvk_GetQueryPoolResults(VkDevice _device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void *pData, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + panvk_stub(); + return VK_SUCCESS; +} + +void +panvk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, + VkQueryResultFlags flags) +{ + panvk_stub(); +} + +void +panvk_CmdResetQueryPool(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount) +{ + panvk_stub(); +} + +void +panvk_CmdBeginQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags) +{ + panvk_stub(); +} + +void +panvk_CmdEndQuery(VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query) +{ + panvk_stub(); +} + +void +panvk_CmdWriteTimestamp(VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query) +{ + panvk_stub(); +} diff --git a/src/panfrost/vulkan/panvk_shader.c b/src/panfrost/vulkan/panvk_shader.c new file mode 100644 index 00000000000..756ad24e011 --- /dev/null +++ b/src/panfrost/vulkan/panvk_shader.c @@ -0,0 +1,466 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_shader.c which is: + * Copyright © 2019 Google LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +#include "nir_builder.h" +#include "nir_lower_blend.h" +#include "spirv/nir_spirv.h" +#include "util/mesa-sha1.h" + +#include "panfrost-quirks.h" +#include "pan_shader.h" + +static nir_shader * +panvk_spirv_to_nir(const void *code, + size_t codesize, + gl_shader_stage stage, + const char *entry_point_name, + const VkSpecializationInfo *spec_info, + const nir_shader_compiler_options *nir_options) +{ + /* TODO these are made-up */ + const struct spirv_to_nir_options spirv_options = { + .caps = { false }, + .ubo_addr_format = nir_address_format_32bit_index_offset, + .ssbo_addr_format = nir_address_format_32bit_index_offset, + }; + + /* convert VkSpecializationInfo */ + struct nir_spirv_specialization *spec = NULL; + uint32_t num_spec = 0; + if (spec_info && spec_info->mapEntryCount) { + spec = malloc(sizeof(*spec) * spec_info->mapEntryCount); + if (!spec) + return NULL; + + for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) { + const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i]; + const void *data = spec_info->pData + entry->offset; + assert(data + entry->size <= spec_info->pData + spec_info->dataSize); + spec[i].id = entry->constantID; + switch (entry->size) { + case 8: + spec[i].value.u64 = *(const uint64_t *)data; + break; + case 4: + spec[i].value.u32 = *(const uint32_t *)data; + break; + case 2: + spec[i].value.u16 = *(const uint16_t *)data; + break; + case 1: + spec[i].value.u8 = *(const uint8_t *)data; + break; + default: + assert(!"Invalid spec constant size"); + break; + } + + spec[i].defined_on_module = false; + } + + num_spec = spec_info->mapEntryCount; + } + + nir_shader *nir = spirv_to_nir(code, codesize / sizeof(uint32_t), spec, + num_spec, stage, entry_point_name, + &spirv_options, nir_options); + + free(spec); + + assert(nir->info.stage == stage); + nir_validate_shader(nir, "after spirv_to_nir"); + + return nir; +} + +struct panvk_lower_misc_ctx { + struct panvk_shader *shader; + const struct panvk_pipeline_layout *layout; +}; + +static unsigned +get_fixed_sampler_index(nir_deref_instr *deref, + const struct panvk_lower_misc_ctx *ctx) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct panvk_descriptor_set_binding_layout *bind_layout = + &ctx->layout->sets[set].layout->bindings[binding]; + + return bind_layout->sampler_idx + ctx->layout->sets[set].sampler_offset; +} + +static unsigned +get_fixed_texture_index(nir_deref_instr *deref, + const struct panvk_lower_misc_ctx *ctx) +{ + nir_variable *var = nir_deref_instr_get_variable(deref); + unsigned set = var->data.descriptor_set; + unsigned binding = var->data.binding; + const struct panvk_descriptor_set_binding_layout *bind_layout = + &ctx->layout->sets[set].layout->bindings[binding]; + + return bind_layout->tex_idx + ctx->layout->sets[set].tex_offset; +} + +static bool +lower_tex(nir_builder *b, nir_tex_instr *tex, + const struct panvk_lower_misc_ctx *ctx) +{ + bool progress = false; + int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref); + + b->cursor = nir_before_instr(&tex->instr); + + if (sampler_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src); + tex->sampler_index = get_fixed_sampler_index(deref, ctx); + nir_tex_instr_remove_src(tex, sampler_src_idx); + progress = true; + } + + int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); + if (tex_src_idx >= 0) { + nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src); + tex->texture_index = get_fixed_texture_index(deref, ctx); + nir_tex_instr_remove_src(tex, tex_src_idx); + progress = true; + } + + return progress; +} + +static void +lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *intr, + const struct panvk_lower_misc_ctx *ctx) +{ + nir_ssa_def *vulkan_idx = intr->src[0].ssa; + + unsigned set = nir_intrinsic_desc_set(intr); + unsigned binding = nir_intrinsic_binding(intr); + struct panvk_descriptor_set_layout *set_layout = ctx->layout->sets[set].layout; + struct panvk_descriptor_set_binding_layout *binding_layout = + &set_layout->bindings[binding]; + unsigned base; + + switch (binding_layout->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + base = binding_layout->ubo_idx + ctx->layout->sets[set].ubo_offset; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + base = binding_layout->ssbo_idx + ctx->layout->sets[set].ssbo_offset; + break; + default: + unreachable("Invalid descriptor type"); + break; + } + + b->cursor = nir_before_instr(&intr->instr); + nir_ssa_def *idx = nir_iadd(b, nir_imm_int(b, base), vulkan_idx); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, idx); + nir_instr_remove(&intr->instr); +} + +static void +lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin) +{ + /* Loading the descriptor happens as part of the load/store instruction so + * this is a no-op. + */ + b->cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *val = nir_vec2(b, intrin->src[0].ssa, nir_imm_int(b, 0)); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, val); + nir_instr_remove(&intrin->instr); +} + +static bool +lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr, + const struct panvk_lower_misc_ctx *ctx) +{ + switch (intr->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + lower_vulkan_resource_index(b, intr, ctx); + return true; + case nir_intrinsic_load_vulkan_descriptor: + lower_load_vulkan_descriptor(b, intr); + return true; + default: + return false; + } + +} + +static bool +panvk_lower_misc_instr(nir_builder *b, + nir_instr *instr, + void *data) +{ + const struct panvk_lower_misc_ctx *ctx = data; + + switch (instr->type) { + case nir_instr_type_tex: + return lower_tex(b, nir_instr_as_tex(instr), ctx); + case nir_instr_type_intrinsic: + return lower_intrinsic(b, nir_instr_as_intrinsic(instr), ctx); + default: + return false; + } +} + +static bool +panvk_lower_misc(nir_shader *nir, const struct panvk_lower_misc_ctx *ctx) +{ + return nir_shader_instructions_pass(nir, panvk_lower_misc_instr, + nir_metadata_block_index | + nir_metadata_dominance, + (void *)ctx); +} + +static void +panvk_lower_blend(struct panfrost_device *pdev, + nir_shader *nir, + struct pan_blend_state *blend_state, + bool static_blend_constants) +{ + nir_lower_blend_options options = { + .logicop_enable = blend_state->logicop_enable, + .logicop_func = blend_state->logicop_func, + }; + + bool lower_blend = false; + for (unsigned rt = 0; rt < blend_state->rt_count; rt++) { + if (!panvk_blend_needs_lowering(pdev, blend_state, rt)) + continue; + + const struct pan_blend_rt_state *rt_state = &blend_state->rts[rt]; + options.rt[rt].colormask = rt_state->equation.color_mask; + options.format[rt] = rt_state->format; + if (!rt_state->equation.blend_enable) { + static const nir_lower_blend_channel replace = { + .func = BLEND_FUNC_ADD, + .src_factor = BLEND_FACTOR_ZERO, + .invert_src_factor = true, + .dst_factor = BLEND_FACTOR_ZERO, + .invert_dst_factor = false, + }; + + options.rt[rt].rgb = replace; + options.rt[rt].alpha = replace; + } else { + options.rt[rt].rgb.func = rt_state->equation.rgb_func; + options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor; + options.rt[rt].rgb.invert_src_factor = rt_state->equation.rgb_invert_src_factor; + options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor; + options.rt[rt].rgb.invert_dst_factor = rt_state->equation.rgb_invert_dst_factor; + options.rt[rt].alpha.func = rt_state->equation.alpha_func; + options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor; + options.rt[rt].alpha.invert_src_factor = rt_state->equation.alpha_invert_src_factor; + options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor; + options.rt[rt].alpha.invert_dst_factor = rt_state->equation.alpha_invert_dst_factor; + } + + lower_blend = true; + } + + /* FIXME: currently untested */ + assert(!lower_blend); + + if (lower_blend) + NIR_PASS_V(nir, nir_lower_blend, options); +} + +struct panvk_shader * +panvk_shader_create(struct panvk_device *dev, + gl_shader_stage stage, + const VkPipelineShaderStageCreateInfo *stage_info, + const struct panvk_pipeline_layout *layout, + unsigned sysval_ubo, + struct pan_blend_state *blend_state, + bool static_blend_constants, + const VkAllocationCallbacks *alloc) +{ + const struct panvk_shader_module *module = panvk_shader_module_from_handle(stage_info->module); + struct panfrost_device *pdev = &dev->physical_device->pdev; + struct panvk_shader *shader; + + shader = vk_zalloc2(&dev->vk.alloc, alloc, sizeof(*shader), 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (!shader) + return NULL; + + util_dynarray_init(&shader->binary, NULL); + + /* translate SPIR-V to NIR */ + assert(module->code_size % 4 == 0); + nir_shader *nir = panvk_spirv_to_nir(module->code, + module->code_size, + stage, stage_info->pName, + stage_info->pSpecializationInfo, + pan_shader_get_compiler_options(pdev)); + if (!nir) { + vk_free2(&dev->vk.alloc, alloc, shader); + return NULL; + } + + if (stage == MESA_SHADER_FRAGMENT) + panvk_lower_blend(pdev, nir, blend_state, static_blend_constants); + + /* multi step inlining procedure */ + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + NIR_PASS_V(nir, nir_copy_prop); + NIR_PASS_V(nir, nir_opt_deref); + foreach_list_typed_safe(nir_function, func, node, &nir->functions) { + if (!func->is_entrypoint) + exec_node_remove(&func->node); + } + assert(exec_list_length(&nir->functions) == 1); + NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp); + + /* Split member structs. We do this before lower_io_to_temporaries so that + * it doesn't lower system values to temporaries by accident. + */ + NIR_PASS_V(nir, nir_split_var_copies); + NIR_PASS_V(nir, nir_split_per_member_structs); + + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | + nir_var_system_value | nir_var_mem_shared, + NULL); + + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + nir_shader_get_entrypoint(nir), true, true); + + NIR_PASS_V(nir, nir_lower_indirect_derefs, + nir_var_shader_in | nir_var_shader_out, + UINT32_MAX); + + NIR_PASS_V(nir, nir_opt_copy_prop_vars); + NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all); + + NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false); + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_32bit_index_offset); + + nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage); + nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage); + + NIR_PASS_V(nir, nir_lower_system_values); + NIR_PASS_V(nir, nir_lower_compute_system_values, NULL); + + NIR_PASS_V(nir, nir_lower_var_copies); + + struct panvk_lower_misc_ctx ctx = { + .shader = shader, + .layout = layout, + }; + NIR_PASS_V(nir, panvk_lower_misc, &ctx); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + if (unlikely(dev->physical_device->instance->debug_flags & PANVK_DEBUG_NIR)) { + fprintf(stderr, "translated nir:\n"); + nir_print_shader(nir, stderr); + } + + struct panfrost_compile_inputs inputs = { + .gpu_id = pdev->gpu_id, + .no_ubo_to_push = true, + .sysval_ubo = sysval_ubo, + }; + + pan_shader_compile(pdev, nir, &inputs, &shader->binary, &shader->info); + + /* Patch the descriptor count */ + shader->info.ubo_count = + shader->info.sysvals.sysval_count ? sysval_ubo + 1 : layout->num_ubos; + shader->info.sampler_count = layout->num_samplers; + shader->info.texture_count = layout->num_textures; + + shader->sysval_ubo = sysval_ubo; + + ralloc_free(nir); + + return shader; +} + +void +panvk_shader_destroy(struct panvk_device *dev, + struct panvk_shader *shader, + const VkAllocationCallbacks *alloc) +{ + util_dynarray_fini(&shader->binary); + vk_free2(&dev->vk.alloc, alloc, shader); +} + +VkResult +panvk_CreateShaderModule(VkDevice _device, + const VkShaderModuleCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkShaderModule *pShaderModule) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + assert(pCreateInfo->codeSize % 4 == 0); + + module = vk_object_zalloc(&device->vk, pAllocator, + sizeof(*module) + pCreateInfo->codeSize, + VK_OBJECT_TYPE_SHADER_MODULE); + if (module == NULL) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + module->code_size = pCreateInfo->codeSize; + memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize); + + _mesa_sha1_compute(module->code, module->code_size, module->sha1); + + *pShaderModule = panvk_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void +panvk_DestroyShaderModule(VkDevice _device, + VkShaderModule _module, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_shader_module, module, _module); + + if (!module) + return; + + vk_object_free(&device->vk, pAllocator, module); +} diff --git a/src/panfrost/vulkan/panvk_sync.c b/src/panfrost/vulkan/panvk_sync.c new file mode 100644 index 00000000000..7cef829bf95 --- /dev/null +++ b/src/panfrost/vulkan/panvk_sync.c @@ -0,0 +1,417 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Derived from tu_drm.c which is: + * Copyright © 2018 Google, Inc. + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "panvk_private.h" + +static VkResult +sync_create(struct panvk_device *device, + struct panvk_syncobj *sync, + bool signaled) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + + struct drm_syncobj_create create = { + .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0, + }; + + int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); + if (ret) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + sync->permanent = create.handle; + + return VK_SUCCESS; +} + +static void +sync_set_temporary(struct panvk_device *device, struct panvk_syncobj *sync, + uint32_t syncobj) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + + if (sync->temporary) { + struct drm_syncobj_destroy destroy = { .handle = sync->temporary }; + drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy); + } + + sync->temporary = syncobj; +} + +static void +sync_destroy(struct panvk_device *device, struct panvk_syncobj *sync) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + + if (!sync) + return; + + sync_set_temporary(device, sync, 0); + struct drm_syncobj_destroy destroy = { .handle = sync->permanent }; + drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy); +} + +static VkResult +sync_import(struct panvk_device *device, struct panvk_syncobj *sync, + bool temporary, bool sync_fd, int fd) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + int ret; + + if (!sync_fd) { + uint32_t *dst = temporary ? &sync->temporary : &sync->permanent; + + struct drm_syncobj_handle handle = { .fd = fd }; + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle); + if (ret) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + + if (*dst) { + struct drm_syncobj_destroy destroy = { .handle = *dst }; + drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy); + } + *dst = handle.handle; + close(fd); + } else { + assert(temporary); + + struct drm_syncobj_create create = {}; + + if (fd == -1) + create.flags |= DRM_SYNCOBJ_CREATE_SIGNALED; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create); + if (ret) + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + + if (fd != -1) { + struct drm_syncobj_handle handle = { + .fd = fd, + .handle = create.handle, + .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE, + }; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle); + if (ret) { + struct drm_syncobj_destroy destroy = { .handle = create.handle }; + drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy); + return VK_ERROR_INVALID_EXTERNAL_HANDLE; + } + close(fd); + } + + sync_set_temporary(device, sync, create.handle); + } + + return VK_SUCCESS; +} + +static VkResult +sync_export(struct panvk_device *device, struct panvk_syncobj *sync, + bool sync_fd, int *p_fd) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + + struct drm_syncobj_handle handle = { + .handle = sync->temporary ? : sync->permanent, + .flags = sync_fd ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0, + .fd = -1, + }; + int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); + if (ret) + return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); + + /* restore permanent payload on export */ + sync_set_temporary(device, sync, 0); + + *p_fd = handle.fd; + return VK_SUCCESS; +} + +VkResult +panvk_CreateSemaphore(VkDevice _device, + const VkSemaphoreCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSemaphore *pSemaphore) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_semaphore *sem = + vk_object_zalloc(&device->vk, pAllocator, sizeof(*sem), + VK_OBJECT_TYPE_SEMAPHORE); + if (!sem) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult ret = sync_create(device, &sem->syncobj, false); + if (ret != VK_SUCCESS) { + vk_free2(&device->vk.alloc, pAllocator, sync); + return ret; + } + + *pSemaphore = panvk_semaphore_to_handle(sem); + return VK_SUCCESS; +} + +void +panvk_DestroySemaphore(VkDevice _device, VkSemaphore _sem, const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_semaphore, sem, _sem); + + sync_destroy(device, &sem->syncobj); + vk_object_free(&device->vk, pAllocator, sem); +} + +VkResult +panvk_ImportSemaphoreFdKHR(VkDevice _device, const VkImportSemaphoreFdInfoKHR *info) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_semaphore, sem, info->semaphore); + bool temp = info->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT; + bool sync_fd = info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + + return sync_import(device, &sem->syncobj, temp, sync_fd, info->fd); +} + +VkResult +panvk_GetSemaphoreFdKHR(VkDevice _device, const VkSemaphoreGetFdInfoKHR *info, int *pFd) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_semaphore, sem, info->semaphore); + bool sync_fd = info->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT; + + return sync_export(device, &sem->syncobj, sync_fd, pFd); +} + +VkResult +panvk_CreateFence(VkDevice _device, + const VkFenceCreateInfo *info, + const VkAllocationCallbacks *pAllocator, + VkFence *pFence) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_fence *fence = + vk_object_zalloc(&device->vk, pAllocator, sizeof(*fence), + VK_OBJECT_TYPE_FENCE); + if (!fence) + return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult ret = sync_create(device, &fence->syncobj, + info->flags & VK_FENCE_CREATE_SIGNALED_BIT); + if (ret != VK_SUCCESS) { + vk_free2(&device->vk.alloc, pAllocator, fence); + return ret; + } + + *pFence = panvk_fence_to_handle(fence); + return VK_SUCCESS; +} + +void +panvk_DestroyFence(VkDevice _device, VkFence _fence, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_fence, fence, _fence); + + sync_destroy(device, &fence->syncobj); + vk_object_free(&device->vk, pAllocator, fence); +} + +VkResult +panvk_ImportFenceFdKHR(VkDevice _device, const VkImportFenceFdInfoKHR *info) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_fence, fence, info->fence); + bool sync_fd = info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + bool temp = info->flags & VK_FENCE_IMPORT_TEMPORARY_BIT; + + return sync_import(device, &fence->syncobj, temp, sync_fd, info->fd); +} + +VkResult +panvk_GetFenceFdKHR(VkDevice _device, const VkFenceGetFdInfoKHR *info, int *pFd) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_fence, fence, info->fence); + bool sync_fd = info->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT; + + return sync_export(device, &fence->syncobj, sync_fd, pFd); +} + +static VkResult +drm_syncobj_wait(struct panvk_device *device, + const uint32_t *handles, uint32_t count_handles, + int64_t timeout_nsec, bool wait_all) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + struct drm_syncobj_wait wait = { + .handles = (uint64_t) (uintptr_t) handles, + .count_handles = count_handles, + .timeout_nsec = timeout_nsec, + .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0) + }; + + int ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait); + if (ret) { + if (errno == ETIME) + return VK_TIMEOUT; + + assert(0); + return VK_ERROR_DEVICE_LOST; /* TODO */ + } + return VK_SUCCESS; +} + +static uint64_t +gettime_ns(void) +{ + struct timespec current; + clock_gettime(CLOCK_MONOTONIC, ¤t); + return (uint64_t)current.tv_sec * 1000000000 + current.tv_nsec; +} + +/* and the kernel converts it right back to relative timeout - very smart UAPI */ +static uint64_t +absolute_timeout(uint64_t timeout) +{ + if (timeout == 0) + return 0; + uint64_t current_time = gettime_ns(); + uint64_t max_timeout = (uint64_t) INT64_MAX - current_time; + + timeout = MIN2(max_timeout, timeout); + + return (current_time + timeout); +} + +VkResult +panvk_WaitForFences(VkDevice _device, + uint32_t fenceCount, + const VkFence *pFences, + VkBool32 waitAll, + uint64_t timeout) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + + if (panvk_device_is_lost(device)) + return VK_ERROR_DEVICE_LOST; + + uint32_t handles[fenceCount]; + for (unsigned i = 0; i < fenceCount; ++i) { + VK_FROM_HANDLE(panvk_fence, fence, pFences[i]); + + if (fence->syncobj.temporary) { + handles[i] = fence->syncobj.temporary; + } else { + handles[i] = fence->syncobj.permanent; + } + } + + return drm_syncobj_wait(device, handles, fenceCount, absolute_timeout(timeout), waitAll); +} + +VkResult +panvk_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + const struct panfrost_device *pdev = &device->physical_device->pdev; + int ret; + + uint32_t handles[fenceCount]; + for (unsigned i = 0; i < fenceCount; ++i) { + VK_FROM_HANDLE(panvk_fence, fence, pFences[i]); + + sync_set_temporary(device, &fence->syncobj, 0); + handles[i] = fence->syncobj.permanent; + } + + struct drm_syncobj_array objs = { + .handles = (uint64_t) (uintptr_t) handles, + .count_handles = fenceCount, + }; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_RESET, &objs); + if (ret) { + panvk_device_set_lost(device, "DRM_IOCTL_SYNCOBJ_RESET failure: %s", + strerror(errno)); + } + + return VK_SUCCESS; +} + +VkResult +panvk_GetFenceStatus(VkDevice _device, VkFence _fence) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_fence, fence, _fence); + uint32_t handle = fence->syncobj.temporary ? : fence->syncobj.permanent; + VkResult result; + + result = drm_syncobj_wait(device, &handle, 1, 0, false); + if (result == VK_TIMEOUT) + result = VK_NOT_READY; + return result; +} + +int +panvk_signal_syncobjs(struct panvk_device *device, + struct panvk_syncobj *syncobj1, + struct panvk_syncobj *syncobj2) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + uint32_t handles[2], count = 0; + + if (syncobj1) + handles[count++] = syncobj1->temporary ?: syncobj1->permanent; + + if (syncobj2) + handles[count++] = syncobj2->temporary ?: syncobj2->permanent; + + if (!count) + return 0; + + struct drm_syncobj_array objs = { + .handles = (uintptr_t) handles, + .count_handles = count + }; + + return drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_SIGNAL, &objs); +} + +int +panvk_syncobj_to_fd(struct panvk_device *device, struct panvk_syncobj *sync) +{ + const struct panfrost_device *pdev = &device->physical_device->pdev; + struct drm_syncobj_handle handle = { .handle = sync->permanent }; + int ret; + + ret = drmIoctl(pdev->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle); + + return ret ? -1 : handle.fd; +} diff --git a/src/panfrost/vulkan/panvk_util.c b/src/panfrost/vulkan/panvk_util.c new file mode 100644 index 00000000000..bf29b0bee89 --- /dev/null +++ b/src/panfrost/vulkan/panvk_util.c @@ -0,0 +1,87 @@ +/* + * Copyright © 2015 Collabora Ltd. + * + * Derived from tu_util.c which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +#include +#include +#include +#include +#include +#include + +#include "util/u_math.h" +#include "vk_enum_to_str.h" + +/** Log an error message. */ +void panvk_printflike(1, 2) +panvk_logi(const char *format, ...) +{ + va_list va; + + va_start(va, format); + panvk_logi_v(format, va); + va_end(va); +} + +/** \see panvk_logi() */ +void +panvk_logi_v(const char *format, va_list va) +{ + fprintf(stderr, "tu: info: "); + vfprintf(stderr, format, va); + fprintf(stderr, "\n"); +} + +VkResult +__vk_errorf(struct panvk_instance *instance, + VkResult error, + const char *file, + int line, + const char *format, + ...) +{ + va_list ap; + char buffer[256]; + + const char *error_str = vk_Result_to_str(error); + +#ifndef DEBUG + return error; +#endif + + if (format) { + va_start(ap, format); + vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); + } else { + fprintf(stderr, "%s:%d: %s\n", file, line, error_str); + } + + return error; +} diff --git a/src/panfrost/vulkan/panvk_varyings.c b/src/panfrost/vulkan/panvk_varyings.c new file mode 100644 index 00000000000..1f33bfcb92e --- /dev/null +++ b/src/panfrost/vulkan/panvk_varyings.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" +#include "panvk_varyings.h" + +#include "pan_pool.h" + +unsigned +panvk_varyings_buf_count(const struct panvk_device *dev, + struct panvk_varyings_info *varyings) +{ + const struct panfrost_device *pdev = &dev->physical_device->pdev; + + return util_bitcount(varyings->buf_mask) + (pan_is_bifrost(pdev) ? 1 : 0); +} + +void +panvk_varyings_alloc(struct panvk_varyings_info *varyings, + struct pan_pool *varying_mem_pool, + unsigned vertex_count) +{ + for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { + if (!(varyings->buf_mask & (1 << i))) continue; + + unsigned buf_idx = panvk_varying_buf_index(varyings, i); + unsigned size = varyings->buf[buf_idx].stride * vertex_count; + if (!size) + continue; + + struct panfrost_ptr ptr = + panfrost_pool_alloc_aligned(varying_mem_pool, size, 64); + + varyings->buf[buf_idx].size = size; + varyings->buf[buf_idx].address = ptr.gpu; + varyings->buf[buf_idx].cpu = ptr.cpu; + } +} diff --git a/src/panfrost/vulkan/panvk_varyings.h b/src/panfrost/vulkan/panvk_varyings.h new file mode 100644 index 00000000000..c1e9a0851f7 --- /dev/null +++ b/src/panfrost/vulkan/panvk_varyings.h @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2021 Collabora Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef PANVK_VARYINGS_H +#define PANVK_VARYINGS_H + +#include "util/bitset.h" +#include "util/format/u_format.h" + +#include "compiler/shader_enums.h" +#include "midgard_pack.h" +#include "panfrost-job.h" + +struct pan_pool; +struct panvk_device; + +enum panvk_varying_buf_id { + PANVK_VARY_BUF_GENERAL, + PANVK_VARY_BUF_POSITION, + PANVK_VARY_BUF_PSIZ, + PANVK_VARY_BUF_PNTCOORD, + PANVK_VARY_BUF_FRAGCOORD, + + /* Keep last */ + PANVK_VARY_BUF_MAX, +}; + +struct panvk_varying { + unsigned buf; + unsigned offset; + enum pipe_format format; +}; + +struct panvk_varying_buf { + mali_ptr address; + void *cpu; + unsigned stride; + unsigned size; +}; + +struct panvk_varyings_info { + struct panvk_varying varying[VARYING_SLOT_MAX]; + BITSET_DECLARE(active, VARYING_SLOT_MAX); + struct panvk_varying_buf buf[VARYING_SLOT_MAX]; + struct { + unsigned count; + gl_varying_slot loc[VARYING_SLOT_MAX]; + } stage[MESA_SHADER_STAGES]; + unsigned buf_mask; +}; + +void +panvk_varyings_alloc(struct panvk_varyings_info *varyings, + struct pan_pool *varying_mem_pool, + unsigned vertex_count); + +unsigned +panvk_varyings_buf_count(const struct panvk_device *dev, + struct panvk_varyings_info *varyings); + +static inline unsigned +panvk_varying_buf_index(const struct panvk_varyings_info *varyings, + enum panvk_varying_buf_id b) +{ + return util_bitcount(varyings->buf_mask & BITFIELD_MASK(b)); +} + +static inline enum panvk_varying_buf_id +panvk_varying_buf_id(bool fs, gl_varying_slot loc) +{ + switch (loc) { + case VARYING_SLOT_POS: + return fs ? PANVK_VARY_BUF_FRAGCOORD : PANVK_VARY_BUF_POSITION; + case VARYING_SLOT_PSIZ: + return PANVK_VARY_BUF_PSIZ; + case VARYING_SLOT_PNTC: + return PANVK_VARY_BUF_PNTCOORD; + default: + return PANVK_VARY_BUF_GENERAL; + } +} + +static inline bool +panvk_varying_is_builtin(gl_shader_stage stage, gl_varying_slot loc) +{ + bool fs = stage == MESA_SHADER_FRAGMENT; + + switch (loc) { + case VARYING_SLOT_POS: + case VARYING_SLOT_PNTC: + return fs; + default: + return false; + } +} + +static inline enum mali_attribute_special +panvk_varying_special_buf_id(enum panvk_varying_buf_id buf_id) +{ + switch (buf_id) { + case PANVK_VARY_BUF_PNTCOORD: + return MALI_ATTRIBUTE_SPECIAL_POINT_COORD; + case PANVK_VARY_BUF_FRAGCOORD: + return MALI_ATTRIBUTE_SPECIAL_FRAG_COORD; + default: + return 0; + } +} + +static inline unsigned +panvk_varying_size(const struct panvk_varyings_info *varyings, + gl_varying_slot loc) +{ + switch (loc) { + case VARYING_SLOT_POS: + return sizeof(float) * 4; + case VARYING_SLOT_PSIZ: + return sizeof(uint16_t); + default: + return util_format_get_blocksize(varyings->varying[loc].format); + } +} + +#endif diff --git a/src/panfrost/vulkan/panvk_wsi.c b/src/panfrost/vulkan/panvk_wsi.c new file mode 100644 index 00000000000..d4420aee89a --- /dev/null +++ b/src/panfrost/vulkan/panvk_wsi.c @@ -0,0 +1,293 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from tu_wsi.c: + * Copyright © 2016 Red Hat + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +#include "vk_util.h" +#include "wsi_common.h" + +static VKAPI_PTR PFN_vkVoidFunction +panvk_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physicalDevice); + return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName); +} + +VkResult +panvk_wsi_init(struct panvk_physical_device *physical_device) +{ + VkResult result; + + result = wsi_device_init(&physical_device->wsi_device, + panvk_physical_device_to_handle(physical_device), + panvk_wsi_proc_addr, + &physical_device->instance->vk.alloc, + physical_device->master_fd, NULL, + false); + if (result != VK_SUCCESS) + return result; + + physical_device->wsi_device.supports_modifiers = false; + + return VK_SUCCESS; +} + +void +panvk_wsi_finish(struct panvk_physical_device *physical_device) +{ + wsi_device_finish(&physical_device->wsi_device, + &physical_device->instance->vk.alloc); +} + +void +panvk_DestroySurfaceKHR(VkInstance _instance, + VkSurfaceKHR _surface, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_instance, instance, _instance); + ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); + + vk_free2(&instance->vk.alloc, pAllocator, surface); +} + +VkResult +panvk_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32 *pSupported) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_surface_support( + &device->wsi_device, queueFamilyIndex, surface, pSupported); +} + +VkResult +panvk_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_surface_capabilities(&device->wsi_device, surface, + pSurfaceCapabilities); +} + +VkResult +panvk_GetPhysicalDeviceSurfaceCapabilities2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, + VkSurfaceCapabilities2KHR *pSurfaceCapabilities) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_surface_capabilities2( + &device->wsi_device, pSurfaceInfo, pSurfaceCapabilities); +} + +VkResult +panvk_GetPhysicalDeviceSurfaceCapabilities2EXT( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilities2EXT *pSurfaceCapabilities) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_surface_capabilities2ext( + &device->wsi_device, surface, pSurfaceCapabilities); +} + +VkResult +panvk_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_surface_formats( + &device->wsi_device, surface, pSurfaceFormatCount, pSurfaceFormats); +} + +VkResult +panvk_GetPhysicalDeviceSurfaceFormats2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormat2KHR *pSurfaceFormats) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo, + pSurfaceFormatCount, + pSurfaceFormats); +} + +VkResult +panvk_GetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_surface_present_modes( + &device->wsi_device, surface, pPresentModeCount, pPresentModes); +} + +VkResult +panvk_CreateSwapchainKHR(VkDevice _device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSwapchainKHR *pSwapchain) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + + const VkAllocationCallbacks *alloc; + if (pAllocator) + alloc = pAllocator; + else + alloc = &device->vk.alloc; + + return wsi_common_create_swapchain(&device->physical_device->wsi_device, + panvk_device_to_handle(device), + pCreateInfo, alloc, pSwapchain); +} + +void +panvk_DestroySwapchainKHR(VkDevice _device, + VkSwapchainKHR swapchain, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + const VkAllocationCallbacks *alloc; + + if (pAllocator) + alloc = pAllocator; + else + alloc = &device->vk.alloc; + + wsi_common_destroy_swapchain(_device, swapchain, alloc); +} + +VkResult +panvk_GetSwapchainImagesKHR(VkDevice device, + VkSwapchainKHR swapchain, + uint32_t *pSwapchainImageCount, + VkImage *pSwapchainImages) +{ + return wsi_common_get_images(swapchain, pSwapchainImageCount, + pSwapchainImages); +} + +VkResult +panvk_AcquireNextImageKHR(VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t *pImageIndex) +{ + VkAcquireNextImageInfoKHR acquire_info = { + .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, + .swapchain = swapchain, + .timeout = timeout, + .semaphore = semaphore, + .fence = fence, + .deviceMask = 0, + }; + + return panvk_AcquireNextImage2KHR(device, &acquire_info, pImageIndex); +} + +VkResult +panvk_AcquireNextImage2KHR(VkDevice _device, + const VkAcquireNextImageInfoKHR *pAcquireInfo, + uint32_t *pImageIndex) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_fence, fence, pAcquireInfo->fence); + VK_FROM_HANDLE(panvk_semaphore, sem, pAcquireInfo->semaphore); + struct panvk_physical_device *pdevice = device->physical_device; + + VkResult result = + wsi_common_acquire_next_image2(&pdevice->wsi_device, _device, + pAcquireInfo, pImageIndex); + + /* signal fence/semaphore - image is available immediately */ + if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) { + panvk_signal_syncobjs(device, fence ? &fence->syncobj : NULL, + sem ? &sem->syncobj : NULL); + } + + return result; +} + +VkResult +panvk_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) +{ + VK_FROM_HANDLE(panvk_queue, queue, _queue); + return wsi_common_queue_present( + &queue->device->physical_device->wsi_device, + panvk_device_to_handle(queue->device), _queue, queue->queue_family_index, + pPresentInfo); +} + +VkResult +panvk_GetDeviceGroupPresentCapabilitiesKHR( + VkDevice device, VkDeviceGroupPresentCapabilitiesKHR *pCapabilities) +{ + memset(pCapabilities->presentMask, 0, sizeof(pCapabilities->presentMask)); + pCapabilities->presentMask[0] = 0x1; + pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; + + return VK_SUCCESS; +} + +VkResult +panvk_GetDeviceGroupSurfacePresentModesKHR( + VkDevice device, + VkSurfaceKHR surface, + VkDeviceGroupPresentModeFlagsKHR *pModes) +{ + *pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; + + return VK_SUCCESS; +} + +VkResult +panvk_GetPhysicalDevicePresentRectanglesKHR(VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pRectCount, + VkRect2D *pRects) +{ + VK_FROM_HANDLE(panvk_physical_device, device, physicalDevice); + + return wsi_common_get_present_rectangles(&device->wsi_device, surface, + pRectCount, pRects); +} diff --git a/src/panfrost/vulkan/panvk_wsi_display.c b/src/panfrost/vulkan/panvk_wsi_display.c new file mode 100644 index 00000000000..28666e91cb4 --- /dev/null +++ b/src/panfrost/vulkan/panvk_wsi_display.c @@ -0,0 +1,136 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from v3dv driver: + * Copyright © 2020 Raspberry Pi + * Copyright © 2017 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ +#include "panvk_private.h" +#include "wsi_common_display.h" + +VkResult +panvk_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device, + uint32_t *property_count, + VkDisplayPropertiesKHR *properties) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physical_device); + + return wsi_display_get_physical_device_display_properties( + physical_device, + &pdevice->wsi_device, + property_count, + properties); +} + +VkResult +panvk_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physical_device, + uint32_t *property_count, + VkDisplayPlanePropertiesKHR *properties) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physical_device); + + return wsi_display_get_physical_device_display_plane_properties( + physical_device, + &pdevice->wsi_device, + property_count, + properties); +} + +VkResult +panvk_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device, + uint32_t plane_index, + uint32_t *display_count, + VkDisplayKHR *displays) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physical_device); + + return wsi_display_get_display_plane_supported_displays( + physical_device, + &pdevice->wsi_device, + plane_index, + display_count, + displays); +} + +VkResult +panvk_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device, + VkDisplayKHR display, + uint32_t *property_count, + VkDisplayModePropertiesKHR *properties) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physical_device); + + return wsi_display_get_display_mode_properties(physical_device, + &pdevice->wsi_device, + display, + property_count, + properties); +} + +VkResult +panvk_CreateDisplayModeKHR(VkPhysicalDevice physical_device, + VkDisplayKHR display, + const VkDisplayModeCreateInfoKHR *create_info, + const VkAllocationCallbacks *allocator, + VkDisplayModeKHR *mode) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physical_device); + + return wsi_display_create_display_mode(physical_device, + &pdevice->wsi_device, + display, + create_info, + allocator, + mode); +} + +VkResult +panvk_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device, + VkDisplayModeKHR mode_khr, + uint32_t plane_index, + VkDisplayPlaneCapabilitiesKHR *capabilities) +{ + VK_FROM_HANDLE(panvk_physical_device, pdevice, physical_device); + + return wsi_get_display_plane_capabilities(physical_device, + &pdevice->wsi_device, + mode_khr, + plane_index, + capabilities); +} + +VkResult +panvk_CreateDisplayPlaneSurfaceKHR(VkInstance _instance, + const VkDisplaySurfaceCreateInfoKHR *create_info, + const VkAllocationCallbacks *allocator, + VkSurfaceKHR *surface) +{ + VK_FROM_HANDLE(panvk_instance, instance, _instance); + const VkAllocationCallbacks *alloc; + + if (allocator) + alloc = allocator; + else + alloc = &instance->vk.alloc; + + return wsi_create_display_surface(_instance, alloc, + create_info, surface); +} diff --git a/src/panfrost/vulkan/panvk_wsi_wayland.c b/src/panfrost/vulkan/panvk_wsi_wayland.c new file mode 100644 index 00000000000..f3b041992d3 --- /dev/null +++ b/src/panfrost/vulkan/panvk_wsi_wayland.c @@ -0,0 +1,61 @@ +/* + * Copyright © 2021 Collabora Ltd. + * + * Derived from turnip driver: + * Copyright © 2016 Red Hat + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "panvk_private.h" + +#include "wsi_common_wayland.h" + +VkBool32 +panvk_GetPhysicalDeviceWaylandPresentationSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + struct wl_display *display) +{ + VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice); + + return wsi_wl_get_presentation_support(&physical_device->wsi_device, + display); +} + +VkResult +panvk_CreateWaylandSurfaceKHR(VkInstance _instance, + const VkWaylandSurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface) +{ + VK_FROM_HANDLE(panvk_instance, instance, _instance); + const VkAllocationCallbacks *alloc; + assert(pCreateInfo->sType == + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR); + + if (pAllocator) + alloc = pAllocator; + else + alloc = &instance->vk.alloc; + + return wsi_create_wl_surface(alloc, pCreateInfo, pSurface); +}