2191 lines
67 KiB
C
2191 lines
67 KiB
C
/*
|
|
* Copyright © 2019 Raspberry Pi
|
|
*
|
|
* based in part on anv driver which is:
|
|
* Copyright © 2015 Intel Corporation
|
|
*
|
|
* based in part on radv driver which is:
|
|
* Copyright © 2016 Red Hat.
|
|
* Copyright © 2016 Bas Nieuwenhuizen
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
#ifndef V3DV_PRIVATE_H
|
|
#define V3DV_PRIVATE_H
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <vulkan/vulkan.h>
|
|
#include <vulkan/vk_icd.h>
|
|
#include <vk_enum_to_str.h>
|
|
|
|
#include "vk_device.h"
|
|
#include "vk_format.h"
|
|
#include "vk_instance.h"
|
|
#include "vk_image.h"
|
|
#include "vk_log.h"
|
|
#include "vk_physical_device.h"
|
|
#include "vk_shader_module.h"
|
|
#include "vk_util.h"
|
|
|
|
#include "vk_command_buffer.h"
|
|
#include "vk_queue.h"
|
|
|
|
#include <xf86drm.h>
|
|
|
|
#ifdef HAVE_VALGRIND
|
|
#include <valgrind.h>
|
|
#include <memcheck.h>
|
|
#define VG(x) x
|
|
#else
|
|
#define VG(x) ((void)0)
|
|
#endif
|
|
|
|
#include "v3dv_limits.h"
|
|
|
|
#include "common/v3d_device_info.h"
|
|
#include "common/v3d_limits.h"
|
|
#include "common/v3d_tiling.h"
|
|
#include "common/v3d_util.h"
|
|
|
|
#include "compiler/shader_enums.h"
|
|
#include "compiler/spirv/nir_spirv.h"
|
|
|
|
#include "compiler/v3d_compiler.h"
|
|
|
|
#include "vk_debug_report.h"
|
|
#include "util/set.h"
|
|
#include "util/hash_table.h"
|
|
#include "util/xmlconfig.h"
|
|
#include "u_atomic.h"
|
|
|
|
#include "v3dv_entrypoints.h"
|
|
#include "v3dv_bo.h"
|
|
|
|
#include "drm-uapi/v3d_drm.h"
|
|
|
|
#include "vk_alloc.h"
|
|
#include "simulator/v3d_simulator.h"
|
|
|
|
#include "v3dv_cl.h"
|
|
|
|
#include "wsi_common.h"
|
|
|
|
/* A non-fatal assert. Useful for debugging. */
|
|
#ifdef DEBUG
|
|
#define v3dv_assert(x) ({ \
|
|
if (unlikely(!(x))) \
|
|
fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
|
|
})
|
|
#else
|
|
#define v3dv_assert(x)
|
|
#endif
|
|
|
|
#define perf_debug(...) do { \
|
|
if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
|
|
fprintf(stderr, __VA_ARGS__); \
|
|
} while (0)
|
|
|
|
struct v3dv_instance;
|
|
|
|
#ifdef USE_V3D_SIMULATOR
|
|
#define using_v3d_simulator true
|
|
#else
|
|
#define using_v3d_simulator false
|
|
#endif
|
|
|
|
struct v3d_simulator_file;
|
|
|
|
/* Minimum required by the Vulkan 1.1 spec */
|
|
#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
|
|
|
|
struct v3dv_physical_device {
|
|
struct vk_physical_device vk;
|
|
|
|
char *name;
|
|
int32_t render_fd;
|
|
int32_t display_fd;
|
|
int32_t master_fd;
|
|
|
|
/* We need these because it is not clear how to detect
|
|
* valid devids in a portable way
|
|
*/
|
|
bool has_primary;
|
|
bool has_render;
|
|
|
|
dev_t primary_devid;
|
|
dev_t render_devid;
|
|
|
|
uint8_t driver_build_sha1[20];
|
|
uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
|
|
uint8_t device_uuid[VK_UUID_SIZE];
|
|
uint8_t driver_uuid[VK_UUID_SIZE];
|
|
|
|
struct disk_cache *disk_cache;
|
|
|
|
mtx_t mutex;
|
|
|
|
struct wsi_device wsi_device;
|
|
|
|
VkPhysicalDeviceMemoryProperties memory;
|
|
|
|
struct v3d_device_info devinfo;
|
|
|
|
struct v3d_simulator_file *sim_file;
|
|
|
|
const struct v3d_compiler *compiler;
|
|
uint32_t next_program_id;
|
|
|
|
struct {
|
|
bool merge_jobs;
|
|
} options;
|
|
};
|
|
|
|
VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
|
|
struct v3dv_physical_device *pdevice,
|
|
VkIcdSurfaceBase *surface);
|
|
|
|
VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
|
|
void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
|
|
struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
|
|
uint32_t index);
|
|
|
|
void v3dv_meta_clear_init(struct v3dv_device *device);
|
|
void v3dv_meta_clear_finish(struct v3dv_device *device);
|
|
|
|
void v3dv_meta_blit_init(struct v3dv_device *device);
|
|
void v3dv_meta_blit_finish(struct v3dv_device *device);
|
|
|
|
void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
|
|
void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
|
|
|
|
bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
|
|
const VkOffset3D *offset,
|
|
VkFormat *compat_format);
|
|
|
|
struct v3dv_instance {
|
|
struct vk_instance vk;
|
|
|
|
int physicalDeviceCount;
|
|
struct v3dv_physical_device physicalDevice;
|
|
|
|
bool pipeline_cache_enabled;
|
|
bool default_pipeline_cache_enabled;
|
|
};
|
|
|
|
/* Tracks wait threads spawned from a single vkQueueSubmit call */
|
|
struct v3dv_queue_submit_wait_info {
|
|
/* struct vk_object_base base; ?*/
|
|
struct list_head list_link;
|
|
|
|
struct v3dv_device *device;
|
|
|
|
/* List of wait threads spawned for any command buffers in a particular
|
|
* call to vkQueueSubmit.
|
|
*/
|
|
uint32_t wait_thread_count;
|
|
struct {
|
|
pthread_t thread;
|
|
bool finished;
|
|
} wait_threads[16];
|
|
|
|
/* The master wait thread for the entire submit. This will wait for all
|
|
* other threads in this submit to complete before processing signal
|
|
* semaphores and fences.
|
|
*/
|
|
pthread_t master_wait_thread;
|
|
|
|
/* List of semaphores (and fence) to signal after all wait threads completed
|
|
* and all command buffer jobs in the submission have been sent to the GPU.
|
|
*/
|
|
uint32_t signal_semaphore_count;
|
|
VkSemaphore *signal_semaphores;
|
|
VkFence fence;
|
|
};
|
|
|
|
struct v3dv_queue {
|
|
struct vk_queue vk;
|
|
|
|
struct v3dv_device *device;
|
|
|
|
/* A list of active v3dv_queue_submit_wait_info */
|
|
struct list_head submit_wait_list;
|
|
|
|
/* A mutex to prevent concurrent access to the list of wait threads */
|
|
mtx_t mutex;
|
|
|
|
struct v3dv_job *noop_job;
|
|
};
|
|
|
|
#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
|
|
#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
|
|
sizeof(VkComponentMapping))
|
|
|
|
struct v3dv_meta_color_clear_pipeline {
|
|
VkPipeline pipeline;
|
|
VkRenderPass pass;
|
|
bool cached;
|
|
uint64_t key;
|
|
};
|
|
|
|
struct v3dv_meta_depth_clear_pipeline {
|
|
VkPipeline pipeline;
|
|
uint64_t key;
|
|
};
|
|
|
|
struct v3dv_meta_blit_pipeline {
|
|
VkPipeline pipeline;
|
|
VkRenderPass pass;
|
|
VkRenderPass pass_no_load;
|
|
uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
|
|
};
|
|
|
|
struct v3dv_meta_texel_buffer_copy_pipeline {
|
|
VkPipeline pipeline;
|
|
VkRenderPass pass;
|
|
VkRenderPass pass_no_load;
|
|
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
|
|
};
|
|
|
|
struct v3dv_pipeline_key {
|
|
bool robust_buffer_access;
|
|
uint8_t topology;
|
|
uint8_t logicop_func;
|
|
bool msaa;
|
|
bool sample_coverage;
|
|
bool sample_alpha_to_coverage;
|
|
bool sample_alpha_to_one;
|
|
uint8_t cbufs;
|
|
struct {
|
|
enum pipe_format format;
|
|
uint8_t swizzle[4];
|
|
} color_fmt[V3D_MAX_DRAW_BUFFERS];
|
|
uint8_t f32_color_rb;
|
|
uint32_t va_swap_rb_mask;
|
|
bool has_multiview;
|
|
};
|
|
|
|
struct v3dv_pipeline_cache_stats {
|
|
uint32_t miss;
|
|
uint32_t hit;
|
|
uint32_t count;
|
|
};
|
|
|
|
/* Equivalent to gl_shader_stage, but including the coordinate shaders
|
|
*
|
|
* FIXME: perhaps move to common
|
|
*/
|
|
enum broadcom_shader_stage {
|
|
BROADCOM_SHADER_VERTEX,
|
|
BROADCOM_SHADER_VERTEX_BIN,
|
|
BROADCOM_SHADER_GEOMETRY,
|
|
BROADCOM_SHADER_GEOMETRY_BIN,
|
|
BROADCOM_SHADER_FRAGMENT,
|
|
BROADCOM_SHADER_COMPUTE,
|
|
};
|
|
|
|
#define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
|
|
|
|
/* Assumes that coordinate shaders will be custom-handled by the caller */
|
|
static inline enum broadcom_shader_stage
|
|
gl_shader_stage_to_broadcom(gl_shader_stage stage)
|
|
{
|
|
switch (stage) {
|
|
case MESA_SHADER_VERTEX:
|
|
return BROADCOM_SHADER_VERTEX;
|
|
case MESA_SHADER_GEOMETRY:
|
|
return BROADCOM_SHADER_GEOMETRY;
|
|
case MESA_SHADER_FRAGMENT:
|
|
return BROADCOM_SHADER_FRAGMENT;
|
|
case MESA_SHADER_COMPUTE:
|
|
return BROADCOM_SHADER_COMPUTE;
|
|
default:
|
|
unreachable("Unknown gl shader stage");
|
|
}
|
|
}
|
|
|
|
static inline gl_shader_stage
|
|
broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
|
|
{
|
|
switch (stage) {
|
|
case BROADCOM_SHADER_VERTEX:
|
|
case BROADCOM_SHADER_VERTEX_BIN:
|
|
return MESA_SHADER_VERTEX;
|
|
case BROADCOM_SHADER_GEOMETRY:
|
|
case BROADCOM_SHADER_GEOMETRY_BIN:
|
|
return MESA_SHADER_GEOMETRY;
|
|
case BROADCOM_SHADER_FRAGMENT:
|
|
return MESA_SHADER_FRAGMENT;
|
|
case BROADCOM_SHADER_COMPUTE:
|
|
return MESA_SHADER_COMPUTE;
|
|
default:
|
|
unreachable("Unknown broadcom shader stage");
|
|
}
|
|
}
|
|
|
|
static inline bool
|
|
broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
|
|
{
|
|
switch (stage) {
|
|
case BROADCOM_SHADER_VERTEX_BIN:
|
|
case BROADCOM_SHADER_GEOMETRY_BIN:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static inline bool
|
|
broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
|
|
{
|
|
switch (stage) {
|
|
case BROADCOM_SHADER_VERTEX:
|
|
case BROADCOM_SHADER_GEOMETRY:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static inline enum broadcom_shader_stage
|
|
broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
|
|
{
|
|
switch (stage) {
|
|
case BROADCOM_SHADER_VERTEX:
|
|
return BROADCOM_SHADER_VERTEX_BIN;
|
|
case BROADCOM_SHADER_GEOMETRY:
|
|
return BROADCOM_SHADER_GEOMETRY_BIN;
|
|
default:
|
|
unreachable("Invalid shader stage");
|
|
}
|
|
}
|
|
|
|
static inline const char *
|
|
broadcom_shader_stage_name(enum broadcom_shader_stage stage)
|
|
{
|
|
switch(stage) {
|
|
case BROADCOM_SHADER_VERTEX_BIN:
|
|
return "MESA_SHADER_VERTEX_BIN";
|
|
case BROADCOM_SHADER_GEOMETRY_BIN:
|
|
return "MESA_SHADER_GEOMETRY_BIN";
|
|
default:
|
|
return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
|
|
}
|
|
}
|
|
|
|
struct v3dv_pipeline_cache {
|
|
struct vk_object_base base;
|
|
|
|
struct v3dv_device *device;
|
|
mtx_t mutex;
|
|
|
|
struct hash_table *nir_cache;
|
|
struct v3dv_pipeline_cache_stats nir_stats;
|
|
|
|
struct hash_table *cache;
|
|
struct v3dv_pipeline_cache_stats stats;
|
|
|
|
/* For VK_EXT_pipeline_creation_cache_control. */
|
|
bool externally_synchronized;
|
|
};
|
|
|
|
struct v3dv_device {
|
|
struct vk_device vk;
|
|
|
|
struct v3dv_instance *instance;
|
|
struct v3dv_physical_device *pdevice;
|
|
|
|
struct v3d_device_info devinfo;
|
|
struct v3dv_queue queue;
|
|
|
|
/* A sync object to track the last job submitted to the GPU. */
|
|
uint32_t last_job_sync;
|
|
|
|
/* A mutex to prevent concurrent access to last_job_sync from the queue */
|
|
mtx_t mutex;
|
|
|
|
/* Resources used for meta operations */
|
|
struct {
|
|
mtx_t mtx;
|
|
struct {
|
|
VkPipelineLayout p_layout;
|
|
struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
|
|
} color_clear;
|
|
struct {
|
|
VkPipelineLayout p_layout;
|
|
struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
|
|
} depth_clear;
|
|
struct {
|
|
VkDescriptorSetLayout ds_layout;
|
|
VkPipelineLayout p_layout;
|
|
struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
|
|
} blit;
|
|
struct {
|
|
VkDescriptorSetLayout ds_layout;
|
|
VkPipelineLayout p_layout;
|
|
struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
|
|
} texel_buffer_copy;
|
|
} meta;
|
|
|
|
struct v3dv_bo_cache {
|
|
/** List of struct v3d_bo freed, by age. */
|
|
struct list_head time_list;
|
|
/** List of struct v3d_bo freed, per size, by age. */
|
|
struct list_head *size_list;
|
|
uint32_t size_list_size;
|
|
|
|
mtx_t lock;
|
|
|
|
uint32_t cache_size;
|
|
uint32_t cache_count;
|
|
uint32_t max_cache_size;
|
|
} bo_cache;
|
|
|
|
uint32_t bo_size;
|
|
uint32_t bo_count;
|
|
|
|
struct v3dv_pipeline_cache default_pipeline_cache;
|
|
|
|
/* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
|
|
* following covers the most common case, that is all attributes format
|
|
* being float being float, allowing us to reuse the same BO for all
|
|
* pipelines matching this requirement. Pipelines that need integer
|
|
* attributes will create their own BO.
|
|
*/
|
|
struct v3dv_bo *default_attribute_float;
|
|
VkPhysicalDeviceFeatures features;
|
|
|
|
#ifdef ANDROID
|
|
const void *gralloc;
|
|
enum {
|
|
V3DV_GRALLOC_UNKNOWN,
|
|
V3DV_GRALLOC_CROS,
|
|
V3DV_GRALLOC_OTHER,
|
|
} gralloc_type;
|
|
#endif
|
|
};
|
|
|
|
struct v3dv_device_memory {
|
|
struct vk_object_base base;
|
|
|
|
struct v3dv_bo *bo;
|
|
const VkMemoryType *type;
|
|
bool has_bo_ownership;
|
|
bool is_for_wsi;
|
|
};
|
|
|
|
#define V3D_OUTPUT_IMAGE_FORMAT_NO 255
|
|
#define TEXTURE_DATA_FORMAT_NO 255
|
|
|
|
struct v3dv_format {
|
|
bool supported;
|
|
|
|
/* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
|
|
uint8_t rt_type;
|
|
|
|
/* One of V3D33_TEXTURE_DATA_FORMAT_*. */
|
|
uint8_t tex_type;
|
|
|
|
/* Swizzle to apply to the RGBA shader output for storing to the tile
|
|
* buffer, to the RGBA tile buffer to produce shader input (for
|
|
* blending), and for turning the rgba8888 texture sampler return
|
|
* value into shader rgba values.
|
|
*/
|
|
uint8_t swizzle[4];
|
|
|
|
/* Whether the return value is 16F/I/UI or 32F/I/UI. */
|
|
uint8_t return_size;
|
|
|
|
/* If the format supports (linear) filtering when texturing. */
|
|
bool supports_filtering;
|
|
};
|
|
|
|
struct v3d_resource_slice {
|
|
uint32_t offset;
|
|
uint32_t stride;
|
|
uint32_t padded_height;
|
|
/* Size of a single pane of the slice. For 3D textures, there will be
|
|
* a number of panes equal to the minified, power-of-two-aligned
|
|
* depth.
|
|
*/
|
|
uint32_t size;
|
|
uint8_t ub_pad;
|
|
enum v3d_tiling_mode tiling;
|
|
uint32_t padded_height_of_output_image_in_uif_blocks;
|
|
};
|
|
|
|
struct v3dv_image {
|
|
struct vk_image vk;
|
|
|
|
const struct v3dv_format *format;
|
|
uint32_t cpp;
|
|
bool tiled;
|
|
|
|
struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
|
|
uint64_t size; /* Total size in bytes */
|
|
uint32_t cube_map_stride;
|
|
|
|
struct v3dv_device_memory *mem;
|
|
VkDeviceSize mem_offset;
|
|
uint32_t alignment;
|
|
|
|
#ifdef ANDROID
|
|
/* Image is backed by VK_ANDROID_native_buffer, */
|
|
bool is_native_buffer_memory;
|
|
#endif
|
|
};
|
|
|
|
VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
|
|
|
|
/* Pre-generating packets needs to consider changes in packet sizes across hw
|
|
* versions. Keep things simple and allocate enough space for any supported
|
|
* version. We ensure the size is large enough through static asserts.
|
|
*/
|
|
#define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
|
|
#define V3DV_SAMPLER_STATE_LENGTH 24
|
|
#define V3DV_BLEND_CFG_LENGTH 5
|
|
#define V3DV_CFG_BITS_LENGTH 4
|
|
#define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
|
|
#define V3DV_VCM_CACHE_SIZE_LENGTH 2
|
|
#define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
|
|
#define V3DV_STENCIL_CFG_LENGTH 6
|
|
|
|
struct v3dv_image_view {
|
|
struct vk_image_view vk;
|
|
|
|
const struct v3dv_format *format;
|
|
bool swap_rb;
|
|
uint32_t internal_bpp;
|
|
uint32_t internal_type;
|
|
uint32_t offset;
|
|
|
|
/* Precomputed (composed from createinfo->components and formar swizzle)
|
|
* swizzles to pass in to the shader key.
|
|
*
|
|
* This could be also included on the descriptor bo, but the shader state
|
|
* packet doesn't need it on a bo, so we can just avoid a memory copy
|
|
*/
|
|
uint8_t swizzle[4];
|
|
|
|
/* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
|
|
* during UpdateDescriptorSets.
|
|
*
|
|
* Empirical tests show that cube arrays need a different shader state
|
|
* depending on whether they are used with a sampler or not, so for these
|
|
* we generate two states and select the one to use based on the descriptor
|
|
* type.
|
|
*/
|
|
uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
|
|
};
|
|
|
|
uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
|
|
|
|
struct v3dv_buffer {
|
|
struct vk_object_base base;
|
|
|
|
VkDeviceSize size;
|
|
VkBufferUsageFlags usage;
|
|
uint32_t alignment;
|
|
|
|
struct v3dv_device_memory *mem;
|
|
VkDeviceSize mem_offset;
|
|
};
|
|
|
|
struct v3dv_buffer_view {
|
|
struct vk_object_base base;
|
|
|
|
struct v3dv_buffer *buffer;
|
|
|
|
VkFormat vk_format;
|
|
const struct v3dv_format *format;
|
|
uint32_t internal_bpp;
|
|
uint32_t internal_type;
|
|
|
|
uint32_t offset;
|
|
uint32_t size;
|
|
uint32_t num_elements;
|
|
|
|
/* Prepacked TEXTURE_SHADER_STATE. */
|
|
uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
|
|
};
|
|
|
|
struct v3dv_subpass_attachment {
|
|
uint32_t attachment;
|
|
VkImageLayout layout;
|
|
};
|
|
|
|
struct v3dv_subpass {
|
|
uint32_t input_count;
|
|
struct v3dv_subpass_attachment *input_attachments;
|
|
|
|
uint32_t color_count;
|
|
struct v3dv_subpass_attachment *color_attachments;
|
|
struct v3dv_subpass_attachment *resolve_attachments;
|
|
|
|
struct v3dv_subpass_attachment ds_attachment;
|
|
|
|
/* If we need to emit the clear of the depth/stencil attachment using a
|
|
* a draw call instead of using the TLB (GFXH-1461).
|
|
*/
|
|
bool do_depth_clear_with_draw;
|
|
bool do_stencil_clear_with_draw;
|
|
|
|
/* Multiview */
|
|
uint32_t view_mask;
|
|
};
|
|
|
|
struct v3dv_render_pass_attachment {
|
|
VkAttachmentDescription2 desc;
|
|
|
|
uint32_t first_subpass;
|
|
uint32_t last_subpass;
|
|
|
|
/* When multiview is enabled, we no longer care about when a particular
|
|
* attachment is first or last used in a render pass, since not all views
|
|
* in the attachment will meet that criteria. Instead, we need to track
|
|
* each individual view (layer) in each attachment and emit our stores,
|
|
* loads and clears accordingly.
|
|
*/
|
|
struct {
|
|
uint32_t first_subpass;
|
|
uint32_t last_subpass;
|
|
} views[MAX_MULTIVIEW_VIEW_COUNT];
|
|
|
|
/* If this is a multismapled attachment that is going to be resolved,
|
|
* whether we can use the TLB resolve on store.
|
|
*/
|
|
bool use_tlb_resolve;
|
|
};
|
|
|
|
struct v3dv_render_pass {
|
|
struct vk_object_base base;
|
|
|
|
bool multiview_enabled;
|
|
|
|
uint32_t attachment_count;
|
|
struct v3dv_render_pass_attachment *attachments;
|
|
|
|
uint32_t subpass_count;
|
|
struct v3dv_subpass *subpasses;
|
|
|
|
struct v3dv_subpass_attachment *subpass_attachments;
|
|
};
|
|
|
|
struct v3dv_framebuffer {
|
|
struct vk_object_base base;
|
|
|
|
uint32_t width;
|
|
uint32_t height;
|
|
uint32_t layers;
|
|
|
|
/* Typically, edge tiles in the framebuffer have padding depending on the
|
|
* underlying tiling layout. One consequnce of this is that when the
|
|
* framebuffer dimensions are not aligned to tile boundaries, tile stores
|
|
* would still write full tiles on the edges and write to the padded area.
|
|
* If the framebuffer is aliasing a smaller region of a larger image, then
|
|
* we need to be careful with this though, as we won't have padding on the
|
|
* edge tiles (which typically means that we need to load the tile buffer
|
|
* before we store).
|
|
*/
|
|
bool has_edge_padding;
|
|
|
|
uint32_t attachment_count;
|
|
uint32_t color_attachment_count;
|
|
struct v3dv_image_view *attachments[0];
|
|
};
|
|
|
|
struct v3dv_frame_tiling {
|
|
uint32_t width;
|
|
uint32_t height;
|
|
uint32_t layers;
|
|
uint32_t render_target_count;
|
|
uint32_t internal_bpp;
|
|
bool msaa;
|
|
uint32_t tile_width;
|
|
uint32_t tile_height;
|
|
uint32_t draw_tiles_x;
|
|
uint32_t draw_tiles_y;
|
|
uint32_t supertile_width;
|
|
uint32_t supertile_height;
|
|
uint32_t frame_width_in_supertiles;
|
|
uint32_t frame_height_in_supertiles;
|
|
};
|
|
|
|
void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *framebuffer,
|
|
const struct v3dv_subpass *subpass,
|
|
uint8_t *max_bpp, bool *msaa);
|
|
|
|
bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
|
|
const VkRect2D *area,
|
|
struct v3dv_framebuffer *fb,
|
|
struct v3dv_render_pass *pass,
|
|
uint32_t subpass_idx);
|
|
|
|
struct v3dv_cmd_pool {
|
|
struct vk_object_base base;
|
|
|
|
VkAllocationCallbacks alloc;
|
|
struct list_head cmd_buffers;
|
|
};
|
|
|
|
enum v3dv_cmd_buffer_status {
|
|
V3DV_CMD_BUFFER_STATUS_NEW = 0,
|
|
V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
|
|
V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
|
|
V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
|
|
};
|
|
|
|
union v3dv_clear_value {
|
|
uint32_t color[4];
|
|
struct {
|
|
float z;
|
|
uint8_t s;
|
|
};
|
|
};
|
|
|
|
struct v3dv_cmd_buffer_attachment_state {
|
|
/* The original clear value as provided by the Vulkan API */
|
|
VkClearValue vk_clear_value;
|
|
|
|
/* The hardware clear value */
|
|
union v3dv_clear_value clear_value;
|
|
};
|
|
|
|
struct v3dv_viewport_state {
|
|
uint32_t count;
|
|
VkViewport viewports[MAX_VIEWPORTS];
|
|
float translate[MAX_VIEWPORTS][3];
|
|
float scale[MAX_VIEWPORTS][3];
|
|
};
|
|
|
|
struct v3dv_scissor_state {
|
|
uint32_t count;
|
|
VkRect2D scissors[MAX_SCISSORS];
|
|
};
|
|
|
|
/* Mostly a v3dv mapping of VkDynamicState, used to track which data as
|
|
* defined as dynamic
|
|
*/
|
|
enum v3dv_dynamic_state_bits {
|
|
V3DV_DYNAMIC_VIEWPORT = 1 << 0,
|
|
V3DV_DYNAMIC_SCISSOR = 1 << 1,
|
|
V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,
|
|
V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,
|
|
V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,
|
|
V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
|
|
V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
|
|
V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
|
|
V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8,
|
|
V3DV_DYNAMIC_ALL = (1 << 9) - 1,
|
|
};
|
|
|
|
/* Flags for dirty pipeline state.
|
|
*/
|
|
enum v3dv_cmd_dirty_bits {
|
|
V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,
|
|
V3DV_CMD_DIRTY_SCISSOR = 1 << 1,
|
|
V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,
|
|
V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,
|
|
V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,
|
|
V3DV_CMD_DIRTY_PIPELINE = 1 << 5,
|
|
V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6,
|
|
V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7,
|
|
V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8,
|
|
V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9,
|
|
V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10,
|
|
V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11,
|
|
V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 12,
|
|
V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 13,
|
|
V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 14,
|
|
V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 15,
|
|
V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 16,
|
|
V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 17,
|
|
};
|
|
|
|
struct v3dv_dynamic_state {
|
|
/**
|
|
* Bitmask of (1 << VK_DYNAMIC_STATE_*).
|
|
* Defines the set of saved dynamic state.
|
|
*/
|
|
uint32_t mask;
|
|
|
|
struct v3dv_viewport_state viewport;
|
|
|
|
struct v3dv_scissor_state scissor;
|
|
|
|
struct {
|
|
uint32_t front;
|
|
uint32_t back;
|
|
} stencil_compare_mask;
|
|
|
|
struct {
|
|
uint32_t front;
|
|
uint32_t back;
|
|
} stencil_write_mask;
|
|
|
|
struct {
|
|
uint32_t front;
|
|
uint32_t back;
|
|
} stencil_reference;
|
|
|
|
float blend_constants[4];
|
|
|
|
struct {
|
|
float constant_factor;
|
|
float depth_bias_clamp;
|
|
float slope_factor;
|
|
} depth_bias;
|
|
|
|
float line_width;
|
|
|
|
uint32_t color_write_enable;
|
|
};
|
|
|
|
void v3dv_viewport_compute_xform(const VkViewport *viewport,
|
|
float scale[3],
|
|
float translate[3]);
|
|
|
|
enum v3dv_ez_state {
|
|
V3D_EZ_UNDECIDED = 0,
|
|
V3D_EZ_GT_GE,
|
|
V3D_EZ_LT_LE,
|
|
V3D_EZ_DISABLED,
|
|
};
|
|
|
|
enum v3dv_job_type {
|
|
V3DV_JOB_TYPE_GPU_CL = 0,
|
|
V3DV_JOB_TYPE_GPU_CL_SECONDARY,
|
|
V3DV_JOB_TYPE_GPU_TFU,
|
|
V3DV_JOB_TYPE_GPU_CSD,
|
|
V3DV_JOB_TYPE_CPU_RESET_QUERIES,
|
|
V3DV_JOB_TYPE_CPU_END_QUERY,
|
|
V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
|
|
V3DV_JOB_TYPE_CPU_SET_EVENT,
|
|
V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
|
|
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
|
|
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
|
|
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
|
|
};
|
|
|
|
struct v3dv_reset_query_cpu_job_info {
|
|
struct v3dv_query_pool *pool;
|
|
uint32_t first;
|
|
uint32_t count;
|
|
};
|
|
|
|
struct v3dv_end_query_cpu_job_info {
|
|
struct v3dv_query_pool *pool;
|
|
uint32_t query;
|
|
|
|
/* This is one unless multiview is used */
|
|
uint32_t count;
|
|
};
|
|
|
|
struct v3dv_copy_query_results_cpu_job_info {
|
|
struct v3dv_query_pool *pool;
|
|
uint32_t first;
|
|
uint32_t count;
|
|
struct v3dv_buffer *dst;
|
|
uint32_t offset;
|
|
uint32_t stride;
|
|
VkQueryResultFlags flags;
|
|
};
|
|
|
|
struct v3dv_event_set_cpu_job_info {
|
|
struct v3dv_event *event;
|
|
int state;
|
|
};
|
|
|
|
struct v3dv_event_wait_cpu_job_info {
|
|
/* List of events to wait on */
|
|
uint32_t event_count;
|
|
struct v3dv_event **events;
|
|
|
|
/* Whether any postponed jobs after the wait should wait on semaphores */
|
|
bool sem_wait;
|
|
};
|
|
|
|
struct v3dv_copy_buffer_to_image_cpu_job_info {
|
|
struct v3dv_image *image;
|
|
struct v3dv_buffer *buffer;
|
|
uint32_t buffer_offset;
|
|
uint32_t buffer_stride;
|
|
uint32_t buffer_layer_stride;
|
|
VkOffset3D image_offset;
|
|
VkExtent3D image_extent;
|
|
uint32_t mip_level;
|
|
uint32_t base_layer;
|
|
uint32_t layer_count;
|
|
};
|
|
|
|
struct v3dv_csd_indirect_cpu_job_info {
|
|
struct v3dv_buffer *buffer;
|
|
uint32_t offset;
|
|
struct v3dv_job *csd_job;
|
|
uint32_t wg_size;
|
|
uint32_t *wg_uniform_offsets[3];
|
|
bool needs_wg_uniform_rewrite;
|
|
};
|
|
|
|
struct v3dv_timestamp_query_cpu_job_info {
|
|
struct v3dv_query_pool *pool;
|
|
uint32_t query;
|
|
|
|
/* This is one unless multiview is used */
|
|
uint32_t count;
|
|
};
|
|
|
|
struct v3dv_job {
|
|
struct list_head list_link;
|
|
|
|
/* We only create job clones when executing secondary command buffers into
|
|
* primaries. These clones don't make deep copies of the original object
|
|
* so we want to flag them to avoid freeing resources they don't own.
|
|
*/
|
|
bool is_clone;
|
|
|
|
enum v3dv_job_type type;
|
|
|
|
struct v3dv_device *device;
|
|
|
|
struct v3dv_cmd_buffer *cmd_buffer;
|
|
|
|
struct v3dv_cl bcl;
|
|
struct v3dv_cl rcl;
|
|
struct v3dv_cl indirect;
|
|
|
|
/* Set of all BOs referenced by the job. This will be used for making
|
|
* the list of BOs that the kernel will need to have paged in to
|
|
* execute our job.
|
|
*/
|
|
struct set *bos;
|
|
uint32_t bo_count;
|
|
uint64_t bo_handle_mask;
|
|
|
|
struct v3dv_bo *tile_alloc;
|
|
struct v3dv_bo *tile_state;
|
|
|
|
bool tmu_dirty_rcl;
|
|
|
|
uint32_t first_subpass;
|
|
|
|
/* When the current subpass is split into multiple jobs, this flag is set
|
|
* to true for any jobs after the first in the same subpass.
|
|
*/
|
|
bool is_subpass_continue;
|
|
|
|
/* If this job is the last job emitted for a subpass. */
|
|
bool is_subpass_finish;
|
|
|
|
struct v3dv_frame_tiling frame_tiling;
|
|
|
|
enum v3dv_ez_state ez_state;
|
|
enum v3dv_ez_state first_ez_state;
|
|
|
|
/* If we have already decided if we need to disable Early Z/S completely
|
|
* for this job.
|
|
*/
|
|
bool decided_global_ez_enable;
|
|
|
|
/* If this job has been configured to use early Z/S clear */
|
|
bool early_zs_clear;
|
|
|
|
/* Number of draw calls recorded into the job */
|
|
uint32_t draw_count;
|
|
|
|
/* A flag indicating whether we want to flush every draw separately. This
|
|
* can be used for debugging, or for cases where special circumstances
|
|
* require this behavior.
|
|
*/
|
|
bool always_flush;
|
|
|
|
/* Whether we need to serialize this job in our command stream */
|
|
bool serialize;
|
|
|
|
/* If this is a CL job, whether we should sync before binning */
|
|
bool needs_bcl_sync;
|
|
|
|
/* Job specs for CPU jobs */
|
|
union {
|
|
struct v3dv_reset_query_cpu_job_info query_reset;
|
|
struct v3dv_end_query_cpu_job_info query_end;
|
|
struct v3dv_copy_query_results_cpu_job_info query_copy_results;
|
|
struct v3dv_event_set_cpu_job_info event_set;
|
|
struct v3dv_event_wait_cpu_job_info event_wait;
|
|
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
|
|
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
|
|
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
|
|
} cpu;
|
|
|
|
/* Job specs for TFU jobs */
|
|
struct drm_v3d_submit_tfu tfu;
|
|
|
|
/* Job specs for CSD jobs */
|
|
struct {
|
|
struct v3dv_bo *shared_memory;
|
|
uint32_t wg_count[3];
|
|
uint32_t wg_base[3];
|
|
struct drm_v3d_submit_csd submit;
|
|
} csd;
|
|
};
|
|
|
|
void v3dv_job_init(struct v3dv_job *job,
|
|
enum v3dv_job_type type,
|
|
struct v3dv_device *device,
|
|
struct v3dv_cmd_buffer *cmd_buffer,
|
|
int32_t subpass_idx);
|
|
void v3dv_job_destroy(struct v3dv_job *job);
|
|
|
|
void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
|
|
void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
|
|
|
|
void v3dv_job_start_frame(struct v3dv_job *job,
|
|
uint32_t width,
|
|
uint32_t height,
|
|
uint32_t layers,
|
|
bool allocate_tile_state_for_all_layers,
|
|
uint32_t render_target_count,
|
|
uint8_t max_internal_bpp,
|
|
bool msaa);
|
|
|
|
struct v3dv_job *
|
|
v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
|
|
struct v3dv_cmd_buffer *cmd_buffer);
|
|
|
|
struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
|
|
enum v3dv_job_type type,
|
|
struct v3dv_cmd_buffer *cmd_buffer,
|
|
uint32_t subpass_idx);
|
|
|
|
void
|
|
v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
|
|
uint32_t slot_size,
|
|
uint32_t used_count,
|
|
uint32_t *alloc_count,
|
|
void **ptr);
|
|
|
|
void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
|
|
|
|
/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
|
|
* cmd_buffer specific header?
|
|
*/
|
|
struct v3dv_draw_info {
|
|
uint32_t vertex_count;
|
|
uint32_t instance_count;
|
|
uint32_t first_vertex;
|
|
uint32_t first_instance;
|
|
};
|
|
|
|
struct v3dv_vertex_binding {
|
|
struct v3dv_buffer *buffer;
|
|
VkDeviceSize offset;
|
|
};
|
|
|
|
struct v3dv_descriptor_state {
|
|
struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
|
|
uint32_t valid;
|
|
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
|
|
};
|
|
|
|
struct v3dv_cmd_pipeline_state {
|
|
struct v3dv_pipeline *pipeline;
|
|
|
|
struct v3dv_descriptor_state descriptor_state;
|
|
};
|
|
|
|
struct v3dv_cmd_buffer_state {
|
|
struct v3dv_render_pass *pass;
|
|
struct v3dv_framebuffer *framebuffer;
|
|
VkRect2D render_area;
|
|
|
|
/* Current job being recorded */
|
|
struct v3dv_job *job;
|
|
|
|
uint32_t subpass_idx;
|
|
|
|
struct v3dv_cmd_pipeline_state gfx;
|
|
struct v3dv_cmd_pipeline_state compute;
|
|
|
|
struct v3dv_dynamic_state dynamic;
|
|
|
|
uint32_t dirty;
|
|
VkShaderStageFlagBits dirty_descriptor_stages;
|
|
VkShaderStageFlagBits dirty_push_constants_stages;
|
|
|
|
/* Current clip window. We use this to check whether we have an active
|
|
* scissor, since in that case we can't use TLB clears and need to fallback
|
|
* to drawing rects.
|
|
*/
|
|
VkRect2D clip_window;
|
|
|
|
/* Whether our render area is aligned to tile boundaries. If this is false
|
|
* then we have tiles that are only partially covered by the render area,
|
|
* and therefore, we need to be careful with our loads and stores so we don't
|
|
* modify pixels for the tile area that is not covered by the render area.
|
|
* This means, for example, that we can't use the TLB to clear, since that
|
|
* always clears full tiles.
|
|
*/
|
|
bool tile_aligned_render_area;
|
|
|
|
uint32_t attachment_alloc_count;
|
|
struct v3dv_cmd_buffer_attachment_state *attachments;
|
|
|
|
struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
|
|
|
|
struct {
|
|
VkBuffer buffer;
|
|
VkDeviceSize offset;
|
|
uint8_t index_size;
|
|
} index_buffer;
|
|
|
|
/* Current uniforms */
|
|
struct {
|
|
struct v3dv_cl_reloc vs_bin;
|
|
struct v3dv_cl_reloc vs;
|
|
struct v3dv_cl_reloc gs_bin;
|
|
struct v3dv_cl_reloc gs;
|
|
struct v3dv_cl_reloc fs;
|
|
} uniforms;
|
|
|
|
/* Current view index for multiview rendering */
|
|
uint32_t view_index;
|
|
|
|
/* Used to flag OOM conditions during command buffer recording */
|
|
bool oom;
|
|
|
|
/* Whether we have recorded a pipeline barrier that we still need to
|
|
* process.
|
|
*/
|
|
bool has_barrier;
|
|
bool has_bcl_barrier;
|
|
|
|
/* Secondary command buffer state */
|
|
struct {
|
|
bool occlusion_query_enable;
|
|
} inheritance;
|
|
|
|
/* Command buffer state saved during a meta operation */
|
|
struct {
|
|
uint32_t subpass_idx;
|
|
VkRenderPass pass;
|
|
VkFramebuffer framebuffer;
|
|
|
|
uint32_t attachment_alloc_count;
|
|
uint32_t attachment_count;
|
|
struct v3dv_cmd_buffer_attachment_state *attachments;
|
|
|
|
bool tile_aligned_render_area;
|
|
VkRect2D render_area;
|
|
|
|
struct v3dv_dynamic_state dynamic;
|
|
|
|
struct v3dv_cmd_pipeline_state gfx;
|
|
bool has_descriptor_state;
|
|
|
|
uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
|
|
} meta;
|
|
|
|
/* Command buffer state for queries */
|
|
struct {
|
|
/* A list of vkCmdQueryEnd commands recorded in the command buffer during
|
|
* a render pass. We queue these here and then schedule the corresponding
|
|
* CPU jobs for them at the time we finish the GPU job in which they have
|
|
* been recorded.
|
|
*/
|
|
struct {
|
|
uint32_t used_count;
|
|
uint32_t alloc_count;
|
|
struct v3dv_end_query_cpu_job_info *states;
|
|
} end;
|
|
|
|
/* This BO is not NULL if we have an active query, that is, we have
|
|
* called vkCmdBeginQuery but not vkCmdEndQuery.
|
|
*/
|
|
struct {
|
|
struct v3dv_bo *bo;
|
|
uint32_t offset;
|
|
} active_query;
|
|
} query;
|
|
};
|
|
|
|
/* The following struct represents the info from a descriptor that we store on
|
|
* the host memory. They are mostly links to other existing vulkan objects,
|
|
* like the image_view in order to access to swizzle info, or the buffer used
|
|
* for a UBO/SSBO, for example.
|
|
*
|
|
* FIXME: revisit if makes sense to just move everything that would be needed
|
|
* from a descriptor to the bo.
|
|
*/
|
|
struct v3dv_descriptor {
|
|
VkDescriptorType type;
|
|
|
|
union {
|
|
struct {
|
|
struct v3dv_image_view *image_view;
|
|
struct v3dv_sampler *sampler;
|
|
};
|
|
|
|
struct {
|
|
struct v3dv_buffer *buffer;
|
|
uint32_t offset;
|
|
uint32_t range;
|
|
};
|
|
|
|
struct v3dv_buffer_view *buffer_view;
|
|
};
|
|
};
|
|
|
|
struct v3dv_query {
|
|
bool maybe_available;
|
|
union {
|
|
/* Used by GPU queries (occlusion) */
|
|
struct {
|
|
struct v3dv_bo *bo;
|
|
uint32_t offset;
|
|
};
|
|
/* Used by CPU queries (timestamp) */
|
|
uint64_t value;
|
|
};
|
|
};
|
|
|
|
struct v3dv_query_pool {
|
|
struct vk_object_base base;
|
|
|
|
struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
|
|
|
|
VkQueryType query_type;
|
|
uint32_t query_count;
|
|
struct v3dv_query *queries;
|
|
};
|
|
|
|
VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first,
|
|
uint32_t count,
|
|
void *data,
|
|
VkDeviceSize stride,
|
|
VkQueryResultFlags flags);
|
|
|
|
void v3dv_reset_query_pools(struct v3dv_device *device,
|
|
struct v3dv_query_pool *query_pool,
|
|
uint32_t first,
|
|
uint32_t last);
|
|
|
|
typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
|
|
uint64_t pobj,
|
|
VkAllocationCallbacks *alloc);
|
|
struct v3dv_cmd_buffer_private_obj {
|
|
struct list_head list_link;
|
|
uint64_t obj;
|
|
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
|
|
};
|
|
|
|
struct v3dv_cmd_buffer {
|
|
struct vk_command_buffer vk;
|
|
|
|
struct v3dv_device *device;
|
|
|
|
struct v3dv_cmd_pool *pool;
|
|
struct list_head pool_link;
|
|
|
|
/* Used at submit time to link command buffers in the submission that have
|
|
* spawned wait threads, so we can then wait on all of them to complete
|
|
* before we process any signal sempahores or fences.
|
|
*/
|
|
struct list_head list_link;
|
|
|
|
VkCommandBufferUsageFlags usage_flags;
|
|
VkCommandBufferLevel level;
|
|
|
|
enum v3dv_cmd_buffer_status status;
|
|
|
|
struct v3dv_cmd_buffer_state state;
|
|
|
|
/* FIXME: we have just one client-side and bo for the push constants,
|
|
* independently of the stageFlags in vkCmdPushConstants, and the
|
|
* pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
|
|
* tunning in the future if it makes sense.
|
|
*/
|
|
uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
|
|
struct v3dv_cl_reloc push_constants_resource;
|
|
|
|
/* Collection of Vulkan objects created internally by the driver (typically
|
|
* during recording of meta operations) that are part of the command buffer
|
|
* and should be destroyed with it.
|
|
*/
|
|
struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
|
|
|
|
/* Per-command buffer resources for meta operations. */
|
|
struct {
|
|
struct {
|
|
/* The current descriptor pool for blit sources */
|
|
VkDescriptorPool dspool;
|
|
} blit;
|
|
struct {
|
|
/* The current descriptor pool for texel buffer copy sources */
|
|
VkDescriptorPool dspool;
|
|
} texel_buffer_copy;
|
|
} meta;
|
|
|
|
/* List of jobs in the command buffer. For primary command buffers it
|
|
* represents the jobs we want to submit to the GPU. For secondary command
|
|
* buffers it represents jobs that will be merged into a primary command
|
|
* buffer via vkCmdExecuteCommands.
|
|
*/
|
|
struct list_head jobs;
|
|
};
|
|
|
|
struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
|
|
int32_t subpass_idx,
|
|
enum v3dv_job_type type);
|
|
void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
|
|
|
|
struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
|
|
uint32_t subpass_idx);
|
|
struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
|
|
uint32_t subpass_idx);
|
|
|
|
void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
|
|
|
|
void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
|
|
bool push_descriptor_state);
|
|
void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
|
|
uint32_t dirty_dynamic_state,
|
|
bool needs_subpass_resume);
|
|
|
|
void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first,
|
|
uint32_t count);
|
|
|
|
void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query,
|
|
VkQueryControlFlags flags);
|
|
|
|
void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t query);
|
|
|
|
void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_query_pool *pool,
|
|
uint32_t first,
|
|
uint32_t count,
|
|
struct v3dv_buffer *dst,
|
|
uint32_t offset,
|
|
uint32_t stride,
|
|
VkQueryResultFlags flags);
|
|
|
|
void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct drm_v3d_submit_tfu *tfu);
|
|
|
|
void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
|
|
const uint32_t *wg_counts);
|
|
|
|
void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
|
|
uint64_t obj,
|
|
v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
|
|
|
|
struct v3dv_semaphore {
|
|
struct vk_object_base base;
|
|
|
|
/* A syncobject handle associated with this semaphore */
|
|
uint32_t sync;
|
|
|
|
/* A temporary syncobject handle produced from a vkImportSemaphoreFd. */
|
|
uint32_t temp_sync;
|
|
};
|
|
|
|
struct v3dv_fence {
|
|
struct vk_object_base base;
|
|
|
|
/* A syncobject handle associated with this fence */
|
|
uint32_t sync;
|
|
|
|
/* A temporary syncobject handle produced from a vkImportFenceFd. */
|
|
uint32_t temp_sync;
|
|
};
|
|
|
|
struct v3dv_event {
|
|
struct vk_object_base base;
|
|
int state;
|
|
};
|
|
|
|
struct v3dv_shader_variant {
|
|
enum broadcom_shader_stage stage;
|
|
|
|
union {
|
|
struct v3d_prog_data *base;
|
|
struct v3d_vs_prog_data *vs;
|
|
struct v3d_gs_prog_data *gs;
|
|
struct v3d_fs_prog_data *fs;
|
|
struct v3d_compute_prog_data *cs;
|
|
} prog_data;
|
|
|
|
/* We explicitly save the prog_data_size as it would make easier to
|
|
* serialize
|
|
*/
|
|
uint32_t prog_data_size;
|
|
|
|
/* The assembly for this variant will be uploaded to a BO shared with all
|
|
* other shader stages in that pipeline. This is the offset in that BO.
|
|
*/
|
|
uint32_t assembly_offset;
|
|
|
|
/* Note: it is really likely that qpu_insts would be NULL, as it will be
|
|
* used only temporarily, to upload it to the shared bo, as we compile the
|
|
* different stages individually.
|
|
*/
|
|
uint64_t *qpu_insts;
|
|
uint32_t qpu_insts_size;
|
|
};
|
|
|
|
/*
|
|
* Per-stage info for each stage, useful so shader_module_compile_to_nir and
|
|
* other methods doesn't have so many parameters.
|
|
*
|
|
* FIXME: for the case of the coordinate shader and the vertex shader, module,
|
|
* entrypoint, spec_info and nir are the same. There are also info only
|
|
* relevant to some stages. But seemed too much a hassle to create a new
|
|
* struct only to handle that. Revisit if such kind of info starts to grow.
|
|
*/
|
|
struct v3dv_pipeline_stage {
|
|
struct v3dv_pipeline *pipeline;
|
|
|
|
enum broadcom_shader_stage stage;
|
|
|
|
const struct vk_shader_module *module;
|
|
const char *entrypoint;
|
|
const VkSpecializationInfo *spec_info;
|
|
|
|
nir_shader *nir;
|
|
|
|
/* The following is the combined hash of module+entrypoint+spec_info+nir */
|
|
unsigned char shader_sha1[20];
|
|
|
|
/** A name for this program, so you can track it in shader-db output. */
|
|
uint32_t program_id;
|
|
|
|
VkPipelineCreationFeedbackEXT feedback;
|
|
};
|
|
|
|
/* We are using the descriptor pool entry for two things:
|
|
* * Track the allocated sets, so we can properly free it if needed
|
|
* * Track the suballocated pool bo regions, so if some descriptor set is
|
|
* freed, the gap could be reallocated later.
|
|
*
|
|
* Those only make sense if the pool was not created with the flag
|
|
* VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
|
|
*/
|
|
struct v3dv_descriptor_pool_entry
|
|
{
|
|
struct v3dv_descriptor_set *set;
|
|
/* Offset and size of the subregion allocated for this entry from the
|
|
* pool->bo
|
|
*/
|
|
uint32_t offset;
|
|
uint32_t size;
|
|
};
|
|
|
|
struct v3dv_descriptor_pool {
|
|
struct vk_object_base base;
|
|
|
|
/* If this descriptor pool has been allocated for the driver for internal
|
|
* use, typically to implement meta operations.
|
|
*/
|
|
bool is_driver_internal;
|
|
|
|
struct v3dv_bo *bo;
|
|
/* Current offset at the descriptor bo. 0 means that we didn't use it for
|
|
* any descriptor. If the descriptor bo is NULL, current offset is
|
|
* meaningless
|
|
*/
|
|
uint32_t current_offset;
|
|
|
|
/* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
|
|
* descriptor sets are handled as a whole as pool memory and handled by the
|
|
* following pointers. If set, they are not used, and individually
|
|
* descriptor sets are allocated/freed.
|
|
*/
|
|
uint8_t *host_memory_base;
|
|
uint8_t *host_memory_ptr;
|
|
uint8_t *host_memory_end;
|
|
|
|
uint32_t entry_count;
|
|
uint32_t max_entry_count;
|
|
struct v3dv_descriptor_pool_entry entries[0];
|
|
};
|
|
|
|
struct v3dv_descriptor_set {
|
|
struct vk_object_base base;
|
|
|
|
struct v3dv_descriptor_pool *pool;
|
|
|
|
const struct v3dv_descriptor_set_layout *layout;
|
|
|
|
/* Offset relative to the descriptor pool bo for this set */
|
|
uint32_t base_offset;
|
|
|
|
/* The descriptors below can be indexed (set/binding) using the set_layout
|
|
*/
|
|
struct v3dv_descriptor descriptors[0];
|
|
};
|
|
|
|
struct v3dv_descriptor_set_binding_layout {
|
|
VkDescriptorType type;
|
|
|
|
/* Number of array elements in this binding */
|
|
uint32_t array_size;
|
|
|
|
/* Index into the flattend descriptor set */
|
|
uint32_t descriptor_index;
|
|
|
|
uint32_t dynamic_offset_count;
|
|
uint32_t dynamic_offset_index;
|
|
|
|
/* Offset into the descriptor set where this descriptor lives (final offset
|
|
* on the descriptor bo need to take into account set->base_offset)
|
|
*/
|
|
uint32_t descriptor_offset;
|
|
|
|
/* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
|
|
* if there are no immutable samplers.
|
|
*/
|
|
uint32_t immutable_samplers_offset;
|
|
};
|
|
|
|
struct v3dv_descriptor_set_layout {
|
|
struct vk_object_base base;
|
|
|
|
VkDescriptorSetLayoutCreateFlags flags;
|
|
|
|
/* Number of bindings in this descriptor set */
|
|
uint32_t binding_count;
|
|
|
|
/* Total bo size needed for this descriptor set
|
|
*/
|
|
uint32_t bo_size;
|
|
|
|
/* Shader stages affected by this descriptor set */
|
|
uint16_t shader_stages;
|
|
|
|
/* Number of descriptors in this descriptor set */
|
|
uint32_t descriptor_count;
|
|
|
|
/* Number of dynamic offsets used by this descriptor set */
|
|
uint16_t dynamic_offset_count;
|
|
|
|
/* Bindings in this descriptor set */
|
|
struct v3dv_descriptor_set_binding_layout binding[0];
|
|
};
|
|
|
|
struct v3dv_pipeline_layout {
|
|
struct vk_object_base base;
|
|
|
|
struct {
|
|
struct v3dv_descriptor_set_layout *layout;
|
|
uint32_t dynamic_offset_start;
|
|
} set[MAX_SETS];
|
|
|
|
uint32_t num_sets;
|
|
|
|
/* Shader stages that are declared to use descriptors from this layout */
|
|
uint32_t shader_stages;
|
|
|
|
uint32_t dynamic_offset_count;
|
|
uint32_t push_constant_size;
|
|
};
|
|
|
|
/*
|
|
* We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
|
|
* it to be big enough to include the max value for all of them.
|
|
*
|
|
* FIXME: one alternative would be to allocate the map as big as you need for
|
|
* each descriptor type. That would means more individual allocations.
|
|
*/
|
|
#define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
|
|
MAX_UNIFORM_BUFFERS, \
|
|
MAX_STORAGE_BUFFERS)
|
|
|
|
|
|
struct v3dv_descriptor_map {
|
|
/* TODO: avoid fixed size array/justify the size */
|
|
unsigned num_desc; /* Number of descriptors */
|
|
int set[DESCRIPTOR_MAP_SIZE];
|
|
int binding[DESCRIPTOR_MAP_SIZE];
|
|
int array_index[DESCRIPTOR_MAP_SIZE];
|
|
int array_size[DESCRIPTOR_MAP_SIZE];
|
|
|
|
/* NOTE: the following is only for sampler, but this is the easier place to
|
|
* put it.
|
|
*/
|
|
uint8_t return_size[DESCRIPTOR_MAP_SIZE];
|
|
};
|
|
|
|
struct v3dv_sampler {
|
|
struct vk_object_base base;
|
|
|
|
bool compare_enable;
|
|
bool unnormalized_coordinates;
|
|
bool clamp_to_transparent_black_border;
|
|
|
|
/* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
|
|
* configuration. If needed it will be copied to the descriptor info during
|
|
* UpdateDescriptorSets
|
|
*/
|
|
uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
|
|
};
|
|
|
|
struct v3dv_descriptor_template_entry {
|
|
/* The type of descriptor in this entry */
|
|
VkDescriptorType type;
|
|
|
|
/* Binding in the descriptor set */
|
|
uint32_t binding;
|
|
|
|
/* Offset at which to write into the descriptor set binding */
|
|
uint32_t array_element;
|
|
|
|
/* Number of elements to write into the descriptor set binding */
|
|
uint32_t array_count;
|
|
|
|
/* Offset into the user provided data */
|
|
size_t offset;
|
|
|
|
/* Stride between elements into the user provided data */
|
|
size_t stride;
|
|
};
|
|
|
|
struct v3dv_descriptor_update_template {
|
|
struct vk_object_base base;
|
|
|
|
VkPipelineBindPoint bind_point;
|
|
|
|
/* The descriptor set this template corresponds to. This value is only
|
|
* valid if the template was created with the templateType
|
|
* VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
|
|
*/
|
|
uint8_t set;
|
|
|
|
/* Number of entries in this template */
|
|
uint32_t entry_count;
|
|
|
|
/* Entries of the template */
|
|
struct v3dv_descriptor_template_entry entries[0];
|
|
};
|
|
|
|
|
|
/* We keep two special values for the sampler idx that represents exactly when a
|
|
* sampler is not needed/provided. The main use is that even if we don't have
|
|
* sampler, we still need to do the output unpacking (through
|
|
* nir_lower_tex). The easier way to do this is to add those special "no
|
|
* sampler" in the sampler_map, and then use the proper unpacking for that
|
|
* case.
|
|
*
|
|
* We have one when we want a 16bit output size, and other when we want a
|
|
* 32bit output size. We use the info coming from the RelaxedPrecision
|
|
* decoration to decide between one and the other.
|
|
*/
|
|
#define V3DV_NO_SAMPLER_16BIT_IDX 0
|
|
#define V3DV_NO_SAMPLER_32BIT_IDX 1
|
|
|
|
/*
|
|
* Following two methods are using on the combined to/from texture/sampler
|
|
* indices maps at v3dv_pipeline.
|
|
*/
|
|
static inline uint32_t
|
|
v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
|
|
uint32_t sampler_index)
|
|
{
|
|
return texture_index << 24 | sampler_index;
|
|
}
|
|
|
|
static inline void
|
|
v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
|
|
uint32_t *texture_index,
|
|
uint32_t *sampler_index)
|
|
{
|
|
uint32_t texture = combined_index_key >> 24;
|
|
uint32_t sampler = combined_index_key & 0xffffff;
|
|
|
|
if (texture_index)
|
|
*texture_index = texture;
|
|
|
|
if (sampler_index)
|
|
*sampler_index = sampler;
|
|
}
|
|
|
|
struct v3dv_descriptor_maps {
|
|
struct v3dv_descriptor_map ubo_map;
|
|
struct v3dv_descriptor_map ssbo_map;
|
|
struct v3dv_descriptor_map sampler_map;
|
|
struct v3dv_descriptor_map texture_map;
|
|
};
|
|
|
|
/* The structure represents data shared between different objects, like the
|
|
* pipeline and the pipeline cache, so we ref count it to know when it should
|
|
* be freed.
|
|
*/
|
|
struct v3dv_pipeline_shared_data {
|
|
uint32_t ref_cnt;
|
|
|
|
unsigned char sha1_key[20];
|
|
|
|
struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
|
|
struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
|
|
|
|
struct v3dv_bo *assembly_bo;
|
|
};
|
|
|
|
struct v3dv_pipeline {
|
|
struct vk_object_base base;
|
|
|
|
struct v3dv_device *device;
|
|
|
|
VkShaderStageFlags active_stages;
|
|
|
|
struct v3dv_render_pass *pass;
|
|
struct v3dv_subpass *subpass;
|
|
|
|
/* Note: We can't use just a MESA_SHADER_STAGES array because we also need
|
|
* to track binning shaders. Note these will be freed once the pipeline
|
|
* has been compiled.
|
|
*/
|
|
struct v3dv_pipeline_stage *vs;
|
|
struct v3dv_pipeline_stage *vs_bin;
|
|
struct v3dv_pipeline_stage *gs;
|
|
struct v3dv_pipeline_stage *gs_bin;
|
|
struct v3dv_pipeline_stage *fs;
|
|
struct v3dv_pipeline_stage *cs;
|
|
|
|
/* Flags for whether optional pipeline stages are present, for convenience */
|
|
bool has_gs;
|
|
|
|
/* Spilling memory requirements */
|
|
struct {
|
|
struct v3dv_bo *bo;
|
|
uint32_t size_per_thread;
|
|
} spill;
|
|
|
|
struct v3dv_dynamic_state dynamic_state;
|
|
|
|
struct v3dv_pipeline_layout *layout;
|
|
|
|
/* Whether this pipeline enables depth writes */
|
|
bool z_updates_enable;
|
|
|
|
enum v3dv_ez_state ez_state;
|
|
|
|
bool msaa;
|
|
bool sample_rate_shading;
|
|
uint32_t sample_mask;
|
|
|
|
bool primitive_restart;
|
|
|
|
/* Accessed by binding. So vb[binding]->stride is the stride of the vertex
|
|
* array with such binding
|
|
*/
|
|
struct v3dv_pipeline_vertex_binding {
|
|
uint32_t stride;
|
|
uint32_t instance_divisor;
|
|
} vb[MAX_VBS];
|
|
uint32_t vb_count;
|
|
|
|
/* Note that a lot of info from VkVertexInputAttributeDescription is
|
|
* already prepacked, so here we are only storing those that need recheck
|
|
* later. The array must be indexed by driver location, since that is the
|
|
* order in which we need to emit the attributes.
|
|
*/
|
|
struct v3dv_pipeline_vertex_attrib {
|
|
uint32_t binding;
|
|
uint32_t offset;
|
|
VkFormat vk_format;
|
|
} va[MAX_VERTEX_ATTRIBS];
|
|
uint32_t va_count;
|
|
|
|
enum pipe_prim_type topology;
|
|
|
|
struct v3dv_pipeline_shared_data *shared_data;
|
|
|
|
/* It is the combined stages sha1, plus the pipeline key sha1. */
|
|
unsigned char sha1[20];
|
|
|
|
/* In general we can reuse v3dv_device->default_attribute_float, so note
|
|
* that the following can be NULL.
|
|
*
|
|
* FIXME: the content of this BO will be small, so it could be improved to
|
|
* be uploaded to a common BO. But as in most cases it will be NULL, it is
|
|
* not a priority.
|
|
*/
|
|
struct v3dv_bo *default_attribute_values;
|
|
|
|
struct vpm_config vpm_cfg;
|
|
struct vpm_config vpm_cfg_bin;
|
|
|
|
/* If the pipeline should emit any of the stencil configuration packets */
|
|
bool emit_stencil_cfg[2];
|
|
|
|
/* Blend state */
|
|
struct {
|
|
/* Per-RT bit mask with blend enables */
|
|
uint8_t enables;
|
|
/* Per-RT prepacked blend config packets */
|
|
uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
|
|
/* Flag indicating whether the blend factors in use require
|
|
* color constants.
|
|
*/
|
|
bool needs_color_constants;
|
|
/* Mask with enabled color channels for each RT (4 bits per RT) */
|
|
uint32_t color_write_masks;
|
|
} blend;
|
|
|
|
/* Depth bias */
|
|
struct {
|
|
bool enabled;
|
|
bool is_z16;
|
|
} depth_bias;
|
|
|
|
/* Packets prepacked during pipeline creation
|
|
*/
|
|
uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
|
|
uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
|
|
uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
|
|
uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
|
|
MAX_VERTEX_ATTRIBS];
|
|
uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
|
|
};
|
|
|
|
static inline VkPipelineBindPoint
|
|
v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
|
|
{
|
|
assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
|
|
!(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
|
|
return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
|
|
VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
|
|
}
|
|
|
|
static inline struct v3dv_descriptor_state*
|
|
v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline)
|
|
{
|
|
if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
|
|
return &cmd_buffer->state.compute.descriptor_state;
|
|
else
|
|
return &cmd_buffer->state.gfx.descriptor_state;
|
|
}
|
|
|
|
const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
|
|
|
|
uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
|
|
uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
|
|
|
|
#ifdef DEBUG
|
|
#define v3dv_debug_ignored_stype(sType) \
|
|
fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
|
|
#else
|
|
#define v3dv_debug_ignored_stype(sType)
|
|
#endif
|
|
|
|
const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
|
|
uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
|
|
const struct v3dv_format *
|
|
v3dv_get_compatible_tfu_format(struct v3dv_device *device,
|
|
uint32_t bpp, VkFormat *out_vk_format);
|
|
bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
|
|
VkFormat vk_format,
|
|
VkFormatFeatureFlags features);
|
|
|
|
struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
struct v3dv_shader_variant *variant);
|
|
|
|
struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
struct v3dv_shader_variant *variant,
|
|
uint32_t **wg_count_offsets);
|
|
|
|
struct v3dv_shader_variant *
|
|
v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
|
|
struct v3dv_pipeline_cache *cache,
|
|
struct v3d_key *key,
|
|
size_t key_size,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkResult *out_vk_result);
|
|
|
|
struct v3dv_shader_variant *
|
|
v3dv_shader_variant_create(struct v3dv_device *device,
|
|
enum broadcom_shader_stage stage,
|
|
struct v3d_prog_data *prog_data,
|
|
uint32_t prog_data_size,
|
|
uint32_t assembly_offset,
|
|
uint64_t *qpu_insts,
|
|
uint32_t qpu_insts_size,
|
|
VkResult *out_vk_result);
|
|
|
|
void
|
|
v3dv_shader_variant_destroy(struct v3dv_device *device,
|
|
struct v3dv_shader_variant *variant);
|
|
|
|
static inline void
|
|
v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
|
|
{
|
|
assert(shared_data && shared_data->ref_cnt >= 1);
|
|
p_atomic_inc(&shared_data->ref_cnt);
|
|
}
|
|
|
|
void
|
|
v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
|
|
struct v3dv_pipeline_shared_data *shared_data);
|
|
|
|
static inline void
|
|
v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
|
|
struct v3dv_pipeline_shared_data *shared_data)
|
|
{
|
|
assert(shared_data && shared_data->ref_cnt >= 1);
|
|
if (p_atomic_dec_zero(&shared_data->ref_cnt))
|
|
v3dv_pipeline_shared_data_destroy(device, shared_data);
|
|
}
|
|
|
|
struct v3dv_descriptor *
|
|
v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
|
|
struct v3dv_descriptor_map *map,
|
|
struct v3dv_pipeline_layout *pipeline_layout,
|
|
uint32_t index,
|
|
uint32_t *dynamic_offset);
|
|
|
|
const struct v3dv_sampler *
|
|
v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
|
|
struct v3dv_descriptor_map *map,
|
|
struct v3dv_pipeline_layout *pipeline_layout,
|
|
uint32_t index);
|
|
|
|
struct v3dv_cl_reloc
|
|
v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
|
|
struct v3dv_descriptor_state *descriptor_state,
|
|
struct v3dv_descriptor_map *map,
|
|
struct v3dv_pipeline_layout *pipeline_layout,
|
|
uint32_t index);
|
|
|
|
struct v3dv_cl_reloc
|
|
v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
|
|
struct v3dv_descriptor_state *descriptor_state,
|
|
struct v3dv_descriptor_map *map,
|
|
struct v3dv_pipeline_layout *pipeline_layout,
|
|
uint32_t index);
|
|
|
|
const struct v3dv_format*
|
|
v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_state,
|
|
struct v3dv_descriptor_map *map,
|
|
struct v3dv_pipeline_layout *pipeline_layout,
|
|
uint32_t index,
|
|
VkFormat *out_vk_format);
|
|
|
|
struct v3dv_bo*
|
|
v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
|
|
struct v3dv_descriptor_map *map,
|
|
struct v3dv_pipeline_layout *pipeline_layout,
|
|
uint32_t index);
|
|
|
|
static inline const struct v3dv_sampler *
|
|
v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
|
|
const struct v3dv_descriptor_set_binding_layout *binding)
|
|
{
|
|
assert(binding->immutable_samplers_offset);
|
|
return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
|
|
}
|
|
|
|
void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
|
|
struct v3dv_device *device,
|
|
VkPipelineCacheCreateFlags,
|
|
bool cache_enabled);
|
|
|
|
void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
|
|
|
|
void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
nir_shader *nir,
|
|
unsigned char sha1_key[20]);
|
|
|
|
nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache,
|
|
const nir_shader_compiler_options *nir_options,
|
|
unsigned char sha1_key[20]);
|
|
|
|
struct v3dv_pipeline_shared_data *
|
|
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
|
|
unsigned char sha1_key[20],
|
|
bool *cache_hit);
|
|
|
|
void
|
|
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
|
|
struct v3dv_pipeline_cache *cache);
|
|
|
|
struct v3dv_bo *
|
|
v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
|
|
struct v3dv_pipeline *pipeline);
|
|
|
|
void v3dv_shader_module_internal_init(struct v3dv_device *device,
|
|
struct vk_shader_module *module,
|
|
nir_shader *nir);
|
|
|
|
#define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
|
|
VK_FROM_HANDLE(__v3dv_type, __name, __handle)
|
|
|
|
VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
|
|
VK_OBJECT_TYPE_COMMAND_BUFFER)
|
|
VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
|
|
VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
|
|
VK_OBJECT_TYPE_INSTANCE)
|
|
VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
|
|
VK_OBJECT_TYPE_PHYSICAL_DEVICE)
|
|
VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
|
|
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, base, VkCommandPool,
|
|
VK_OBJECT_TYPE_COMMAND_POOL)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
|
|
VK_OBJECT_TYPE_BUFFER)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
|
|
VK_OBJECT_TYPE_BUFFER_VIEW)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
|
|
VK_OBJECT_TYPE_DEVICE_MEMORY)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
|
|
VK_OBJECT_TYPE_DESCRIPTOR_POOL)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
|
|
VK_OBJECT_TYPE_DESCRIPTOR_SET)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
|
|
VkDescriptorSetLayout,
|
|
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
|
|
VkDescriptorUpdateTemplate,
|
|
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
|
|
VK_OBJECT_TYPE_FRAMEBUFFER)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
|
|
VK_OBJECT_TYPE_IMAGE)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
|
|
VK_OBJECT_TYPE_IMAGE_VIEW)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
|
|
VK_OBJECT_TYPE_PIPELINE)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
|
|
VK_OBJECT_TYPE_PIPELINE_CACHE)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
|
|
VK_OBJECT_TYPE_PIPELINE_LAYOUT)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
|
|
VK_OBJECT_TYPE_QUERY_POOL)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
|
|
VK_OBJECT_TYPE_RENDER_PASS)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
|
|
VK_OBJECT_TYPE_SAMPLER)
|
|
VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, base, VkSemaphore,
|
|
VK_OBJECT_TYPE_SEMAPHORE)
|
|
|
|
static inline int
|
|
v3dv_ioctl(int fd, unsigned long request, void *arg)
|
|
{
|
|
if (using_v3d_simulator)
|
|
return v3d_simulator_ioctl(fd, request, arg);
|
|
else
|
|
return drmIoctl(fd, request, arg);
|
|
}
|
|
|
|
/* Flags OOM conditions in command buffer state.
|
|
*
|
|
* Note: notice that no-op jobs don't have a command buffer reference.
|
|
*/
|
|
static inline void
|
|
v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
|
|
{
|
|
if (cmd_buffer) {
|
|
cmd_buffer->state.oom = true;
|
|
} else {
|
|
assert(job);
|
|
if (job->cmd_buffer)
|
|
job->cmd_buffer->state.oom = true;
|
|
}
|
|
}
|
|
|
|
#define v3dv_return_if_oom(_cmd_buffer, _job) do { \
|
|
const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
|
|
if (__cmd_buffer && __cmd_buffer->state.oom) \
|
|
return; \
|
|
const struct v3dv_job *__job = _job; \
|
|
if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
|
|
return; \
|
|
} while(0) \
|
|
|
|
static inline uint32_t
|
|
u64_hash(const void *key)
|
|
{
|
|
return _mesa_hash_data(key, sizeof(uint64_t));
|
|
}
|
|
|
|
static inline bool
|
|
u64_compare(const void *key1, const void *key2)
|
|
{
|
|
return memcmp(key1, key2, sizeof(uint64_t)) == 0;
|
|
}
|
|
|
|
/* Helper to call hw ver speficic functions */
|
|
#define v3dv_X(device, thing) ({ \
|
|
__typeof(&v3d42_##thing) v3d_X_thing; \
|
|
switch (device->devinfo.ver) { \
|
|
case 42: \
|
|
v3d_X_thing = &v3d42_##thing; \
|
|
break; \
|
|
default: \
|
|
unreachable("Unsupported hardware generation"); \
|
|
} \
|
|
v3d_X_thing; \
|
|
})
|
|
|
|
|
|
/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
|
|
* define v3dX for each version supported, because when we compile code that
|
|
* is not version-specific, all version-specific macros need to be already
|
|
* defined.
|
|
*/
|
|
#ifdef v3dX
|
|
# include "v3dvx_private.h"
|
|
#else
|
|
# define v3dX(x) v3d42_##x
|
|
# include "v3dvx_private.h"
|
|
# undef v3dX
|
|
#endif
|
|
|
|
#ifdef ANDROID
|
|
VkResult
|
|
v3dv_gralloc_info(struct v3dv_device *device,
|
|
const VkNativeBufferANDROID *gralloc_info,
|
|
int *out_dmabuf,
|
|
int *out_stride,
|
|
int *out_size,
|
|
uint64_t *out_modifier);
|
|
|
|
VkResult
|
|
v3dv_import_native_buffer_fd(VkDevice device_h,
|
|
int dma_buf,
|
|
const VkAllocationCallbacks *alloc,
|
|
VkImage image_h);
|
|
#endif /* ANDROID */
|
|
|
|
#endif /* V3DV_PRIVATE_H */
|