701 lines
26 KiB
C
701 lines
26 KiB
C
/*
|
|
* Copyright © 2019 Raspberry Pi Ltd
|
|
*
|
|
* Based in part on v3d driver which is:
|
|
*
|
|
* Copyright © 2014-2017 Broadcom
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "v3dv_private.h"
|
|
|
|
/* The only version specific structure that we need is
|
|
* TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from
|
|
* previous V3D versions and we don't expect that to change, so for now let's
|
|
* just hardcode the V3D version here.
|
|
*/
|
|
#define V3D_VERSION 41
|
|
#include "broadcom/common/v3d_macros.h"
|
|
#include "broadcom/cle/v3dx_pack.h"
|
|
|
|
/* Our Vulkan resource indices represent indices in descriptor maps which
|
|
* include all shader stages, so we need to size the arrays below
|
|
* accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
|
|
*/
|
|
#define MAX_STAGES 3
|
|
|
|
#define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
|
|
struct texture_bo_list {
|
|
struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
|
|
};
|
|
|
|
/* This tracks state BOs for both textures and samplers, so we
|
|
* multiply by 2.
|
|
*/
|
|
#define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
|
|
struct state_bo_list {
|
|
uint32_t count;
|
|
struct v3dv_bo *states[MAX_TOTAL_STATES];
|
|
};
|
|
|
|
#define MAX_TOTAL_UNIFORM_BUFFERS ((MAX_UNIFORM_BUFFERS + \
|
|
MAX_INLINE_UNIFORM_BUFFERS) * MAX_STAGES)
|
|
#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
|
|
struct buffer_bo_list {
|
|
struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
|
|
struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
|
|
};
|
|
|
|
static bool
|
|
state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
|
|
{
|
|
for (int i = 0; i < list->count; i++) {
|
|
if (list->states[i] == bo)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void
|
|
push_constants_bo_free(VkDevice _device,
|
|
uint64_t bo_ptr,
|
|
VkAllocationCallbacks *alloc)
|
|
{
|
|
V3DV_FROM_HANDLE(v3dv_device, device, _device);
|
|
v3dv_bo_free(device, (struct v3dv_bo *)(uintptr_t) bo_ptr);
|
|
}
|
|
|
|
/*
|
|
* This method checks if the ubo used for push constants is needed to be
|
|
* updated or not.
|
|
*
|
|
* push contants ubo is only used for push constants accessed by a non-const
|
|
* index.
|
|
*/
|
|
static void
|
|
check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline)
|
|
{
|
|
if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO) ||
|
|
pipeline->layout->push_constant_size == 0)
|
|
return;
|
|
|
|
if (cmd_buffer->push_constants_resource.bo == NULL) {
|
|
cmd_buffer->push_constants_resource.bo =
|
|
v3dv_bo_alloc(cmd_buffer->device, 4096, "push constants", true);
|
|
|
|
v3dv_job_add_bo(cmd_buffer->state.job,
|
|
cmd_buffer->push_constants_resource.bo);
|
|
|
|
if (!cmd_buffer->push_constants_resource.bo) {
|
|
fprintf(stderr, "Failed to allocate memory for push constants\n");
|
|
abort();
|
|
}
|
|
|
|
bool ok = v3dv_bo_map(cmd_buffer->device,
|
|
cmd_buffer->push_constants_resource.bo,
|
|
cmd_buffer->push_constants_resource.bo->size);
|
|
if (!ok) {
|
|
fprintf(stderr, "failed to map push constants buffer\n");
|
|
abort();
|
|
}
|
|
} else {
|
|
if (cmd_buffer->push_constants_resource.offset +
|
|
cmd_buffer->state.push_constants_size <=
|
|
cmd_buffer->push_constants_resource.bo->size) {
|
|
cmd_buffer->push_constants_resource.offset +=
|
|
cmd_buffer->state.push_constants_size;
|
|
} else {
|
|
/* We ran out of space so we'll have to allocate a new buffer but we
|
|
* need to ensure the old one is preserved until the end of the command
|
|
* buffer life and make sure it is eventually freed. We use the
|
|
* private object machinery in the command buffer for this.
|
|
*/
|
|
v3dv_cmd_buffer_add_private_obj(
|
|
cmd_buffer, (uintptr_t) cmd_buffer->push_constants_resource.bo,
|
|
(v3dv_cmd_buffer_private_obj_destroy_cb) push_constants_bo_free);
|
|
|
|
/* Now call back so we create a new BO */
|
|
cmd_buffer->push_constants_resource.bo = NULL;
|
|
check_push_constants_ubo(cmd_buffer, pipeline);
|
|
return;
|
|
}
|
|
}
|
|
|
|
assert(cmd_buffer->state.push_constants_size <= MAX_PUSH_CONSTANTS_SIZE);
|
|
memcpy(cmd_buffer->push_constants_resource.bo->map +
|
|
cmd_buffer->push_constants_resource.offset,
|
|
cmd_buffer->state.push_constants_data,
|
|
cmd_buffer->state.push_constants_size);
|
|
|
|
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO;
|
|
}
|
|
|
|
/** V3D 4.x TMU configuration parameter 0 (texture) */
|
|
static void
|
|
write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
enum broadcom_shader_stage stage,
|
|
struct v3dv_cl_out **uniforms,
|
|
uint32_t data,
|
|
struct texture_bo_list *tex_bos,
|
|
struct state_bo_list *state_bos)
|
|
{
|
|
uint32_t texture_idx = v3d_unit_data_get_unit(data);
|
|
|
|
struct v3dv_descriptor_state *descriptor_state =
|
|
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
|
|
|
|
/* We need to ensure that the texture bo is added to the job */
|
|
struct v3dv_bo *texture_bo =
|
|
v3dv_descriptor_map_get_texture_bo(descriptor_state,
|
|
&pipeline->shared_data->maps[stage]->texture_map,
|
|
pipeline->layout, texture_idx);
|
|
assert(texture_bo);
|
|
assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
|
|
tex_bos->tex[texture_idx] = texture_bo;
|
|
|
|
struct v3dv_cl_reloc state_reloc =
|
|
v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
|
|
&pipeline->shared_data->maps[stage]->texture_map,
|
|
pipeline->layout,
|
|
texture_idx);
|
|
|
|
cl_aligned_u32(uniforms, state_reloc.bo->offset +
|
|
state_reloc.offset +
|
|
v3d_unit_data_get_offset(data));
|
|
|
|
/* Texture and Sampler states are typically suballocated, so they are
|
|
* usually the same BO: only flag them once to avoid trying to add them
|
|
* multiple times to the job later.
|
|
*/
|
|
if (!state_bo_in_list(state_bos, state_reloc.bo)) {
|
|
assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
|
|
state_bos->states[state_bos->count++] = state_reloc.bo;
|
|
}
|
|
}
|
|
|
|
/** V3D 4.x TMU configuration parameter 1 (sampler) */
|
|
static void
|
|
write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
enum broadcom_shader_stage stage,
|
|
struct v3dv_cl_out **uniforms,
|
|
uint32_t data,
|
|
struct state_bo_list *state_bos)
|
|
{
|
|
uint32_t sampler_idx = v3d_unit_data_get_unit(data);
|
|
struct v3dv_descriptor_state *descriptor_state =
|
|
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
|
|
|
|
assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX &&
|
|
sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
|
|
|
|
struct v3dv_cl_reloc sampler_state_reloc =
|
|
v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
|
|
&pipeline->shared_data->maps[stage]->sampler_map,
|
|
pipeline->layout, sampler_idx);
|
|
|
|
const struct v3dv_sampler *sampler =
|
|
v3dv_descriptor_map_get_sampler(descriptor_state,
|
|
&pipeline->shared_data->maps[stage]->sampler_map,
|
|
pipeline->layout, sampler_idx);
|
|
assert(sampler);
|
|
|
|
/* Set unnormalized coordinates flag from sampler object */
|
|
uint32_t p1_packed = v3d_unit_data_get_offset(data);
|
|
if (sampler->unnormalized_coordinates) {
|
|
struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked;
|
|
V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked);
|
|
p1_unpacked.unnormalized_coordinates = true;
|
|
V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed,
|
|
&p1_unpacked);
|
|
}
|
|
|
|
cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
|
|
sampler_state_reloc.offset +
|
|
p1_packed);
|
|
|
|
/* Texture and Sampler states are typically suballocated, so they are
|
|
* usually the same BO: only flag them once to avoid trying to add them
|
|
* multiple times to the job later.
|
|
*/
|
|
if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
|
|
assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
|
|
state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
|
|
}
|
|
}
|
|
|
|
static void
|
|
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
enum broadcom_shader_stage stage,
|
|
struct v3dv_cl_out **uniforms,
|
|
enum quniform_contents content,
|
|
uint32_t data,
|
|
struct buffer_bo_list *buffer_bos)
|
|
{
|
|
struct v3dv_descriptor_state *descriptor_state =
|
|
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
|
|
|
|
struct v3dv_descriptor_map *map =
|
|
content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
|
|
&pipeline->shared_data->maps[stage]->ubo_map :
|
|
&pipeline->shared_data->maps[stage]->ssbo_map;
|
|
|
|
uint32_t offset =
|
|
content == QUNIFORM_UBO_ADDR ?
|
|
v3d_unit_data_get_offset(data) :
|
|
0;
|
|
|
|
uint32_t dynamic_offset = 0;
|
|
|
|
/* For ubos, index is shifted, as 0 is reserved for push constants
|
|
* and 1..MAX_INLINE_UNIFORM_BUFFERS are reserved for inline uniform
|
|
* buffers.
|
|
*/
|
|
uint32_t index = v3d_unit_data_get_unit(data);
|
|
if (content == QUNIFORM_UBO_ADDR && index == 0) {
|
|
/* Ensure the push constants UBO is created and updated. This also
|
|
* adds the BO to the job so we don't need to track it in buffer_bos.
|
|
*/
|
|
check_push_constants_ubo(cmd_buffer, pipeline);
|
|
|
|
struct v3dv_cl_reloc *resource =
|
|
&cmd_buffer->push_constants_resource;
|
|
assert(resource->bo);
|
|
|
|
cl_aligned_u32(uniforms, resource->bo->offset +
|
|
resource->offset +
|
|
offset + dynamic_offset);
|
|
} else {
|
|
if (content == QUNIFORM_UBO_ADDR) {
|
|
/* We reserve index 0 for push constants and artificially increase our
|
|
* indices by one for that reason, fix that now before accessing the
|
|
* descriptor map.
|
|
*/
|
|
assert(index > 0);
|
|
index--;
|
|
} else {
|
|
index = data;
|
|
}
|
|
|
|
struct v3dv_descriptor *descriptor =
|
|
v3dv_descriptor_map_get_descriptor(descriptor_state, map,
|
|
pipeline->layout,
|
|
index, &dynamic_offset);
|
|
|
|
/* Inline UBO descriptors store UBO data in descriptor pool memory,
|
|
* instead of an external buffer.
|
|
*/
|
|
assert(descriptor);
|
|
|
|
if (content == QUNIFORM_GET_SSBO_SIZE ||
|
|
content == QUNIFORM_GET_UBO_SIZE) {
|
|
cl_aligned_u32(uniforms, descriptor->range);
|
|
} else {
|
|
/* Inline uniform buffers store their contents in pool memory instead
|
|
* of an external buffer.
|
|
*/
|
|
struct v3dv_bo *bo;
|
|
uint32_t addr;
|
|
if (descriptor->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
|
assert(dynamic_offset == 0);
|
|
struct v3dv_cl_reloc reloc =
|
|
v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
|
|
descriptor_state, map,
|
|
pipeline->layout, index,
|
|
NULL);
|
|
bo = reloc.bo;
|
|
addr = reloc.bo->offset + reloc.offset + offset;
|
|
} else {
|
|
assert(descriptor->buffer);
|
|
assert(descriptor->buffer->mem);
|
|
assert(descriptor->buffer->mem->bo);
|
|
|
|
bo = descriptor->buffer->mem->bo;
|
|
addr = bo->offset +
|
|
descriptor->buffer->mem_offset +
|
|
descriptor->offset +
|
|
offset + dynamic_offset;
|
|
}
|
|
|
|
cl_aligned_u32(uniforms, addr);
|
|
|
|
if (content == QUNIFORM_UBO_ADDR) {
|
|
assert(index < MAX_TOTAL_UNIFORM_BUFFERS);
|
|
buffer_bos->ubo[index] = bo;
|
|
} else {
|
|
assert(index < MAX_TOTAL_STORAGE_BUFFERS);
|
|
buffer_bos->ssbo[index] = bo;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
write_inline_uniform(struct v3dv_cl_out **uniforms,
|
|
uint32_t index,
|
|
uint32_t offset,
|
|
struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
enum broadcom_shader_stage stage)
|
|
{
|
|
assert(index < MAX_INLINE_UNIFORM_BUFFERS);
|
|
|
|
struct v3dv_descriptor_state *descriptor_state =
|
|
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
|
|
|
|
struct v3dv_descriptor_map *map =
|
|
&pipeline->shared_data->maps[stage]->ubo_map;
|
|
|
|
struct v3dv_cl_reloc reloc =
|
|
v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
|
|
descriptor_state, map,
|
|
pipeline->layout, index,
|
|
NULL);
|
|
|
|
/* Offset comes in 32-bit units */
|
|
uint32_t *addr = reloc.bo->map + reloc.offset + 4 * offset;
|
|
cl_aligned_u32(uniforms, *addr);
|
|
}
|
|
|
|
static uint32_t
|
|
get_texture_size_from_image_view(struct v3dv_image_view *image_view,
|
|
enum quniform_contents contents,
|
|
uint32_t data)
|
|
{
|
|
switch(contents) {
|
|
case QUNIFORM_IMAGE_WIDTH:
|
|
case QUNIFORM_TEXTURE_WIDTH:
|
|
/* We don't u_minify the values, as we are using the image_view
|
|
* extents
|
|
*/
|
|
return image_view->vk.extent.width;
|
|
case QUNIFORM_IMAGE_HEIGHT:
|
|
case QUNIFORM_TEXTURE_HEIGHT:
|
|
return image_view->vk.extent.height;
|
|
case QUNIFORM_IMAGE_DEPTH:
|
|
case QUNIFORM_TEXTURE_DEPTH:
|
|
return image_view->vk.extent.depth;
|
|
case QUNIFORM_IMAGE_ARRAY_SIZE:
|
|
case QUNIFORM_TEXTURE_ARRAY_SIZE:
|
|
if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
|
|
return image_view->vk.layer_count;
|
|
} else {
|
|
assert(image_view->vk.layer_count % 6 == 0);
|
|
return image_view->vk.layer_count / 6;
|
|
}
|
|
case QUNIFORM_TEXTURE_LEVELS:
|
|
return image_view->vk.level_count;
|
|
case QUNIFORM_TEXTURE_SAMPLES:
|
|
assert(image_view->vk.image);
|
|
return image_view->vk.image->samples;
|
|
default:
|
|
unreachable("Bad texture size field");
|
|
}
|
|
}
|
|
|
|
|
|
static uint32_t
|
|
get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
|
|
enum quniform_contents contents,
|
|
uint32_t data)
|
|
{
|
|
switch(contents) {
|
|
case QUNIFORM_IMAGE_WIDTH:
|
|
case QUNIFORM_TEXTURE_WIDTH:
|
|
return buffer_view->num_elements;
|
|
/* Only size can be queried for texel buffers */
|
|
default:
|
|
unreachable("Bad texture size field for texel buffers");
|
|
}
|
|
}
|
|
|
|
static uint32_t
|
|
get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
enum broadcom_shader_stage stage,
|
|
enum quniform_contents contents,
|
|
uint32_t data)
|
|
{
|
|
uint32_t texture_idx = data;
|
|
|
|
struct v3dv_descriptor_state *descriptor_state =
|
|
v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
|
|
|
|
struct v3dv_descriptor *descriptor =
|
|
v3dv_descriptor_map_get_descriptor(descriptor_state,
|
|
&pipeline->shared_data->maps[stage]->texture_map,
|
|
pipeline->layout,
|
|
texture_idx, NULL);
|
|
|
|
assert(descriptor);
|
|
|
|
switch (descriptor->type) {
|
|
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
|
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
|
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
|
return get_texture_size_from_image_view(descriptor->image_view,
|
|
contents, data);
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
|
return get_texture_size_from_buffer_view(descriptor->buffer_view,
|
|
contents, data);
|
|
default:
|
|
unreachable("Wrong descriptor for getting texture size");
|
|
}
|
|
}
|
|
|
|
struct v3dv_cl_reloc
|
|
v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
struct v3dv_shader_variant *variant,
|
|
uint32_t **wg_count_offsets)
|
|
{
|
|
struct v3d_uniform_list *uinfo =
|
|
&variant->prog_data.base->uniforms;
|
|
struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
|
|
|
|
struct v3dv_job *job = cmd_buffer->state.job;
|
|
assert(job);
|
|
assert(job->cmd_buffer == cmd_buffer);
|
|
|
|
struct texture_bo_list tex_bos = { 0 };
|
|
struct state_bo_list state_bos = { 0 };
|
|
struct buffer_bo_list buffer_bos = { 0 };
|
|
|
|
/* The hardware always pre-fetches the next uniform (also when there
|
|
* aren't any), so we always allocate space for an extra slot. This
|
|
* fixes MMU exceptions reported since Linux kernel 5.4 when the
|
|
* uniforms fill up the tail bytes of a page in the indirect
|
|
* BO. In that scenario, when the hardware pre-fetches after reading
|
|
* the last uniform it will read beyond the end of the page and trigger
|
|
* the MMU exception.
|
|
*/
|
|
v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
|
|
|
|
struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
|
|
|
|
struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
|
|
|
|
for (int i = 0; i < uinfo->count; i++) {
|
|
uint32_t data = uinfo->data[i];
|
|
|
|
switch (uinfo->contents[i]) {
|
|
case QUNIFORM_CONSTANT:
|
|
cl_aligned_u32(&uniforms, data);
|
|
break;
|
|
|
|
case QUNIFORM_UNIFORM:
|
|
cl_aligned_u32(&uniforms, cmd_buffer->state.push_constants_data[data]);
|
|
break;
|
|
|
|
case QUNIFORM_INLINE_UBO_0:
|
|
case QUNIFORM_INLINE_UBO_1:
|
|
case QUNIFORM_INLINE_UBO_2:
|
|
case QUNIFORM_INLINE_UBO_3:
|
|
write_inline_uniform(&uniforms,
|
|
uinfo->contents[i] - QUNIFORM_INLINE_UBO_0, data,
|
|
cmd_buffer, pipeline, variant->stage);
|
|
break;
|
|
|
|
case QUNIFORM_VIEWPORT_X_SCALE:
|
|
cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f);
|
|
break;
|
|
|
|
case QUNIFORM_VIEWPORT_Y_SCALE:
|
|
cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f);
|
|
break;
|
|
|
|
case QUNIFORM_VIEWPORT_Z_OFFSET:
|
|
cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]);
|
|
break;
|
|
|
|
case QUNIFORM_VIEWPORT_Z_SCALE:
|
|
cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]);
|
|
break;
|
|
|
|
case QUNIFORM_SSBO_OFFSET:
|
|
case QUNIFORM_UBO_ADDR:
|
|
case QUNIFORM_GET_SSBO_SIZE:
|
|
case QUNIFORM_GET_UBO_SIZE:
|
|
write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
|
|
uinfo->contents[i], data, &buffer_bos);
|
|
|
|
break;
|
|
|
|
case QUNIFORM_IMAGE_TMU_CONFIG_P0:
|
|
case QUNIFORM_TMU_CONFIG_P0:
|
|
write_tmu_p0(cmd_buffer, pipeline, variant->stage,
|
|
&uniforms, data, &tex_bos, &state_bos);
|
|
break;
|
|
|
|
case QUNIFORM_TMU_CONFIG_P1:
|
|
write_tmu_p1(cmd_buffer, pipeline, variant->stage,
|
|
&uniforms, data, &state_bos);
|
|
break;
|
|
|
|
case QUNIFORM_IMAGE_WIDTH:
|
|
case QUNIFORM_IMAGE_HEIGHT:
|
|
case QUNIFORM_IMAGE_DEPTH:
|
|
case QUNIFORM_IMAGE_ARRAY_SIZE:
|
|
case QUNIFORM_TEXTURE_WIDTH:
|
|
case QUNIFORM_TEXTURE_HEIGHT:
|
|
case QUNIFORM_TEXTURE_DEPTH:
|
|
case QUNIFORM_TEXTURE_ARRAY_SIZE:
|
|
case QUNIFORM_TEXTURE_LEVELS:
|
|
case QUNIFORM_TEXTURE_SAMPLES:
|
|
cl_aligned_u32(&uniforms,
|
|
get_texture_size(cmd_buffer,
|
|
pipeline,
|
|
variant->stage,
|
|
uinfo->contents[i],
|
|
data));
|
|
break;
|
|
|
|
/* We generate this from geometry shaders to cap the generated gl_Layer
|
|
* to be within the number of layers of the framebuffer so we prevent the
|
|
* binner from trying to access tile state memory out of bounds (for
|
|
* layers that don't exist).
|
|
*
|
|
* Unfortunately, for secondary command buffers we may not know the
|
|
* number of layers in the framebuffer at this stage. Since we are
|
|
* only using this to sanitize the shader and it should not have any
|
|
* impact on correct shaders that emit valid values for gl_Layer,
|
|
* we just work around it by using the largest number of layers we
|
|
* support.
|
|
*
|
|
* FIXME: we could do better than this by recording in the job that
|
|
* the value at this uniform offset is not correct, and patch it when
|
|
* we execute the secondary command buffer into a primary, since we do
|
|
* have the correct number of layers at that point, but again, since this
|
|
* is only for sanityzing the shader and it only affects the specific case
|
|
* of secondary command buffers without framebuffer info available it
|
|
* might not be worth the trouble.
|
|
*
|
|
* With multiview the number of layers is dictated by the view mask
|
|
* and not by the framebuffer layers. We do set the job's frame tiling
|
|
* information correctly from the view mask in that case, however,
|
|
* secondary command buffers may not have valid frame tiling data,
|
|
* so when multiview is enabled, we always set the number of layers
|
|
* from the subpass view mask.
|
|
*/
|
|
case QUNIFORM_FB_LAYERS: {
|
|
const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
|
|
const uint32_t view_mask =
|
|
state->pass->subpasses[state->subpass_idx].view_mask;
|
|
|
|
uint32_t num_layers;
|
|
if (view_mask != 0) {
|
|
num_layers = util_last_bit(view_mask);
|
|
} else if (job->frame_tiling.layers != 0) {
|
|
num_layers = job->frame_tiling.layers;
|
|
} else if (cmd_buffer->state.framebuffer) {
|
|
num_layers = cmd_buffer->state.framebuffer->layers;
|
|
} else {
|
|
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
|
|
num_layers = 2048;
|
|
#if DEBUG
|
|
fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
|
|
"secondary command buffer\n");
|
|
#endif
|
|
}
|
|
cl_aligned_u32(&uniforms, num_layers);
|
|
break;
|
|
}
|
|
|
|
case QUNIFORM_VIEW_INDEX:
|
|
cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
|
|
break;
|
|
|
|
case QUNIFORM_NUM_WORK_GROUPS:
|
|
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
|
assert(job->csd.wg_count[data] > 0);
|
|
if (wg_count_offsets)
|
|
wg_count_offsets[data] = (uint32_t *) uniforms;
|
|
cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
|
|
break;
|
|
|
|
case QUNIFORM_WORK_GROUP_BASE:
|
|
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
|
cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
|
|
break;
|
|
|
|
case QUNIFORM_SHARED_OFFSET:
|
|
assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
|
|
assert(job->csd.shared_memory);
|
|
cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
|
|
break;
|
|
|
|
case QUNIFORM_SPILL_OFFSET:
|
|
assert(pipeline->spill.bo);
|
|
cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
|
|
break;
|
|
|
|
case QUNIFORM_SPILL_SIZE_PER_THREAD:
|
|
assert(pipeline->spill.size_per_thread > 0);
|
|
cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
|
|
break;
|
|
|
|
default:
|
|
unreachable("unsupported quniform_contents uniform type\n");
|
|
}
|
|
}
|
|
|
|
cl_end(&job->indirect, uniforms);
|
|
|
|
for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
|
|
if (tex_bos.tex[i])
|
|
v3dv_job_add_bo(job, tex_bos.tex[i]);
|
|
}
|
|
|
|
for (int i = 0; i < state_bos.count; i++)
|
|
v3dv_job_add_bo(job, state_bos.states[i]);
|
|
|
|
for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
|
|
if (buffer_bos.ubo[i])
|
|
v3dv_job_add_bo(job, buffer_bos.ubo[i]);
|
|
}
|
|
|
|
for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
|
|
if (buffer_bos.ssbo[i])
|
|
v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
|
|
}
|
|
|
|
if (job->csd.shared_memory)
|
|
v3dv_job_add_bo(job, job->csd.shared_memory);
|
|
|
|
if (pipeline->spill.bo)
|
|
v3dv_job_add_bo(job, pipeline->spill.bo);
|
|
|
|
return uniform_stream;
|
|
}
|
|
|
|
struct v3dv_cl_reloc
|
|
v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
|
|
struct v3dv_pipeline *pipeline,
|
|
struct v3dv_shader_variant *variant)
|
|
{
|
|
return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL);
|
|
}
|