2018-08-08 23:23:57 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2016 Red Hat.
|
|
|
|
* Copyright © 2016 Bas Nieuwenhuizen
|
|
|
|
*
|
|
|
|
* based in part on anv driver which is:
|
|
|
|
* Copyright © 2015 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
2019-01-09 22:16:01 +00:00
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
2018-08-08 23:23:57 +01:00
|
|
|
*/
|
|
|
|
|
2020-07-17 14:18:15 +01:00
|
|
|
#include "common/freedreno_guardband.h"
|
2018-08-08 23:23:57 +01:00
|
|
|
#include "tu_private.h"
|
2019-01-09 22:16:01 +00:00
|
|
|
|
2020-04-01 19:50:55 +01:00
|
|
|
#include "ir3/ir3_nir.h"
|
2019-01-09 22:16:01 +00:00
|
|
|
#include "main/menums.h"
|
2018-08-08 23:23:57 +01:00
|
|
|
#include "nir/nir.h"
|
|
|
|
#include "nir/nir_builder.h"
|
|
|
|
#include "spirv/nir_spirv.h"
|
2019-01-09 22:16:01 +00:00
|
|
|
#include "util/debug.h"
|
2018-08-08 23:23:57 +01:00
|
|
|
#include "util/mesa-sha1.h"
|
|
|
|
#include "util/u_atomic.h"
|
|
|
|
#include "vk_format.h"
|
2022-06-22 17:10:12 +01:00
|
|
|
#include "vk_pipeline.h"
|
2019-01-09 22:16:01 +00:00
|
|
|
#include "vk_util.h"
|
2018-08-08 23:23:57 +01:00
|
|
|
|
2019-02-21 17:22:17 +00:00
|
|
|
#include "tu_cs.h"
|
|
|
|
|
2020-06-13 18:52:45 +01:00
|
|
|
/* Emit IB that preloads the descriptors that the shader uses */
|
|
|
|
|
2020-03-23 16:23:32 +00:00
|
|
|
static void
|
|
|
|
emit_load_state(struct tu_cs *cs, unsigned opcode, enum a6xx_state_type st,
|
|
|
|
enum a6xx_state_block sb, unsigned base, unsigned offset,
|
|
|
|
unsigned count)
|
|
|
|
{
|
|
|
|
/* Note: just emit one packet, even if count overflows NUM_UNIT. It's not
|
|
|
|
* clear if emitting more packets will even help anything. Presumably the
|
|
|
|
* descriptor cache is relatively small, and these packets stop doing
|
|
|
|
* anything when there are too many descriptors.
|
|
|
|
*/
|
|
|
|
tu_cs_emit_pkt7(cs, opcode, 3);
|
|
|
|
tu_cs_emit(cs,
|
|
|
|
CP_LOAD_STATE6_0_STATE_TYPE(st) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_BINDLESS) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_BLOCK(sb) |
|
|
|
|
CP_LOAD_STATE6_0_NUM_UNIT(MIN2(count, 1024-1)));
|
|
|
|
tu_cs_emit_qw(cs, offset | (base << 28));
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned
|
2022-03-18 13:46:43 +00:00
|
|
|
tu6_load_state_size(struct tu_pipeline *pipeline,
|
|
|
|
struct tu_pipeline_layout *layout, bool compute)
|
2020-03-23 16:23:32 +00:00
|
|
|
{
|
|
|
|
const unsigned load_state_size = 4;
|
|
|
|
unsigned size = 0;
|
2022-03-18 13:46:43 +00:00
|
|
|
for (unsigned i = 0; i < layout->num_sets; i++) {
|
2020-11-10 02:02:21 +00:00
|
|
|
if (!(pipeline->active_desc_sets & (1u << i)))
|
2020-06-20 01:43:50 +01:00
|
|
|
continue;
|
|
|
|
|
2022-03-18 13:46:43 +00:00
|
|
|
struct tu_descriptor_set_layout *set_layout = layout->set[i].layout;
|
2020-03-23 16:23:32 +00:00
|
|
|
for (unsigned j = 0; j < set_layout->binding_count; j++) {
|
|
|
|
struct tu_descriptor_set_binding_layout *binding = &set_layout->binding[j];
|
|
|
|
unsigned count = 0;
|
|
|
|
/* Note: some users, like amber for example, pass in
|
|
|
|
* VK_SHADER_STAGE_ALL which includes a bunch of extra bits, so
|
|
|
|
* filter these out by using VK_SHADER_STAGE_ALL_GRAPHICS explicitly.
|
|
|
|
*/
|
|
|
|
VkShaderStageFlags stages = compute ?
|
|
|
|
binding->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT :
|
|
|
|
binding->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
|
|
|
|
unsigned stage_count = util_bitcount(stages);
|
2020-06-20 01:43:50 +01:00
|
|
|
|
|
|
|
if (!binding->array_size)
|
|
|
|
continue;
|
|
|
|
|
2020-03-23 16:23:32 +00:00
|
|
|
switch (binding->type) {
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
|
|
|
/* IBO-backed resources only need one packet for all graphics stages */
|
|
|
|
if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT)
|
|
|
|
count += 1;
|
|
|
|
if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
|
|
|
|
count += 1;
|
|
|
|
break;
|
|
|
|
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
|
|
|
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
|
|
|
/* Textures and UBO's needs a packet for each stage */
|
|
|
|
count = stage_count;
|
|
|
|
break;
|
|
|
|
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
|
|
|
/* Because of how we pack combined images and samplers, we
|
|
|
|
* currently can't use one packet for the whole array.
|
|
|
|
*/
|
|
|
|
count = stage_count * binding->array_size * 2;
|
|
|
|
break;
|
2020-06-20 01:43:50 +01:00
|
|
|
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
2021-07-27 05:49:56 +01:00
|
|
|
case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
|
2020-06-20 01:43:50 +01:00
|
|
|
break;
|
2020-03-23 16:23:32 +00:00
|
|
|
default:
|
|
|
|
unreachable("bad descriptor type");
|
|
|
|
}
|
|
|
|
size += count * load_state_size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2022-03-18 13:46:43 +00:00
|
|
|
tu6_emit_load_state(struct tu_pipeline *pipeline,
|
|
|
|
struct tu_pipeline_layout *layout, bool compute)
|
2020-03-23 16:23:32 +00:00
|
|
|
{
|
2022-03-18 13:46:43 +00:00
|
|
|
unsigned size = tu6_load_state_size(pipeline, layout, compute);
|
2020-03-23 16:23:32 +00:00
|
|
|
if (size == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
struct tu_cs cs;
|
|
|
|
tu_cs_begin_sub_stream(&pipeline->cs, size, &cs);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < layout->num_sets; i++) {
|
2020-06-09 13:40:58 +01:00
|
|
|
/* From 13.2.7. Descriptor Set Binding:
|
|
|
|
*
|
|
|
|
* A compatible descriptor set must be bound for all set numbers that
|
|
|
|
* any shaders in a pipeline access, at the time that a draw or
|
|
|
|
* dispatch command is recorded to execute using that pipeline.
|
|
|
|
* However, if none of the shaders in a pipeline statically use any
|
|
|
|
* bindings with a particular set number, then no descriptor set need
|
|
|
|
* be bound for that set number, even if the pipeline layout includes
|
|
|
|
* a non-trivial descriptor set layout for that set number.
|
|
|
|
*
|
|
|
|
* This means that descriptor sets unused by the pipeline may have a
|
|
|
|
* garbage or 0 BINDLESS_BASE register, which will cause context faults
|
|
|
|
* when prefetching descriptors from these sets. Skip prefetching for
|
|
|
|
* descriptors from them to avoid this. This is also an optimization,
|
|
|
|
* since these prefetches would be useless.
|
|
|
|
*/
|
|
|
|
if (!(pipeline->active_desc_sets & (1u << i)))
|
|
|
|
continue;
|
|
|
|
|
2020-03-23 16:23:32 +00:00
|
|
|
struct tu_descriptor_set_layout *set_layout = layout->set[i].layout;
|
|
|
|
for (unsigned j = 0; j < set_layout->binding_count; j++) {
|
|
|
|
struct tu_descriptor_set_binding_layout *binding = &set_layout->binding[j];
|
|
|
|
unsigned base = i;
|
|
|
|
unsigned offset = binding->offset / 4;
|
|
|
|
/* Note: some users, like amber for example, pass in
|
|
|
|
* VK_SHADER_STAGE_ALL which includes a bunch of extra bits, so
|
|
|
|
* filter these out by using VK_SHADER_STAGE_ALL_GRAPHICS explicitly.
|
|
|
|
*/
|
|
|
|
VkShaderStageFlags stages = compute ?
|
|
|
|
binding->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT :
|
|
|
|
binding->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
|
|
|
|
unsigned count = binding->array_size;
|
|
|
|
if (count == 0 || stages == 0)
|
|
|
|
continue;
|
|
|
|
switch (binding->type) {
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
|
|
|
base = MAX_SETS;
|
2020-06-15 04:10:01 +01:00
|
|
|
offset = (layout->set[i].dynamic_offset_start +
|
2022-02-23 15:16:03 +00:00
|
|
|
binding->dynamic_offset_offset) / 4;
|
2021-04-10 16:11:58 +01:00
|
|
|
FALLTHROUGH;
|
2020-03-23 16:23:32 +00:00
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
2022-02-23 15:52:38 +00:00
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: {
|
|
|
|
unsigned mul = binding->size / (A6XX_TEX_CONST_DWORDS * 4);
|
2020-03-23 16:23:32 +00:00
|
|
|
/* IBO-backed resources only need one packet for all graphics stages */
|
|
|
|
if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) {
|
|
|
|
emit_load_state(&cs, CP_LOAD_STATE6, ST6_SHADER, SB6_IBO,
|
2022-02-23 15:52:38 +00:00
|
|
|
base, offset, count * mul);
|
2020-03-23 16:23:32 +00:00
|
|
|
}
|
|
|
|
if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
|
|
|
|
emit_load_state(&cs, CP_LOAD_STATE6_FRAG, ST6_IBO, SB6_CS_SHADER,
|
2022-02-23 15:52:38 +00:00
|
|
|
base, offset, count * mul);
|
2020-03-23 16:23:32 +00:00
|
|
|
}
|
|
|
|
break;
|
2022-02-23 15:52:38 +00:00
|
|
|
}
|
2020-03-23 16:23:32 +00:00
|
|
|
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
2021-07-27 05:49:56 +01:00
|
|
|
case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
|
2020-06-15 04:10:01 +01:00
|
|
|
/* nothing - input attachment doesn't use bindless */
|
|
|
|
break;
|
2020-03-23 16:23:32 +00:00
|
|
|
case VK_DESCRIPTOR_TYPE_SAMPLER:
|
|
|
|
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
|
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: {
|
2020-06-13 18:52:45 +01:00
|
|
|
tu_foreach_stage(stage, stages) {
|
|
|
|
emit_load_state(&cs, tu6_stage2opcode(stage),
|
2020-03-23 16:23:32 +00:00
|
|
|
binding->type == VK_DESCRIPTOR_TYPE_SAMPLER ?
|
|
|
|
ST6_SHADER : ST6_CONSTANTS,
|
2020-06-13 18:52:45 +01:00
|
|
|
tu6_stage2texsb(stage), base, offset, count);
|
2020-03-23 16:23:32 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
|
|
|
base = MAX_SETS;
|
2020-06-15 04:10:01 +01:00
|
|
|
offset = (layout->set[i].dynamic_offset_start +
|
2022-02-23 15:16:03 +00:00
|
|
|
binding->dynamic_offset_offset) / 4;
|
2021-04-10 16:11:58 +01:00
|
|
|
FALLTHROUGH;
|
2020-03-23 16:23:32 +00:00
|
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
|
2020-06-13 18:52:45 +01:00
|
|
|
tu_foreach_stage(stage, stages) {
|
|
|
|
emit_load_state(&cs, tu6_stage2opcode(stage), ST6_UBO,
|
|
|
|
tu6_stage2shadersb(stage), base, offset, count);
|
2020-03-23 16:23:32 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
|
2020-06-13 18:52:45 +01:00
|
|
|
tu_foreach_stage(stage, stages) {
|
2020-03-23 16:23:32 +00:00
|
|
|
/* TODO: We could emit less CP_LOAD_STATE6 if we used
|
|
|
|
* struct-of-arrays instead of array-of-structs.
|
|
|
|
*/
|
|
|
|
for (unsigned i = 0; i < count; i++) {
|
|
|
|
unsigned tex_offset = offset + 2 * i * A6XX_TEX_CONST_DWORDS;
|
|
|
|
unsigned sam_offset = offset + (2 * i + 1) * A6XX_TEX_CONST_DWORDS;
|
2020-06-13 18:52:45 +01:00
|
|
|
emit_load_state(&cs, tu6_stage2opcode(stage),
|
|
|
|
ST6_CONSTANTS, tu6_stage2texsb(stage),
|
2020-03-23 16:23:32 +00:00
|
|
|
base, tex_offset, 1);
|
2020-06-13 18:52:45 +01:00
|
|
|
emit_load_state(&cs, tu6_stage2opcode(stage),
|
|
|
|
ST6_SHADER, tu6_stage2texsb(stage),
|
2020-03-23 16:23:32 +00:00
|
|
|
base, sam_offset, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
unreachable("bad descriptor type");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-18 21:24:26 +01:00
|
|
|
pipeline->load_state = tu_cs_end_draw_state(&pipeline->cs, &cs);
|
2020-03-23 16:23:32 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 17:22:17 +00:00
|
|
|
struct tu_pipeline_builder
|
|
|
|
{
|
|
|
|
struct tu_device *device;
|
2022-03-10 19:15:16 +00:00
|
|
|
void *mem_ctx;
|
2022-02-17 19:48:36 +00:00
|
|
|
struct vk_pipeline_cache *cache;
|
2019-12-14 06:05:11 +00:00
|
|
|
struct tu_pipeline_layout *layout;
|
2019-02-21 17:22:17 +00:00
|
|
|
const VkAllocationCallbacks *alloc;
|
|
|
|
const VkGraphicsPipelineCreateInfo *create_info;
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
struct tu_compiled_shaders *shaders;
|
2020-06-16 10:44:23 +01:00
|
|
|
struct ir3_shader_variant *binning_variant;
|
2020-12-03 04:28:38 +00:00
|
|
|
uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1];
|
2020-06-23 15:37:56 +01:00
|
|
|
uint64_t binning_vs_iova;
|
2019-02-27 06:09:37 +00:00
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
uint32_t additional_cs_reserve_size;
|
|
|
|
|
2020-10-29 14:13:00 +00:00
|
|
|
struct tu_pvtmem_config pvtmem;
|
|
|
|
|
2019-02-19 21:49:01 +00:00
|
|
|
bool rasterizer_discard;
|
2019-02-27 07:29:51 +00:00
|
|
|
/* these states are affectd by rasterizer_discard */
|
2021-03-11 14:22:38 +00:00
|
|
|
bool emit_msaa_state;
|
2022-06-30 01:56:29 +01:00
|
|
|
bool depth_clip_disable;
|
2019-02-27 07:29:51 +00:00
|
|
|
VkSampleCountFlagBits samples;
|
2019-02-21 22:58:52 +00:00
|
|
|
bool use_color_attachments;
|
2020-05-14 16:17:46 +01:00
|
|
|
bool use_dual_src_blend;
|
2021-02-08 20:11:37 +00:00
|
|
|
bool alpha_to_coverage;
|
2019-02-27 06:10:34 +00:00
|
|
|
uint32_t color_attachment_count;
|
2019-02-21 22:58:52 +00:00
|
|
|
VkFormat color_attachment_formats[MAX_RTS];
|
2020-04-10 00:13:44 +01:00
|
|
|
VkFormat depth_attachment_format;
|
2020-06-02 03:02:19 +01:00
|
|
|
uint32_t render_components;
|
2020-07-02 10:33:42 +01:00
|
|
|
uint32_t multiview_mask;
|
2022-02-23 09:44:23 +00:00
|
|
|
|
2022-02-18 17:15:03 +00:00
|
|
|
bool subpass_raster_order_attachment_access;
|
|
|
|
bool subpass_feedback_loop_color;
|
2022-02-23 09:44:23 +00:00
|
|
|
bool subpass_feedback_loop_ds;
|
2019-02-21 17:22:17 +00:00
|
|
|
};
|
|
|
|
|
2019-02-21 22:58:52 +00:00
|
|
|
static bool
|
|
|
|
tu_logic_op_reads_dst(VkLogicOp op)
|
|
|
|
{
|
|
|
|
switch (op) {
|
|
|
|
case VK_LOGIC_OP_CLEAR:
|
|
|
|
case VK_LOGIC_OP_COPY:
|
|
|
|
case VK_LOGIC_OP_COPY_INVERTED:
|
|
|
|
case VK_LOGIC_OP_SET:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static VkBlendFactor
|
|
|
|
tu_blend_factor_no_dst_alpha(VkBlendFactor factor)
|
|
|
|
{
|
|
|
|
/* treat dst alpha as 1.0 and avoid reading it */
|
|
|
|
switch (factor) {
|
|
|
|
case VK_BLEND_FACTOR_DST_ALPHA:
|
|
|
|
return VK_BLEND_FACTOR_ONE;
|
|
|
|
case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
|
|
|
|
return VK_BLEND_FACTOR_ZERO;
|
|
|
|
default:
|
|
|
|
return factor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-14 16:17:46 +01:00
|
|
|
static bool tu_blend_factor_is_dual_src(VkBlendFactor factor)
|
|
|
|
{
|
|
|
|
switch (factor) {
|
|
|
|
case VK_BLEND_FACTOR_SRC1_COLOR:
|
|
|
|
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
|
|
|
|
case VK_BLEND_FACTOR_SRC1_ALPHA:
|
|
|
|
case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
tu_blend_state_is_dual_src(const VkPipelineColorBlendStateCreateInfo *info)
|
|
|
|
{
|
|
|
|
if (!info)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < info->attachmentCount; i++) {
|
|
|
|
const VkPipelineColorBlendAttachmentState *blend = &info->pAttachments[i];
|
|
|
|
if (tu_blend_factor_is_dual_src(blend->srcColorBlendFactor) ||
|
|
|
|
tu_blend_factor_is_dual_src(blend->dstColorBlendFactor) ||
|
|
|
|
tu_blend_factor_is_dual_src(blend->srcAlphaBlendFactor) ||
|
|
|
|
tu_blend_factor_is_dual_src(blend->dstAlphaBlendFactor))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-07-12 20:00:38 +01:00
|
|
|
static const struct xs_config {
|
|
|
|
uint16_t reg_sp_xs_ctrl;
|
|
|
|
uint16_t reg_sp_xs_config;
|
|
|
|
uint16_t reg_sp_xs_instrlen;
|
|
|
|
uint16_t reg_hlsq_xs_ctrl;
|
|
|
|
uint16_t reg_sp_xs_first_exec_offset;
|
|
|
|
uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
|
|
|
|
} xs_config[] = {
|
|
|
|
[MESA_SHADER_VERTEX] = {
|
|
|
|
REG_A6XX_SP_VS_CTRL_REG0,
|
|
|
|
REG_A6XX_SP_VS_CONFIG,
|
|
|
|
REG_A6XX_SP_VS_INSTRLEN,
|
|
|
|
REG_A6XX_HLSQ_VS_CNTL,
|
|
|
|
REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
|
|
|
|
REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
|
|
|
|
},
|
|
|
|
[MESA_SHADER_TESS_CTRL] = {
|
|
|
|
REG_A6XX_SP_HS_CTRL_REG0,
|
|
|
|
REG_A6XX_SP_HS_CONFIG,
|
|
|
|
REG_A6XX_SP_HS_INSTRLEN,
|
|
|
|
REG_A6XX_HLSQ_HS_CNTL,
|
|
|
|
REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
|
|
|
|
REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
|
|
|
|
},
|
|
|
|
[MESA_SHADER_TESS_EVAL] = {
|
|
|
|
REG_A6XX_SP_DS_CTRL_REG0,
|
|
|
|
REG_A6XX_SP_DS_CONFIG,
|
|
|
|
REG_A6XX_SP_DS_INSTRLEN,
|
|
|
|
REG_A6XX_HLSQ_DS_CNTL,
|
|
|
|
REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
|
|
|
|
REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
|
|
|
|
},
|
|
|
|
[MESA_SHADER_GEOMETRY] = {
|
|
|
|
REG_A6XX_SP_GS_CTRL_REG0,
|
|
|
|
REG_A6XX_SP_GS_CONFIG,
|
|
|
|
REG_A6XX_SP_GS_INSTRLEN,
|
|
|
|
REG_A6XX_HLSQ_GS_CNTL,
|
|
|
|
REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
|
|
|
|
REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
|
|
|
|
},
|
|
|
|
[MESA_SHADER_FRAGMENT] = {
|
|
|
|
REG_A6XX_SP_FS_CTRL_REG0,
|
|
|
|
REG_A6XX_SP_FS_CONFIG,
|
|
|
|
REG_A6XX_SP_FS_INSTRLEN,
|
|
|
|
REG_A6XX_HLSQ_FS_CNTL,
|
|
|
|
REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
|
|
|
|
REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
|
|
|
|
},
|
|
|
|
[MESA_SHADER_COMPUTE] = {
|
|
|
|
REG_A6XX_SP_CS_CTRL_REG0,
|
|
|
|
REG_A6XX_SP_CS_CONFIG,
|
|
|
|
REG_A6XX_SP_CS_INSTRLEN,
|
|
|
|
REG_A6XX_HLSQ_CS_CNTL,
|
|
|
|
REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
|
|
|
|
REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
static uint32_t
|
|
|
|
tu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant *xs)
|
|
|
|
{
|
|
|
|
const struct ir3_const_state *const_state = ir3_const_state(xs);
|
|
|
|
uint32_t base = const_state->offsets.immediate;
|
|
|
|
int32_t size = DIV_ROUND_UP(const_state->immediates_count, 4);
|
|
|
|
|
|
|
|
/* truncate size to avoid writing constants that shader
|
|
|
|
* does not use:
|
|
|
|
*/
|
|
|
|
size = MIN2(size + base, xs->constlen) - base;
|
|
|
|
|
|
|
|
return MAX2(size, 0) * 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We allocate fixed-length substreams for shader state, however some
|
|
|
|
* parts of the state may have unbound length. Their additional space
|
|
|
|
* requirements should be calculated here.
|
|
|
|
*/
|
|
|
|
static uint32_t
|
|
|
|
tu_xs_get_additional_cs_size_dwords(const struct ir3_shader_variant *xs)
|
|
|
|
{
|
2022-02-23 22:45:59 +00:00
|
|
|
const struct ir3_const_state *const_state = ir3_const_state(xs);
|
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
uint32_t size = tu_xs_get_immediates_packet_size_dwords(xs);
|
2022-02-23 22:45:59 +00:00
|
|
|
|
|
|
|
/* Variable number of UBO upload ranges. */
|
|
|
|
size += 4 * const_state->ubo_state.num_enabled;
|
|
|
|
|
|
|
|
/* Variable number of dwords for the primitive map */
|
2022-06-17 00:31:00 +01:00
|
|
|
size += xs->input_size;
|
|
|
|
|
|
|
|
size += xs->constant_data_size / 4;
|
2022-02-23 22:45:59 +00:00
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
return size;
|
|
|
|
}
|
|
|
|
|
2020-06-11 13:38:18 +01:00
|
|
|
void
|
2020-06-11 01:32:17 +01:00
|
|
|
tu6_emit_xs_config(struct tu_cs *cs,
|
|
|
|
gl_shader_stage stage, /* xs->type, but xs may be NULL */
|
2021-07-12 20:00:38 +01:00
|
|
|
const struct ir3_shader_variant *xs)
|
2020-06-11 01:32:17 +01:00
|
|
|
{
|
|
|
|
const struct xs_config *cfg = &xs_config[stage];
|
|
|
|
|
|
|
|
if (!xs) {
|
|
|
|
/* shader stage disabled */
|
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-07-12 20:00:38 +01:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1);
|
|
|
|
tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED |
|
|
|
|
COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) |
|
|
|
|
COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) |
|
|
|
|
COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) |
|
|
|
|
COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) |
|
|
|
|
A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) |
|
|
|
|
A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp));
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
|
|
|
|
tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
|
|
|
|
A6XX_HLSQ_VS_CNTL_ENABLED);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tu6_emit_xs(struct tu_cs *cs,
|
|
|
|
gl_shader_stage stage, /* xs->type, but xs may be NULL */
|
|
|
|
const struct ir3_shader_variant *xs,
|
|
|
|
const struct tu_pvtmem_config *pvtmem,
|
|
|
|
uint64_t binary_iova)
|
|
|
|
{
|
|
|
|
const struct xs_config *cfg = &xs_config[stage];
|
|
|
|
|
|
|
|
if (!xs) {
|
|
|
|
/* shader stage disabled */
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-03-10 12:26:58 +00:00
|
|
|
enum a6xx_threadsize thrsz =
|
|
|
|
xs->info.double_threadsize ? THREAD128 : THREAD64;
|
2021-03-09 14:53:06 +00:00
|
|
|
switch (stage) {
|
|
|
|
case MESA_SHADER_VERTEX:
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0(
|
|
|
|
.fullregfootprint = xs->info.max_reg + 1,
|
|
|
|
.halfregfootprint = xs->info.max_half_reg + 1,
|
2021-03-26 14:39:12 +00:00
|
|
|
.branchstack = ir3_shader_branchstack_hw(xs),
|
2021-03-09 14:53:06 +00:00
|
|
|
.mergedregs = xs->mergedregs,
|
|
|
|
));
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_TESS_CTRL:
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0(
|
|
|
|
.fullregfootprint = xs->info.max_reg + 1,
|
|
|
|
.halfregfootprint = xs->info.max_half_reg + 1,
|
2021-03-26 14:39:12 +00:00
|
|
|
.branchstack = ir3_shader_branchstack_hw(xs),
|
2021-03-09 14:53:06 +00:00
|
|
|
));
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_TESS_EVAL:
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0(
|
|
|
|
.fullregfootprint = xs->info.max_reg + 1,
|
|
|
|
.halfregfootprint = xs->info.max_half_reg + 1,
|
2021-03-26 14:39:12 +00:00
|
|
|
.branchstack = ir3_shader_branchstack_hw(xs),
|
2021-03-09 14:53:06 +00:00
|
|
|
));
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_GEOMETRY:
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0(
|
|
|
|
.fullregfootprint = xs->info.max_reg + 1,
|
|
|
|
.halfregfootprint = xs->info.max_half_reg + 1,
|
2021-03-26 14:39:12 +00:00
|
|
|
.branchstack = ir3_shader_branchstack_hw(xs),
|
2021-03-09 14:53:06 +00:00
|
|
|
));
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_FRAGMENT:
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0(
|
|
|
|
.fullregfootprint = xs->info.max_reg + 1,
|
|
|
|
.halfregfootprint = xs->info.max_half_reg + 1,
|
2021-03-26 14:39:12 +00:00
|
|
|
.branchstack = ir3_shader_branchstack_hw(xs),
|
2021-03-09 14:53:06 +00:00
|
|
|
.mergedregs = xs->mergedregs,
|
2021-03-10 12:26:58 +00:00
|
|
|
.threadsize = thrsz,
|
2021-03-09 14:53:06 +00:00
|
|
|
.pixlodenable = xs->need_pixlod,
|
|
|
|
.diff_fine = xs->need_fine_derivatives,
|
|
|
|
.varying = xs->total_in != 0,
|
|
|
|
/* unknown bit, seems unnecessary */
|
|
|
|
.unk24 = true,
|
|
|
|
));
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_COMPUTE:
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0(
|
|
|
|
.fullregfootprint = xs->info.max_reg + 1,
|
|
|
|
.halfregfootprint = xs->info.max_half_reg + 1,
|
2021-03-26 14:39:12 +00:00
|
|
|
.branchstack = ir3_shader_branchstack_hw(xs),
|
2021-03-09 14:53:06 +00:00
|
|
|
.mergedregs = xs->mergedregs,
|
2021-03-10 12:26:58 +00:00
|
|
|
.threadsize = thrsz,
|
2021-03-09 14:53:06 +00:00
|
|
|
));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
unreachable("bad shader stage");
|
|
|
|
}
|
2020-06-11 01:32:17 +01:00
|
|
|
|
2021-07-12 20:00:38 +01:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_instrlen, 1);
|
2020-06-11 01:32:17 +01:00
|
|
|
tu_cs_emit(cs, xs->instrlen);
|
|
|
|
|
2020-10-29 14:13:00 +00:00
|
|
|
/* emit program binary & private memory layout
|
2020-06-11 01:32:17 +01:00
|
|
|
* binary_iova should be aligned to 1 instrlen unit (128 bytes)
|
|
|
|
*/
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-06-11 01:32:17 +01:00
|
|
|
assert((binary_iova & 0x7f) == 0);
|
2020-10-29 14:13:00 +00:00
|
|
|
assert((pvtmem->iova & 0x1f) == 0);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-10-29 14:13:00 +00:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_first_exec_offset, 7);
|
|
|
|
tu_cs_emit(cs, 0);
|
2020-06-11 01:32:17 +01:00
|
|
|
tu_cs_emit_qw(cs, binary_iova);
|
2020-10-29 14:13:00 +00:00
|
|
|
tu_cs_emit(cs,
|
|
|
|
A6XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(pvtmem->per_fiber_size));
|
|
|
|
tu_cs_emit_qw(cs, pvtmem->iova);
|
|
|
|
tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(pvtmem->per_sp_size) |
|
|
|
|
COND(pvtmem->per_wave, A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT));
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2021-06-24 17:30:24 +01:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_pvt_mem_hw_stack_offset, 1);
|
|
|
|
tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(pvtmem->per_sp_size));
|
|
|
|
|
2021-12-03 11:23:18 +00:00
|
|
|
uint32_t shader_preload_size =
|
|
|
|
MIN2(xs->instrlen, cs->device->physical_device->info->a6xx.instr_cache_size);
|
|
|
|
|
2020-06-13 18:52:45 +01:00
|
|
|
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
|
2020-06-11 01:32:17 +01:00
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
2020-06-13 18:52:45 +01:00
|
|
|
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
2021-12-03 11:23:18 +00:00
|
|
|
CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size));
|
2020-06-11 01:32:17 +01:00
|
|
|
tu_cs_emit_qw(cs, binary_iova);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-06-11 01:32:17 +01:00
|
|
|
/* emit immediates */
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-06-14 19:36:05 +01:00
|
|
|
const struct ir3_const_state *const_state = ir3_const_state(xs);
|
2020-06-11 01:32:17 +01:00
|
|
|
uint32_t base = const_state->offsets.immediate;
|
2021-02-02 16:00:08 +00:00
|
|
|
unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
if (immediate_size > 0) {
|
|
|
|
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + immediate_size);
|
2020-07-07 19:56:35 +01:00
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
2021-02-02 16:00:08 +00:00
|
|
|
CP_LOAD_STATE6_0_NUM_UNIT(immediate_size / 4));
|
2020-07-07 19:56:35 +01:00
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
tu_cs_emit_array(cs, const_state->immediates, immediate_size);
|
2020-07-07 19:56:35 +01:00
|
|
|
}
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-07-07 19:56:35 +01:00
|
|
|
if (const_state->constant_data_ubo != -1) {
|
|
|
|
uint64_t iova = binary_iova + xs->info.constant_data_offset;
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-07-07 19:56:35 +01:00
|
|
|
/* Upload UBO state for the constant data. */
|
|
|
|
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 5);
|
|
|
|
tu_cs_emit(cs,
|
|
|
|
CP_LOAD_STATE6_0_DST_OFF(const_state->constant_data_ubo) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)|
|
|
|
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
|
|
|
CP_LOAD_STATE6_0_NUM_UNIT(1));
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
|
|
|
int size_vec4s = DIV_ROUND_UP(xs->constant_data_size, 16);
|
|
|
|
tu_cs_emit_qw(cs,
|
|
|
|
iova |
|
|
|
|
(uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32);
|
|
|
|
|
|
|
|
/* Upload the constant data to the const file if needed. */
|
|
|
|
const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
|
|
|
|
|
|
|
|
for (int i = 0; i < ubo_state->num_enabled; i++) {
|
|
|
|
if (ubo_state->range[i].ubo.block != const_state->constant_data_ubo ||
|
|
|
|
ubo_state->range[i].ubo.bindless) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t start = ubo_state->range[i].start;
|
|
|
|
uint32_t end = ubo_state->range[i].end;
|
|
|
|
uint32_t size = MIN2(end - start,
|
|
|
|
(16 * xs->constlen) - ubo_state->range[i].offset);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
|
|
|
|
tu_cs_emit(cs,
|
|
|
|
CP_LOAD_STATE6_0_DST_OFF(ubo_state->range[i].offset / 16) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
|
|
|
CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
|
|
|
|
tu_cs_emit_qw(cs, iova + start);
|
|
|
|
}
|
|
|
|
}
|
2021-11-25 15:55:01 +00:00
|
|
|
|
|
|
|
/* emit FS driver param */
|
|
|
|
if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
|
|
|
|
uint32_t base = const_state->offsets.driver_param;
|
|
|
|
int32_t size = DIV_ROUND_UP(const_state->num_driver_params, 4);
|
|
|
|
size = MAX2(MIN2(size + base, xs->constlen) - base, 0);
|
|
|
|
|
|
|
|
if (size > 0) {
|
|
|
|
tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4);
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
|
|
|
|
CP_LOAD_STATE6_0_NUM_UNIT(size));
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
|
|
|
|
|
|
|
assert(size == 1);
|
|
|
|
tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
}
|
|
|
|
}
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
2022-06-15 01:07:28 +01:00
|
|
|
static void
|
|
|
|
tu6_emit_shared_consts_enable(struct tu_cs *cs, bool enable)
|
|
|
|
{
|
|
|
|
/* Enable/disable shared constants */
|
|
|
|
tu_cs_emit_regs(cs, A6XX_HLSQ_SHARED_CONSTS(.enable = enable));
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_MODE_CONTROL(.constant_demotion_enable = true,
|
|
|
|
.isammode = ISAMMODE_GL,
|
|
|
|
.shared_consts_enable = enable));
|
|
|
|
}
|
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
static void
|
2022-02-17 19:48:36 +00:00
|
|
|
tu6_emit_cs_config(struct tu_cs *cs,
|
2020-06-11 01:32:17 +01:00
|
|
|
const struct ir3_shader_variant *v,
|
2020-10-29 14:13:00 +00:00
|
|
|
const struct tu_pvtmem_config *pvtmem,
|
2020-09-03 18:30:31 +01:00
|
|
|
uint64_t binary_iova)
|
2019-11-27 04:37:19 +00:00
|
|
|
{
|
2022-06-15 01:07:28 +01:00
|
|
|
bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable;
|
|
|
|
tu6_emit_shared_consts_enable(cs, shared_consts_enable);
|
|
|
|
|
2020-07-13 11:22:20 +01:00
|
|
|
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
|
|
|
.cs_state = true,
|
2022-06-15 01:07:28 +01:00
|
|
|
.cs_ibo = true,
|
|
|
|
.cs_shared_const = shared_consts_enable));
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2021-07-12 20:00:38 +01:00
|
|
|
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v);
|
|
|
|
tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2021-02-15 11:14:56 +00:00
|
|
|
uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
|
2019-11-27 04:37:19 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
|
2021-02-15 11:14:56 +00:00
|
|
|
tu_cs_emit(cs, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
|
|
|
|
A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2021-09-14 18:06:06 +01:00
|
|
|
if (cs->device->physical_device->info->a6xx.has_lpac) {
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
|
|
|
|
tu_cs_emit(cs, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) |
|
|
|
|
A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
|
|
|
|
}
|
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
uint32_t local_invocation_id =
|
|
|
|
ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
|
|
|
|
uint32_t work_group_id =
|
2021-06-04 20:04:15 +01:00
|
|
|
ir3_find_sysval_regid(v, SYSTEM_VALUE_WORKGROUP_ID);
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2021-03-10 12:26:58 +00:00
|
|
|
enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
|
2019-11-27 04:37:19 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CNTL_0, 2);
|
|
|
|
tu_cs_emit(cs,
|
|
|
|
A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
|
2020-09-10 13:02:12 +01:00
|
|
|
A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
|
|
|
|
A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
|
2019-11-27 04:37:19 +00:00
|
|
|
A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
|
2020-09-10 13:02:12 +01:00
|
|
|
tu_cs_emit(cs, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
|
2021-03-10 12:26:58 +00:00
|
|
|
A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz));
|
2021-09-14 18:06:06 +01:00
|
|
|
|
|
|
|
if (cs->device->physical_device->info->a6xx.has_lpac) {
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CNTL_0, 2);
|
|
|
|
tu_cs_emit(cs,
|
|
|
|
A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) |
|
|
|
|
A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
|
|
|
|
A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
|
|
|
|
A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
|
|
|
|
tu_cs_emit(cs, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
|
|
|
|
A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz));
|
|
|
|
}
|
2019-11-27 04:37:19 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
static void
|
|
|
|
tu6_emit_vs_system_values(struct tu_cs *cs,
|
2020-04-01 21:21:26 +01:00
|
|
|
const struct ir3_shader_variant *vs,
|
2020-04-24 20:20:10 +01:00
|
|
|
const struct ir3_shader_variant *hs,
|
|
|
|
const struct ir3_shader_variant *ds,
|
2020-04-22 14:27:24 +01:00
|
|
|
const struct ir3_shader_variant *gs,
|
|
|
|
bool primid_passthru)
|
2019-02-27 06:10:34 +00:00
|
|
|
{
|
|
|
|
const uint32_t vertexid_regid =
|
2020-04-01 21:21:26 +01:00
|
|
|
ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
|
2019-02-27 06:10:34 +00:00
|
|
|
const uint32_t instanceid_regid =
|
2020-04-01 21:21:26 +01:00
|
|
|
ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
|
2020-04-24 20:20:10 +01:00
|
|
|
const uint32_t tess_coord_x_regid = hs ?
|
|
|
|
ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD) :
|
|
|
|
regid(63, 0);
|
|
|
|
const uint32_t tess_coord_y_regid = VALIDREG(tess_coord_x_regid) ?
|
|
|
|
tess_coord_x_regid + 1 :
|
|
|
|
regid(63, 0);
|
2021-07-29 16:40:17 +01:00
|
|
|
const uint32_t hs_rel_patch_regid = hs ?
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
|
2020-04-24 20:20:10 +01:00
|
|
|
regid(63, 0);
|
2021-07-29 16:40:17 +01:00
|
|
|
const uint32_t ds_rel_patch_regid = hs ?
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3) :
|
2020-04-24 20:20:10 +01:00
|
|
|
regid(63, 0);
|
|
|
|
const uint32_t hs_invocation_regid = hs ?
|
|
|
|
ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3) :
|
|
|
|
regid(63, 0);
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
const uint32_t gs_primitiveid_regid = gs ?
|
2020-04-01 21:21:26 +01:00
|
|
|
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) :
|
|
|
|
regid(63, 0);
|
2021-09-08 13:59:28 +01:00
|
|
|
const uint32_t vs_primitiveid_regid = hs ?
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID) :
|
2021-09-08 13:59:28 +01:00
|
|
|
gs_primitiveid_regid;
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
const uint32_t ds_primitiveid_regid = ds ?
|
|
|
|
ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID) :
|
|
|
|
regid(63, 0);
|
2020-06-11 01:32:17 +01:00
|
|
|
const uint32_t gsheader_regid = gs ?
|
2020-04-01 21:21:26 +01:00
|
|
|
ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3) :
|
|
|
|
regid(63, 0);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-07-02 10:33:42 +01:00
|
|
|
/* Note: we currently don't support multiview with tess or GS. If we did,
|
|
|
|
* and the HW actually works, then we'd have to somehow share this across
|
|
|
|
* stages. Note that the blob doesn't support this either.
|
|
|
|
*/
|
|
|
|
const uint32_t viewid_regid =
|
|
|
|
ir3_find_sysval_regid(vs, SYSTEM_VALUE_VIEW_INDEX);
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6);
|
|
|
|
tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) |
|
2020-04-01 21:21:26 +01:00
|
|
|
A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) |
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitiveid_regid) |
|
2020-07-02 10:33:42 +01:00
|
|
|
A6XX_VFD_CONTROL_1_REGID4VIEWID(viewid_regid));
|
2021-07-29 16:40:17 +01:00
|
|
|
tu_cs_emit(cs, A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) |
|
2020-04-24 20:20:10 +01:00
|
|
|
A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
|
2021-07-29 16:40:17 +01:00
|
|
|
tu_cs_emit(cs, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) |
|
2020-04-24 20:20:10 +01:00
|
|
|
A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
|
|
|
|
A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitiveid_regid));
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */
|
2020-04-01 21:21:26 +01:00
|
|
|
tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) |
|
|
|
|
0xfc00); /* VFD_CONTROL_5 */
|
2020-04-22 14:27:24 +01:00
|
|
|
tu_cs_emit(cs, COND(primid_passthru, A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
2020-02-25 01:07:25 +00:00
|
|
|
static void
|
2020-06-20 18:46:01 +01:00
|
|
|
tu6_setup_streamout(struct tu_cs *cs,
|
|
|
|
const struct ir3_shader_variant *v,
|
|
|
|
struct ir3_shader_linkage *l)
|
2020-02-25 01:07:25 +00:00
|
|
|
{
|
2022-02-16 10:42:58 +00:00
|
|
|
const struct ir3_stream_output_info *info = &v->stream_output;
|
2020-09-23 12:08:37 +01:00
|
|
|
/* Note: 64 here comes from the HW layout of the program RAM. The program
|
|
|
|
* for stream N is at DWORD 64 * N.
|
|
|
|
*/
|
|
|
|
#define A6XX_SO_PROG_DWORDS 64
|
|
|
|
uint32_t prog[A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS] = {};
|
|
|
|
BITSET_DECLARE(valid_dwords, A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) = {0};
|
2020-02-25 01:07:25 +00:00
|
|
|
|
2020-06-20 18:46:01 +01:00
|
|
|
/* TODO: streamout state should be in a non-GMEM draw state */
|
2020-02-25 01:07:25 +00:00
|
|
|
|
2020-06-20 18:46:01 +01:00
|
|
|
/* no streamout: */
|
|
|
|
if (info->num_outputs == 0) {
|
|
|
|
tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 4);
|
|
|
|
tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
|
|
|
|
tu_cs_emit(cs, 0);
|
2020-09-23 09:51:48 +01:00
|
|
|
tu_cs_emit(cs, REG_A6XX_VPC_SO_STREAM_CNTL);
|
2020-06-20 18:46:01 +01:00
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
return;
|
|
|
|
}
|
2020-02-25 01:07:25 +00:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < info->num_outputs; i++) {
|
|
|
|
const struct ir3_stream_output *out = &info->output[i];
|
|
|
|
unsigned k = out->register_index;
|
|
|
|
unsigned idx;
|
|
|
|
|
2021-08-10 17:07:51 +01:00
|
|
|
/* Skip it, if it's an output that was never assigned a register. */
|
|
|
|
if (k >= v->outputs_count || v->outputs[k].regid == INVALID_REG)
|
2020-04-06 06:16:21 +01:00
|
|
|
continue;
|
|
|
|
|
2020-02-25 01:07:25 +00:00
|
|
|
/* linkage map sorted by order frag shader wants things, so
|
|
|
|
* a bit less ideal here..
|
|
|
|
*/
|
|
|
|
for (idx = 0; idx < l->cnt; idx++)
|
2021-10-30 18:51:59 +01:00
|
|
|
if (l->var[idx].slot == v->outputs[k].slot)
|
2020-02-25 01:07:25 +00:00
|
|
|
break;
|
|
|
|
|
2022-07-07 18:29:53 +01:00
|
|
|
assert(idx < l->cnt);
|
2020-02-25 01:07:25 +00:00
|
|
|
|
|
|
|
for (unsigned j = 0; j < out->num_components; j++) {
|
|
|
|
unsigned c = j + out->start_component;
|
|
|
|
unsigned loc = l->var[idx].loc + c;
|
|
|
|
unsigned off = j + out->dst_offset; /* in dwords */
|
|
|
|
|
2020-09-23 12:08:37 +01:00
|
|
|
assert(loc < A6XX_SO_PROG_DWORDS * 2);
|
|
|
|
unsigned dword = out->stream * A6XX_SO_PROG_DWORDS + loc/2;
|
2020-02-25 01:07:25 +00:00
|
|
|
if (loc & 1) {
|
2020-09-23 12:08:37 +01:00
|
|
|
prog[dword] |= A6XX_VPC_SO_PROG_B_EN |
|
2020-06-20 18:46:01 +01:00
|
|
|
A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) |
|
|
|
|
A6XX_VPC_SO_PROG_B_OFF(off * 4);
|
2020-02-25 01:07:25 +00:00
|
|
|
} else {
|
2020-09-23 12:08:37 +01:00
|
|
|
prog[dword] |= A6XX_VPC_SO_PROG_A_EN |
|
2020-06-20 18:46:01 +01:00
|
|
|
A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) |
|
|
|
|
A6XX_VPC_SO_PROG_A_OFF(off * 4);
|
2020-02-25 01:07:25 +00:00
|
|
|
}
|
2020-09-23 12:08:37 +01:00
|
|
|
BITSET_SET(valid_dwords, dword);
|
2020-02-25 01:07:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-23 12:08:37 +01:00
|
|
|
unsigned prog_count = 0;
|
|
|
|
unsigned start, end;
|
|
|
|
BITSET_FOREACH_RANGE(start, end, valid_dwords,
|
|
|
|
A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) {
|
|
|
|
prog_count += end - start + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 10 + 2 * prog_count);
|
2020-09-23 09:51:48 +01:00
|
|
|
tu_cs_emit(cs, REG_A6XX_VPC_SO_STREAM_CNTL);
|
2020-09-23 12:08:37 +01:00
|
|
|
tu_cs_emit(cs, A6XX_VPC_SO_STREAM_CNTL_STREAM_ENABLE(info->streams_written) |
|
2022-07-12 15:55:03 +01:00
|
|
|
COND(info->stride[0] > 0,
|
2020-09-23 12:08:37 +01:00
|
|
|
A6XX_VPC_SO_STREAM_CNTL_BUF0_STREAM(1 + info->buffer_to_stream[0])) |
|
2022-07-12 15:55:03 +01:00
|
|
|
COND(info->stride[1] > 0,
|
2020-09-23 12:08:37 +01:00
|
|
|
A6XX_VPC_SO_STREAM_CNTL_BUF1_STREAM(1 + info->buffer_to_stream[1])) |
|
2022-07-12 15:55:03 +01:00
|
|
|
COND(info->stride[2] > 0,
|
2020-09-23 12:08:37 +01:00
|
|
|
A6XX_VPC_SO_STREAM_CNTL_BUF2_STREAM(1 + info->buffer_to_stream[2])) |
|
2022-07-12 15:55:03 +01:00
|
|
|
COND(info->stride[3] > 0,
|
2020-09-23 12:08:37 +01:00
|
|
|
A6XX_VPC_SO_STREAM_CNTL_BUF3_STREAM(1 + info->buffer_to_stream[3])));
|
2020-06-20 18:46:01 +01:00
|
|
|
for (uint32_t i = 0; i < 4; i++) {
|
2022-07-12 15:55:03 +01:00
|
|
|
tu_cs_emit(cs, REG_A6XX_VPC_SO_BUFFER_STRIDE(i));
|
|
|
|
tu_cs_emit(cs, info->stride[i]);
|
2020-06-20 18:46:01 +01:00
|
|
|
}
|
2020-09-23 12:08:37 +01:00
|
|
|
bool first = true;
|
|
|
|
BITSET_FOREACH_RANGE(start, end, valid_dwords,
|
|
|
|
A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) {
|
|
|
|
tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL);
|
|
|
|
tu_cs_emit(cs, COND(first, A6XX_VPC_SO_CNTL_RESET) |
|
|
|
|
A6XX_VPC_SO_CNTL_ADDR(start));
|
|
|
|
for (unsigned i = start; i < end; i++) {
|
|
|
|
tu_cs_emit(cs, REG_A6XX_VPC_SO_PROG);
|
|
|
|
tu_cs_emit(cs, prog[i]);
|
|
|
|
}
|
|
|
|
first = false;
|
2020-06-20 18:46:01 +01:00
|
|
|
}
|
2020-02-25 01:07:25 +00:00
|
|
|
}
|
|
|
|
|
2020-04-01 19:09:48 +01:00
|
|
|
static void
|
|
|
|
tu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base,
|
|
|
|
enum a6xx_state_block block, uint32_t offset,
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
uint32_t size, const uint32_t *dwords) {
|
2020-04-01 19:09:48 +01:00
|
|
|
assert(size % 4 == 0);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt7(cs, opcode, 3 + size);
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
|
|
|
CP_LOAD_STATE6_0_STATE_BLOCK(block) |
|
|
|
|
CP_LOAD_STATE6_0_NUM_UNIT(size / 4));
|
|
|
|
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
|
|
|
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
|
|
|
dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
|
|
|
|
|
|
|
|
tu_cs_emit_array(cs, dwords, size);
|
|
|
|
}
|
|
|
|
|
2020-04-01 19:50:55 +01:00
|
|
|
static void
|
|
|
|
tu6_emit_link_map(struct tu_cs *cs,
|
|
|
|
const struct ir3_shader_variant *producer,
|
2020-04-24 20:20:10 +01:00
|
|
|
const struct ir3_shader_variant *consumer,
|
|
|
|
enum a6xx_state_block sb)
|
|
|
|
{
|
2020-06-14 19:36:05 +01:00
|
|
|
const struct ir3_const_state *const_state = ir3_const_state(consumer);
|
2020-04-01 19:50:55 +01:00
|
|
|
uint32_t base = const_state->offsets.primitive_map;
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
int size = DIV_ROUND_UP(consumer->input_size, 4);
|
2020-04-01 19:50:55 +01:00
|
|
|
|
|
|
|
size = (MIN2(size + base, consumer->constlen) - base) * 4;
|
2020-04-23 03:28:34 +01:00
|
|
|
if (size <= 0)
|
|
|
|
return;
|
2020-04-01 19:50:55 +01:00
|
|
|
|
2020-04-24 20:20:10 +01:00
|
|
|
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size,
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
producer->output_loc);
|
2020-04-01 19:50:55 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t
|
2022-01-19 03:17:27 +00:00
|
|
|
primitive_to_tess(enum shader_prim primitive) {
|
2020-04-01 19:50:55 +01:00
|
|
|
switch (primitive) {
|
2022-01-19 03:17:27 +00:00
|
|
|
case SHADER_PRIM_POINTS:
|
2020-04-01 19:50:55 +01:00
|
|
|
return TESS_POINTS;
|
2022-01-19 03:17:27 +00:00
|
|
|
case SHADER_PRIM_LINE_STRIP:
|
2020-04-01 19:50:55 +01:00
|
|
|
return TESS_LINES;
|
2022-01-19 03:17:27 +00:00
|
|
|
case SHADER_PRIM_TRIANGLE_STRIP:
|
2020-04-01 19:50:55 +01:00
|
|
|
return TESS_CW_TRIS;
|
|
|
|
default:
|
|
|
|
unreachable("");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-11 13:38:18 +01:00
|
|
|
void
|
2019-02-27 06:10:34 +00:00
|
|
|
tu6_emit_vpc(struct tu_cs *cs,
|
|
|
|
const struct ir3_shader_variant *vs,
|
2020-04-24 20:20:10 +01:00
|
|
|
const struct ir3_shader_variant *hs,
|
|
|
|
const struct ir3_shader_variant *ds,
|
2020-04-01 19:50:55 +01:00
|
|
|
const struct ir3_shader_variant *gs,
|
2020-07-14 15:11:11 +01:00
|
|
|
const struct ir3_shader_variant *fs,
|
2021-07-08 16:49:18 +01:00
|
|
|
uint32_t patch_control_points)
|
2019-02-27 06:10:34 +00:00
|
|
|
{
|
2020-07-07 15:37:40 +01:00
|
|
|
/* note: doesn't compile as static because of the array regs.. */
|
|
|
|
const struct reg_config {
|
|
|
|
uint16_t reg_sp_xs_out_reg;
|
|
|
|
uint16_t reg_sp_xs_vpc_dst_reg;
|
|
|
|
uint16_t reg_vpc_xs_pack;
|
|
|
|
uint16_t reg_vpc_xs_clip_cntl;
|
|
|
|
uint16_t reg_gras_xs_cl_cntl;
|
|
|
|
uint16_t reg_pc_xs_out_cntl;
|
|
|
|
uint16_t reg_sp_xs_primitive_cntl;
|
|
|
|
uint16_t reg_vpc_xs_layer_cntl;
|
|
|
|
uint16_t reg_gras_xs_layer_cntl;
|
|
|
|
} reg_config[] = {
|
|
|
|
[MESA_SHADER_VERTEX] = {
|
|
|
|
REG_A6XX_SP_VS_OUT_REG(0),
|
|
|
|
REG_A6XX_SP_VS_VPC_DST_REG(0),
|
|
|
|
REG_A6XX_VPC_VS_PACK,
|
|
|
|
REG_A6XX_VPC_VS_CLIP_CNTL,
|
|
|
|
REG_A6XX_GRAS_VS_CL_CNTL,
|
|
|
|
REG_A6XX_PC_VS_OUT_CNTL,
|
|
|
|
REG_A6XX_SP_VS_PRIMITIVE_CNTL,
|
|
|
|
REG_A6XX_VPC_VS_LAYER_CNTL,
|
|
|
|
REG_A6XX_GRAS_VS_LAYER_CNTL
|
|
|
|
},
|
2021-08-02 10:56:15 +01:00
|
|
|
[MESA_SHADER_TESS_CTRL] = {
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
REG_A6XX_PC_HS_OUT_CNTL,
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
0
|
|
|
|
},
|
2020-07-07 15:37:40 +01:00
|
|
|
[MESA_SHADER_TESS_EVAL] = {
|
|
|
|
REG_A6XX_SP_DS_OUT_REG(0),
|
|
|
|
REG_A6XX_SP_DS_VPC_DST_REG(0),
|
|
|
|
REG_A6XX_VPC_DS_PACK,
|
|
|
|
REG_A6XX_VPC_DS_CLIP_CNTL,
|
|
|
|
REG_A6XX_GRAS_DS_CL_CNTL,
|
|
|
|
REG_A6XX_PC_DS_OUT_CNTL,
|
|
|
|
REG_A6XX_SP_DS_PRIMITIVE_CNTL,
|
|
|
|
REG_A6XX_VPC_DS_LAYER_CNTL,
|
|
|
|
REG_A6XX_GRAS_DS_LAYER_CNTL
|
|
|
|
},
|
|
|
|
[MESA_SHADER_GEOMETRY] = {
|
|
|
|
REG_A6XX_SP_GS_OUT_REG(0),
|
|
|
|
REG_A6XX_SP_GS_VPC_DST_REG(0),
|
|
|
|
REG_A6XX_VPC_GS_PACK,
|
|
|
|
REG_A6XX_VPC_GS_CLIP_CNTL,
|
|
|
|
REG_A6XX_GRAS_GS_CL_CNTL,
|
|
|
|
REG_A6XX_PC_GS_OUT_CNTL,
|
|
|
|
REG_A6XX_SP_GS_PRIMITIVE_CNTL,
|
|
|
|
REG_A6XX_VPC_GS_LAYER_CNTL,
|
|
|
|
REG_A6XX_GRAS_GS_LAYER_CNTL
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2020-04-24 20:20:10 +01:00
|
|
|
const struct ir3_shader_variant *last_shader;
|
|
|
|
if (gs) {
|
|
|
|
last_shader = gs;
|
|
|
|
} else if (hs) {
|
|
|
|
last_shader = ds;
|
|
|
|
} else {
|
|
|
|
last_shader = vs;
|
|
|
|
}
|
2020-07-07 15:37:40 +01:00
|
|
|
|
|
|
|
const struct reg_config *cfg = ®_config[last_shader->type];
|
|
|
|
|
2020-09-24 15:04:18 +01:00
|
|
|
struct ir3_shader_linkage linkage = {
|
|
|
|
.primid_loc = 0xff,
|
|
|
|
.clip0_loc = 0xff,
|
|
|
|
.clip1_loc = 0xff,
|
|
|
|
};
|
2020-06-11 01:32:17 +01:00
|
|
|
if (fs)
|
|
|
|
ir3_link_shaders(&linkage, last_shader, fs, true);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2022-02-16 10:42:58 +00:00
|
|
|
if (last_shader->stream_output.num_outputs)
|
2021-01-04 23:23:02 +00:00
|
|
|
ir3_link_stream_out(&linkage, last_shader);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-04-22 14:27:24 +01:00
|
|
|
/* We do this after linking shaders in order to know whether PrimID
|
|
|
|
* passthrough needs to be enabled.
|
|
|
|
*/
|
|
|
|
bool primid_passthru = linkage.primid_loc != 0xff;
|
2020-04-24 20:20:10 +01:00
|
|
|
tu6_emit_vs_system_values(cs, vs, hs, ds, gs, primid_passthru);
|
2020-04-22 14:27:24 +01:00
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4);
|
2020-04-22 16:54:41 +01:00
|
|
|
tu_cs_emit(cs, ~linkage.varmask[0]);
|
|
|
|
tu_cs_emit(cs, ~linkage.varmask[1]);
|
|
|
|
tu_cs_emit(cs, ~linkage.varmask[2]);
|
|
|
|
tu_cs_emit(cs, ~linkage.varmask[3]);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
|
|
|
/* a6xx finds position/pointsize at the end */
|
|
|
|
const uint32_t pointsize_regid =
|
2020-04-01 19:50:55 +01:00
|
|
|
ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ);
|
2020-07-07 18:12:14 +01:00
|
|
|
const uint32_t layer_regid =
|
|
|
|
ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER);
|
2020-07-14 15:38:09 +01:00
|
|
|
const uint32_t view_regid =
|
|
|
|
ir3_find_output_regid(last_shader, VARYING_SLOT_VIEWPORT);
|
2020-09-24 15:04:18 +01:00
|
|
|
const uint32_t clip0_regid =
|
|
|
|
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0);
|
|
|
|
const uint32_t clip1_regid =
|
|
|
|
ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1);
|
2020-07-07 15:37:40 +01:00
|
|
|
uint32_t flags_regid = gs ?
|
|
|
|
ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0;
|
2020-04-01 19:50:55 +01:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff, view_loc = 0xff;
|
2020-08-21 12:45:16 +01:00
|
|
|
|
2020-04-01 19:50:55 +01:00
|
|
|
if (layer_regid != regid(63, 0)) {
|
|
|
|
layer_loc = linkage.max_loc;
|
2021-10-30 18:51:59 +01:00
|
|
|
ir3_link_add(&linkage, VARYING_SLOT_LAYER, layer_regid, 0x1, linkage.max_loc);
|
2020-04-01 19:50:55 +01:00
|
|
|
}
|
2020-08-21 12:45:16 +01:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
if (view_regid != regid(63, 0)) {
|
|
|
|
view_loc = linkage.max_loc;
|
2021-10-30 18:51:59 +01:00
|
|
|
ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc);
|
2020-07-14 15:38:09 +01:00
|
|
|
}
|
2020-08-21 12:45:16 +01:00
|
|
|
|
|
|
|
unsigned extra_pos = 0;
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < last_shader->outputs_count; i++) {
|
|
|
|
if (last_shader->outputs[i].slot != VARYING_SLOT_POS)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (position_loc == 0xff)
|
|
|
|
position_loc = linkage.max_loc;
|
|
|
|
|
2021-10-30 18:51:59 +01:00
|
|
|
ir3_link_add(&linkage, last_shader->outputs[i].slot,
|
|
|
|
last_shader->outputs[i].regid,
|
2020-08-21 12:45:16 +01:00
|
|
|
0xf, position_loc + 4 * last_shader->outputs[i].view);
|
|
|
|
extra_pos = MAX2(extra_pos, last_shader->outputs[i].view);
|
2019-09-13 23:20:05 +01:00
|
|
|
}
|
2020-08-21 12:45:16 +01:00
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
if (pointsize_regid != regid(63, 0)) {
|
|
|
|
pointsize_loc = linkage.max_loc;
|
2021-10-30 18:51:59 +01:00
|
|
|
ir3_link_add(&linkage, VARYING_SLOT_PSIZ, pointsize_regid, 0x1, linkage.max_loc);
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
2020-09-24 15:04:18 +01:00
|
|
|
uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask;
|
|
|
|
|
|
|
|
/* Handle the case where clip/cull distances aren't read by the FS */
|
|
|
|
uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc;
|
|
|
|
if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) {
|
|
|
|
clip0_loc = linkage.max_loc;
|
2021-10-30 18:51:59 +01:00
|
|
|
ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST0, clip0_regid,
|
|
|
|
clip_cull_mask & 0xf, linkage.max_loc);
|
2020-09-24 15:04:18 +01:00
|
|
|
}
|
|
|
|
if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) {
|
|
|
|
clip1_loc = linkage.max_loc;
|
2021-10-30 18:51:59 +01:00
|
|
|
ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST1, clip1_regid,
|
|
|
|
clip_cull_mask >> 4, linkage.max_loc);
|
2020-09-24 15:04:18 +01:00
|
|
|
}
|
|
|
|
|
2020-06-20 18:46:01 +01:00
|
|
|
tu6_setup_streamout(cs, last_shader, &linkage);
|
2020-02-25 01:07:25 +00:00
|
|
|
|
2020-07-22 22:52:50 +01:00
|
|
|
/* The GPU hangs on some models when there are no outputs (xs_pack::CNT),
|
|
|
|
* at least when a DS is the last stage, so add a dummy output to keep it
|
|
|
|
* happy if there aren't any. We do this late in order to avoid emitting
|
|
|
|
* any unused code and make sure that optimizations don't remove it.
|
|
|
|
*/
|
|
|
|
if (linkage.cnt == 0)
|
2021-10-30 18:51:59 +01:00
|
|
|
ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc);
|
2020-07-22 22:52:50 +01:00
|
|
|
|
2020-04-01 19:50:55 +01:00
|
|
|
/* map outputs of the last shader to VPC */
|
2019-02-27 06:10:34 +00:00
|
|
|
assert(linkage.cnt <= 32);
|
2020-04-01 19:50:55 +01:00
|
|
|
const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2);
|
|
|
|
const uint32_t sp_vpc_dst_count = DIV_ROUND_UP(linkage.cnt, 4);
|
2021-07-11 20:40:05 +01:00
|
|
|
uint32_t sp_out[16] = {0};
|
|
|
|
uint32_t sp_vpc_dst[8] = {0};
|
2019-02-27 06:10:34 +00:00
|
|
|
for (uint32_t i = 0; i < linkage.cnt; i++) {
|
2020-04-01 19:50:55 +01:00
|
|
|
((uint16_t *) sp_out)[i] =
|
2019-02-27 06:10:34 +00:00
|
|
|
A6XX_SP_VS_OUT_REG_A_REGID(linkage.var[i].regid) |
|
|
|
|
A6XX_SP_VS_OUT_REG_A_COMPMASK(linkage.var[i].compmask);
|
2020-04-01 19:50:55 +01:00
|
|
|
((uint8_t *) sp_vpc_dst)[i] =
|
2019-02-27 06:10:34 +00:00
|
|
|
A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc);
|
|
|
|
}
|
|
|
|
|
2020-07-07 15:37:40 +01:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count);
|
2020-04-01 19:50:55 +01:00
|
|
|
tu_cs_emit_array(cs, sp_out, sp_out_count);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-07-07 15:37:40 +01:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count);
|
2020-04-01 19:50:55 +01:00
|
|
|
tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-07-07 15:37:40 +01:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1);
|
|
|
|
tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) |
|
|
|
|
A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) |
|
2020-08-21 12:45:16 +01:00
|
|
|
A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc) |
|
|
|
|
A6XX_VPC_VS_PACK_EXTRAPOS(extra_pos));
|
2020-07-07 15:37:40 +01:00
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1);
|
2020-09-24 15:04:18 +01:00
|
|
|
tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) |
|
|
|
|
A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) |
|
|
|
|
A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc));
|
2020-07-07 15:37:40 +01:00
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1);
|
2020-09-24 15:04:18 +01:00
|
|
|
tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) |
|
|
|
|
A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask));
|
2020-07-07 15:37:40 +01:00
|
|
|
|
2021-08-02 10:56:15 +01:00
|
|
|
const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs };
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(geom_shaders); i++) {
|
|
|
|
const struct ir3_shader_variant *shader = geom_shaders[i];
|
|
|
|
if (!shader)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
bool primid = shader->type != MESA_SHADER_VERTEX &&
|
|
|
|
VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID));
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1);
|
|
|
|
if (shader == last_shader) {
|
|
|
|
tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) |
|
|
|
|
CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) |
|
|
|
|
CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) |
|
|
|
|
CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) |
|
|
|
|
COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) |
|
|
|
|
A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask));
|
|
|
|
} else {
|
|
|
|
tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID));
|
|
|
|
}
|
|
|
|
}
|
2020-07-07 15:37:40 +01:00
|
|
|
|
2022-07-01 22:29:53 +01:00
|
|
|
/* if vertex_flags somehow gets optimized out, your gonna have a bad time: */
|
|
|
|
if (gs)
|
|
|
|
assert(flags_regid != INVALID_REG);
|
|
|
|
|
2020-07-07 15:37:40 +01:00
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1);
|
|
|
|
tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) |
|
|
|
|
A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid));
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1);
|
2020-07-14 15:38:09 +01:00
|
|
|
tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) |
|
|
|
|
A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc));
|
2020-07-07 15:37:40 +01:00
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
|
2020-07-14 15:38:09 +01:00
|
|
|
tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) |
|
|
|
|
CONDREG(view_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_VIEW));
|
2020-07-07 15:37:40 +01:00
|
|
|
|
2020-07-11 20:55:12 +01:00
|
|
|
tu_cs_emit_regs(cs, A6XX_PC_PRIMID_PASSTHRU(primid_passthru));
|
2020-04-22 14:27:24 +01:00
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1);
|
2020-06-11 01:32:17 +01:00
|
|
|
tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs ? fs->total_in : 0) |
|
|
|
|
COND(fs && fs->total_in, A6XX_VPC_CNTL_0_VARYING) |
|
|
|
|
A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) |
|
2020-07-02 10:33:42 +01:00
|
|
|
A6XX_VPC_CNTL_0_VIEWIDLOC(linkage.viewid_loc));
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-04-24 20:20:10 +01:00
|
|
|
if (hs) {
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
|
2022-02-16 11:42:45 +00:00
|
|
|
tu_cs_emit(cs, hs->tess.tcs_vertices_out);
|
2020-04-24 20:20:10 +01:00
|
|
|
|
|
|
|
/* Total attribute slots in HS incoming patch. */
|
2020-07-14 15:11:11 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1);
|
|
|
|
tu_cs_emit(cs, patch_control_points * vs->output_size / 4);
|
2020-04-24 20:20:10 +01:00
|
|
|
|
2020-12-21 18:45:42 +00:00
|
|
|
const uint32_t wavesize = 64;
|
|
|
|
const uint32_t max_wave_input_size = 64;
|
|
|
|
|
|
|
|
/* note: if HS is really just the VS extended, then this
|
2022-02-16 11:42:45 +00:00
|
|
|
* should be by MAX2(patch_control_points, hs->tess.tcs_vertices_out)
|
2020-12-21 18:45:42 +00:00
|
|
|
* however that doesn't match the blob, and fails some dEQP tests.
|
|
|
|
*/
|
2022-02-16 11:42:45 +00:00
|
|
|
uint32_t prims_per_wave = wavesize / hs->tess.tcs_vertices_out;
|
2020-12-21 18:45:42 +00:00
|
|
|
uint32_t max_prims_per_wave =
|
|
|
|
max_wave_input_size * wavesize / (vs->output_size * patch_control_points);
|
|
|
|
prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave);
|
|
|
|
|
|
|
|
uint32_t total_size = vs->output_size * patch_control_points * prims_per_wave;
|
|
|
|
uint32_t wave_input_size = DIV_ROUND_UP(total_size, wavesize);
|
2020-07-14 15:19:15 +01:00
|
|
|
|
2020-12-14 16:42:59 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
|
2020-12-21 18:45:42 +00:00
|
|
|
tu_cs_emit(cs, wave_input_size);
|
2020-07-14 15:19:15 +01:00
|
|
|
|
2020-04-24 20:20:10 +01:00
|
|
|
/* In SPIR-V generated from GLSL, the tessellation primitive params are
|
|
|
|
* are specified in the tess eval shader, but in SPIR-V generated from
|
|
|
|
* HLSL, they are specified in the tess control shader. */
|
2022-02-16 11:42:45 +00:00
|
|
|
const struct ir3_shader_variant *tess =
|
|
|
|
ds->tess.spacing == TESS_SPACING_UNSPECIFIED ? hs : ds;
|
2020-04-24 20:20:10 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_CNTL, 1);
|
|
|
|
uint32_t output;
|
2022-02-16 11:42:45 +00:00
|
|
|
if (tess->tess.point_mode)
|
2020-04-24 20:20:10 +01:00
|
|
|
output = TESS_POINTS;
|
2022-02-16 11:42:45 +00:00
|
|
|
else if (tess->tess.primitive_mode == TESS_PRIMITIVE_ISOLINES)
|
2020-04-24 20:20:10 +01:00
|
|
|
output = TESS_LINES;
|
2022-02-16 11:42:45 +00:00
|
|
|
else if (tess->tess.ccw)
|
2020-04-24 20:20:10 +01:00
|
|
|
output = TESS_CCW_TRIS;
|
|
|
|
else
|
|
|
|
output = TESS_CW_TRIS;
|
|
|
|
|
|
|
|
enum a6xx_tess_spacing spacing;
|
2022-02-16 11:42:45 +00:00
|
|
|
switch (tess->tess.spacing) {
|
2020-04-24 20:20:10 +01:00
|
|
|
case TESS_SPACING_EQUAL:
|
|
|
|
spacing = TESS_EQUAL;
|
|
|
|
break;
|
|
|
|
case TESS_SPACING_FRACTIONAL_ODD:
|
|
|
|
spacing = TESS_FRACTIONAL_ODD;
|
|
|
|
break;
|
|
|
|
case TESS_SPACING_FRACTIONAL_EVEN:
|
|
|
|
spacing = TESS_FRACTIONAL_EVEN;
|
|
|
|
break;
|
|
|
|
case TESS_SPACING_UNSPECIFIED:
|
|
|
|
default:
|
|
|
|
unreachable("invalid tess spacing");
|
|
|
|
}
|
|
|
|
tu_cs_emit(cs, A6XX_PC_TESS_CNTL_SPACING(spacing) |
|
|
|
|
A6XX_PC_TESS_CNTL_OUTPUT(output));
|
|
|
|
|
|
|
|
tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER);
|
|
|
|
tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER);
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-06-11 01:32:17 +01:00
|
|
|
if (gs) {
|
2020-06-11 14:34:28 +01:00
|
|
|
uint32_t vertices_out, invocations, output, vec4_size;
|
2021-01-14 17:13:19 +00:00
|
|
|
uint32_t prev_stage_output_size = ds ? ds->output_size : vs->output_size;
|
|
|
|
|
2022-01-31 15:51:24 +00:00
|
|
|
if (hs) {
|
|
|
|
tu6_emit_link_map(cs, ds, gs, SB6_GS_SHADER);
|
2020-06-11 14:34:28 +01:00
|
|
|
} else {
|
2022-01-31 15:51:24 +00:00
|
|
|
tu6_emit_link_map(cs, vs, gs, SB6_GS_SHADER);
|
2020-06-11 14:34:28 +01:00
|
|
|
}
|
2022-02-16 11:42:45 +00:00
|
|
|
vertices_out = gs->gs.vertices_out - 1;
|
|
|
|
output = primitive_to_tess(gs->gs.output_primitive);
|
|
|
|
invocations = gs->gs.invocations - 1;
|
2022-01-31 15:51:24 +00:00
|
|
|
/* Size of per-primitive alloction in ldlw memory in vec4s. */
|
2022-02-16 11:42:45 +00:00
|
|
|
vec4_size = gs->gs.vertices_in *
|
2022-01-31 15:51:24 +00:00
|
|
|
DIV_ROUND_UP(prev_stage_output_size, 4);
|
2020-04-01 19:50:55 +01:00
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
|
|
|
|
tu_cs_emit(cs,
|
|
|
|
A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(vertices_out) |
|
|
|
|
A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) |
|
|
|
|
A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(invocations));
|
|
|
|
|
2021-08-12 13:47:40 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PARAM, 1);
|
2020-04-01 19:50:55 +01:00
|
|
|
tu_cs_emit(cs, 0xff);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
|
|
|
|
tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
|
|
|
|
|
2021-04-30 17:07:58 +01:00
|
|
|
uint32_t prim_size = prev_stage_output_size;
|
|
|
|
if (prim_size > 64)
|
|
|
|
prim_size = 64;
|
|
|
|
else if (prim_size == 64)
|
|
|
|
prim_size = 63;
|
2020-04-30 18:12:28 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1);
|
2021-04-30 17:07:58 +01:00
|
|
|
tu_cs_emit(cs, prim_size);
|
2020-04-01 19:50:55 +01:00
|
|
|
}
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
tu6_vpc_varying_mode(const struct ir3_shader_variant *fs,
|
|
|
|
uint32_t index,
|
|
|
|
uint8_t *interp_mode,
|
|
|
|
uint8_t *ps_repl_mode)
|
|
|
|
{
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
INTERP_SMOOTH = 0,
|
|
|
|
INTERP_FLAT = 1,
|
|
|
|
INTERP_ZERO = 2,
|
|
|
|
INTERP_ONE = 3,
|
|
|
|
};
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
PS_REPL_NONE = 0,
|
|
|
|
PS_REPL_S = 1,
|
|
|
|
PS_REPL_T = 2,
|
|
|
|
PS_REPL_ONE_MINUS_T = 3,
|
|
|
|
};
|
|
|
|
|
|
|
|
const uint32_t compmask = fs->inputs[index].compmask;
|
|
|
|
|
|
|
|
/* NOTE: varyings are packed, so if compmask is 0xb then first, second, and
|
|
|
|
* fourth component occupy three consecutive varying slots
|
|
|
|
*/
|
|
|
|
int shift = 0;
|
|
|
|
*interp_mode = 0;
|
|
|
|
*ps_repl_mode = 0;
|
|
|
|
if (fs->inputs[index].slot == VARYING_SLOT_PNTC) {
|
|
|
|
if (compmask & 0x1) {
|
|
|
|
*ps_repl_mode |= PS_REPL_S << shift;
|
|
|
|
shift += 2;
|
|
|
|
}
|
|
|
|
if (compmask & 0x2) {
|
|
|
|
*ps_repl_mode |= PS_REPL_T << shift;
|
|
|
|
shift += 2;
|
|
|
|
}
|
|
|
|
if (compmask & 0x4) {
|
|
|
|
*interp_mode |= INTERP_ZERO << shift;
|
|
|
|
shift += 2;
|
|
|
|
}
|
|
|
|
if (compmask & 0x8) {
|
|
|
|
*interp_mode |= INTERP_ONE << 6;
|
|
|
|
shift += 2;
|
|
|
|
}
|
2020-08-13 02:59:33 +01:00
|
|
|
} else if (fs->inputs[index].flat) {
|
2019-02-27 06:10:34 +00:00
|
|
|
for (int i = 0; i < 4; i++) {
|
|
|
|
if (compmask & (1 << i)) {
|
|
|
|
*interp_mode |= INTERP_FLAT << shift;
|
|
|
|
shift += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return shift;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tu6_emit_vpc_varying_modes(struct tu_cs *cs,
|
2020-06-11 01:32:17 +01:00
|
|
|
const struct ir3_shader_variant *fs)
|
2019-02-27 06:10:34 +00:00
|
|
|
{
|
|
|
|
uint32_t interp_modes[8] = { 0 };
|
|
|
|
uint32_t ps_repl_modes[8] = { 0 };
|
|
|
|
|
2020-06-11 01:32:17 +01:00
|
|
|
if (fs) {
|
2019-02-27 06:10:34 +00:00
|
|
|
for (int i = -1;
|
|
|
|
(i = ir3_next_varying(fs, i)) < (int) fs->inputs_count;) {
|
|
|
|
|
|
|
|
/* get the mode for input i */
|
|
|
|
uint8_t interp_mode;
|
|
|
|
uint8_t ps_repl_mode;
|
|
|
|
const int bits =
|
|
|
|
tu6_vpc_varying_mode(fs, i, &interp_mode, &ps_repl_mode);
|
|
|
|
|
|
|
|
/* OR the mode into the array */
|
|
|
|
const uint32_t inloc = fs->inputs[i].inloc * 2;
|
|
|
|
uint32_t n = inloc / 32;
|
|
|
|
uint32_t shift = inloc % 32;
|
|
|
|
interp_modes[n] |= interp_mode << shift;
|
|
|
|
ps_repl_modes[n] |= ps_repl_mode << shift;
|
|
|
|
if (shift + bits > 32) {
|
|
|
|
n++;
|
|
|
|
shift = 32 - shift;
|
|
|
|
|
|
|
|
interp_modes[n] |= interp_mode >> shift;
|
|
|
|
ps_repl_modes[n] |= ps_repl_mode >> shift;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
|
|
|
|
tu_cs_emit_array(cs, interp_modes, 8);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
|
|
|
|
tu_cs_emit_array(cs, ps_repl_modes, 8);
|
|
|
|
}
|
|
|
|
|
2020-06-11 13:38:18 +01:00
|
|
|
void
|
2019-10-07 02:05:47 +01:00
|
|
|
tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
|
2019-02-27 06:10:34 +00:00
|
|
|
{
|
2019-10-07 02:05:47 +01:00
|
|
|
uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
|
2020-06-21 04:03:53 +01:00
|
|
|
uint32_t ij_regid[IJ_COUNT];
|
2019-10-07 02:05:47 +01:00
|
|
|
uint32_t smask_in_regid;
|
|
|
|
|
2020-06-16 14:11:02 +01:00
|
|
|
bool sample_shading = fs->per_samp | fs->key.sample_shading;
|
2019-10-07 02:05:47 +01:00
|
|
|
bool enable_varyings = fs->total_in > 0;
|
|
|
|
|
|
|
|
samp_id_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID);
|
|
|
|
smask_in_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN);
|
|
|
|
face_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE);
|
|
|
|
coord_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD);
|
|
|
|
zwcoord_regid = VALIDREG(coord_regid) ? coord_regid + 2 : regid(63, 0);
|
2020-06-21 04:03:53 +01:00
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
|
|
|
|
ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
|
|
|
|
|
2019-10-18 19:52:35 +01:00
|
|
|
if (fs->num_sampler_prefetch > 0) {
|
2020-06-21 04:03:53 +01:00
|
|
|
assert(VALIDREG(ij_regid[IJ_PERSP_PIXEL]));
|
2019-10-18 19:52:35 +01:00
|
|
|
/* also, it seems like ij_pix is *required* to be r0.x */
|
2020-06-21 04:03:53 +01:00
|
|
|
assert(ij_regid[IJ_PERSP_PIXEL] == regid(0, 0));
|
2019-10-18 19:52:35 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
|
|
|
|
tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
|
|
|
|
A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) |
|
|
|
|
0x7000); // XXX);
|
|
|
|
for (int i = 0; i < fs->num_sampler_prefetch; i++) {
|
|
|
|
const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
|
|
|
|
tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) |
|
|
|
|
A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) |
|
|
|
|
A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) |
|
|
|
|
A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) |
|
|
|
|
A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) |
|
|
|
|
COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) |
|
|
|
|
A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd));
|
|
|
|
}
|
|
|
|
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
if (fs->num_sampler_prefetch > 0) {
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_BINDLESS_PREFETCH_CMD(0), fs->num_sampler_prefetch);
|
|
|
|
for (int i = 0; i < fs->num_sampler_prefetch; i++) {
|
|
|
|
const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
|
|
|
|
tu_cs_emit(cs,
|
|
|
|
A6XX_SP_FS_BINDLESS_PREFETCH_CMD_SAMP_ID(prefetch->samp_bindless_id) |
|
|
|
|
A6XX_SP_FS_BINDLESS_PREFETCH_CMD_TEX_ID(prefetch->tex_bindless_id));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5);
|
|
|
|
tu_cs_emit(cs, 0x7);
|
2019-10-07 02:05:47 +01:00
|
|
|
tu_cs_emit(cs, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
|
|
|
|
A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) |
|
|
|
|
A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) |
|
2022-07-05 18:01:00 +01:00
|
|
|
A6XX_HLSQ_CONTROL_2_REG_CENTERRHW(ij_regid[IJ_PERSP_CENTER_RHW]));
|
2020-06-21 04:03:53 +01:00
|
|
|
tu_cs_emit(cs, A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
|
|
|
|
A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
|
|
|
|
A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(ij_regid[IJ_PERSP_CENTROID]) |
|
|
|
|
A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(ij_regid[IJ_LINEAR_CENTROID]));
|
2019-10-07 02:05:47 +01:00
|
|
|
tu_cs_emit(cs, A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
|
|
|
|
A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
|
2020-06-21 04:03:53 +01:00
|
|
|
A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) |
|
|
|
|
A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE]));
|
2021-08-12 13:51:15 +01:00
|
|
|
tu_cs_emit(cs, 0xfcfc);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2021-03-10 12:26:58 +00:00
|
|
|
enum a6xx_threadsize thrsz = fs->info.double_threadsize ? THREAD128 : THREAD64;
|
2020-09-10 13:02:12 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL_0, 1);
|
2021-03-10 12:26:58 +00:00
|
|
|
tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(thrsz) |
|
2020-09-10 13:02:12 +01:00
|
|
|
COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS));
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2020-06-21 04:03:53 +01:00
|
|
|
bool need_size = fs->frag_face || fs->fragcoord_compmask != 0;
|
|
|
|
bool need_size_persamp = false;
|
2022-07-05 17:51:58 +01:00
|
|
|
if (VALIDREG(ij_regid[IJ_PERSP_CENTER_RHW])) {
|
2020-06-21 04:03:53 +01:00
|
|
|
if (sample_shading)
|
|
|
|
need_size_persamp = true;
|
|
|
|
else
|
|
|
|
need_size = true;
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CNTL, 1);
|
2019-10-07 02:05:47 +01:00
|
|
|
tu_cs_emit(cs,
|
2020-06-21 04:03:53 +01:00
|
|
|
CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CNTL_IJ_PERSP_PIXEL) |
|
|
|
|
CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CNTL_IJ_PERSP_CENTROID) |
|
|
|
|
CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CNTL_IJ_PERSP_SAMPLE) |
|
2021-09-20 22:55:56 +01:00
|
|
|
CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) |
|
|
|
|
CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_GRAS_CNTL_IJ_LINEAR_CENTROID) |
|
|
|
|
CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) |
|
2021-09-20 22:48:15 +01:00
|
|
|
COND(need_size, A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) |
|
|
|
|
COND(need_size_persamp, A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) |
|
2020-06-21 04:03:53 +01:00
|
|
|
COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)));
|
2019-02-27 06:10:34 +00:00
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2);
|
2019-10-07 02:05:47 +01:00
|
|
|
tu_cs_emit(cs,
|
2020-06-21 04:03:53 +01:00
|
|
|
CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) |
|
|
|
|
CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) |
|
|
|
|
CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_SAMPLE) |
|
2021-09-20 22:55:56 +01:00
|
|
|
CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) |
|
|
|
|
CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_CENTROID) |
|
|
|
|
CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) |
|
2021-09-20 22:48:15 +01:00
|
|
|
COND(need_size, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) |
|
2019-10-07 02:05:47 +01:00
|
|
|
COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) |
|
2021-09-20 22:48:15 +01:00
|
|
|
COND(need_size_persamp, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) |
|
2020-06-21 04:03:53 +01:00
|
|
|
COND(fs->fragcoord_compmask != 0,
|
|
|
|
A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)));
|
2019-10-07 02:05:47 +01:00
|
|
|
tu_cs_emit(cs,
|
2021-08-12 13:47:40 +01:00
|
|
|
A6XX_RB_RENDER_CONTROL1_FRAGCOORDSAMPLEMODE(
|
|
|
|
sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER) |
|
2019-10-07 02:05:47 +01:00
|
|
|
CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
|
|
|
|
CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
|
2022-07-05 18:01:00 +01:00
|
|
|
CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_RENDER_CONTROL1_CENTERRHW) |
|
2019-10-07 02:05:47 +01:00
|
|
|
COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
|
2020-02-23 22:29:37 +00:00
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CNTL, 1);
|
|
|
|
tu_cs_emit(cs, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));
|
|
|
|
|
2021-08-12 13:47:40 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 1);
|
2021-08-12 13:55:56 +01:00
|
|
|
tu_cs_emit(cs, CONDREG(samp_id_regid, A6XX_GRAS_LRZ_PS_INPUT_CNTL_SAMPLEID) |
|
|
|
|
A6XX_GRAS_LRZ_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE(
|
2021-08-12 13:47:40 +01:00
|
|
|
sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER));
|
2020-02-23 22:29:37 +00:00
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 1);
|
|
|
|
tu_cs_emit(cs, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE));
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tu6_emit_fs_outputs(struct tu_cs *cs,
|
|
|
|
const struct ir3_shader_variant *fs,
|
2020-06-02 03:02:19 +01:00
|
|
|
uint32_t mrt_count, bool dual_src_blend,
|
2020-06-22 03:08:45 +01:00
|
|
|
uint32_t render_components,
|
2021-02-04 15:50:12 +00:00
|
|
|
bool no_earlyz,
|
|
|
|
struct tu_pipeline *pipeline)
|
2019-02-27 06:10:34 +00:00
|
|
|
{
|
2020-07-16 14:49:36 +01:00
|
|
|
uint32_t smask_regid, posz_regid, stencilref_regid;
|
2019-10-07 02:05:47 +01:00
|
|
|
|
|
|
|
posz_regid = ir3_find_output_regid(fs, FRAG_RESULT_DEPTH);
|
|
|
|
smask_regid = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK);
|
2020-07-16 14:49:36 +01:00
|
|
|
stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL);
|
2019-10-07 02:05:47 +01:00
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
uint32_t fragdata_regid[8];
|
|
|
|
if (fs->color0_mrt) {
|
|
|
|
fragdata_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_COLOR);
|
|
|
|
for (uint32_t i = 1; i < ARRAY_SIZE(fragdata_regid); i++)
|
|
|
|
fragdata_regid[i] = fragdata_regid[0];
|
|
|
|
} else {
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++)
|
|
|
|
fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
|
2019-10-07 02:05:47 +01:00
|
|
|
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) |
|
|
|
|
A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
|
2020-07-16 14:49:36 +01:00
|
|
|
A6XX_SP_FS_OUTPUT_CNTL0_STENCILREF_REGID(stencilref_regid) |
|
|
|
|
COND(dual_src_blend, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count));
|
|
|
|
|
2021-04-27 18:12:28 +01:00
|
|
|
uint32_t fs_render_components = 0;
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) {
|
|
|
|
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) |
|
2021-07-26 16:09:12 +01:00
|
|
|
(COND(fragdata_regid[i] & HALF_REG_ID,
|
|
|
|
A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION)));
|
2021-04-27 18:12:28 +01:00
|
|
|
|
|
|
|
if (VALIDREG(fragdata_regid[i])) {
|
|
|
|
fs_render_components |= 0xf << (i * 4);
|
|
|
|
}
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
2021-04-27 18:12:28 +01:00
|
|
|
/* dual source blending has an extra fs output in the 2nd slot */
|
|
|
|
if (dual_src_blend) {
|
|
|
|
fs_render_components |= 0xf << 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* There is no point in having component enabled which is not written
|
|
|
|
* by the shader. Per VK spec it is an UB, however a few apps depend on
|
|
|
|
* attachment not being changed if FS doesn't have corresponding output.
|
|
|
|
*/
|
|
|
|
fs_render_components &= render_components;
|
|
|
|
|
2020-05-14 15:41:02 +01:00
|
|
|
tu_cs_emit_regs(cs,
|
2021-04-27 18:12:28 +01:00
|
|
|
A6XX_SP_FS_RENDER_COMPONENTS(.dword = fs_render_components));
|
2020-05-14 15:41:02 +01:00
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
|
2019-12-12 22:00:13 +00:00
|
|
|
tu_cs_emit(cs, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
|
2020-05-14 16:17:46 +01:00
|
|
|
COND(fs->writes_smask, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
|
2020-07-16 14:49:36 +01:00
|
|
|
COND(fs->writes_stencilref, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
|
2020-05-14 16:17:46 +01:00
|
|
|
COND(dual_src_blend, A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_cs_emit(cs, A6XX_RB_FS_OUTPUT_CNTL1_MRT(mrt_count));
|
|
|
|
|
2020-05-14 15:41:02 +01:00
|
|
|
tu_cs_emit_regs(cs,
|
2021-04-27 18:12:28 +01:00
|
|
|
A6XX_RB_RENDER_COMPONENTS(.dword = fs_render_components));
|
2020-05-14 15:41:02 +01:00
|
|
|
|
2021-02-04 15:50:12 +00:00
|
|
|
if (pipeline) {
|
|
|
|
pipeline->lrz.fs_has_kill = fs->has_kill;
|
2022-02-16 11:42:45 +00:00
|
|
|
pipeline->lrz.early_fragment_tests = fs->fs.early_fragment_tests;
|
2020-05-31 18:46:54 +01:00
|
|
|
|
2022-02-16 11:42:45 +00:00
|
|
|
if (!fs->fs.early_fragment_tests &&
|
2021-02-04 15:50:12 +00:00
|
|
|
(fs->no_earlyz || fs->has_kill || fs->writes_pos || fs->writes_stencilref || no_earlyz || fs->writes_smask)) {
|
|
|
|
pipeline->lrz.force_late_z = true;
|
|
|
|
}
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-01 19:09:48 +01:00
|
|
|
static void
|
2020-05-15 18:52:43 +01:00
|
|
|
tu6_emit_geom_tess_consts(struct tu_cs *cs,
|
|
|
|
const struct ir3_shader_variant *vs,
|
|
|
|
const struct ir3_shader_variant *hs,
|
|
|
|
const struct ir3_shader_variant *ds,
|
|
|
|
const struct ir3_shader_variant *gs,
|
|
|
|
uint32_t cps_per_patch)
|
|
|
|
{
|
2020-07-27 15:06:46 +01:00
|
|
|
struct tu_device *dev = cs->device;
|
|
|
|
|
2020-05-15 18:52:43 +01:00
|
|
|
uint32_t num_vertices =
|
2022-02-16 11:42:45 +00:00
|
|
|
hs ? cps_per_patch : gs->gs.vertices_in;
|
2020-05-15 18:52:43 +01:00
|
|
|
|
|
|
|
uint32_t vs_params[4] = {
|
|
|
|
vs->output_size * num_vertices * 4, /* vs primitive stride */
|
|
|
|
vs->output_size * 4, /* vs vertex stride */
|
2020-04-01 19:09:48 +01:00
|
|
|
0,
|
|
|
|
0,
|
|
|
|
};
|
2020-06-14 19:36:05 +01:00
|
|
|
uint32_t vs_base = ir3_const_state(vs)->offsets.primitive_param;
|
2020-04-01 19:09:48 +01:00
|
|
|
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, vs_base, SB6_VS_SHADER, 0,
|
2020-05-15 18:52:43 +01:00
|
|
|
ARRAY_SIZE(vs_params), vs_params);
|
|
|
|
|
|
|
|
if (hs) {
|
|
|
|
assert(ds->type != MESA_SHADER_NONE);
|
2020-07-27 15:06:46 +01:00
|
|
|
|
|
|
|
/* Create the shared tess factor BO the first time tess is used on the device. */
|
|
|
|
mtx_lock(&dev->mutex);
|
2022-02-02 17:29:34 +00:00
|
|
|
if (!dev->tess_bo)
|
2020-07-27 15:06:46 +01:00
|
|
|
tu_bo_init_new(dev, &dev->tess_bo, TU_TESS_BO_SIZE, TU_BO_ALLOC_NO_FLAGS);
|
|
|
|
mtx_unlock(&dev->mutex);
|
|
|
|
|
2022-02-02 17:29:34 +00:00
|
|
|
uint64_t tess_factor_iova = dev->tess_bo->iova;
|
2020-07-27 15:06:46 +01:00
|
|
|
uint64_t tess_param_iova = tess_factor_iova + TU_TESS_FACTOR_SIZE;
|
|
|
|
|
|
|
|
uint32_t hs_params[8] = {
|
2020-05-15 18:52:43 +01:00
|
|
|
vs->output_size * num_vertices * 4, /* hs primitive stride */
|
|
|
|
vs->output_size * 4, /* hs vertex stride */
|
|
|
|
hs->output_size,
|
|
|
|
cps_per_patch,
|
2020-07-27 15:06:46 +01:00
|
|
|
tess_param_iova,
|
|
|
|
tess_param_iova >> 32,
|
|
|
|
tess_factor_iova,
|
|
|
|
tess_factor_iova >> 32,
|
2020-05-15 18:52:43 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
uint32_t hs_base = hs->const_state->offsets.primitive_param;
|
2020-07-27 15:06:46 +01:00
|
|
|
uint32_t hs_param_dwords = MIN2((hs->constlen - hs_base) * 4, ARRAY_SIZE(hs_params));
|
2020-05-15 18:52:43 +01:00
|
|
|
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0,
|
2020-07-27 15:06:46 +01:00
|
|
|
hs_param_dwords, hs_params);
|
2020-05-15 18:52:43 +01:00
|
|
|
if (gs)
|
2022-02-16 11:42:45 +00:00
|
|
|
num_vertices = gs->gs.vertices_in;
|
2020-05-15 18:52:43 +01:00
|
|
|
|
2020-07-27 15:06:46 +01:00
|
|
|
uint32_t ds_params[8] = {
|
2020-05-15 18:52:43 +01:00
|
|
|
ds->output_size * num_vertices * 4, /* ds primitive stride */
|
|
|
|
ds->output_size * 4, /* ds vertex stride */
|
|
|
|
hs->output_size, /* hs vertex stride (dwords) */
|
2022-02-16 11:42:45 +00:00
|
|
|
hs->tess.tcs_vertices_out,
|
2020-07-27 15:06:46 +01:00
|
|
|
tess_param_iova,
|
|
|
|
tess_param_iova >> 32,
|
|
|
|
tess_factor_iova,
|
|
|
|
tess_factor_iova >> 32,
|
2020-05-15 18:52:43 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
uint32_t ds_base = ds->const_state->offsets.primitive_param;
|
2020-07-27 15:06:46 +01:00
|
|
|
uint32_t ds_param_dwords = MIN2((ds->constlen - ds_base) * 4, ARRAY_SIZE(ds_params));
|
2020-05-15 18:52:43 +01:00
|
|
|
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, ds_base, SB6_DS_SHADER, 0,
|
2020-07-27 15:06:46 +01:00
|
|
|
ds_param_dwords, ds_params);
|
2020-05-15 18:52:43 +01:00
|
|
|
}
|
2020-04-01 19:09:48 +01:00
|
|
|
|
2020-05-15 18:52:43 +01:00
|
|
|
if (gs) {
|
|
|
|
const struct ir3_shader_variant *prev = ds ? ds : vs;
|
|
|
|
uint32_t gs_params[4] = {
|
|
|
|
prev->output_size * num_vertices * 4, /* gs primitive stride */
|
|
|
|
prev->output_size * 4, /* gs vertex stride */
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
};
|
|
|
|
uint32_t gs_base = gs->const_state->offsets.primitive_param;
|
|
|
|
tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0,
|
|
|
|
ARRAY_SIZE(gs_params), gs_params);
|
|
|
|
}
|
2020-04-01 19:09:48 +01:00
|
|
|
}
|
|
|
|
|
2021-07-12 20:00:38 +01:00
|
|
|
static void
|
|
|
|
tu6_emit_program_config(struct tu_cs *cs,
|
|
|
|
struct tu_pipeline_builder *builder)
|
|
|
|
{
|
|
|
|
gl_shader_stage stage = MESA_SHADER_VERTEX;
|
|
|
|
|
|
|
|
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
|
|
|
|
|
2022-06-15 01:08:31 +01:00
|
|
|
bool shared_consts_enable = tu6_shared_constants_enable(builder->layout,
|
|
|
|
builder->device->compiler);
|
2022-06-15 01:07:28 +01:00
|
|
|
tu6_emit_shared_consts_enable(cs, shared_consts_enable);
|
|
|
|
|
2021-07-12 20:00:38 +01:00
|
|
|
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
|
|
|
|
.vs_state = true,
|
|
|
|
.hs_state = true,
|
|
|
|
.ds_state = true,
|
|
|
|
.gs_state = true,
|
|
|
|
.fs_state = true,
|
2022-06-15 01:07:28 +01:00
|
|
|
.gfx_ibo = true,
|
|
|
|
.gfx_shared_const = shared_consts_enable));
|
2022-02-17 19:48:36 +00:00
|
|
|
for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
|
|
|
|
tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]);
|
2021-07-12 20:00:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
static void
|
|
|
|
tu6_emit_program(struct tu_cs *cs,
|
2020-06-11 01:32:17 +01:00
|
|
|
struct tu_pipeline_builder *builder,
|
2021-02-04 15:50:12 +00:00
|
|
|
bool binning_pass,
|
|
|
|
struct tu_pipeline *pipeline)
|
2019-02-27 06:10:34 +00:00
|
|
|
{
|
2022-02-17 19:48:36 +00:00
|
|
|
const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX];
|
2020-06-16 10:44:23 +01:00
|
|
|
const struct ir3_shader_variant *bs = builder->binning_variant;
|
2022-02-17 19:48:36 +00:00
|
|
|
const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
|
|
|
|
const struct ir3_shader_variant *ds = builder->shaders->variants[MESA_SHADER_TESS_EVAL];
|
|
|
|
const struct ir3_shader_variant *gs = builder->shaders->variants[MESA_SHADER_GEOMETRY];
|
|
|
|
const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
|
2020-06-11 01:32:17 +01:00
|
|
|
gl_shader_stage stage = MESA_SHADER_VERTEX;
|
2020-07-14 15:11:11 +01:00
|
|
|
uint32_t cps_per_patch = builder->create_info->pTessellationState ?
|
|
|
|
builder->create_info->pTessellationState->patchControlPoints : 0;
|
2022-02-17 19:48:36 +00:00
|
|
|
bool multi_pos_output = builder->shaders->multi_pos_output;
|
2020-06-11 01:32:17 +01:00
|
|
|
|
2020-06-16 10:44:23 +01:00
|
|
|
/* Don't use the binning pass variant when GS is present because we don't
|
|
|
|
* support compiling correct binning pass variants with GS.
|
2020-06-11 01:32:17 +01:00
|
|
|
*/
|
2020-06-16 10:44:23 +01:00
|
|
|
if (binning_pass && !gs) {
|
|
|
|
vs = bs;
|
2021-07-12 20:00:38 +01:00
|
|
|
tu6_emit_xs(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova);
|
2020-06-11 01:32:17 +01:00
|
|
|
stage++;
|
2020-04-01 19:09:48 +01:00
|
|
|
}
|
2020-06-11 01:32:17 +01:00
|
|
|
|
2022-06-10 10:31:08 +01:00
|
|
|
for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) {
|
2022-02-17 19:48:36 +00:00
|
|
|
const struct ir3_shader_variant *xs = builder->shaders->variants[stage];
|
2020-06-11 01:32:17 +01:00
|
|
|
|
|
|
|
if (stage == MESA_SHADER_FRAGMENT && binning_pass)
|
|
|
|
fs = xs = NULL;
|
|
|
|
|
2021-07-12 20:00:38 +01:00
|
|
|
tu6_emit_xs(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]);
|
2020-06-11 01:32:17 +01:00
|
|
|
}
|
|
|
|
|
2020-08-31 11:25:44 +01:00
|
|
|
uint32_t multiview_views = util_logbase2(builder->multiview_mask) + 1;
|
|
|
|
uint32_t multiview_cntl = builder->multiview_mask ?
|
|
|
|
A6XX_PC_MULTIVIEW_CNTL_ENABLE |
|
|
|
|
A6XX_PC_MULTIVIEW_CNTL_VIEWS(multiview_views) |
|
2020-08-21 12:45:16 +01:00
|
|
|
COND(!multi_pos_output, A6XX_PC_MULTIVIEW_CNTL_DISABLEMULTIPOS)
|
2020-08-31 11:25:44 +01:00
|
|
|
: 0;
|
|
|
|
|
|
|
|
/* Copy what the blob does here. This will emit an extra 0x3f
|
|
|
|
* CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
|
|
|
|
* this is working around yet.
|
|
|
|
*/
|
2021-07-08 18:28:43 +01:00
|
|
|
if (builder->device->physical_device->info->a6xx.has_cp_reg_write) {
|
|
|
|
tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
|
|
|
|
tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
|
|
|
|
tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL);
|
|
|
|
} else {
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_CNTL, 1);
|
|
|
|
}
|
2020-08-31 11:25:44 +01:00
|
|
|
tu_cs_emit(cs, multiview_cntl);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MULTIVIEW_CNTL, 1);
|
|
|
|
tu_cs_emit(cs, multiview_cntl);
|
|
|
|
|
|
|
|
if (multiview_cntl &&
|
2021-07-08 02:46:49 +01:00
|
|
|
builder->device->physical_device->info->a6xx.supports_multiview_mask) {
|
2020-08-31 11:25:44 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_MASK, 1);
|
|
|
|
tu_cs_emit(cs, builder->multiview_mask);
|
2020-07-02 10:33:42 +01:00
|
|
|
}
|
|
|
|
|
2020-12-14 16:42:59 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1);
|
2020-06-11 01:32:17 +01:00
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
|
2021-07-08 16:49:18 +01:00
|
|
|
tu6_emit_vpc(cs, vs, hs, ds, gs, fs, cps_per_patch);
|
2020-06-11 01:32:17 +01:00
|
|
|
tu6_emit_vpc_varying_modes(cs, fs);
|
|
|
|
|
2021-02-08 20:11:37 +00:00
|
|
|
bool no_earlyz = builder->depth_attachment_format == VK_FORMAT_S8_UINT;
|
|
|
|
uint32_t mrt_count = builder->color_attachment_count;
|
|
|
|
uint32_t render_components = builder->render_components;
|
|
|
|
|
|
|
|
if (builder->alpha_to_coverage) {
|
|
|
|
/* alpha to coverage can behave like a discard */
|
|
|
|
no_earlyz = true;
|
|
|
|
/* alpha value comes from first mrt */
|
|
|
|
render_components |= 0xf;
|
|
|
|
if (!mrt_count) {
|
|
|
|
mrt_count = 1;
|
|
|
|
/* Disable memory write for dummy mrt because it doesn't get set otherwise */
|
|
|
|
tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, .component_enable = 0));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-11 01:32:17 +01:00
|
|
|
if (fs) {
|
|
|
|
tu6_emit_fs_inputs(cs, fs);
|
2021-02-08 20:11:37 +00:00
|
|
|
tu6_emit_fs_outputs(cs, fs, mrt_count,
|
2020-06-11 01:32:17 +01:00
|
|
|
builder->use_dual_src_blend,
|
2021-02-08 20:11:37 +00:00
|
|
|
render_components,
|
2021-02-04 15:50:12 +00:00
|
|
|
no_earlyz,
|
|
|
|
pipeline);
|
2020-06-11 01:32:17 +01:00
|
|
|
} else {
|
|
|
|
/* TODO: check if these can be skipped if fs is disabled */
|
|
|
|
struct ir3_shader_variant dummy_variant = {};
|
|
|
|
tu6_emit_fs_inputs(cs, &dummy_variant);
|
2021-02-08 20:11:37 +00:00
|
|
|
tu6_emit_fs_outputs(cs, &dummy_variant, mrt_count,
|
2020-06-11 01:32:17 +01:00
|
|
|
builder->use_dual_src_blend,
|
2021-02-08 20:11:37 +00:00
|
|
|
render_components,
|
2021-02-04 15:50:12 +00:00
|
|
|
no_earlyz,
|
|
|
|
NULL);
|
2020-06-11 01:32:17 +01:00
|
|
|
}
|
|
|
|
|
2020-05-15 18:52:43 +01:00
|
|
|
if (gs || hs) {
|
|
|
|
tu6_emit_geom_tess_consts(cs, vs, hs, ds, gs, cps_per_patch);
|
|
|
|
}
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
2022-06-17 00:37:07 +01:00
|
|
|
#define TU6_EMIT_VERTEX_INPUT_MAX_DWORDS (MAX_VERTEX_ATTRIBS * 5 + 4)
|
|
|
|
|
2019-02-22 06:31:36 +00:00
|
|
|
static void
|
2020-09-17 15:16:42 +01:00
|
|
|
tu6_emit_vertex_input(struct tu_pipeline *pipeline,
|
2022-06-17 00:37:07 +01:00
|
|
|
struct tu_draw_state *vi_state,
|
2019-02-22 06:31:36 +00:00
|
|
|
const struct ir3_shader_variant *vs,
|
2020-09-09 14:26:59 +01:00
|
|
|
const VkPipelineVertexInputStateCreateInfo *info)
|
2019-02-22 06:31:36 +00:00
|
|
|
{
|
2020-03-18 02:28:38 +00:00
|
|
|
uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
|
2020-06-25 00:56:01 +01:00
|
|
|
uint32_t step_rate[MAX_VBS];
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2022-06-17 00:37:07 +01:00
|
|
|
struct tu_cs cs;
|
|
|
|
tu_cs_begin_sub_stream(&pipeline->cs,
|
|
|
|
TU6_EMIT_VERTEX_INPUT_MAX_DWORDS, &cs);
|
|
|
|
|
2020-03-18 02:28:38 +00:00
|
|
|
for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) {
|
|
|
|
const VkVertexInputBindingDescription *binding =
|
|
|
|
&info->pVertexBindingDescriptions[i];
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2020-09-17 15:16:42 +01:00
|
|
|
if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) {
|
2022-06-17 00:37:07 +01:00
|
|
|
tu_cs_emit_regs(&cs,
|
2020-09-17 15:16:42 +01:00
|
|
|
A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride));
|
|
|
|
}
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2020-03-18 02:28:38 +00:00
|
|
|
if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
|
|
|
|
binding_instanced |= 1 << binding->binding;
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2020-06-25 00:56:01 +01:00
|
|
|
step_rate[binding->binding] = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_state =
|
|
|
|
vk_find_struct_const(info->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
|
|
|
|
if (div_state) {
|
|
|
|
for (uint32_t i = 0; i < div_state->vertexBindingDivisorCount; i++) {
|
|
|
|
const VkVertexInputBindingDivisorDescriptionEXT *desc =
|
|
|
|
&div_state->pVertexBindingDivisors[i];
|
|
|
|
step_rate[desc->binding] = desc->divisor;
|
|
|
|
}
|
2020-03-18 02:28:38 +00:00
|
|
|
}
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2022-01-12 19:35:33 +00:00
|
|
|
int32_t input_for_attr[MAX_VERTEX_ATTRIBS];
|
|
|
|
uint32_t used_attrs_count = 0;
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2022-01-12 19:35:33 +00:00
|
|
|
for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) {
|
|
|
|
input_for_attr[attr_idx] = -1;
|
|
|
|
for (uint32_t input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
|
|
|
|
if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) ==
|
|
|
|
info->pVertexAttributeDescriptions[attr_idx].location) {
|
|
|
|
input_for_attr[attr_idx] = input_idx;
|
|
|
|
used_attrs_count++;
|
2020-03-18 02:28:38 +00:00
|
|
|
break;
|
2022-01-12 19:35:33 +00:00
|
|
|
}
|
2020-03-18 02:28:38 +00:00
|
|
|
}
|
2022-01-12 19:35:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (used_attrs_count)
|
2022-06-17 00:37:07 +01:00
|
|
|
tu_cs_emit_pkt4(&cs, REG_A6XX_VFD_DECODE_INSTR(0), used_attrs_count * 2);
|
2022-01-12 19:35:33 +00:00
|
|
|
|
|
|
|
for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) {
|
|
|
|
const VkVertexInputAttributeDescription *attr =
|
|
|
|
&info->pVertexAttributeDescriptions[attr_idx];
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2022-01-12 19:35:33 +00:00
|
|
|
if (input_for_attr[attr_idx] == -1)
|
2020-03-18 02:28:38 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
const struct tu_native_format format = tu6_format_vtx(attr->format);
|
2022-06-17 00:37:07 +01:00
|
|
|
tu_cs_emit(&cs, A6XX_VFD_DECODE_INSTR(0,
|
2022-01-12 19:35:33 +00:00
|
|
|
.idx = attr->binding,
|
|
|
|
.offset = attr->offset,
|
|
|
|
.instanced = binding_instanced & (1 << attr->binding),
|
|
|
|
.format = format.fmt,
|
|
|
|
.swap = format.swap,
|
|
|
|
.unk30 = 1,
|
|
|
|
._float = !vk_format_is_int(attr->format)).value);
|
2022-06-17 00:37:07 +01:00
|
|
|
tu_cs_emit(&cs, A6XX_VFD_DECODE_STEP_RATE(0, step_rate[attr->binding]).value);
|
2022-01-12 19:35:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (used_attrs_count)
|
2022-06-17 00:37:07 +01:00
|
|
|
tu_cs_emit_pkt4(&cs, REG_A6XX_VFD_DEST_CNTL_INSTR(0), used_attrs_count);
|
2022-01-12 19:35:33 +00:00
|
|
|
|
|
|
|
for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) {
|
|
|
|
int32_t input_idx = input_for_attr[attr_idx];
|
|
|
|
if (input_idx == -1)
|
|
|
|
continue;
|
|
|
|
|
2022-06-17 00:37:07 +01:00
|
|
|
tu_cs_emit(&cs, A6XX_VFD_DEST_CNTL_INSTR(0,
|
2022-01-12 19:35:33 +00:00
|
|
|
.writemask = vs->inputs[input_idx].compmask,
|
|
|
|
.regid = vs->inputs[input_idx].regid).value);
|
2019-02-22 06:31:36 +00:00
|
|
|
}
|
|
|
|
|
2022-06-17 00:37:07 +01:00
|
|
|
tu_cs_emit_regs(&cs,
|
2020-03-18 02:28:38 +00:00
|
|
|
A6XX_VFD_CONTROL_0(
|
2022-01-12 19:35:33 +00:00
|
|
|
.fetch_cnt = used_attrs_count, /* decode_cnt for binning pass ? */
|
|
|
|
.decode_cnt = used_attrs_count));
|
2022-06-17 00:37:07 +01:00
|
|
|
|
|
|
|
*vi_state = tu_cs_end_draw_state(&pipeline->cs, &cs);
|
2019-02-22 06:31:36 +00:00
|
|
|
}
|
|
|
|
|
2019-02-19 21:49:01 +00:00
|
|
|
void
|
2021-12-30 17:59:46 +00:00
|
|
|
tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewports, uint32_t num_viewport,
|
|
|
|
bool z_negative_one_to_one)
|
2019-02-19 21:49:01 +00:00
|
|
|
{
|
2020-07-14 15:38:09 +01:00
|
|
|
VkExtent2D guardband = {511, 511};
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET(0), num_viewport * 6);
|
|
|
|
for (uint32_t i = 0; i < num_viewport; i++) {
|
|
|
|
const VkViewport *viewport = &viewports[i];
|
|
|
|
float offsets[3];
|
|
|
|
float scales[3];
|
|
|
|
scales[0] = viewport->width / 2.0f;
|
|
|
|
scales[1] = viewport->height / 2.0f;
|
2021-12-30 17:59:46 +00:00
|
|
|
if (z_negative_one_to_one) {
|
|
|
|
scales[2] = 0.5 * (viewport->maxDepth - viewport->minDepth);
|
|
|
|
} else {
|
|
|
|
scales[2] = viewport->maxDepth - viewport->minDepth;
|
|
|
|
}
|
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
offsets[0] = viewport->x + scales[0];
|
|
|
|
offsets[1] = viewport->y + scales[1];
|
2021-12-30 17:59:46 +00:00
|
|
|
if (z_negative_one_to_one) {
|
|
|
|
offsets[2] = 0.5 * (viewport->minDepth + viewport->maxDepth);
|
|
|
|
} else {
|
|
|
|
offsets[2] = viewport->minDepth;
|
|
|
|
}
|
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
for (uint32_t j = 0; j < 3; j++) {
|
|
|
|
tu_cs_emit(cs, fui(offsets[j]));
|
|
|
|
tu_cs_emit(cs, fui(scales[j]));
|
|
|
|
}
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
guardband.width =
|
|
|
|
MIN2(guardband.width, fd_calc_guardband(offsets[0], scales[0], false));
|
|
|
|
guardband.height =
|
|
|
|
MIN2(guardband.height, fd_calc_guardband(offsets[1], scales[1], false));
|
|
|
|
}
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), num_viewport * 2);
|
|
|
|
for (uint32_t i = 0; i < num_viewport; i++) {
|
|
|
|
const VkViewport *viewport = &viewports[i];
|
|
|
|
VkOffset2D min;
|
|
|
|
VkOffset2D max;
|
|
|
|
min.x = (int32_t) viewport->x;
|
|
|
|
max.x = (int32_t) ceilf(viewport->x + viewport->width);
|
|
|
|
if (viewport->height >= 0.0f) {
|
|
|
|
min.y = (int32_t) viewport->y;
|
|
|
|
max.y = (int32_t) ceilf(viewport->y + viewport->height);
|
|
|
|
} else {
|
|
|
|
min.y = (int32_t)(viewport->y + viewport->height);
|
|
|
|
max.y = (int32_t) ceilf(viewport->y);
|
|
|
|
}
|
|
|
|
/* the spec allows viewport->height to be 0.0f */
|
|
|
|
if (min.y == max.y)
|
|
|
|
max.y++;
|
|
|
|
/* allow viewport->width = 0.0f for un-initialized viewports: */
|
|
|
|
if (min.x == max.x)
|
|
|
|
max.x++;
|
2021-03-16 17:48:30 +00:00
|
|
|
|
|
|
|
min.x = MAX2(min.x, 0);
|
|
|
|
min.y = MAX2(min.y, 0);
|
2022-06-01 02:37:16 +01:00
|
|
|
max.x = MAX2(max.x, 1);
|
|
|
|
max.y = MAX2(max.y, 1);
|
2021-03-16 17:48:30 +00:00
|
|
|
|
|
|
|
assert(min.x < max.x);
|
|
|
|
assert(min.y < max.y);
|
2022-06-01 02:37:16 +01:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) |
|
2020-07-09 19:59:04 +01:00
|
|
|
A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(min.y));
|
2022-06-01 02:37:16 +01:00
|
|
|
tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_X(max.x - 1) |
|
|
|
|
A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_Y(max.y - 1));
|
2020-07-14 15:38:09 +01:00
|
|
|
}
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_Z_CLAMP(0), num_viewport * 2);
|
|
|
|
for (uint32_t i = 0; i < num_viewport; i++) {
|
|
|
|
const VkViewport *viewport = &viewports[i];
|
|
|
|
tu_cs_emit(cs, fui(MIN2(viewport->minDepth, viewport->maxDepth)));
|
|
|
|
tu_cs_emit(cs, fui(MAX2(viewport->minDepth, viewport->maxDepth)));
|
|
|
|
}
|
2019-02-19 21:49:01 +00:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1);
|
2020-07-14 15:38:09 +01:00
|
|
|
tu_cs_emit(cs, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband.width) |
|
|
|
|
A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband.height));
|
2020-03-24 01:37:25 +00:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
/* TODO: what to do about this and multi viewport ? */
|
|
|
|
float z_clamp_min = num_viewport ? MIN2(viewports[0].minDepth, viewports[0].maxDepth) : 0;
|
|
|
|
float z_clamp_max = num_viewport ? MAX2(viewports[0].minDepth, viewports[0].maxDepth) : 0;
|
2020-03-24 01:37:25 +00:00
|
|
|
|
|
|
|
tu_cs_emit_regs(cs,
|
|
|
|
A6XX_RB_Z_CLAMP_MIN(z_clamp_min),
|
|
|
|
A6XX_RB_Z_CLAMP_MAX(z_clamp_max));
|
2019-02-19 21:49:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2020-07-14 15:38:09 +01:00
|
|
|
tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissors, uint32_t scissor_count)
|
2019-02-19 21:49:01 +00:00
|
|
|
{
|
2020-07-14 15:38:09 +01:00
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), scissor_count * 2);
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
for (uint32_t i = 0; i < scissor_count; i++) {
|
|
|
|
const VkRect2D *scissor = &scissors[i];
|
2020-06-21 04:20:32 +01:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
uint32_t min_x = scissor->offset.x;
|
|
|
|
uint32_t min_y = scissor->offset.y;
|
|
|
|
uint32_t max_x = min_x + scissor->extent.width - 1;
|
|
|
|
uint32_t max_y = min_y + scissor->extent.height - 1;
|
2020-06-25 15:55:48 +01:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
if (!scissor->extent.width || !scissor->extent.height) {
|
|
|
|
min_x = min_y = 1;
|
|
|
|
max_x = max_y = 0;
|
|
|
|
} else {
|
|
|
|
/* avoid overflow */
|
|
|
|
uint32_t scissor_max = BITFIELD_MASK(15);
|
|
|
|
min_x = MIN2(scissor_max, min_x);
|
|
|
|
min_y = MIN2(scissor_max, min_y);
|
|
|
|
max_x = MIN2(scissor_max, max_x);
|
|
|
|
max_y = MIN2(scissor_max, max_y);
|
|
|
|
}
|
|
|
|
|
|
|
|
tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(min_x) |
|
|
|
|
A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(min_y));
|
|
|
|
tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(max_x) |
|
|
|
|
A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(max_y));
|
|
|
|
}
|
2019-02-19 21:49:01 +00:00
|
|
|
}
|
|
|
|
|
2020-04-21 17:14:23 +01:00
|
|
|
void
|
|
|
|
tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc)
|
|
|
|
{
|
|
|
|
if (!samp_loc) {
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1);
|
|
|
|
tu_cs_emit(cs, 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount);
|
|
|
|
assert(samp_loc->sampleLocationGridSize.width == 1);
|
|
|
|
assert(samp_loc->sampleLocationGridSize.height == 1);
|
|
|
|
|
|
|
|
uint32_t sample_config =
|
|
|
|
A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE;
|
|
|
|
uint32_t sample_locations = 0;
|
|
|
|
for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) {
|
|
|
|
sample_locations |=
|
|
|
|
(A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(samp_loc->pSampleLocations[i].x) |
|
|
|
|
A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(samp_loc->pSampleLocations[i].y)) << i*8;
|
|
|
|
}
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 2);
|
|
|
|
tu_cs_emit(cs, sample_config);
|
|
|
|
tu_cs_emit(cs, sample_locations);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 2);
|
|
|
|
tu_cs_emit(cs, sample_config);
|
|
|
|
tu_cs_emit(cs, sample_locations);
|
|
|
|
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 2);
|
|
|
|
tu_cs_emit(cs, sample_config);
|
|
|
|
tu_cs_emit(cs, sample_locations);
|
|
|
|
}
|
|
|
|
|
2019-02-27 07:29:51 +00:00
|
|
|
static uint32_t
|
|
|
|
tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info,
|
2021-10-04 02:24:58 +01:00
|
|
|
enum a5xx_line_mode line_mode,
|
2020-07-02 10:33:42 +01:00
|
|
|
bool multiview)
|
2019-02-27 07:29:51 +00:00
|
|
|
{
|
|
|
|
uint32_t gras_su_cntl = 0;
|
|
|
|
|
|
|
|
if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT)
|
|
|
|
gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT;
|
|
|
|
if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT)
|
|
|
|
gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK;
|
|
|
|
|
|
|
|
if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE)
|
|
|
|
gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW;
|
|
|
|
|
2020-09-17 15:16:42 +01:00
|
|
|
gras_su_cntl |=
|
|
|
|
A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f);
|
2019-02-27 07:29:51 +00:00
|
|
|
|
|
|
|
if (rast_info->depthBiasEnable)
|
|
|
|
gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET;
|
|
|
|
|
2021-10-04 02:24:58 +01:00
|
|
|
gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINE_MODE(line_mode);
|
2019-02-27 07:29:51 +00:00
|
|
|
|
2020-07-02 10:33:42 +01:00
|
|
|
if (multiview) {
|
|
|
|
gras_su_cntl |=
|
|
|
|
A6XX_GRAS_SU_CNTL_UNK17 |
|
|
|
|
A6XX_GRAS_SU_CNTL_MULTIVIEW_ENABLE;
|
|
|
|
}
|
|
|
|
|
2019-02-27 07:29:51 +00:00
|
|
|
return gras_su_cntl;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
tu6_emit_depth_bias(struct tu_cs *cs,
|
|
|
|
float constant_factor,
|
|
|
|
float clamp,
|
|
|
|
float slope_factor)
|
|
|
|
{
|
|
|
|
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3);
|
2020-01-16 23:38:37 +00:00
|
|
|
tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor).value);
|
|
|
|
tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor).value);
|
|
|
|
tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value);
|
2019-02-27 07:29:51 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 22:58:52 +00:00
|
|
|
static uint32_t
|
|
|
|
tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att,
|
|
|
|
bool has_alpha)
|
|
|
|
{
|
|
|
|
const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp);
|
|
|
|
const enum adreno_rb_blend_factor src_color_factor = tu6_blend_factor(
|
|
|
|
has_alpha ? att->srcColorBlendFactor
|
|
|
|
: tu_blend_factor_no_dst_alpha(att->srcColorBlendFactor));
|
|
|
|
const enum adreno_rb_blend_factor dst_color_factor = tu6_blend_factor(
|
|
|
|
has_alpha ? att->dstColorBlendFactor
|
|
|
|
: tu_blend_factor_no_dst_alpha(att->dstColorBlendFactor));
|
|
|
|
const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp);
|
|
|
|
const enum adreno_rb_blend_factor src_alpha_factor =
|
|
|
|
tu6_blend_factor(att->srcAlphaBlendFactor);
|
|
|
|
const enum adreno_rb_blend_factor dst_alpha_factor =
|
|
|
|
tu6_blend_factor(att->dstAlphaBlendFactor);
|
|
|
|
|
|
|
|
return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) |
|
|
|
|
A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) |
|
|
|
|
A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) |
|
|
|
|
A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) |
|
|
|
|
A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) |
|
|
|
|
A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor);
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
tu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att,
|
|
|
|
uint32_t rb_mrt_control_rop,
|
|
|
|
bool has_alpha)
|
|
|
|
{
|
|
|
|
uint32_t rb_mrt_control =
|
|
|
|
A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask);
|
|
|
|
|
|
|
|
rb_mrt_control |= rb_mrt_control_rop;
|
|
|
|
|
|
|
|
if (att->blendEnable) {
|
|
|
|
rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND;
|
|
|
|
|
|
|
|
if (has_alpha)
|
|
|
|
rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND2;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rb_mrt_control;
|
|
|
|
}
|
|
|
|
|
2022-06-07 00:48:44 +01:00
|
|
|
uint32_t
|
|
|
|
tu6_rb_mrt_control_rop(VkLogicOp op, bool *rop_reads_dst)
|
|
|
|
{
|
|
|
|
*rop_reads_dst = tu_logic_op_reads_dst(op);
|
|
|
|
return A6XX_RB_MRT_CONTROL_ROP_ENABLE |
|
|
|
|
A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(op));
|
|
|
|
}
|
|
|
|
|
2019-02-21 22:58:52 +00:00
|
|
|
static void
|
2022-06-07 00:48:44 +01:00
|
|
|
tu6_emit_rb_mrt_controls(struct tu_pipeline *pipeline,
|
2019-02-21 22:58:52 +00:00
|
|
|
const VkPipelineColorBlendStateCreateInfo *blend_info,
|
|
|
|
const VkFormat attachment_formats[MAX_RTS],
|
2022-06-07 00:48:44 +01:00
|
|
|
bool *rop_reads_dst,
|
2022-05-26 22:40:49 +01:00
|
|
|
uint32_t *color_bandwidth_per_sample)
|
2019-02-21 22:58:52 +00:00
|
|
|
{
|
2022-06-08 16:11:09 +01:00
|
|
|
const VkPipelineColorWriteCreateInfoEXT *color_info =
|
|
|
|
vk_find_struct_const(blend_info->pNext,
|
|
|
|
PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
|
|
|
|
|
|
|
|
/* The static state is ignored if it's dynamic. In that case assume
|
|
|
|
* everything is enabled and then the appropriate registers will be zero'd
|
|
|
|
* dynamically.
|
|
|
|
*/
|
|
|
|
if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE))
|
|
|
|
color_info = NULL;
|
|
|
|
|
2022-06-07 00:48:44 +01:00
|
|
|
*rop_reads_dst = false;
|
2022-05-26 22:40:49 +01:00
|
|
|
*color_bandwidth_per_sample = 0;
|
2019-02-21 22:58:52 +00:00
|
|
|
|
|
|
|
uint32_t rb_mrt_control_rop = 0;
|
|
|
|
if (blend_info->logicOpEnable) {
|
2022-06-07 00:48:44 +01:00
|
|
|
pipeline->logic_op_enabled = true;
|
|
|
|
rb_mrt_control_rop = tu6_rb_mrt_control_rop(blend_info->logicOp,
|
|
|
|
rop_reads_dst);
|
2019-02-21 22:58:52 +00:00
|
|
|
}
|
|
|
|
|
2022-05-26 22:40:49 +01:00
|
|
|
uint32_t total_bpp = 0;
|
2022-06-07 00:48:44 +01:00
|
|
|
pipeline->num_rts = blend_info->attachmentCount;
|
2019-02-21 22:58:52 +00:00
|
|
|
for (uint32_t i = 0; i < blend_info->attachmentCount; i++) {
|
|
|
|
const VkPipelineColorBlendAttachmentState *att =
|
|
|
|
&blend_info->pAttachments[i];
|
|
|
|
const VkFormat format = attachment_formats[i];
|
|
|
|
|
|
|
|
uint32_t rb_mrt_control = 0;
|
|
|
|
uint32_t rb_mrt_blend_control = 0;
|
2022-06-08 16:11:09 +01:00
|
|
|
if (format != VK_FORMAT_UNDEFINED &&
|
|
|
|
(!color_info || color_info->pColorWriteEnables[i])) {
|
2019-02-21 22:58:52 +00:00
|
|
|
const bool has_alpha = vk_format_has_alpha(format);
|
|
|
|
|
|
|
|
rb_mrt_control =
|
2021-01-23 02:56:24 +00:00
|
|
|
tu6_rb_mrt_control(att, rb_mrt_control_rop, has_alpha);
|
2019-02-21 22:58:52 +00:00
|
|
|
rb_mrt_blend_control = tu6_rb_mrt_blend_control(att, has_alpha);
|
|
|
|
|
2022-05-26 22:40:49 +01:00
|
|
|
/* calculate bpp based on format and write mask */
|
|
|
|
uint32_t write_bpp = 0;
|
|
|
|
if (att->colorWriteMask == 0xf) {
|
|
|
|
write_bpp = vk_format_get_blocksizebits(format);
|
|
|
|
} else {
|
|
|
|
const enum pipe_format pipe_format = vk_format_to_pipe_format(format);
|
|
|
|
for (uint32_t i = 0; i < 4; i++) {
|
|
|
|
if (att->colorWriteMask & (1 << i)) {
|
|
|
|
write_bpp += util_format_get_component_bits(pipe_format,
|
|
|
|
UTIL_FORMAT_COLORSPACE_RGB, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
total_bpp += write_bpp;
|
2022-05-26 22:18:02 +01:00
|
|
|
|
2022-06-07 00:48:44 +01:00
|
|
|
pipeline->color_write_enable |= BIT(i);
|
|
|
|
if (att->blendEnable)
|
|
|
|
pipeline->blend_enable |= BIT(i);
|
|
|
|
|
2022-06-16 23:42:29 +01:00
|
|
|
if (att->blendEnable || *rop_reads_dst) {
|
2022-05-26 22:40:49 +01:00
|
|
|
total_bpp += write_bpp;
|
2022-05-26 22:18:02 +01:00
|
|
|
}
|
2019-02-21 22:58:52 +00:00
|
|
|
}
|
|
|
|
|
2022-06-07 00:48:44 +01:00
|
|
|
pipeline->rb_mrt_control[i] = rb_mrt_control & pipeline->rb_mrt_control_mask;
|
|
|
|
pipeline->rb_mrt_blend_control[i] = rb_mrt_blend_control;
|
2019-02-21 22:58:52 +00:00
|
|
|
}
|
2022-05-26 22:18:02 +01:00
|
|
|
|
2022-05-26 22:40:49 +01:00
|
|
|
*color_bandwidth_per_sample = total_bpp / 8;
|
2019-02-21 22:58:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2022-06-07 00:48:44 +01:00
|
|
|
tu6_emit_blend_control(struct tu_pipeline *pipeline,
|
2019-02-21 22:58:52 +00:00
|
|
|
uint32_t blend_enable_mask,
|
2020-05-14 16:17:46 +01:00
|
|
|
bool dual_src_blend,
|
2019-02-21 22:58:52 +00:00
|
|
|
const VkPipelineMultisampleStateCreateInfo *msaa_info)
|
|
|
|
{
|
|
|
|
const uint32_t sample_mask =
|
2020-06-05 00:51:13 +01:00
|
|
|
msaa_info->pSampleMask ? (*msaa_info->pSampleMask & 0xffff)
|
2019-02-21 22:58:52 +00:00
|
|
|
: ((1 << msaa_info->rasterizationSamples) - 1);
|
|
|
|
|
2022-06-07 00:48:44 +01:00
|
|
|
|
|
|
|
pipeline->sp_blend_cntl =
|
|
|
|
A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask,
|
|
|
|
.dual_color_in_enable = dual_src_blend,
|
|
|
|
.alpha_to_coverage = msaa_info->alphaToCoverageEnable,
|
|
|
|
.unk8 = true).value & pipeline->sp_blend_cntl_mask;
|
2020-06-05 00:51:13 +01:00
|
|
|
|
2019-02-21 22:58:52 +00:00
|
|
|
/* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */
|
2022-06-07 00:48:44 +01:00
|
|
|
pipeline->rb_blend_cntl =
|
|
|
|
A6XX_RB_BLEND_CNTL(.enable_blend = blend_enable_mask,
|
|
|
|
.independent_blend = true,
|
|
|
|
.sample_mask = sample_mask,
|
|
|
|
.dual_color_in_enable = dual_src_blend,
|
|
|
|
.alpha_to_coverage = msaa_info->alphaToCoverageEnable,
|
|
|
|
.alpha_to_one = msaa_info->alphaToOneEnable).value &
|
|
|
|
pipeline->rb_blend_cntl_mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tu6_emit_blend(struct tu_cs *cs,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.dword = pipeline->sp_blend_cntl));
|
|
|
|
tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.dword = pipeline->rb_blend_cntl));
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < pipeline->num_rts; i++) {
|
|
|
|
tu_cs_emit_regs(cs,
|
|
|
|
A6XX_RB_MRT_CONTROL(i, .dword = pipeline->rb_mrt_control[i]),
|
|
|
|
A6XX_RB_MRT_BLEND_CONTROL(i, .dword = pipeline->rb_mrt_blend_control[i]));
|
|
|
|
}
|
2019-02-21 22:58:52 +00:00
|
|
|
}
|
|
|
|
|
2020-10-29 14:13:00 +00:00
|
|
|
static uint32_t
|
|
|
|
calc_pvtmem_size(struct tu_device *dev, struct tu_pvtmem_config *config,
|
|
|
|
uint32_t pvtmem_bytes)
|
|
|
|
{
|
|
|
|
uint32_t per_fiber_size = ALIGN(pvtmem_bytes, 512);
|
|
|
|
uint32_t per_sp_size =
|
2021-07-08 02:46:49 +01:00
|
|
|
ALIGN(per_fiber_size * dev->physical_device->info->a6xx.fibers_per_sp, 1 << 12);
|
2020-10-29 14:13:00 +00:00
|
|
|
|
|
|
|
if (config) {
|
|
|
|
config->per_fiber_size = per_fiber_size;
|
|
|
|
config->per_sp_size = per_sp_size;
|
|
|
|
}
|
|
|
|
|
2021-07-08 02:46:49 +01:00
|
|
|
return dev->physical_device->info->num_sp_cores * per_sp_size;
|
2020-10-29 14:13:00 +00:00
|
|
|
}
|
|
|
|
|
2021-05-17 10:40:05 +01:00
|
|
|
static VkResult
|
2020-10-29 14:13:00 +00:00
|
|
|
tu_setup_pvtmem(struct tu_device *dev,
|
|
|
|
struct tu_pipeline *pipeline,
|
|
|
|
struct tu_pvtmem_config *config,
|
|
|
|
uint32_t pvtmem_bytes, bool per_wave)
|
|
|
|
{
|
|
|
|
if (!pvtmem_bytes) {
|
|
|
|
memset(config, 0, sizeof(*config));
|
2021-05-17 10:40:05 +01:00
|
|
|
return VK_SUCCESS;
|
2020-10-29 14:13:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t total_size = calc_pvtmem_size(dev, config, pvtmem_bytes);
|
|
|
|
config->per_wave = per_wave;
|
|
|
|
|
2021-05-17 10:40:05 +01:00
|
|
|
VkResult result =
|
|
|
|
tu_bo_init_new(dev, &pipeline->pvtmem_bo, total_size,
|
|
|
|
TU_BO_ALLOC_NO_FLAGS);
|
|
|
|
if (result != VK_SUCCESS)
|
|
|
|
return result;
|
|
|
|
|
2022-02-02 17:29:34 +00:00
|
|
|
config->iova = pipeline->pvtmem_bo->iova;
|
2021-05-17 10:40:05 +01:00
|
|
|
|
|
|
|
return result;
|
2020-10-29 14:13:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-02-21 17:22:17 +00:00
|
|
|
static VkResult
|
2020-06-23 15:37:56 +01:00
|
|
|
tu_pipeline_allocate_cs(struct tu_device *dev,
|
|
|
|
struct tu_pipeline *pipeline,
|
2022-03-18 13:46:43 +00:00
|
|
|
struct tu_pipeline_layout *layout,
|
2020-06-23 15:37:56 +01:00
|
|
|
struct tu_pipeline_builder *builder,
|
|
|
|
struct ir3_shader_variant *compute)
|
2018-08-08 23:23:57 +01:00
|
|
|
{
|
2022-02-23 22:45:59 +00:00
|
|
|
uint32_t size = 1024 + tu6_load_state_size(pipeline, layout, compute);
|
2019-02-21 17:22:17 +00:00
|
|
|
|
2020-06-23 15:37:56 +01:00
|
|
|
/* graphics case: */
|
|
|
|
if (builder) {
|
2022-06-17 00:37:07 +01:00
|
|
|
size += 2 * TU6_EMIT_VERTEX_INPUT_MAX_DWORDS;
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
|
|
|
|
if (builder->shaders->variants[i]) {
|
|
|
|
size += builder->shaders->variants[i]->info.size / 4;
|
2020-10-29 14:13:00 +00:00
|
|
|
}
|
2020-06-23 15:37:56 +01:00
|
|
|
}
|
|
|
|
|
2020-07-07 19:56:35 +01:00
|
|
|
size += builder->binning_variant->info.size / 4;
|
2021-02-02 16:00:08 +00:00
|
|
|
|
|
|
|
builder->additional_cs_reserve_size = 0;
|
2022-02-17 19:48:36 +00:00
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
|
|
|
|
struct ir3_shader_variant *variant = builder->shaders->variants[i];
|
2021-02-02 16:00:08 +00:00
|
|
|
if (variant) {
|
|
|
|
builder->additional_cs_reserve_size +=
|
|
|
|
tu_xs_get_additional_cs_size_dwords(variant);
|
|
|
|
|
|
|
|
if (variant->binning) {
|
|
|
|
builder->additional_cs_reserve_size +=
|
|
|
|
tu_xs_get_additional_cs_size_dwords(variant->binning);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-17 00:29:51 +01:00
|
|
|
/* The additional size is used twice, once per tu6_emit_program() call. */
|
|
|
|
size += builder->additional_cs_reserve_size * 2;
|
2020-06-23 15:37:56 +01:00
|
|
|
} else {
|
2020-07-07 19:56:35 +01:00
|
|
|
size += compute->info.size / 4;
|
2021-02-02 16:00:08 +00:00
|
|
|
|
|
|
|
size += tu_xs_get_additional_cs_size_dwords(compute);
|
2020-06-23 15:37:56 +01:00
|
|
|
}
|
|
|
|
|
2022-02-14 22:45:01 +00:00
|
|
|
/* Allocate the space for the pipeline out of the device's RO suballocator.
|
|
|
|
*
|
|
|
|
* Sub-allocating BOs saves memory and also kernel overhead in refcounting of
|
|
|
|
* BOs at exec time.
|
|
|
|
*
|
|
|
|
* The pipeline cache would seem like a natural place to stick the
|
|
|
|
* suballocator, except that it is not guaranteed to outlive the pipelines
|
|
|
|
* created from it, so you can't store any long-lived state there, and you
|
|
|
|
* can't use its EXTERNALLY_SYNCHRONIZED flag to avoid atomics because
|
|
|
|
* pipeline destroy isn't synchronized by the cache.
|
|
|
|
*/
|
|
|
|
pthread_mutex_lock(&dev->pipeline_mutex);
|
|
|
|
VkResult result = tu_suballoc_bo_alloc(&pipeline->bo, &dev->pipeline_suballoc,
|
|
|
|
size * 4, 128);
|
|
|
|
pthread_mutex_unlock(&dev->pipeline_mutex);
|
|
|
|
if (result != VK_SUCCESS)
|
|
|
|
return result;
|
2019-02-21 17:22:17 +00:00
|
|
|
|
2022-02-14 22:45:01 +00:00
|
|
|
tu_cs_init_suballoc(&pipeline->cs, dev, &pipeline->bo);
|
|
|
|
|
|
|
|
return VK_SUCCESS;
|
2018-08-08 23:23:57 +01:00
|
|
|
}
|
|
|
|
|
2020-06-16 16:00:31 +01:00
|
|
|
static void
|
|
|
|
tu_pipeline_shader_key_init(struct ir3_shader_key *key,
|
2021-08-10 04:32:15 +01:00
|
|
|
const struct tu_pipeline *pipeline,
|
2020-06-16 16:00:31 +01:00
|
|
|
const VkGraphicsPipelineCreateInfo *pipeline_info)
|
|
|
|
{
|
2020-06-17 19:23:37 +01:00
|
|
|
for (uint32_t i = 0; i < pipeline_info->stageCount; i++) {
|
|
|
|
if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) {
|
|
|
|
key->has_gs = true;
|
|
|
|
break;
|
2020-06-16 16:00:31 +01:00
|
|
|
}
|
2020-06-17 19:23:37 +01:00
|
|
|
}
|
2020-06-16 16:00:31 +01:00
|
|
|
|
2021-08-10 04:32:15 +01:00
|
|
|
if (pipeline_info->pRasterizationState->rasterizerDiscardEnable &&
|
|
|
|
!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD)))
|
2020-06-17 19:23:37 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState;
|
|
|
|
const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
|
|
|
|
vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
|
|
|
|
if (msaa_info->rasterizationSamples > 1 ||
|
|
|
|
/* also set msaa key when sample location is not the default
|
|
|
|
* since this affects varying interpolation */
|
|
|
|
(sample_locations && sample_locations->sampleLocationsEnable)) {
|
|
|
|
key->msaa = true;
|
2020-06-16 16:00:31 +01:00
|
|
|
}
|
|
|
|
|
2022-05-24 23:26:42 +01:00
|
|
|
/* The 1.3.215 spec says:
|
|
|
|
*
|
|
|
|
* Sample shading can be used to specify a minimum number of unique
|
|
|
|
* samples to process for each fragment. If sample shading is enabled,
|
|
|
|
* an implementation must provide a minimum of
|
|
|
|
*
|
|
|
|
* max(ceil(minSampleShadingFactor * totalSamples), 1)
|
|
|
|
*
|
|
|
|
* unique associated data for each fragment, where
|
|
|
|
* minSampleShadingFactor is the minimum fraction of sample shading.
|
|
|
|
*
|
|
|
|
* The definition is pretty much the same as OpenGL's GL_SAMPLE_SHADING.
|
|
|
|
* They both require unique associated data.
|
|
|
|
*
|
|
|
|
* There are discussions to change the definition, such that
|
|
|
|
* sampleShadingEnable does not imply unique associated data. Before the
|
|
|
|
* discussions are settled and before apps (i.e., ANGLE) are fixed to
|
|
|
|
* follow the new and incompatible definition, we should stick to the
|
|
|
|
* current definition.
|
|
|
|
*
|
|
|
|
* Note that ir3_shader_key::sample_shading is not actually used by ir3,
|
|
|
|
* just checked in tu6_emit_fs_inputs. We will also copy the value to
|
|
|
|
* tu_shader_key::force_sample_interp in a bit.
|
|
|
|
*/
|
|
|
|
if (msaa_info->sampleShadingEnable &&
|
|
|
|
(msaa_info->minSampleShading * msaa_info->rasterizationSamples) > 1.0f)
|
2020-06-16 14:11:02 +01:00
|
|
|
key->sample_shading = true;
|
|
|
|
|
2020-04-24 16:05:48 +01:00
|
|
|
/* We set this after we compile to NIR because we need the prim mode */
|
|
|
|
key->tessellation = IR3_TESS_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t
|
|
|
|
tu6_get_tessmode(struct tu_shader* shader)
|
|
|
|
{
|
2022-01-19 01:43:15 +00:00
|
|
|
enum tess_primitive_mode primitive_mode = shader->ir3_shader->nir->info.tess._primitive_mode;
|
2020-04-24 16:05:48 +01:00
|
|
|
switch (primitive_mode) {
|
2022-01-19 01:43:15 +00:00
|
|
|
case TESS_PRIMITIVE_ISOLINES:
|
2020-04-24 16:05:48 +01:00
|
|
|
return IR3_TESS_ISOLINES;
|
2022-01-19 01:43:15 +00:00
|
|
|
case TESS_PRIMITIVE_TRIANGLES:
|
2020-04-24 16:05:48 +01:00
|
|
|
return IR3_TESS_TRIANGLES;
|
2022-01-19 01:43:15 +00:00
|
|
|
case TESS_PRIMITIVE_QUADS:
|
2020-04-24 16:05:48 +01:00
|
|
|
return IR3_TESS_QUADS;
|
2022-01-19 01:43:15 +00:00
|
|
|
case TESS_PRIMITIVE_UNSPECIFIED:
|
2020-04-24 16:05:48 +01:00
|
|
|
return IR3_TESS_NONE;
|
|
|
|
default:
|
|
|
|
unreachable("bad tessmode");
|
|
|
|
}
|
2020-06-16 16:00:31 +01:00
|
|
|
}
|
|
|
|
|
2020-06-23 15:37:56 +01:00
|
|
|
static uint64_t
|
|
|
|
tu_upload_variant(struct tu_pipeline *pipeline,
|
|
|
|
const struct ir3_shader_variant *variant)
|
|
|
|
{
|
|
|
|
struct tu_cs_memory memory;
|
|
|
|
|
|
|
|
if (!variant)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* this expects to get enough alignment because shaders are allocated first
|
2020-07-07 19:56:35 +01:00
|
|
|
* and total size is always aligned correctly
|
2020-06-23 15:37:56 +01:00
|
|
|
* note: an assert in tu6_emit_xs_config validates the alignment
|
|
|
|
*/
|
2020-07-07 19:56:35 +01:00
|
|
|
tu_cs_alloc(&pipeline->cs, variant->info.size / 4, 1, &memory);
|
2020-06-23 15:37:56 +01:00
|
|
|
|
2020-07-07 19:56:35 +01:00
|
|
|
memcpy(memory.map, variant->bin, variant->info.size);
|
2020-06-23 15:37:56 +01:00
|
|
|
return memory.iova;
|
|
|
|
}
|
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
static void
|
|
|
|
tu_append_executable(struct tu_pipeline *pipeline, struct ir3_shader_variant *variant,
|
|
|
|
char *nir_from_spirv)
|
|
|
|
{
|
|
|
|
struct tu_pipeline_executable exe = {
|
2022-02-16 17:19:58 +00:00
|
|
|
.stage = variant->type,
|
2021-02-04 13:12:35 +00:00
|
|
|
.nir_from_spirv = nir_from_spirv,
|
2022-06-15 15:35:39 +01:00
|
|
|
.nir_final = ralloc_strdup(pipeline->executables_mem_ctx, variant->disasm_info.nir),
|
|
|
|
.disasm = ralloc_strdup(pipeline->executables_mem_ctx, variant->disasm_info.disasm),
|
2021-02-04 13:12:35 +00:00
|
|
|
.stats = variant->info,
|
|
|
|
.is_binning = variant->binning_pass,
|
|
|
|
};
|
|
|
|
|
|
|
|
util_dynarray_append(&pipeline->executables, struct tu_pipeline_executable, exe);
|
|
|
|
}
|
|
|
|
|
tu: Initial link-time optimizations
This is mostly taken from radv, and cleaned up a bit: don't explicitly
list every stage at the beginning, and name the shaders "producer" and
"consumer" to reduce confusion. I also stripped out a lot of other stuff
to get to the bare minimum of calling nir_link_opt_varyings,
nir_remove_unused_varyings, and nir_compact_varyings and then cleaning
up the fallout. In the future we may want to temporarily scalarize I/O
like radv does, and add back a few things like the psize optimization.
In the meantime this already provides a lot of benefit.
Results from the radv fossil_db with some apps not compilable by turnip
removed:
Totals:
MaxWaves: 1637288 -> 1668200 (+1.89%); split: +1.89%, -0.00%
Instrs: 54620287 -> 54114442 (-0.93%); split: -0.98%, +0.05%
CodeSize: 92235646 -> 91277584 (-1.04%); split: -1.07%, +0.03%
NOPs: 11176775 -> 11185206 (+0.08%); split: -0.63%, +0.71%
Full: 1689271 -> 1657175 (-1.90%); split: -1.92%, +0.02%
(ss): 1318763 -> 1317757 (-0.08%); split: -1.40%, +1.32%
(sy): 618795 -> 617724 (-0.17%); split: -0.70%, +0.53%
(ss)-stall: 3496370 -> 3470116 (-0.75%); split: -1.37%, +0.62%
(sy)-stall: 23512954 -> 23511164 (-0.01%); split: -1.04%, +1.03%
STPs: 27557 -> 27461 (-0.35%)
LDPs: 22948 -> 22804 (-0.63%)
Cat0: 11823765 -> 11829681 (+0.05%); split: -0.62%, +0.67%
Cat1: 3120042 -> 2991831 (-4.11%); split: -4.43%, +0.32%
Cat2: 28605309 -> 28324829 (-0.98%); split: -0.98%, +0.00%
Cat3: 7334628 -> 7252342 (-1.12%); split: -1.12%, +0.00%
Cat4: 1216514 -> 1204894 (-0.96%)
Cat5: 863976 -> 861926 (-0.24%)
Cat6: 1648571 -> 1641457 (-0.43%)
Totals from 23575 (16.16% of 145856) affected shaders:
MaxWaves: 258806 -> 289718 (+11.94%); split: +11.94%, -0.00%
Instrs: 7571190 -> 7065345 (-6.68%); split: -7.04%, +0.36%
CodeSize: 13864308 -> 12906246 (-6.91%); split: -7.09%, +0.18%
NOPs: 959185 -> 967616 (+0.88%); split: -7.35%, +8.23%
Full: 313335 -> 281239 (-10.24%); split: -10.36%, +0.11%
(ss): 154628 -> 153622 (-0.65%); split: -11.90%, +11.25%
(sy): 69758 -> 68687 (-1.54%); split: -6.21%, +4.67%
(ss)-stall: 322002 -> 295748 (-8.15%); split: -14.92%, +6.76%
(sy)-stall: 3270366 -> 3268576 (-0.05%); split: -7.45%, +7.40%
STPs: 3624 -> 3528 (-2.65%)
LDPs: 1074 -> 930 (-13.41%)
Cat0: 1022684 -> 1028600 (+0.58%); split: -7.13%, +7.71%
Cat1: 531102 -> 402891 (-24.14%); split: -26.04%, +1.90%
Cat2: 4090309 -> 3809829 (-6.86%); split: -6.86%, +0.00%
Cat3: 1449686 -> 1367400 (-5.68%); split: -5.69%, +0.01%
Cat4: 103543 -> 91923 (-11.22%)
Cat5: 57441 -> 55391 (-3.57%)
Cat6: 316096 -> 308982 (-2.25%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14767>
2022-01-27 17:33:55 +00:00
|
|
|
static void
|
|
|
|
tu_link_shaders(struct tu_pipeline_builder *builder,
|
|
|
|
nir_shader **shaders, unsigned shaders_count)
|
|
|
|
{
|
|
|
|
nir_shader *consumer = NULL;
|
|
|
|
for (gl_shader_stage stage = shaders_count - 1;
|
|
|
|
stage >= MESA_SHADER_VERTEX; stage--) {
|
|
|
|
if (!shaders[stage])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_shader *producer = shaders[stage];
|
|
|
|
if (!consumer) {
|
|
|
|
consumer = producer;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nir_link_opt_varyings(producer, consumer)) {
|
|
|
|
NIR_PASS_V(consumer, nir_opt_constant_folding);
|
|
|
|
NIR_PASS_V(consumer, nir_opt_algebraic);
|
|
|
|
NIR_PASS_V(consumer, nir_opt_dce);
|
|
|
|
}
|
|
|
|
|
|
|
|
NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
|
|
|
|
NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
|
|
|
|
|
|
|
|
bool progress = nir_remove_unused_varyings(producer, consumer);
|
|
|
|
|
|
|
|
nir_compact_varyings(producer, consumer, true);
|
|
|
|
if (progress) {
|
|
|
|
if (nir_lower_global_vars_to_local(producer)) {
|
|
|
|
/* Remove dead writes, which can remove input loads */
|
|
|
|
NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
|
|
NIR_PASS_V(producer, nir_opt_dce);
|
|
|
|
}
|
|
|
|
nir_lower_global_vars_to_local(consumer);
|
|
|
|
}
|
|
|
|
|
|
|
|
consumer = producer;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
static void
|
|
|
|
tu_shader_key_init(struct tu_shader_key *key,
|
|
|
|
const VkPipelineShaderStageCreateInfo *stage_info,
|
|
|
|
struct tu_device *dev)
|
|
|
|
{
|
|
|
|
enum ir3_wavesize_option api_wavesize, real_wavesize;
|
|
|
|
|
|
|
|
if (stage_info) {
|
|
|
|
if (stage_info->flags &
|
2022-07-01 13:04:48 +01:00
|
|
|
VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT) {
|
2022-02-17 19:48:36 +00:00
|
|
|
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
|
|
|
|
} else {
|
2022-07-01 13:04:48 +01:00
|
|
|
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *size_info =
|
2022-02-17 19:48:36 +00:00
|
|
|
vk_find_struct_const(stage_info->pNext,
|
2022-07-01 13:04:48 +01:00
|
|
|
PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
|
2022-02-17 19:48:36 +00:00
|
|
|
|
|
|
|
if (size_info) {
|
|
|
|
if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
|
|
|
|
api_wavesize = IR3_SINGLE_ONLY;
|
|
|
|
} else {
|
|
|
|
assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
|
|
|
|
api_wavesize = IR3_DOUBLE_ONLY;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Match the exposed subgroupSize. */
|
|
|
|
api_wavesize = IR3_DOUBLE_ONLY;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (stage_info->flags &
|
2022-07-01 13:04:48 +01:00
|
|
|
VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT)
|
2022-02-17 19:48:36 +00:00
|
|
|
real_wavesize = api_wavesize;
|
|
|
|
else if (api_wavesize == IR3_SINGLE_ONLY)
|
|
|
|
real_wavesize = IR3_SINGLE_ONLY;
|
|
|
|
else
|
|
|
|
real_wavesize = IR3_SINGLE_OR_DOUBLE;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
key->api_wavesize = api_wavesize;
|
|
|
|
key->real_wavesize = real_wavesize;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tu_hash_stage(struct mesa_sha1 *ctx,
|
|
|
|
const VkPipelineShaderStageCreateInfo *stage,
|
|
|
|
const struct tu_shader_key *key)
|
|
|
|
{
|
2022-06-22 17:10:12 +01:00
|
|
|
unsigned char stage_hash[SHA1_DIGEST_LENGTH];
|
2022-02-17 19:48:36 +00:00
|
|
|
|
2022-06-22 17:10:12 +01:00
|
|
|
vk_pipeline_hash_shader_stage(stage, stage_hash);
|
|
|
|
_mesa_sha1_update(ctx, stage_hash, sizeof(stage_hash));
|
2022-02-17 19:48:36 +00:00
|
|
|
_mesa_sha1_update(ctx, key, sizeof(*key));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Hash flags which can affect ir3 shader compilation which aren't known until
|
|
|
|
* logical device creation.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
tu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler)
|
|
|
|
{
|
2022-05-19 20:40:18 +01:00
|
|
|
_mesa_sha1_update(ctx, &compiler->robust_buffer_access2,
|
|
|
|
sizeof(compiler->robust_buffer_access2));
|
2022-02-17 19:48:36 +00:00
|
|
|
_mesa_sha1_update(ctx, &ir3_shader_debug, sizeof(ir3_shader_debug));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tu_hash_shaders(unsigned char *hash,
|
|
|
|
const VkPipelineShaderStageCreateInfo **stages,
|
|
|
|
const struct tu_pipeline_layout *layout,
|
|
|
|
const struct tu_shader_key *keys,
|
|
|
|
const struct ir3_shader_key *ir3_key,
|
|
|
|
const struct ir3_compiler *compiler)
|
|
|
|
{
|
|
|
|
struct mesa_sha1 ctx;
|
|
|
|
|
|
|
|
_mesa_sha1_init(&ctx);
|
|
|
|
|
|
|
|
if (layout)
|
|
|
|
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
|
|
|
|
|
|
|
|
_mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key));
|
|
|
|
|
|
|
|
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
|
|
|
if (stages[i]) {
|
|
|
|
tu_hash_stage(&ctx, stages[i], &keys[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
tu_hash_compiler(&ctx, compiler);
|
|
|
|
_mesa_sha1_final(&ctx, hash);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
tu_hash_compute(unsigned char *hash,
|
|
|
|
const VkPipelineShaderStageCreateInfo *stage,
|
|
|
|
const struct tu_pipeline_layout *layout,
|
|
|
|
const struct tu_shader_key *key,
|
|
|
|
const struct ir3_compiler *compiler)
|
|
|
|
{
|
|
|
|
struct mesa_sha1 ctx;
|
|
|
|
|
|
|
|
_mesa_sha1_init(&ctx);
|
|
|
|
|
|
|
|
if (layout)
|
|
|
|
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
|
|
|
|
|
|
|
|
tu_hash_stage(&ctx, stage, key);
|
|
|
|
|
|
|
|
tu_hash_compiler(&ctx, compiler);
|
|
|
|
_mesa_sha1_final(&ctx, hash);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
tu_shaders_serialize(struct vk_pipeline_cache_object *object,
|
|
|
|
struct blob *blob);
|
|
|
|
|
|
|
|
static struct vk_pipeline_cache_object *
|
|
|
|
tu_shaders_deserialize(struct vk_device *device,
|
|
|
|
const void *key_data, size_t key_size,
|
|
|
|
struct blob_reader *blob);
|
|
|
|
|
|
|
|
static void
|
|
|
|
tu_shaders_destroy(struct vk_pipeline_cache_object *object)
|
|
|
|
{
|
|
|
|
struct tu_compiled_shaders *shaders =
|
|
|
|
container_of(object, struct tu_compiled_shaders, base);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++)
|
|
|
|
ralloc_free(shaders->variants[i]);
|
|
|
|
|
|
|
|
vk_pipeline_cache_object_finish(&shaders->base);
|
|
|
|
vk_free(&object->device->alloc, shaders);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct vk_pipeline_cache_object_ops tu_shaders_ops = {
|
|
|
|
.serialize = tu_shaders_serialize,
|
|
|
|
.deserialize = tu_shaders_deserialize,
|
|
|
|
.destroy = tu_shaders_destroy,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct tu_compiled_shaders *
|
|
|
|
tu_shaders_init(struct tu_device *dev, const void *key_data, size_t key_size)
|
|
|
|
{
|
|
|
|
VK_MULTIALLOC(ma);
|
|
|
|
VK_MULTIALLOC_DECL(&ma, struct tu_compiled_shaders, shaders, 1);
|
|
|
|
VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
|
|
|
|
|
|
|
|
if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc,
|
|
|
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
memcpy(obj_key_data, key_data, key_size);
|
|
|
|
vk_pipeline_cache_object_init(&dev->vk, &shaders->base,
|
|
|
|
&tu_shaders_ops, obj_key_data, key_size);
|
|
|
|
|
|
|
|
return shaders;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
tu_shaders_serialize(struct vk_pipeline_cache_object *object,
|
|
|
|
struct blob *blob)
|
|
|
|
{
|
|
|
|
struct tu_compiled_shaders *shaders =
|
|
|
|
container_of(object, struct tu_compiled_shaders, base);
|
|
|
|
|
|
|
|
blob_write_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts));
|
|
|
|
blob_write_uint8(blob, shaders->active_desc_sets);
|
|
|
|
blob_write_uint8(blob, shaders->multi_pos_output);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) {
|
|
|
|
if (shaders->variants[i]) {
|
|
|
|
blob_write_uint8(blob, 1);
|
|
|
|
ir3_store_variant(blob, shaders->variants[i]);
|
|
|
|
} else {
|
|
|
|
blob_write_uint8(blob, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct vk_pipeline_cache_object *
|
|
|
|
tu_shaders_deserialize(struct vk_device *_device,
|
|
|
|
const void *key_data, size_t key_size,
|
|
|
|
struct blob_reader *blob)
|
|
|
|
{
|
|
|
|
struct tu_device *dev = container_of(_device, struct tu_device, vk);
|
|
|
|
struct tu_compiled_shaders *shaders =
|
|
|
|
tu_shaders_init(dev, key_data, key_size);
|
|
|
|
|
|
|
|
if (!shaders)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
blob_copy_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts));
|
|
|
|
shaders->active_desc_sets = blob_read_uint8(blob);
|
|
|
|
shaders->multi_pos_output = blob_read_uint8(blob);
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) {
|
|
|
|
bool has_shader = blob_read_uint8(blob);
|
|
|
|
if (has_shader) {
|
|
|
|
shaders->variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &shaders->base;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct tu_compiled_shaders *
|
|
|
|
tu_pipeline_cache_lookup(struct vk_pipeline_cache *cache,
|
2022-05-16 18:11:42 +01:00
|
|
|
const void *key_data, size_t key_size,
|
|
|
|
bool *application_cache_hit)
|
2022-02-17 19:48:36 +00:00
|
|
|
{
|
|
|
|
struct vk_pipeline_cache_object *object =
|
|
|
|
vk_pipeline_cache_lookup_object(cache, key_data, key_size,
|
2022-05-16 18:11:42 +01:00
|
|
|
&tu_shaders_ops, application_cache_hit);
|
2022-02-17 19:48:36 +00:00
|
|
|
if (object)
|
|
|
|
return container_of(object, struct tu_compiled_shaders, base);
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct tu_compiled_shaders *
|
|
|
|
tu_pipeline_cache_insert(struct vk_pipeline_cache *cache,
|
|
|
|
struct tu_compiled_shaders *shaders)
|
|
|
|
{
|
|
|
|
struct vk_pipeline_cache_object *object =
|
|
|
|
vk_pipeline_cache_add_object(cache, &shaders->base);
|
|
|
|
return container_of(object, struct tu_compiled_shaders, base);
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:09:37 +00:00
|
|
|
static VkResult
|
2020-05-15 18:52:43 +01:00
|
|
|
tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
2019-02-27 06:09:37 +00:00
|
|
|
{
|
2022-02-17 19:48:36 +00:00
|
|
|
VkResult result = VK_SUCCESS;
|
2020-06-24 11:56:09 +01:00
|
|
|
const struct ir3_compiler *compiler = builder->device->compiler;
|
2019-02-27 06:09:37 +00:00
|
|
|
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
|
|
|
|
NULL
|
|
|
|
};
|
2022-07-01 13:04:48 +01:00
|
|
|
VkPipelineCreationFeedback pipeline_feedback = {
|
2022-05-16 18:11:42 +01:00
|
|
|
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
|
|
|
|
};
|
2022-07-01 13:04:48 +01:00
|
|
|
VkPipelineCreationFeedback stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
|
2022-05-16 18:11:42 +01:00
|
|
|
|
|
|
|
int64_t pipeline_start = os_time_get_nano();
|
|
|
|
|
|
|
|
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
|
|
|
|
vk_find_struct_const(builder->create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
|
|
|
|
|
2019-02-27 06:09:37 +00:00
|
|
|
for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
|
|
|
|
gl_shader_stage stage =
|
2020-04-16 20:44:06 +01:00
|
|
|
vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
|
2019-02-27 06:09:37 +00:00
|
|
|
stage_infos[stage] = &builder->create_info->pStages[i];
|
|
|
|
}
|
|
|
|
|
2022-06-15 01:08:31 +01:00
|
|
|
if (tu6_shared_constants_enable(builder->layout, builder->device->compiler)) {
|
2022-06-15 01:07:28 +01:00
|
|
|
pipeline->shared_consts = (struct tu_push_constant_range) {
|
|
|
|
.lo = 0,
|
|
|
|
.dwords = builder->layout->push_constant_size / 4,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { };
|
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
|
|
|
stage < ARRAY_SIZE(keys); stage++) {
|
|
|
|
tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct ir3_shader_key ir3_key = {};
|
|
|
|
tu_pipeline_shader_key_init(&ir3_key, pipeline, builder->create_info);
|
|
|
|
|
|
|
|
keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask;
|
|
|
|
keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask;
|
2022-05-24 23:26:42 +01:00
|
|
|
keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading;
|
2022-02-17 19:48:36 +00:00
|
|
|
|
|
|
|
unsigned char pipeline_sha1[20];
|
|
|
|
tu_hash_shaders(pipeline_sha1, stage_infos, builder->layout, keys, &ir3_key, compiler);
|
|
|
|
|
|
|
|
const bool executable_info = builder->create_info->flags &
|
|
|
|
VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
|
|
|
|
|
|
|
|
char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL };
|
|
|
|
|
|
|
|
struct tu_compiled_shaders *compiled_shaders;
|
|
|
|
|
|
|
|
if (!executable_info) {
|
2022-05-16 18:11:42 +01:00
|
|
|
bool application_cache_hit = false;
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
compiled_shaders =
|
|
|
|
tu_pipeline_cache_lookup(builder->cache, &pipeline_sha1,
|
2022-05-16 18:11:42 +01:00
|
|
|
sizeof(pipeline_sha1),
|
|
|
|
&application_cache_hit);
|
|
|
|
|
|
|
|
if (application_cache_hit && builder->cache != builder->device->mem_cache) {
|
|
|
|
pipeline_feedback.flags |=
|
|
|
|
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
|
|
|
|
}
|
2019-02-27 06:09:37 +00:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
if (compiled_shaders)
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2022-05-17 15:28:30 +01:00
|
|
|
if (builder->create_info->flags &
|
|
|
|
VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
|
|
|
|
return VK_PIPELINE_COMPILE_REQUIRED;
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL };
|
|
|
|
|
|
|
|
struct tu_shader *shaders[ARRAY_SIZE(nir)] = { NULL };
|
2020-07-06 17:16:39 +01:00
|
|
|
|
2020-06-16 10:44:23 +01:00
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
2020-12-03 04:28:38 +00:00
|
|
|
stage < ARRAY_SIZE(nir); stage++) {
|
2019-02-27 06:09:37 +00:00
|
|
|
const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
|
2020-07-06 17:16:39 +01:00
|
|
|
if (!stage_info)
|
|
|
|
continue;
|
|
|
|
|
2022-05-16 18:11:42 +01:00
|
|
|
int64_t stage_start = os_time_get_nano();
|
|
|
|
|
2022-03-10 19:15:16 +00:00
|
|
|
nir[stage] = tu_spirv_to_nir(builder->device, builder->mem_ctx, stage_info, stage);
|
2022-02-17 19:48:36 +00:00
|
|
|
if (!nir[stage]) {
|
|
|
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
goto fail;
|
|
|
|
}
|
2022-05-16 18:11:42 +01:00
|
|
|
|
|
|
|
stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
|
|
|
|
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
|
2020-07-06 17:16:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!nir[MESA_SHADER_FRAGMENT]) {
|
|
|
|
const nir_shader_compiler_options *nir_options =
|
|
|
|
ir3_get_compiler_options(builder->device->compiler);
|
2020-10-26 18:28:33 +00:00
|
|
|
nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
|
2020-10-26 18:37:25 +00:00
|
|
|
nir_options,
|
|
|
|
"noop_fs");
|
2020-07-06 17:16:39 +01:00
|
|
|
nir[MESA_SHADER_FRAGMENT] = fs_b.shader;
|
|
|
|
}
|
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
if (executable_info) {
|
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
|
|
|
stage < ARRAY_SIZE(nir); stage++) {
|
|
|
|
if (!nir[stage])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_initial_disasm[stage] =
|
|
|
|
nir_shader_as_str(nir[stage], pipeline->executables_mem_ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
tu: Initial link-time optimizations
This is mostly taken from radv, and cleaned up a bit: don't explicitly
list every stage at the beginning, and name the shaders "producer" and
"consumer" to reduce confusion. I also stripped out a lot of other stuff
to get to the bare minimum of calling nir_link_opt_varyings,
nir_remove_unused_varyings, and nir_compact_varyings and then cleaning
up the fallout. In the future we may want to temporarily scalarize I/O
like radv does, and add back a few things like the psize optimization.
In the meantime this already provides a lot of benefit.
Results from the radv fossil_db with some apps not compilable by turnip
removed:
Totals:
MaxWaves: 1637288 -> 1668200 (+1.89%); split: +1.89%, -0.00%
Instrs: 54620287 -> 54114442 (-0.93%); split: -0.98%, +0.05%
CodeSize: 92235646 -> 91277584 (-1.04%); split: -1.07%, +0.03%
NOPs: 11176775 -> 11185206 (+0.08%); split: -0.63%, +0.71%
Full: 1689271 -> 1657175 (-1.90%); split: -1.92%, +0.02%
(ss): 1318763 -> 1317757 (-0.08%); split: -1.40%, +1.32%
(sy): 618795 -> 617724 (-0.17%); split: -0.70%, +0.53%
(ss)-stall: 3496370 -> 3470116 (-0.75%); split: -1.37%, +0.62%
(sy)-stall: 23512954 -> 23511164 (-0.01%); split: -1.04%, +1.03%
STPs: 27557 -> 27461 (-0.35%)
LDPs: 22948 -> 22804 (-0.63%)
Cat0: 11823765 -> 11829681 (+0.05%); split: -0.62%, +0.67%
Cat1: 3120042 -> 2991831 (-4.11%); split: -4.43%, +0.32%
Cat2: 28605309 -> 28324829 (-0.98%); split: -0.98%, +0.00%
Cat3: 7334628 -> 7252342 (-1.12%); split: -1.12%, +0.00%
Cat4: 1216514 -> 1204894 (-0.96%)
Cat5: 863976 -> 861926 (-0.24%)
Cat6: 1648571 -> 1641457 (-0.43%)
Totals from 23575 (16.16% of 145856) affected shaders:
MaxWaves: 258806 -> 289718 (+11.94%); split: +11.94%, -0.00%
Instrs: 7571190 -> 7065345 (-6.68%); split: -7.04%, +0.36%
CodeSize: 13864308 -> 12906246 (-6.91%); split: -7.09%, +0.18%
NOPs: 959185 -> 967616 (+0.88%); split: -7.35%, +8.23%
Full: 313335 -> 281239 (-10.24%); split: -10.36%, +0.11%
(ss): 154628 -> 153622 (-0.65%); split: -11.90%, +11.25%
(sy): 69758 -> 68687 (-1.54%); split: -6.21%, +4.67%
(ss)-stall: 322002 -> 295748 (-8.15%); split: -14.92%, +6.76%
(sy)-stall: 3270366 -> 3268576 (-0.05%); split: -7.45%, +7.40%
STPs: 3624 -> 3528 (-2.65%)
LDPs: 1074 -> 930 (-13.41%)
Cat0: 1022684 -> 1028600 (+0.58%); split: -7.13%, +7.71%
Cat1: 531102 -> 402891 (-24.14%); split: -26.04%, +1.90%
Cat2: 4090309 -> 3809829 (-6.86%); split: -6.86%, +0.00%
Cat3: 1449686 -> 1367400 (-5.68%); split: -5.69%, +0.01%
Cat4: 103543 -> 91923 (-11.22%)
Cat5: 57441 -> 55391 (-3.57%)
Cat6: 316096 -> 308982 (-2.25%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14767>
2022-01-27 17:33:55 +00:00
|
|
|
tu_link_shaders(builder, nir, ARRAY_SIZE(nir));
|
2020-07-06 17:16:39 +01:00
|
|
|
|
2021-02-16 07:44:43 +00:00
|
|
|
uint32_t desc_sets = 0;
|
2020-07-06 17:16:39 +01:00
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
2020-12-03 04:28:38 +00:00
|
|
|
stage < ARRAY_SIZE(nir); stage++) {
|
2020-07-06 17:16:39 +01:00
|
|
|
if (!nir[stage])
|
2019-02-27 06:09:37 +00:00
|
|
|
continue;
|
|
|
|
|
2022-05-16 18:11:42 +01:00
|
|
|
int64_t stage_start = os_time_get_nano();
|
|
|
|
|
2019-02-27 06:09:37 +00:00
|
|
|
struct tu_shader *shader =
|
2022-02-17 19:48:36 +00:00
|
|
|
tu_shader_create(builder->device, nir[stage], &keys[stage],
|
|
|
|
builder->layout, builder->alloc);
|
|
|
|
if (!shader) {
|
|
|
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
goto fail;
|
|
|
|
}
|
2019-02-27 06:09:37 +00:00
|
|
|
|
2020-04-24 16:05:48 +01:00
|
|
|
/* In SPIR-V generated from GLSL, the primitive mode is specified in the
|
|
|
|
* tessellation evaluation shader, but in SPIR-V generated from HLSL,
|
|
|
|
* the mode is specified in the tessellation control shader. */
|
|
|
|
if ((stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_TESS_CTRL) &&
|
2022-02-17 19:48:36 +00:00
|
|
|
ir3_key.tessellation == IR3_TESS_NONE) {
|
|
|
|
ir3_key.tessellation = tu6_get_tessmode(shader);
|
2020-04-24 16:05:48 +01:00
|
|
|
}
|
|
|
|
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
if (stage > MESA_SHADER_TESS_CTRL) {
|
|
|
|
if (stage == MESA_SHADER_FRAGMENT) {
|
2022-02-17 19:48:36 +00:00
|
|
|
ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
(nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID));
|
|
|
|
} else {
|
2022-02-17 19:48:36 +00:00
|
|
|
ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
|
tu, freedreno/a6xx, ir3: Rewrite tess PrimID handling
The previous handling conflated RelPatchID and PrimID, which would
result in incorrect gl_PrimitiveID when doing draw splitting and didn't
work with PrimID passthrough which fills the VPC slot with the "correct"
PrimID value from the tess factor BO which we left 0. Replace PrimID in
the tess lowering pass with a new RelPatchID sysval, and relace PrimID
with RelPatchID in the VS input code in turnip/freedreno at the same
time so that there is no net change in the tess lowering code. However,
now we have to add new mechanisms for getting the user-level PrimID:
- In the TCS it comes from the VS, just like gl_PrimitiveIDIn in the GS.
This means we have to add another register to our VS->TCS ABI. I
decided to put PrimID in r0.z, after the TCS header and RelPatchID,
because it might not be read in the TCS.
- If any stage after the TCS uses PrimID, the TCS stores it in the first
dword of the tess factor BO, and it is read by the fixed-function
tessellator and accessed in the TES via the newly-uncovered DSPRIMID
field. If we have tess and GS, the TES passes this value through to
the GS in the same way as the VS does. PrimID passthrough for reading
it in the FS when there's tess but no GS also "just works" once we
start storing it in the TCS. In particular this fixes
dEQP-VK.pipeline.misc.primitive_id_from_tess which tests exactly that.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12166>
2021-08-02 10:55:15 +01:00
|
|
|
BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-16 07:44:43 +00:00
|
|
|
/* Keep track of the status of each shader's active descriptor sets,
|
|
|
|
* which is set in tu_lower_io. */
|
|
|
|
desc_sets |= shader->active_desc_sets;
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
shaders[stage] = shader;
|
2022-05-16 18:11:42 +01:00
|
|
|
|
|
|
|
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
|
2020-06-16 10:44:23 +01:00
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
struct tu_shader *last_shader = shaders[MESA_SHADER_GEOMETRY];
|
2020-07-14 15:38:09 +01:00
|
|
|
if (!last_shader)
|
2022-02-17 19:48:36 +00:00
|
|
|
last_shader = shaders[MESA_SHADER_TESS_EVAL];
|
2020-07-14 15:38:09 +01:00
|
|
|
if (!last_shader)
|
2022-02-17 19:48:36 +00:00
|
|
|
last_shader = shaders[MESA_SHADER_VERTEX];
|
2020-07-14 15:38:09 +01:00
|
|
|
|
|
|
|
uint64_t outputs_written = last_shader->ir3_shader->nir->info.outputs_written;
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
ir3_key.layer_zero = !(outputs_written & VARYING_BIT_LAYER);
|
|
|
|
ir3_key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT);
|
|
|
|
|
|
|
|
compiled_shaders =
|
|
|
|
tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1));
|
2020-07-03 11:03:00 +01:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
if (!compiled_shaders) {
|
|
|
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
compiled_shaders->active_desc_sets = desc_sets;
|
|
|
|
compiled_shaders->multi_pos_output =
|
|
|
|
shaders[MESA_SHADER_VERTEX]->multi_pos_output;
|
2020-05-15 18:52:43 +01:00
|
|
|
|
2020-06-24 11:56:09 +01:00
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
2022-02-17 19:48:36 +00:00
|
|
|
stage < ARRAY_SIZE(shaders); stage++) {
|
|
|
|
if (!shaders[stage])
|
2020-06-16 10:44:23 +01:00
|
|
|
continue;
|
2022-06-15 01:07:28 +01:00
|
|
|
|
2022-05-16 18:11:42 +01:00
|
|
|
int64_t stage_start = os_time_get_nano();
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
compiled_shaders->variants[stage] =
|
|
|
|
ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key,
|
|
|
|
executable_info);
|
|
|
|
if (!compiled_shaders->variants[stage])
|
2020-06-16 10:44:23 +01:00
|
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
2022-02-17 19:48:36 +00:00
|
|
|
|
|
|
|
compiled_shaders->push_consts[stage] = shaders[stage]->push_consts;
|
2022-05-16 18:11:42 +01:00
|
|
|
|
|
|
|
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
|
2019-02-27 06:09:37 +00:00
|
|
|
}
|
2022-06-15 01:07:28 +01:00
|
|
|
compiled_shaders->shared_consts = pipeline->shared_consts;
|
2019-02-27 06:09:37 +00:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
uint32_t safe_constlens = ir3_trim_constlen(compiled_shaders->variants, compiler);
|
2020-06-24 11:56:09 +01:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
ir3_key.safe_constlen = true;
|
2020-06-24 11:56:09 +01:00
|
|
|
|
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
2022-02-17 19:48:36 +00:00
|
|
|
stage < ARRAY_SIZE(shaders); stage++) {
|
|
|
|
if (!shaders[stage])
|
2020-06-24 11:56:09 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
if (safe_constlens & (1 << stage)) {
|
2022-05-16 18:11:42 +01:00
|
|
|
int64_t stage_start = os_time_get_nano();
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
ralloc_free(compiled_shaders->variants[stage]);
|
|
|
|
compiled_shaders->variants[stage] =
|
|
|
|
ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key,
|
|
|
|
executable_info);
|
|
|
|
if (!compiled_shaders->variants[stage]) {
|
|
|
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
goto fail;
|
|
|
|
}
|
2022-05-16 18:11:42 +01:00
|
|
|
|
|
|
|
stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
|
2020-06-24 11:56:09 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
|
|
|
stage < ARRAY_SIZE(nir); stage++) {
|
|
|
|
if (shaders[stage]) {
|
|
|
|
tu_shader_destroy(builder->device, shaders[stage], builder->alloc);
|
|
|
|
}
|
2019-02-27 06:09:37 +00:00
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
compiled_shaders =
|
|
|
|
tu_pipeline_cache_insert(builder->cache, compiled_shaders);
|
2020-06-16 16:00:31 +01:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
done:
|
2021-02-04 13:12:35 +00:00
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
|
|
|
stage < ARRAY_SIZE(nir); stage++) {
|
2022-02-17 19:48:36 +00:00
|
|
|
if (compiled_shaders->variants[stage]) {
|
|
|
|
tu_append_executable(pipeline, compiled_shaders->variants[stage],
|
2021-02-04 13:12:35 +00:00
|
|
|
nir_initial_disasm[stage]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
struct ir3_shader_variant *vs =
|
|
|
|
compiled_shaders->variants[MESA_SHADER_VERTEX];
|
|
|
|
|
|
|
|
struct ir3_shader_variant *variant;
|
|
|
|
if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) {
|
|
|
|
tu_append_executable(pipeline, vs->binning, NULL);
|
|
|
|
variant = vs->binning;
|
|
|
|
} else {
|
|
|
|
variant = vs;
|
|
|
|
}
|
|
|
|
|
|
|
|
builder->binning_variant = variant;
|
|
|
|
|
|
|
|
builder->shaders = compiled_shaders;
|
|
|
|
|
|
|
|
pipeline->active_desc_sets = compiled_shaders->active_desc_sets;
|
|
|
|
if (compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) {
|
|
|
|
pipeline->tess.patch_type =
|
|
|
|
compiled_shaders->variants[MESA_SHADER_TESS_CTRL]->key.tessellation;
|
2021-02-04 13:12:35 +00:00
|
|
|
}
|
|
|
|
|
2022-05-16 18:11:42 +01:00
|
|
|
pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
|
|
|
|
if (creation_feedback) {
|
|
|
|
*creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
|
|
|
|
|
|
|
|
assert(builder->create_info->stageCount ==
|
|
|
|
creation_feedback->pipelineStageCreationFeedbackCount);
|
|
|
|
for (uint32_t i = 0; i < builder->create_info->stageCount; i++) {
|
|
|
|
gl_shader_stage s =
|
|
|
|
vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage);
|
|
|
|
creation_feedback->pPipelineStageCreationFeedbacks[i] = stage_feedbacks[s];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:09:37 +00:00
|
|
|
return VK_SUCCESS;
|
2022-02-17 19:48:36 +00:00
|
|
|
|
|
|
|
fail:
|
|
|
|
for (gl_shader_stage stage = MESA_SHADER_VERTEX;
|
|
|
|
stage < ARRAY_SIZE(nir); stage++) {
|
|
|
|
if (shaders[stage]) {
|
|
|
|
tu_shader_destroy(builder->device, shaders[stage], builder->alloc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (compiled_shaders)
|
|
|
|
vk_pipeline_cache_object_unref(&compiled_shaders->base);
|
|
|
|
|
|
|
|
return result;
|
2019-02-27 06:09:37 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 17:41:49 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
const VkPipelineDynamicStateCreateInfo *dynamic_info =
|
|
|
|
builder->create_info->pDynamicState;
|
|
|
|
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->gras_su_cntl_mask = ~0u;
|
|
|
|
pipeline->rb_depth_cntl_mask = ~0u;
|
|
|
|
pipeline->rb_stencil_cntl_mask = ~0u;
|
2021-08-10 04:32:15 +01:00
|
|
|
pipeline->pc_raster_cntl_mask = ~0u;
|
|
|
|
pipeline->vpc_unknown_9107_mask = ~0u;
|
2022-06-07 00:48:44 +01:00
|
|
|
pipeline->sp_blend_cntl_mask = ~0u;
|
|
|
|
pipeline->rb_blend_cntl_mask = ~0u;
|
|
|
|
pipeline->rb_mrt_control_mask = ~0u;
|
2020-09-17 15:16:42 +01:00
|
|
|
|
2021-02-03 12:01:51 +00:00
|
|
|
if (!dynamic_info)
|
|
|
|
return;
|
|
|
|
|
2019-02-21 17:41:49 +00:00
|
|
|
for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
|
2020-06-14 15:52:37 +01:00
|
|
|
VkDynamicState state = dynamic_info->pDynamicStates[i];
|
|
|
|
switch (state) {
|
|
|
|
case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
|
2020-09-17 15:16:42 +01:00
|
|
|
if (state == VK_DYNAMIC_STATE_LINE_WIDTH)
|
|
|
|
pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK;
|
2020-06-14 15:52:37 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(state);
|
|
|
|
break;
|
|
|
|
case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_CULL_MODE:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->gras_su_cntl_mask &=
|
|
|
|
~(A6XX_GRAS_SU_CNTL_CULL_BACK | A6XX_GRAS_SU_CNTL_CULL_FRONT);
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_FRONT_FACE:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_FRONT_CW;
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VB_STRIDE);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_VIEWPORT);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_SCISSOR);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->rb_depth_cntl_mask &=
|
2021-08-17 16:19:06 +01:00
|
|
|
~(A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE);
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK;
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->rb_depth_cntl_mask &=
|
2021-08-17 16:19:06 +01:00
|
|
|
~(A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE);
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE:
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_STENCIL_READ);
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_STENCIL_OP:
|
2021-02-05 08:52:55 +00:00
|
|
|
pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_FUNC__MASK |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_FAIL__MASK |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZPASS__MASK |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK);
|
2020-09-17 15:16:42 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE:
|
2021-04-22 17:10:42 +01:00
|
|
|
pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_POLY_OFFSET;
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE:
|
2021-08-10 04:30:07 +01:00
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE);
|
|
|
|
break;
|
2022-07-01 13:04:48 +01:00
|
|
|
case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE:
|
2021-08-10 04:32:15 +01:00
|
|
|
pipeline->pc_raster_cntl_mask &= ~A6XX_PC_RASTER_CNTL_DISCARD;
|
|
|
|
pipeline->vpc_unknown_9107_mask &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD;
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD);
|
|
|
|
break;
|
2022-06-07 00:48:44 +01:00
|
|
|
case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
|
|
|
|
pipeline->sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK;
|
|
|
|
pipeline->rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK;
|
|
|
|
pipeline->rb_mrt_control_mask &= ~A6XX_RB_MRT_CONTROL_ROP_CODE__MASK;
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND);
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_LOGIC_OP);
|
|
|
|
break;
|
2022-06-08 16:11:09 +01:00
|
|
|
case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
|
|
|
|
pipeline->sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK;
|
|
|
|
pipeline->rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK;
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND);
|
|
|
|
|
|
|
|
/* Dynamic color write enable doesn't directly change any of the
|
|
|
|
* registers, but it causes us to make some of the registers 0, so we
|
|
|
|
* set this dynamic state instead of making the register dynamic.
|
|
|
|
*/
|
|
|
|
pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE);
|
|
|
|
break;
|
2020-06-14 15:52:37 +01:00
|
|
|
default:
|
|
|
|
assert(!"unsupported dynamic state");
|
|
|
|
break;
|
|
|
|
}
|
2019-02-21 17:41:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-20 22:26:44 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link,
|
2022-02-17 19:48:36 +00:00
|
|
|
struct tu_push_constant_range *push_consts,
|
2019-12-20 22:26:44 +00:00
|
|
|
struct ir3_shader_variant *v)
|
|
|
|
{
|
2020-06-14 19:36:05 +01:00
|
|
|
link->const_state = *ir3_const_state(v);
|
2019-12-20 22:26:44 +00:00
|
|
|
link->constlen = v->constlen;
|
2022-02-17 19:48:36 +00:00
|
|
|
link->push_consts = *push_consts;
|
2019-12-20 22:26:44 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 06:10:34 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
struct tu_cs prog_cs;
|
2021-07-12 20:00:38 +01:00
|
|
|
|
|
|
|
/* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything
|
|
|
|
* else that could depend on that state (like push constants)
|
|
|
|
*
|
|
|
|
* Note also that this always uses the full VS even in binning pass. The
|
|
|
|
* binning pass variant has the same const layout as the full VS, and
|
|
|
|
* the constlen for the VS will be the same or greater than the constlen
|
|
|
|
* for the binning pass variant. It is required that the constlen state
|
|
|
|
* matches between binning and draw passes, as some parts of the push
|
|
|
|
* consts are emitted in state groups that are shared between the binning
|
|
|
|
* and draw passes.
|
|
|
|
*/
|
|
|
|
tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
|
|
|
|
tu6_emit_program_config(&prog_cs, builder);
|
|
|
|
pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
|
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs);
|
2021-02-04 15:50:12 +00:00
|
|
|
tu6_emit_program(&prog_cs, builder, false, pipeline);
|
2020-06-18 21:24:26 +01:00
|
|
|
pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
|
2019-02-27 06:10:34 +00:00
|
|
|
|
2021-02-02 16:00:08 +00:00
|
|
|
tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs);
|
2021-02-04 15:50:12 +00:00
|
|
|
tu6_emit_program(&prog_cs, builder, true, pipeline);
|
2020-06-18 21:24:26 +01:00
|
|
|
pipeline->program.binning_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
|
2019-09-26 05:31:56 +01:00
|
|
|
|
2020-04-06 21:38:04 +01:00
|
|
|
VkShaderStageFlags stages = 0;
|
|
|
|
for (unsigned i = 0; i < builder->create_info->stageCount; i++) {
|
|
|
|
stages |= builder->create_info->pStages[i].stage;
|
|
|
|
}
|
|
|
|
pipeline->active_stages = stages;
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
|
|
|
|
if (!builder->shaders->variants[i])
|
2019-09-26 05:31:56 +01:00
|
|
|
continue;
|
|
|
|
|
2019-12-20 22:26:44 +00:00
|
|
|
tu_pipeline_set_linkage(&pipeline->program.link[i],
|
2022-02-17 19:48:36 +00:00
|
|
|
&builder->shaders->push_consts[i],
|
|
|
|
builder->shaders->variants[i]);
|
2019-09-26 05:31:56 +01:00
|
|
|
}
|
2019-02-27 06:10:34 +00:00
|
|
|
}
|
|
|
|
|
2019-02-22 06:31:36 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
const VkPipelineVertexInputStateCreateInfo *vi_info =
|
|
|
|
builder->create_info->pVertexInputState;
|
2022-02-17 19:48:36 +00:00
|
|
|
const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX];
|
2020-06-16 10:44:23 +01:00
|
|
|
const struct ir3_shader_variant *bs = builder->binning_variant;
|
2019-02-22 06:31:36 +00:00
|
|
|
|
2021-10-05 12:25:52 +01:00
|
|
|
/* Bindings may contain holes */
|
|
|
|
for (unsigned i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
|
|
|
|
pipeline->num_vbs =
|
|
|
|
MAX2(pipeline->num_vbs, vi_info->pVertexBindingDescriptions[i].binding + 1);
|
|
|
|
}
|
2020-09-09 14:26:59 +01:00
|
|
|
|
2022-06-17 00:37:07 +01:00
|
|
|
tu6_emit_vertex_input(pipeline, &pipeline->vi.state, vs, vi_info);
|
|
|
|
if (bs)
|
|
|
|
tu6_emit_vertex_input(pipeline, &pipeline->vi.binning_state, bs, vi_info);
|
2019-02-22 06:31:36 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 19:07:38 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
const VkPipelineInputAssemblyStateCreateInfo *ia_info =
|
|
|
|
builder->create_info->pInputAssemblyState;
|
|
|
|
|
|
|
|
pipeline->ia.primtype = tu6_primtype(ia_info->topology);
|
|
|
|
pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable;
|
|
|
|
}
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
static bool
|
|
|
|
tu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs,
|
|
|
|
uint32_t id, uint32_t size)
|
|
|
|
{
|
2020-06-18 21:24:26 +01:00
|
|
|
assert(id < ARRAY_SIZE(pipeline->dynamic_state));
|
2020-06-14 15:52:37 +01:00
|
|
|
|
|
|
|
if (pipeline->dynamic_state_mask & BIT(id))
|
|
|
|
return false;
|
|
|
|
|
2020-06-18 21:24:26 +01:00
|
|
|
pipeline->dynamic_state[id] = tu_cs_draw_state(&pipeline->cs, cs, size);
|
2020-06-14 15:52:37 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-24 16:51:04 +01:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
2020-12-17 11:50:04 +00:00
|
|
|
if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ||
|
|
|
|
!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))
|
|
|
|
return;
|
|
|
|
|
2020-04-24 16:51:04 +01:00
|
|
|
const VkPipelineTessellationStateCreateInfo *tess_info =
|
|
|
|
builder->create_info->pTessellationState;
|
|
|
|
|
|
|
|
assert(pipeline->ia.primtype == DI_PT_PATCHES0);
|
|
|
|
assert(tess_info->patchControlPoints <= 32);
|
|
|
|
pipeline->ia.primtype += tess_info->patchControlPoints;
|
2020-04-24 20:52:05 +01:00
|
|
|
const VkPipelineTessellationDomainOriginStateCreateInfo *domain_info =
|
|
|
|
vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
|
|
|
|
pipeline->tess.upper_left_domain_origin = !domain_info ||
|
|
|
|
domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT;
|
2022-02-17 19:48:36 +00:00
|
|
|
const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL];
|
2020-07-02 16:53:33 +01:00
|
|
|
pipeline->tess.param_stride = hs->output_size * 4;
|
2020-04-24 16:51:04 +01:00
|
|
|
}
|
|
|
|
|
2019-02-19 21:49:01 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
/* The spec says:
|
|
|
|
*
|
|
|
|
* pViewportState is a pointer to an instance of the
|
|
|
|
* VkPipelineViewportStateCreateInfo structure, and is ignored if the
|
|
|
|
* pipeline has rasterization disabled."
|
|
|
|
*
|
|
|
|
* We leave the relevant registers stale in that case.
|
|
|
|
*/
|
|
|
|
if (builder->rasterizer_discard)
|
|
|
|
return;
|
|
|
|
|
|
|
|
const VkPipelineViewportStateCreateInfo *vp_info =
|
|
|
|
builder->create_info->pViewportState;
|
2021-12-30 17:59:46 +00:00
|
|
|
const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_info =
|
|
|
|
vk_find_struct_const(vp_info->pNext, PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT);
|
|
|
|
pipeline->z_negative_one_to_one = depth_clip_info ? depth_clip_info->negativeOneToOne : false;
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
struct tu_cs cs;
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * vp_info->viewportCount))
|
2021-12-30 17:59:46 +00:00
|
|
|
tu6_emit_viewport(&cs, vp_info->pViewports, vp_info->viewportCount, pipeline->z_negative_one_to_one);
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2020-07-14 15:38:09 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * vp_info->scissorCount))
|
|
|
|
tu6_emit_scissor(&cs, vp_info->pScissors, vp_info->scissorCount);
|
2019-02-19 21:49:01 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 07:29:51 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
const VkPipelineRasterizationStateCreateInfo *rast_info =
|
|
|
|
builder->create_info->pRasterizationState;
|
|
|
|
|
2020-06-10 21:05:53 +01:00
|
|
|
enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode);
|
2019-02-27 07:29:51 +00:00
|
|
|
|
2022-06-30 01:56:29 +01:00
|
|
|
builder->depth_clip_disable = rast_info->depthClampEnable;
|
2020-07-23 10:44:40 +01:00
|
|
|
|
|
|
|
const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
|
|
|
|
vk_find_struct_const(rast_info, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
|
|
|
|
if (depth_clip_state)
|
2022-06-30 01:56:29 +01:00
|
|
|
builder->depth_clip_disable = !depth_clip_state->depthClipEnable;
|
2020-07-23 10:44:40 +01:00
|
|
|
|
2021-10-04 02:24:58 +01:00
|
|
|
pipeline->line_mode = RECTANGULAR;
|
|
|
|
|
2022-06-22 16:42:46 +01:00
|
|
|
if (tu6_primtype_line(pipeline->ia.primtype) ||
|
|
|
|
(tu6_primtype_patches(pipeline->ia.primtype) &&
|
|
|
|
pipeline->tess.patch_type == IR3_TESS_ISOLINES)) {
|
2021-10-04 02:24:58 +01:00
|
|
|
const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_state =
|
|
|
|
vk_find_struct_const(rast_info->pNext,
|
|
|
|
PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
|
|
|
|
|
|
|
|
if (rast_line_state && rast_line_state->lineRasterizationMode ==
|
|
|
|
VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) {
|
|
|
|
pipeline->line_mode = BRESENHAM;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
struct tu_cs cs;
|
2021-11-03 12:05:23 +00:00
|
|
|
uint32_t cs_size = 9 +
|
|
|
|
(builder->device->physical_device->info->a6xx.has_shading_rate ? 8 : 0) +
|
|
|
|
(builder->emit_msaa_state ? 11 : 0);
|
2021-03-11 14:22:38 +00:00
|
|
|
pipeline->rast_state = tu_cs_draw_state(&pipeline->cs, &cs, cs_size);
|
2020-03-24 01:37:25 +00:00
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
tu_cs_emit_regs(&cs,
|
2020-03-24 01:37:25 +00:00
|
|
|
A6XX_GRAS_CL_CNTL(
|
2022-06-30 01:56:29 +01:00
|
|
|
.znear_clip_disable = builder->depth_clip_disable,
|
|
|
|
.zfar_clip_disable = builder->depth_clip_disable,
|
2020-07-23 10:44:40 +01:00
|
|
|
/* TODO should this be depth_clip_disable instead? */
|
2020-03-24 01:37:25 +00:00
|
|
|
.unk5 = rast_info->depthClampEnable,
|
2021-12-30 17:59:46 +00:00
|
|
|
.zero_gb_scale_z = pipeline->z_negative_one_to_one ? 0 : 1,
|
2020-03-24 01:37:25 +00:00
|
|
|
.vp_clip_code_ignore = 1));
|
2020-06-10 21:05:53 +01:00
|
|
|
|
|
|
|
tu_cs_emit_regs(&cs,
|
2020-07-11 18:03:41 +01:00
|
|
|
A6XX_VPC_POLYGON_MODE(mode));
|
2020-06-10 21:05:53 +01:00
|
|
|
|
|
|
|
tu_cs_emit_regs(&cs,
|
2020-07-11 20:55:12 +01:00
|
|
|
A6XX_PC_POLYGON_MODE(mode));
|
2020-06-10 21:05:53 +01:00
|
|
|
|
2019-02-27 07:29:51 +00:00
|
|
|
/* move to hw ctx init? */
|
2020-06-14 15:52:37 +01:00
|
|
|
tu_cs_emit_regs(&cs,
|
|
|
|
A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
|
|
|
|
A6XX_GRAS_SU_POINT_SIZE(1.0f));
|
|
|
|
|
2021-09-14 17:11:38 +01:00
|
|
|
if (builder->device->physical_device->info->a6xx.has_shading_rate) {
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A00());
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A10());
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A20());
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A30());
|
|
|
|
}
|
|
|
|
|
2021-03-11 14:22:38 +00:00
|
|
|
/* If samples count couldn't be devised from the subpass, we should emit it here.
|
|
|
|
* It happens when subpass doesn't use any color/depth attachment.
|
|
|
|
*/
|
|
|
|
if (builder->emit_msaa_state)
|
2021-10-04 02:24:58 +01:00
|
|
|
tu6_emit_msaa(&cs, builder->samples, pipeline->line_mode);
|
2021-03-11 14:22:38 +00:00
|
|
|
|
2021-08-10 04:32:15 +01:00
|
|
|
const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info =
|
|
|
|
vk_find_struct_const(rast_info->pNext,
|
|
|
|
PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT);
|
|
|
|
unsigned stream = stream_info ? stream_info->rasterizationStream : 0;
|
|
|
|
|
|
|
|
pipeline->pc_raster_cntl = A6XX_PC_RASTER_CNTL_STREAM(stream);
|
|
|
|
pipeline->vpc_unknown_9107 = 0;
|
|
|
|
if (rast_info->rasterizerDiscardEnable) {
|
|
|
|
pipeline->pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD;
|
|
|
|
pipeline->vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RASTERIZER_DISCARD, 4)) {
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = pipeline->pc_raster_cntl));
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = pipeline->vpc_unknown_9107));
|
|
|
|
}
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
pipeline->gras_su_cntl =
|
2021-10-04 02:24:58 +01:00
|
|
|
tu6_gras_su_cntl(rast_info, pipeline->line_mode, builder->multiview_mask != 0);
|
2019-02-27 07:29:51 +00:00
|
|
|
|
2020-09-17 15:16:42 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2))
|
2020-06-14 15:52:37 +01:00
|
|
|
tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl));
|
2019-02-27 07:29:51 +00:00
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) {
|
|
|
|
tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor,
|
2019-02-27 07:29:51 +00:00
|
|
|
rast_info->depthBiasClamp,
|
|
|
|
rast_info->depthBiasSlopeFactor);
|
|
|
|
}
|
|
|
|
|
2021-06-01 10:49:31 +01:00
|
|
|
const struct VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_state =
|
|
|
|
vk_find_struct_const(rast_info->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
|
|
|
|
pipeline->provoking_vertex_last = provoking_vtx_state &&
|
|
|
|
provoking_vtx_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
|
2019-02-27 07:29:51 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 19:46:59 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
/* The spec says:
|
|
|
|
*
|
|
|
|
* pDepthStencilState is a pointer to an instance of the
|
|
|
|
* VkPipelineDepthStencilStateCreateInfo structure, and is ignored if
|
|
|
|
* the pipeline has rasterization disabled or if the subpass of the
|
|
|
|
* render pass the pipeline is created against does not use a
|
|
|
|
* depth/stencil attachment.
|
|
|
|
*/
|
|
|
|
const VkPipelineDepthStencilStateCreateInfo *ds_info =
|
2020-09-17 15:16:42 +01:00
|
|
|
builder->create_info->pDepthStencilState;
|
2022-05-26 22:40:49 +01:00
|
|
|
const enum pipe_format pipe_format =
|
|
|
|
vk_format_to_pipe_format(builder->depth_attachment_format);
|
2020-09-17 15:16:42 +01:00
|
|
|
uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0;
|
2020-06-14 15:52:37 +01:00
|
|
|
struct tu_cs cs;
|
2019-02-21 19:46:59 +00:00
|
|
|
|
2020-09-17 15:16:42 +01:00
|
|
|
if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED &&
|
|
|
|
builder->depth_attachment_format != VK_FORMAT_S8_UINT) {
|
|
|
|
if (ds_info->depthTestEnable) {
|
|
|
|
rb_depth_cntl |=
|
2021-08-17 16:19:06 +01:00
|
|
|
A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE |
|
2020-09-17 15:16:42 +01:00
|
|
|
A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
|
2021-08-17 16:19:06 +01:00
|
|
|
A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
|
2020-09-17 15:16:42 +01:00
|
|
|
|
2022-06-30 01:56:29 +01:00
|
|
|
if (builder->depth_clip_disable)
|
2022-06-28 06:11:15 +01:00
|
|
|
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLIP_DISABLE;
|
2020-09-17 15:16:42 +01:00
|
|
|
|
|
|
|
if (ds_info->depthWriteEnable)
|
|
|
|
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ds_info->depthBoundsTestEnable)
|
2021-08-17 16:19:06 +01:00
|
|
|
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
|
2021-08-17 12:59:56 +01:00
|
|
|
|
|
|
|
if (ds_info->depthBoundsTestEnable && !ds_info->depthTestEnable)
|
|
|
|
tu6_apply_depth_bounds_workaround(builder->device, &rb_depth_cntl);
|
2022-05-26 22:40:49 +01:00
|
|
|
|
|
|
|
pipeline->depth_cpp_per_sample = util_format_get_component_bits(
|
|
|
|
pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 0) / 8;
|
2020-09-17 15:16:42 +01:00
|
|
|
} else {
|
|
|
|
/* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set
|
|
|
|
* to 0 when this pipeline is used, as enabling depth test when there
|
|
|
|
* is no depth attachment is a problem (at least for the S8_UINT case)
|
|
|
|
*/
|
|
|
|
if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL))
|
|
|
|
pipeline->rb_depth_cntl_disable = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
|
|
|
|
const VkStencilOpState *front = &ds_info->front;
|
|
|
|
const VkStencilOpState *back = &ds_info->back;
|
|
|
|
|
|
|
|
rb_stencil_cntl |=
|
|
|
|
A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp));
|
|
|
|
|
|
|
|
if (ds_info->stencilTestEnable) {
|
|
|
|
rb_stencil_cntl |=
|
|
|
|
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
|
|
|
|
A6XX_RB_STENCIL_CONTROL_STENCIL_READ;
|
|
|
|
}
|
2022-05-26 22:40:49 +01:00
|
|
|
|
|
|
|
pipeline->stencil_cpp_per_sample = util_format_get_component_bits(
|
|
|
|
pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 1) / 8;
|
2020-09-17 15:16:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) {
|
|
|
|
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1);
|
|
|
|
tu_cs_emit(&cs, rb_depth_cntl);
|
|
|
|
}
|
2021-02-11 06:53:06 +00:00
|
|
|
pipeline->rb_depth_cntl = rb_depth_cntl;
|
2020-09-17 15:16:42 +01:00
|
|
|
|
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2)) {
|
|
|
|
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_STENCIL_CONTROL, 1);
|
|
|
|
tu_cs_emit(&cs, rb_stencil_cntl);
|
|
|
|
}
|
2021-02-11 06:53:06 +00:00
|
|
|
pipeline->rb_stencil_cntl = rb_stencil_cntl;
|
2020-09-17 15:16:42 +01:00
|
|
|
|
|
|
|
/* the remaining draw states arent used if there is no d/s, leave them empty */
|
|
|
|
if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED)
|
|
|
|
return;
|
2019-02-21 19:46:59 +00:00
|
|
|
|
2020-06-23 23:45:32 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
|
|
|
|
tu_cs_emit_regs(&cs,
|
|
|
|
A6XX_RB_Z_BOUNDS_MIN(ds_info->minDepthBounds),
|
|
|
|
A6XX_RB_Z_BOUNDS_MAX(ds_info->maxDepthBounds));
|
|
|
|
}
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) {
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff,
|
|
|
|
.bfmask = ds_info->back.compareMask & 0xff));
|
2019-02-21 19:46:59 +00:00
|
|
|
}
|
2020-06-14 15:52:37 +01:00
|
|
|
|
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2)) {
|
2021-01-21 14:40:19 +00:00
|
|
|
update_stencil_mask(&pipeline->stencil_wrmask, VK_STENCIL_FACE_FRONT_BIT, ds_info->front.writeMask);
|
|
|
|
update_stencil_mask(&pipeline->stencil_wrmask, VK_STENCIL_FACE_BACK_BIT, ds_info->back.writeMask);
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = pipeline->stencil_wrmask));
|
2019-02-21 19:46:59 +00:00
|
|
|
}
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2)) {
|
|
|
|
tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.ref = ds_info->front.reference & 0xff,
|
|
|
|
.bfref = ds_info->back.reference & 0xff));
|
|
|
|
}
|
2020-05-19 16:19:29 +01:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
if (builder->shaders->variants[MESA_SHADER_FRAGMENT]) {
|
|
|
|
const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT];
|
2020-06-10 08:35:59 +01:00
|
|
|
if (fs->has_kill || fs->no_earlyz || fs->writes_pos) {
|
2021-01-19 09:07:28 +00:00
|
|
|
pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
|
2020-06-10 08:35:59 +01:00
|
|
|
}
|
|
|
|
if (fs->no_earlyz || fs->writes_pos) {
|
2021-01-19 09:07:28 +00:00
|
|
|
pipeline->lrz.force_disable_mask = TU_LRZ_FORCE_DISABLE_LRZ;
|
2020-06-10 08:35:59 +01:00
|
|
|
}
|
|
|
|
}
|
2019-02-21 19:46:59 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 22:58:52 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_multisample_and_color_blend(
|
|
|
|
struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
/* The spec says:
|
|
|
|
*
|
|
|
|
* pMultisampleState is a pointer to an instance of the
|
|
|
|
* VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline
|
|
|
|
* has rasterization disabled.
|
|
|
|
*
|
|
|
|
* Also,
|
|
|
|
*
|
|
|
|
* pColorBlendState is a pointer to an instance of the
|
|
|
|
* VkPipelineColorBlendStateCreateInfo structure, and is ignored if the
|
|
|
|
* pipeline has rasterization disabled or if the subpass of the render
|
|
|
|
* pass the pipeline is created against does not use any color
|
|
|
|
* attachments.
|
|
|
|
*
|
|
|
|
* We leave the relevant registers stale when rasterization is disabled.
|
|
|
|
*/
|
|
|
|
if (builder->rasterizer_discard)
|
|
|
|
return;
|
|
|
|
|
|
|
|
static const VkPipelineColorBlendStateCreateInfo dummy_blend_info;
|
|
|
|
const VkPipelineMultisampleStateCreateInfo *msaa_info =
|
|
|
|
builder->create_info->pMultisampleState;
|
|
|
|
const VkPipelineColorBlendStateCreateInfo *blend_info =
|
|
|
|
builder->use_color_attachments ? builder->create_info->pColorBlendState
|
|
|
|
: &dummy_blend_info;
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
struct tu_cs cs;
|
2022-06-07 00:48:44 +01:00
|
|
|
tu6_emit_rb_mrt_controls(pipeline, blend_info,
|
2019-02-21 22:58:52 +00:00
|
|
|
builder->color_attachment_formats,
|
2022-06-07 00:48:44 +01:00
|
|
|
&pipeline->rop_reads_dst,
|
2022-05-26 22:40:49 +01:00
|
|
|
&pipeline->color_bandwidth_per_sample);
|
2019-02-21 22:58:52 +00:00
|
|
|
|
2022-06-07 00:48:44 +01:00
|
|
|
uint32_t blend_enable_mask =
|
|
|
|
pipeline->rop_reads_dst ? pipeline->color_write_enable : pipeline->blend_enable;
|
|
|
|
tu6_emit_blend_control(pipeline, blend_enable_mask,
|
2020-06-14 15:52:37 +01:00
|
|
|
builder->use_dual_src_blend, msaa_info);
|
2020-04-21 17:14:23 +01:00
|
|
|
|
2022-06-07 00:48:44 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_BLEND,
|
|
|
|
blend_info->attachmentCount * 3 + 4)) {
|
|
|
|
tu6_emit_blend(&cs, pipeline);
|
|
|
|
assert(cs.cur == cs.end); /* validate draw state size */
|
|
|
|
}
|
2020-04-21 17:14:23 +01:00
|
|
|
|
2022-06-07 21:29:57 +01:00
|
|
|
/* Disable LRZ writes when blend or logic op that reads the destination is
|
|
|
|
* enabled, since the resulting pixel value from the blend-draw depends on
|
|
|
|
* an earlier draw, which LRZ in the draw pass could early-reject if the
|
|
|
|
* previous blend-enabled draw wrote LRZ.
|
|
|
|
*
|
|
|
|
* TODO: We need to disable LRZ writes only for the binning pass.
|
|
|
|
* Therefore, we need to emit it in a separate draw state. We keep
|
|
|
|
* it disabled for sysmem path as well for the moment.
|
|
|
|
*/
|
|
|
|
if (blend_enable_mask)
|
|
|
|
pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
|
|
|
|
|
|
|
|
for (int i = 0; i < blend_info->attachmentCount; i++) {
|
|
|
|
VkPipelineColorBlendAttachmentState blendAttachment = blend_info->pAttachments[i];
|
|
|
|
/* From the PoV of LRZ, having masked color channels is
|
|
|
|
* the same as having blend enabled, in that the draw will
|
|
|
|
* care about the fragments from an earlier draw.
|
|
|
|
*/
|
|
|
|
VkFormat format = builder->color_attachment_formats[i];
|
|
|
|
unsigned mask = MASK(vk_format_get_nr_components(format));
|
|
|
|
if (format != VK_FORMAT_UNDEFINED &&
|
2022-06-08 16:11:09 +01:00
|
|
|
((blendAttachment.colorWriteMask & mask) != mask ||
|
|
|
|
!(pipeline->color_write_enable & BIT(i)))) {
|
2022-06-07 21:29:57 +01:00
|
|
|
pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE;
|
2020-09-11 14:16:39 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) {
|
|
|
|
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4);
|
|
|
|
tu_cs_emit_array(&cs, (const uint32_t *) blend_info->blendConstants, 4);
|
2020-04-21 17:14:23 +01:00
|
|
|
}
|
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
|
|
|
|
vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
|
|
|
|
const VkSampleLocationsInfoEXT *samp_loc = NULL;
|
|
|
|
|
|
|
|
if (sample_locations && sample_locations->sampleLocationsEnable)
|
|
|
|
samp_loc = &sample_locations->sampleLocationsInfo;
|
2019-02-21 22:58:52 +00:00
|
|
|
|
2020-06-14 15:52:37 +01:00
|
|
|
if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS,
|
|
|
|
samp_loc ? 9 : 6)) {
|
|
|
|
tu6_emit_sample_locations(&cs, samp_loc);
|
|
|
|
}
|
2019-02-21 22:58:52 +00:00
|
|
|
}
|
|
|
|
|
2022-02-18 17:15:03 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_parse_rasterization_order(
|
|
|
|
struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline)
|
|
|
|
{
|
|
|
|
if (builder->rasterizer_discard)
|
|
|
|
return;
|
|
|
|
|
|
|
|
pipeline->subpass_feedback_loop_ds = builder->subpass_feedback_loop_ds;
|
|
|
|
|
|
|
|
const VkPipelineColorBlendStateCreateInfo *blend_info =
|
|
|
|
builder->create_info->pColorBlendState;
|
|
|
|
|
|
|
|
const VkPipelineDepthStencilStateCreateInfo *ds_info =
|
|
|
|
builder->create_info->pDepthStencilState;
|
|
|
|
|
|
|
|
if (builder->use_color_attachments) {
|
|
|
|
pipeline->raster_order_attachment_access =
|
|
|
|
blend_info->flags &
|
|
|
|
VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_ARM;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) {
|
|
|
|
pipeline->raster_order_attachment_access |=
|
|
|
|
ds_info->flags &
|
|
|
|
(VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM |
|
|
|
|
VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM);
|
|
|
|
}
|
|
|
|
|
2022-03-07 10:13:45 +00:00
|
|
|
if (unlikely(builder->device->physical_device->instance->debug_flags & TU_DEBUG_RAST_ORDER))
|
|
|
|
pipeline->raster_order_attachment_access = true;
|
|
|
|
|
2022-02-18 17:15:03 +00:00
|
|
|
/* VK_EXT_blend_operation_advanced would also require ordered access
|
|
|
|
* when implemented in the future.
|
|
|
|
*/
|
|
|
|
|
|
|
|
uint32_t sysmem_prim_mode = NO_FLUSH;
|
|
|
|
uint32_t gmem_prim_mode = NO_FLUSH;
|
|
|
|
|
|
|
|
if (pipeline->raster_order_attachment_access) {
|
|
|
|
/* VK_ARM_rasterization_order_attachment_access:
|
|
|
|
*
|
|
|
|
* This extension allow access to framebuffer attachments when used as
|
|
|
|
* both input and color attachments from one fragment to the next,
|
|
|
|
* in rasterization order, without explicit synchronization.
|
|
|
|
*/
|
|
|
|
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
|
|
|
|
gmem_prim_mode = FLUSH_PER_OVERLAP;
|
|
|
|
} else {
|
|
|
|
/* If there is a feedback loop, then the shader can read the previous value
|
|
|
|
* of a pixel being written out. It can also write some components and then
|
|
|
|
* read different components without a barrier in between. This is a
|
|
|
|
* problem in sysmem mode with UBWC, because the main buffer and flags
|
|
|
|
* buffer can get out-of-sync if only one is flushed. We fix this by
|
|
|
|
* setting the SINGLE_PRIM_MODE field to the same value that the blob does
|
|
|
|
* for advanced_blend in sysmem mode if a feedback loop is detected.
|
|
|
|
*/
|
|
|
|
if (builder->subpass_feedback_loop_color ||
|
|
|
|
builder->subpass_feedback_loop_ds) {
|
|
|
|
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct tu_cs cs;
|
|
|
|
|
|
|
|
pipeline->prim_order_state_gmem = tu_cs_draw_state(&pipeline->cs, &cs, 2);
|
|
|
|
tu_cs_emit_write_reg(&cs, REG_A6XX_GRAS_SC_CNTL,
|
|
|
|
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
|
|
|
|
A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE(gmem_prim_mode));
|
|
|
|
|
|
|
|
pipeline->prim_order_state_sysmem = tu_cs_draw_state(&pipeline->cs, &cs, 2);
|
|
|
|
tu_cs_emit_write_reg(&cs, REG_A6XX_GRAS_SC_CNTL,
|
|
|
|
A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) |
|
|
|
|
A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE(sysmem_prim_mode));
|
|
|
|
}
|
|
|
|
|
2019-02-21 17:22:17 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_finish(struct tu_pipeline *pipeline,
|
|
|
|
struct tu_device *dev,
|
|
|
|
const VkAllocationCallbacks *alloc)
|
|
|
|
{
|
2020-02-24 13:57:00 +00:00
|
|
|
tu_cs_finish(&pipeline->cs);
|
2022-02-14 22:45:01 +00:00
|
|
|
pthread_mutex_lock(&dev->pipeline_mutex);
|
|
|
|
tu_suballoc_bo_free(&dev->pipeline_suballoc, &pipeline->bo);
|
|
|
|
pthread_mutex_unlock(&dev->pipeline_mutex);
|
2021-02-04 13:12:35 +00:00
|
|
|
|
2022-02-02 17:29:34 +00:00
|
|
|
if (pipeline->pvtmem_bo)
|
|
|
|
tu_bo_finish(dev, pipeline->pvtmem_bo);
|
2021-05-17 10:40:05 +01:00
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
ralloc_free(pipeline->executables_mem_ctx);
|
2019-02-21 17:22:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static VkResult
|
|
|
|
tu_pipeline_builder_build(struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_pipeline **pipeline)
|
|
|
|
{
|
2020-06-23 15:37:56 +01:00
|
|
|
VkResult result;
|
|
|
|
|
2020-07-13 04:08:15 +01:00
|
|
|
*pipeline = vk_object_zalloc(&builder->device->vk, builder->alloc,
|
|
|
|
sizeof(**pipeline), VK_OBJECT_TYPE_PIPELINE);
|
2020-06-23 15:37:56 +01:00
|
|
|
if (!*pipeline)
|
|
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
2019-02-21 17:22:17 +00:00
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
(*pipeline)->executables_mem_ctx = ralloc_context(NULL);
|
|
|
|
util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx);
|
tu: Switch to the bindless descriptor model
Under the bindless model, there are 5 "base" registers programmed with a
64-bit address, and sam/ldib/ldc and so on each specify a base register
and an offset, in units of 16 dwords. The base registers correspond to
descriptor sets in Vulkan. We allocate a buffer at descriptor set
creation time, hopefully outside the main rendering loop, and then
switching descriptor sets is just a matter of programming the base
registers differently. Note, however, that some kinds of descriptors
need to be patched at command recording time, in particular dynamic
UBO's and SSBO's, which need to be patched at CmdBindDescriptorSets
time, and input attachments which need to be patched at draw time based
on the the pipeline that's bound. We reserve the fifth base register
(which seems to be unused by the blob driver) for these, creating a
descriptor set on-the-fly and combining all the dynamic descriptors from
all the different descriptor sets. This way, we never have to copy the
rest of the descriptor set at draw time like the blob seems to do. I
mostly chose to do this because the infrastructure was already there in
the form of dynamic_descriptors, and other drivers (at least radv) don't
cheat either when implementing this.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4358>
2020-03-16 10:49:19 +00:00
|
|
|
|
2019-02-27 06:09:37 +00:00
|
|
|
/* compile and upload shaders */
|
2020-05-15 18:52:43 +01:00
|
|
|
result = tu_pipeline_builder_compile_shaders(builder, *pipeline);
|
2019-02-27 06:09:37 +00:00
|
|
|
if (result != VK_SUCCESS) {
|
2020-07-13 04:08:15 +01:00
|
|
|
vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
|
2022-05-17 15:28:30 +01:00
|
|
|
return result;
|
2020-06-23 15:37:56 +01:00
|
|
|
}
|
2019-02-27 06:09:37 +00:00
|
|
|
|
2022-03-18 13:46:43 +00:00
|
|
|
result = tu_pipeline_allocate_cs(builder->device, *pipeline,
|
2022-02-17 19:48:36 +00:00
|
|
|
builder->layout, builder, NULL);
|
2020-06-23 15:37:56 +01:00
|
|
|
if (result != VK_SUCCESS) {
|
2020-07-13 04:08:15 +01:00
|
|
|
vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
|
2019-02-27 06:09:37 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++)
|
|
|
|
builder->shader_iova[i] =
|
|
|
|
tu_upload_variant(*pipeline, builder->shaders->variants[i]);
|
2020-06-23 15:37:56 +01:00
|
|
|
|
|
|
|
builder->binning_vs_iova =
|
|
|
|
tu_upload_variant(*pipeline, builder->binning_variant);
|
|
|
|
|
2020-10-29 14:13:00 +00:00
|
|
|
/* Setup private memory. Note that because we're sharing the same private
|
|
|
|
* memory for all stages, all stages must use the same config, or else
|
|
|
|
* fibers from one stage might overwrite fibers in another.
|
|
|
|
*/
|
|
|
|
|
|
|
|
uint32_t pvtmem_size = 0;
|
|
|
|
bool per_wave = true;
|
2022-02-17 19:48:36 +00:00
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) {
|
|
|
|
if (builder->shaders->variants[i]) {
|
|
|
|
pvtmem_size = MAX2(pvtmem_size, builder->shaders->variants[i]->pvtmem_size);
|
|
|
|
if (!builder->shaders->variants[i]->pvtmem_per_wave)
|
2020-10-29 14:13:00 +00:00
|
|
|
per_wave = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (builder->binning_variant) {
|
|
|
|
pvtmem_size = MAX2(pvtmem_size, builder->binning_variant->pvtmem_size);
|
|
|
|
if (!builder->binning_variant->pvtmem_per_wave)
|
|
|
|
per_wave = false;
|
|
|
|
}
|
|
|
|
|
2021-05-17 10:40:05 +01:00
|
|
|
result = tu_setup_pvtmem(builder->device, *pipeline, &builder->pvtmem,
|
|
|
|
pvtmem_size, per_wave);
|
|
|
|
if (result != VK_SUCCESS) {
|
|
|
|
vk_object_free(&builder->device->vk, builder->alloc, *pipeline);
|
|
|
|
return result;
|
|
|
|
}
|
2020-10-29 14:13:00 +00:00
|
|
|
|
2019-02-21 17:41:49 +00:00
|
|
|
tu_pipeline_builder_parse_dynamic(builder, *pipeline);
|
2019-02-27 06:10:34 +00:00
|
|
|
tu_pipeline_builder_parse_shader_stages(builder, *pipeline);
|
2019-02-22 06:31:36 +00:00
|
|
|
tu_pipeline_builder_parse_vertex_input(builder, *pipeline);
|
2019-02-21 19:07:38 +00:00
|
|
|
tu_pipeline_builder_parse_input_assembly(builder, *pipeline);
|
2020-04-24 16:51:04 +01:00
|
|
|
tu_pipeline_builder_parse_tessellation(builder, *pipeline);
|
2019-02-19 21:49:01 +00:00
|
|
|
tu_pipeline_builder_parse_viewport(builder, *pipeline);
|
2019-02-27 07:29:51 +00:00
|
|
|
tu_pipeline_builder_parse_rasterization(builder, *pipeline);
|
2019-02-21 19:46:59 +00:00
|
|
|
tu_pipeline_builder_parse_depth_stencil(builder, *pipeline);
|
2019-02-21 22:58:52 +00:00
|
|
|
tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline);
|
2022-02-18 17:15:03 +00:00
|
|
|
tu_pipeline_builder_parse_rasterization_order(builder, *pipeline);
|
2022-03-18 13:46:43 +00:00
|
|
|
tu6_emit_load_state(*pipeline, builder->layout, false);
|
2019-02-21 17:41:49 +00:00
|
|
|
|
2019-02-21 17:22:17 +00:00
|
|
|
return VK_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2019-02-27 06:09:37 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_finish(struct tu_pipeline_builder *builder)
|
|
|
|
{
|
2022-02-17 19:48:36 +00:00
|
|
|
if (builder->shaders)
|
|
|
|
vk_pipeline_cache_object_unref(&builder->shaders->base);
|
2022-03-10 19:15:16 +00:00
|
|
|
ralloc_free(builder->mem_ctx);
|
2019-02-27 06:09:37 +00:00
|
|
|
}
|
|
|
|
|
2019-02-21 17:22:17 +00:00
|
|
|
static void
|
|
|
|
tu_pipeline_builder_init_graphics(
|
|
|
|
struct tu_pipeline_builder *builder,
|
|
|
|
struct tu_device *dev,
|
2022-02-17 19:48:36 +00:00
|
|
|
struct vk_pipeline_cache *cache,
|
2019-02-21 17:22:17 +00:00
|
|
|
const VkGraphicsPipelineCreateInfo *create_info,
|
|
|
|
const VkAllocationCallbacks *alloc)
|
|
|
|
{
|
2019-12-14 06:05:11 +00:00
|
|
|
TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout);
|
|
|
|
|
2019-02-21 17:22:17 +00:00
|
|
|
*builder = (struct tu_pipeline_builder) {
|
|
|
|
.device = dev,
|
2022-03-10 19:15:16 +00:00
|
|
|
.mem_ctx = ralloc_context(NULL),
|
2019-02-21 17:22:17 +00:00
|
|
|
.cache = cache,
|
|
|
|
.create_info = create_info,
|
|
|
|
.alloc = alloc,
|
2019-12-14 06:05:11 +00:00
|
|
|
.layout = layout,
|
2019-02-21 17:22:17 +00:00
|
|
|
};
|
2019-02-19 21:49:01 +00:00
|
|
|
|
2021-08-10 04:32:15 +01:00
|
|
|
bool rasterizer_discard_dynamic = false;
|
|
|
|
if (create_info->pDynamicState) {
|
|
|
|
for (uint32_t i = 0; i < create_info->pDynamicState->dynamicStateCount; i++) {
|
|
|
|
if (create_info->pDynamicState->pDynamicStates[i] ==
|
2022-07-01 13:04:48 +01:00
|
|
|
VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE) {
|
2021-08-10 04:32:15 +01:00
|
|
|
rasterizer_discard_dynamic = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-02 10:33:42 +01:00
|
|
|
const struct tu_render_pass *pass =
|
|
|
|
tu_render_pass_from_handle(create_info->renderPass);
|
|
|
|
const struct tu_subpass *subpass =
|
|
|
|
&pass->subpasses[create_info->subpass];
|
|
|
|
|
2022-02-18 17:15:03 +00:00
|
|
|
builder->subpass_raster_order_attachment_access =
|
|
|
|
subpass->raster_order_attachment_access;
|
|
|
|
builder->subpass_feedback_loop_color = subpass->feedback_loop_color;
|
2022-02-23 09:44:23 +00:00
|
|
|
builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds;
|
|
|
|
|
2020-07-02 10:33:42 +01:00
|
|
|
builder->multiview_mask = subpass->multiview_mask;
|
|
|
|
|
2019-02-19 21:49:01 +00:00
|
|
|
builder->rasterizer_discard =
|
2021-08-10 04:32:15 +01:00
|
|
|
builder->create_info->pRasterizationState->rasterizerDiscardEnable &&
|
|
|
|
!rasterizer_discard_dynamic;
|
2019-02-27 07:29:51 +00:00
|
|
|
|
2021-03-11 14:22:38 +00:00
|
|
|
/* variableMultisampleRate support */
|
|
|
|
builder->emit_msaa_state = (subpass->samples == 0) && !builder->rasterizer_discard;
|
|
|
|
|
2019-02-21 19:46:59 +00:00
|
|
|
if (builder->rasterizer_discard) {
|
2019-02-27 07:29:51 +00:00
|
|
|
builder->samples = VK_SAMPLE_COUNT_1_BIT;
|
2019-02-21 19:46:59 +00:00
|
|
|
} else {
|
2019-02-27 07:29:51 +00:00
|
|
|
builder->samples = create_info->pMultisampleState->rasterizationSamples;
|
2021-02-08 20:11:37 +00:00
|
|
|
builder->alpha_to_coverage = create_info->pMultisampleState->alphaToCoverageEnable;
|
2019-02-21 19:46:59 +00:00
|
|
|
|
2020-04-10 00:13:44 +01:00
|
|
|
const uint32_t a = subpass->depth_stencil_attachment.attachment;
|
|
|
|
builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ?
|
|
|
|
pass->attachments[a].format : VK_FORMAT_UNDEFINED;
|
2019-02-21 22:58:52 +00:00
|
|
|
|
2019-10-14 16:41:08 +01:00
|
|
|
assert(subpass->color_count == 0 ||
|
2019-12-06 01:58:58 +00:00
|
|
|
!create_info->pColorBlendState ||
|
2019-10-14 16:41:08 +01:00
|
|
|
subpass->color_count == create_info->pColorBlendState->attachmentCount);
|
2019-02-27 06:10:34 +00:00
|
|
|
builder->color_attachment_count = subpass->color_count;
|
2019-02-21 22:58:52 +00:00
|
|
|
for (uint32_t i = 0; i < subpass->color_count; i++) {
|
|
|
|
const uint32_t a = subpass->color_attachments[i].attachment;
|
|
|
|
if (a == VK_ATTACHMENT_UNUSED)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
builder->color_attachment_formats[i] = pass->attachments[a].format;
|
|
|
|
builder->use_color_attachments = true;
|
2020-06-02 03:02:19 +01:00
|
|
|
builder->render_components |= 0xf << (i * 4);
|
2019-02-21 22:58:52 +00:00
|
|
|
}
|
2020-05-14 16:17:46 +01:00
|
|
|
|
|
|
|
if (tu_blend_state_is_dual_src(create_info->pColorBlendState)) {
|
|
|
|
builder->color_attachment_count++;
|
|
|
|
builder->use_dual_src_blend = true;
|
2020-06-02 03:02:19 +01:00
|
|
|
/* dual source blending has an extra fs output in the 2nd slot */
|
|
|
|
if (subpass->color_attachments[0].attachment != VK_ATTACHMENT_UNUSED)
|
|
|
|
builder->render_components |= 0xf << 4;
|
2020-05-14 16:17:46 +01:00
|
|
|
}
|
2019-02-21 19:46:59 +00:00
|
|
|
}
|
2019-02-21 17:22:17 +00:00
|
|
|
}
|
|
|
|
|
2019-11-27 00:42:24 +00:00
|
|
|
static VkResult
|
|
|
|
tu_graphics_pipeline_create(VkDevice device,
|
|
|
|
VkPipelineCache pipelineCache,
|
|
|
|
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
|
|
|
const VkAllocationCallbacks *pAllocator,
|
|
|
|
VkPipeline *pPipeline)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_device, dev, device);
|
2022-02-17 19:48:36 +00:00
|
|
|
TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
|
|
|
|
|
|
|
|
cache = cache ? cache : dev->mem_cache;
|
2019-11-27 00:42:24 +00:00
|
|
|
|
|
|
|
struct tu_pipeline_builder builder;
|
|
|
|
tu_pipeline_builder_init_graphics(&builder, dev, cache,
|
|
|
|
pCreateInfo, pAllocator);
|
|
|
|
|
|
|
|
struct tu_pipeline *pipeline = NULL;
|
|
|
|
VkResult result = tu_pipeline_builder_build(&builder, &pipeline);
|
|
|
|
tu_pipeline_builder_finish(&builder);
|
|
|
|
|
|
|
|
if (result == VK_SUCCESS)
|
|
|
|
*pPipeline = tu_pipeline_to_handle(pipeline);
|
|
|
|
else
|
2020-01-02 11:29:29 +00:00
|
|
|
*pPipeline = VK_NULL_HANDLE;
|
2019-11-27 00:42:24 +00:00
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-06-07 05:59:32 +01:00
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
2019-02-21 17:22:17 +00:00
|
|
|
tu_CreateGraphicsPipelines(VkDevice device,
|
2018-11-05 06:42:55 +00:00
|
|
|
VkPipelineCache pipelineCache,
|
|
|
|
uint32_t count,
|
|
|
|
const VkGraphicsPipelineCreateInfo *pCreateInfos,
|
|
|
|
const VkAllocationCallbacks *pAllocator,
|
|
|
|
VkPipeline *pPipelines)
|
2018-08-08 23:23:57 +01:00
|
|
|
{
|
2019-09-19 19:09:46 +01:00
|
|
|
VkResult final_result = VK_SUCCESS;
|
2022-05-17 15:28:30 +01:00
|
|
|
uint32_t i = 0;
|
2018-08-08 23:23:57 +01:00
|
|
|
|
2022-05-17 15:28:30 +01:00
|
|
|
for (; i < count; i++) {
|
2019-11-27 00:42:24 +00:00
|
|
|
VkResult result = tu_graphics_pipeline_create(device, pipelineCache,
|
|
|
|
&pCreateInfos[i], pAllocator,
|
|
|
|
&pPipelines[i]);
|
|
|
|
|
2022-05-17 15:28:30 +01:00
|
|
|
if (result != VK_SUCCESS) {
|
2019-09-19 19:09:46 +01:00
|
|
|
final_result = result;
|
2022-05-17 15:28:30 +01:00
|
|
|
pPipelines[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
if (pCreateInfos[i].flags &
|
|
|
|
VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
|
|
|
|
break;
|
|
|
|
}
|
2018-08-08 23:23:57 +01:00
|
|
|
}
|
|
|
|
|
2022-05-17 15:28:30 +01:00
|
|
|
for (; i < count; i++)
|
|
|
|
pPipelines[i] = VK_NULL_HANDLE;
|
|
|
|
|
2019-09-19 19:09:46 +01:00
|
|
|
return final_result;
|
2018-08-08 23:23:57 +01:00
|
|
|
}
|
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
static VkResult
|
|
|
|
tu_compute_pipeline_create(VkDevice device,
|
2022-02-14 22:45:01 +00:00
|
|
|
VkPipelineCache pipelineCache,
|
2018-11-05 06:42:55 +00:00
|
|
|
const VkComputePipelineCreateInfo *pCreateInfo,
|
|
|
|
const VkAllocationCallbacks *pAllocator,
|
|
|
|
VkPipeline *pPipeline)
|
2018-08-08 23:23:57 +01:00
|
|
|
{
|
2019-11-27 04:37:19 +00:00
|
|
|
TU_FROM_HANDLE(tu_device, dev, device);
|
2022-02-17 19:48:36 +00:00
|
|
|
TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache);
|
2019-12-14 06:05:11 +00:00
|
|
|
TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout);
|
2019-11-27 04:37:19 +00:00
|
|
|
const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage;
|
|
|
|
VkResult result;
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
cache = cache ? cache : dev->mem_cache;
|
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
struct tu_pipeline *pipeline;
|
|
|
|
|
2020-01-26 22:25:17 +00:00
|
|
|
*pPipeline = VK_NULL_HANDLE;
|
|
|
|
|
2022-07-01 13:04:48 +01:00
|
|
|
VkPipelineCreationFeedback pipeline_feedback = {
|
2022-05-16 18:11:42 +01:00
|
|
|
.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
|
|
|
|
};
|
|
|
|
|
|
|
|
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
|
|
|
|
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
|
|
|
|
|
|
|
|
int64_t pipeline_start = os_time_get_nano();
|
|
|
|
|
2020-07-13 04:08:15 +01:00
|
|
|
pipeline = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*pipeline),
|
|
|
|
VK_OBJECT_TYPE_PIPELINE);
|
2020-06-23 15:37:56 +01:00
|
|
|
if (!pipeline)
|
|
|
|
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
pipeline->executables_mem_ctx = ralloc_context(NULL);
|
|
|
|
util_dynarray_init(&pipeline->executables, pipeline->executables_mem_ctx);
|
|
|
|
|
2022-05-17 15:06:38 +01:00
|
|
|
struct tu_shader_key key = { };
|
2022-02-17 19:48:36 +00:00
|
|
|
tu_shader_key_init(&key, stage_info, dev);
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2022-03-10 19:15:16 +00:00
|
|
|
void *pipeline_mem_ctx = ralloc_context(NULL);
|
2022-02-17 19:48:36 +00:00
|
|
|
|
|
|
|
unsigned char pipeline_sha1[20];
|
|
|
|
tu_hash_compute(pipeline_sha1, stage_info, layout, &key, dev->compiler);
|
|
|
|
|
|
|
|
struct tu_compiled_shaders *compiled = NULL;
|
2020-07-06 17:16:39 +01:00
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
const bool executable_info = pCreateInfo->flags &
|
|
|
|
VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
|
|
|
|
|
2022-05-16 18:11:42 +01:00
|
|
|
bool application_cache_hit = false;
|
|
|
|
|
|
|
|
if (!executable_info) {
|
|
|
|
compiled =
|
|
|
|
tu_pipeline_cache_lookup(cache, pipeline_sha1, sizeof(pipeline_sha1),
|
|
|
|
&application_cache_hit);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (application_cache_hit && cache != dev->mem_cache) {
|
|
|
|
pipeline_feedback.flags |=
|
|
|
|
VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
|
|
|
|
}
|
2021-02-04 13:12:35 +00:00
|
|
|
|
2022-06-15 01:08:31 +01:00
|
|
|
if (tu6_shared_constants_enable(layout, dev->compiler)) {
|
2022-06-15 01:07:28 +01:00
|
|
|
pipeline->shared_consts = (struct tu_push_constant_range) {
|
|
|
|
.lo = 0,
|
|
|
|
.dwords = layout->push_constant_size / 4,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
char *nir_initial_disasm = NULL;
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
if (!compiled) {
|
2022-05-17 15:28:30 +01:00
|
|
|
if (pCreateInfo->flags &
|
|
|
|
VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) {
|
|
|
|
result = VK_PIPELINE_COMPILE_REQUIRED;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
struct ir3_shader_key ir3_key = {};
|
2020-07-09 01:53:46 +01:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info,
|
|
|
|
MESA_SHADER_COMPUTE);
|
|
|
|
|
|
|
|
nir_initial_disasm = executable_info ?
|
|
|
|
nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL;
|
|
|
|
|
|
|
|
struct tu_shader *shader =
|
|
|
|
tu_shader_create(dev, nir, &key, layout, pAllocator);
|
|
|
|
if (!shader) {
|
|
|
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
compiled = tu_shaders_init(dev, &pipeline_sha1, sizeof(pipeline_sha1));
|
|
|
|
if (!compiled) {
|
|
|
|
tu_shader_destroy(dev, shader, pAllocator);
|
|
|
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
compiled->active_desc_sets = shader->active_desc_sets;
|
|
|
|
compiled->push_consts[MESA_SHADER_COMPUTE] = shader->push_consts;
|
2022-06-15 01:07:28 +01:00
|
|
|
compiled->shared_consts = pipeline->shared_consts;
|
2022-02-17 19:48:36 +00:00
|
|
|
|
|
|
|
struct ir3_shader_variant *v =
|
|
|
|
ir3_shader_create_variant(shader->ir3_shader, &ir3_key, executable_info);
|
|
|
|
|
|
|
|
tu_shader_destroy(dev, shader, pAllocator);
|
|
|
|
|
|
|
|
if (!v) {
|
|
|
|
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
|
|
|
|
compiled->variants[MESA_SHADER_COMPUTE] = v;
|
|
|
|
|
|
|
|
compiled = tu_pipeline_cache_insert(cache, compiled);
|
2020-06-23 15:37:56 +01:00
|
|
|
}
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2022-05-16 18:11:42 +01:00
|
|
|
pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
|
|
|
|
|
|
|
|
if (creation_feedback) {
|
|
|
|
*creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
|
|
|
|
assert(creation_feedback->pipelineStageCreationFeedbackCount == 1);
|
|
|
|
creation_feedback->pPipelineStageCreationFeedbacks[0] = pipeline_feedback;
|
|
|
|
}
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
pipeline->active_desc_sets = compiled->active_desc_sets;
|
|
|
|
|
|
|
|
struct ir3_shader_variant *v = compiled->variants[MESA_SHADER_COMPUTE];
|
|
|
|
|
2019-12-20 22:26:44 +00:00
|
|
|
tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE],
|
2022-02-17 19:48:36 +00:00
|
|
|
&compiled->push_consts[MESA_SHADER_COMPUTE], v);
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, v);
|
2019-11-27 04:37:19 +00:00
|
|
|
if (result != VK_SUCCESS)
|
2020-01-26 22:25:17 +00:00
|
|
|
goto fail;
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2020-06-23 15:37:56 +01:00
|
|
|
uint64_t shader_iova = tu_upload_variant(pipeline, v);
|
|
|
|
|
2020-10-29 14:13:00 +00:00
|
|
|
struct tu_pvtmem_config pvtmem;
|
|
|
|
tu_setup_pvtmem(dev, pipeline, &pvtmem, v->pvtmem_size, v->pvtmem_per_wave);
|
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
for (int i = 0; i < 3; i++)
|
2021-03-10 14:01:33 +00:00
|
|
|
pipeline->compute.local_size[i] = v->local_size[i];
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2021-05-31 13:21:04 +01:00
|
|
|
pipeline->compute.subgroup_size = v->info.double_threadsize ? 128 : 64;
|
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
struct tu_cs prog_cs;
|
2021-02-02 16:00:08 +00:00
|
|
|
uint32_t additional_reserve_size = tu_xs_get_additional_cs_size_dwords(v);
|
|
|
|
tu_cs_begin_sub_stream(&pipeline->cs, 64 + additional_reserve_size, &prog_cs);
|
2022-02-17 19:48:36 +00:00
|
|
|
tu6_emit_cs_config(&prog_cs, v, &pvtmem, shader_iova);
|
2020-06-18 21:24:26 +01:00
|
|
|
pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
|
2019-11-27 04:37:19 +00:00
|
|
|
|
2022-03-18 13:46:43 +00:00
|
|
|
tu6_emit_load_state(pipeline, layout, true);
|
2020-03-23 16:23:32 +00:00
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
tu_append_executable(pipeline, v, nir_initial_disasm);
|
|
|
|
|
2022-02-17 19:48:36 +00:00
|
|
|
vk_pipeline_cache_object_unref(&compiled->base);
|
2022-03-10 19:15:16 +00:00
|
|
|
ralloc_free(pipeline_mem_ctx);
|
2021-03-02 10:00:58 +00:00
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
*pPipeline = tu_pipeline_to_handle(pipeline);
|
2021-02-04 13:12:35 +00:00
|
|
|
|
2018-08-08 23:23:57 +01:00
|
|
|
return VK_SUCCESS;
|
2019-11-27 04:37:19 +00:00
|
|
|
|
|
|
|
fail:
|
2022-02-17 19:48:36 +00:00
|
|
|
if (compiled)
|
|
|
|
vk_pipeline_cache_object_unref(&compiled->base);
|
2020-01-26 22:25:17 +00:00
|
|
|
|
2022-03-10 19:15:16 +00:00
|
|
|
ralloc_free(pipeline_mem_ctx);
|
|
|
|
|
2020-07-13 04:08:15 +01:00
|
|
|
vk_object_free(&dev->vk, pAllocator, pipeline);
|
2019-11-27 04:37:19 +00:00
|
|
|
|
|
|
|
return result;
|
2018-08-08 23:23:57 +01:00
|
|
|
}
|
|
|
|
|
2021-06-07 05:59:32 +01:00
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
2019-11-27 04:37:19 +00:00
|
|
|
tu_CreateComputePipelines(VkDevice device,
|
2018-11-05 06:42:55 +00:00
|
|
|
VkPipelineCache pipelineCache,
|
|
|
|
uint32_t count,
|
|
|
|
const VkComputePipelineCreateInfo *pCreateInfos,
|
|
|
|
const VkAllocationCallbacks *pAllocator,
|
|
|
|
VkPipeline *pPipelines)
|
2018-08-08 23:23:57 +01:00
|
|
|
{
|
2019-11-27 04:37:19 +00:00
|
|
|
VkResult final_result = VK_SUCCESS;
|
2022-05-17 15:28:30 +01:00
|
|
|
uint32_t i = 0;
|
2018-08-08 23:23:57 +01:00
|
|
|
|
2022-05-17 15:28:30 +01:00
|
|
|
for (; i < count; i++) {
|
2019-11-27 04:37:19 +00:00
|
|
|
VkResult result = tu_compute_pipeline_create(device, pipelineCache,
|
|
|
|
&pCreateInfos[i],
|
|
|
|
pAllocator, &pPipelines[i]);
|
2022-05-17 15:28:30 +01:00
|
|
|
if (result != VK_SUCCESS) {
|
2019-11-27 04:37:19 +00:00
|
|
|
final_result = result;
|
2022-05-17 15:28:30 +01:00
|
|
|
pPipelines[i] = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
if (pCreateInfos[i].flags &
|
|
|
|
VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
|
|
|
|
break;
|
|
|
|
}
|
2018-08-08 23:23:57 +01:00
|
|
|
}
|
|
|
|
|
2022-05-17 15:28:30 +01:00
|
|
|
for (; i < count; i++)
|
|
|
|
pPipelines[i] = VK_NULL_HANDLE;
|
|
|
|
|
2019-11-27 04:37:19 +00:00
|
|
|
return final_result;
|
2018-08-08 23:23:57 +01:00
|
|
|
}
|
2019-02-21 17:22:17 +00:00
|
|
|
|
2021-06-07 05:59:32 +01:00
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
2019-02-21 17:22:17 +00:00
|
|
|
tu_DestroyPipeline(VkDevice _device,
|
|
|
|
VkPipeline _pipeline,
|
|
|
|
const VkAllocationCallbacks *pAllocator)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_device, dev, _device);
|
|
|
|
TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline);
|
|
|
|
|
|
|
|
if (!_pipeline)
|
|
|
|
return;
|
|
|
|
|
|
|
|
tu_pipeline_finish(pipeline, dev, pAllocator);
|
2020-07-13 04:08:15 +01:00
|
|
|
vk_object_free(&dev->vk, pAllocator, pipeline);
|
2019-02-21 17:22:17 +00:00
|
|
|
}
|
2021-02-04 13:12:35 +00:00
|
|
|
|
|
|
|
#define WRITE_STR(field, ...) ({ \
|
|
|
|
memset(field, 0, sizeof(field)); \
|
|
|
|
UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \
|
|
|
|
assert(_i > 0 && _i < sizeof(field)); \
|
|
|
|
})
|
|
|
|
|
|
|
|
static const struct tu_pipeline_executable *
|
|
|
|
tu_pipeline_get_executable(struct tu_pipeline *pipeline, uint32_t index)
|
|
|
|
{
|
|
|
|
assert(index < util_dynarray_num_elements(&pipeline->executables,
|
|
|
|
struct tu_pipeline_executable));
|
|
|
|
return util_dynarray_element(
|
|
|
|
&pipeline->executables, struct tu_pipeline_executable, index);
|
|
|
|
}
|
|
|
|
|
2021-06-07 05:59:32 +01:00
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
2021-02-04 13:12:35 +00:00
|
|
|
tu_GetPipelineExecutablePropertiesKHR(
|
|
|
|
VkDevice _device,
|
|
|
|
const VkPipelineInfoKHR* pPipelineInfo,
|
|
|
|
uint32_t* pExecutableCount,
|
|
|
|
VkPipelineExecutablePropertiesKHR* pProperties)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_device, dev, _device);
|
|
|
|
TU_FROM_HANDLE(tu_pipeline, pipeline, pPipelineInfo->pipeline);
|
2022-03-23 12:37:47 +00:00
|
|
|
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
|
|
|
|
pProperties, pExecutableCount);
|
2021-02-04 13:12:35 +00:00
|
|
|
|
|
|
|
util_dynarray_foreach (&pipeline->executables, struct tu_pipeline_executable, exe) {
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
|
2021-02-04 13:12:35 +00:00
|
|
|
gl_shader_stage stage = exe->stage;
|
|
|
|
props->stages = mesa_to_vk_shader_stage(stage);
|
|
|
|
|
|
|
|
if (!exe->is_binning)
|
|
|
|
WRITE_STR(props->name, "%s", _mesa_shader_stage_to_abbrev(stage));
|
|
|
|
else
|
|
|
|
WRITE_STR(props->name, "Binning VS");
|
|
|
|
|
|
|
|
WRITE_STR(props->description, "%s", _mesa_shader_stage_to_string(stage));
|
|
|
|
|
|
|
|
props->subgroupSize =
|
|
|
|
dev->compiler->threadsize_base * (exe->stats.double_threadsize ? 2 : 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return vk_outarray_status(&out);
|
|
|
|
}
|
|
|
|
|
2021-06-07 05:59:32 +01:00
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
2021-02-04 13:12:35 +00:00
|
|
|
tu_GetPipelineExecutableStatisticsKHR(
|
|
|
|
VkDevice _device,
|
|
|
|
const VkPipelineExecutableInfoKHR* pExecutableInfo,
|
|
|
|
uint32_t* pStatisticCount,
|
|
|
|
VkPipelineExecutableStatisticKHR* pStatistics)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline);
|
2022-03-23 12:37:47 +00:00
|
|
|
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
|
|
|
|
pStatistics, pStatisticCount);
|
2021-02-04 13:12:35 +00:00
|
|
|
|
|
|
|
const struct tu_pipeline_executable *exe =
|
|
|
|
tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "Max Waves Per Core");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Maximum number of simultaneous waves per core.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.max_waves;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "Instruction Count");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Total number of IR3 instructions in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.instrs_count;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2022-01-27 15:48:05 +00:00
|
|
|
WRITE_STR(stat->name, "Code size");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Total number of dwords in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.sizedwords;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "NOPs Count");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of NOP instructions in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.nops_count;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "MOV Count");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of MOV instructions in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.mov_count;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "COV Count");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of COV instructions in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.cov_count;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "Registers used");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of registers used in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.max_reg + 1;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "Half-registers used");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of half-registers used in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.max_half_reg + 1;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "Instructions with SS sync bit");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"SS bit is set for instructions which depend on a result "
|
|
|
|
"of \"long\" instructions to prevent RAW hazard.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.ss;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "Instructions with SY sync bit");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"SY bit is set for instructions which depend on a result "
|
|
|
|
"of loads from global memory to prevent RAW hazard.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.sy;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "Estimated cycles stalled on SS");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"A better metric to estimate the impact of SS syncs.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.sstall;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-12-17 16:40:02 +00:00
|
|
|
WRITE_STR(stat->name, "Estimated cycles stalled on SY");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"A better metric to estimate the impact of SY syncs.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.systall;
|
|
|
|
}
|
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
for (int i = 0; i < ARRAY_SIZE(exe->stats.instrs_per_cat); i++) {
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(stat->name, "cat%d instructions", i);
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of cat%d instructions.", i);
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.instrs_per_cat[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-07-23 13:06:04 +01:00
|
|
|
WRITE_STR(stat->name, "STP Count");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of STore Private instructions in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.stp_count;
|
|
|
|
}
|
|
|
|
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
|
2021-07-23 13:06:04 +01:00
|
|
|
WRITE_STR(stat->name, "LDP Count");
|
|
|
|
WRITE_STR(stat->description,
|
|
|
|
"Number of LoaD Private instructions in the final generated "
|
|
|
|
"shader executable.");
|
|
|
|
stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
|
|
|
|
stat->value.u64 = exe->stats.ldp_count;
|
|
|
|
}
|
|
|
|
|
2021-02-04 13:12:35 +00:00
|
|
|
return vk_outarray_status(&out);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool
|
|
|
|
write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
|
|
|
|
const char *data)
|
|
|
|
{
|
|
|
|
ir->isText = VK_TRUE;
|
|
|
|
|
|
|
|
size_t data_len = strlen(data) + 1;
|
|
|
|
|
|
|
|
if (ir->pData == NULL) {
|
|
|
|
ir->dataSize = data_len;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
strncpy(ir->pData, data, ir->dataSize);
|
|
|
|
if (ir->dataSize < data_len)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
ir->dataSize = data_len;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-06-07 05:59:32 +01:00
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
2021-02-04 13:12:35 +00:00
|
|
|
tu_GetPipelineExecutableInternalRepresentationsKHR(
|
|
|
|
VkDevice _device,
|
|
|
|
const VkPipelineExecutableInfoKHR* pExecutableInfo,
|
|
|
|
uint32_t* pInternalRepresentationCount,
|
|
|
|
VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
|
|
|
|
{
|
|
|
|
TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline);
|
2022-03-23 12:37:47 +00:00
|
|
|
VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
|
|
|
|
pInternalRepresentations, pInternalRepresentationCount);
|
2021-02-04 13:12:35 +00:00
|
|
|
bool incomplete_text = false;
|
|
|
|
|
|
|
|
const struct tu_pipeline_executable *exe =
|
|
|
|
tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
|
|
|
|
|
|
|
|
if (exe->nir_from_spirv) {
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(ir->name, "NIR from SPIRV");
|
|
|
|
WRITE_STR(ir->description,
|
|
|
|
"Initial NIR before any optimizations");
|
|
|
|
|
|
|
|
if (!write_ir_text(ir, exe->nir_from_spirv))
|
|
|
|
incomplete_text = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (exe->nir_final) {
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(ir->name, "Final NIR");
|
|
|
|
WRITE_STR(ir->description,
|
|
|
|
"Final NIR before going into the back-end compiler");
|
|
|
|
|
|
|
|
if (!write_ir_text(ir, exe->nir_final))
|
|
|
|
incomplete_text = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (exe->disasm) {
|
2022-03-23 12:37:47 +00:00
|
|
|
vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
|
2021-02-04 13:12:35 +00:00
|
|
|
WRITE_STR(ir->name, "IR3 Assembly");
|
|
|
|
WRITE_STR(ir->description,
|
|
|
|
"Final IR3 assembly for the generated shader binary");
|
|
|
|
|
|
|
|
if (!write_ir_text(ir, exe->disasm))
|
|
|
|
incomplete_text = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
|
|
|
|
}
|