1158 lines
46 KiB
C
1158 lines
46 KiB
C
/*
|
|
* Copyright 2020 Advanced Micro Devices, Inc.
|
|
* All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
|
* the Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "si_pipe.h"
|
|
#include "si_shader_internal.h"
|
|
#include "sid.h"
|
|
|
|
static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
|
|
{
|
|
switch (ctx->type) {
|
|
case PIPE_SHADER_TESS_CTRL:
|
|
return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
|
|
|
|
case PIPE_SHADER_TESS_EVAL:
|
|
return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id);
|
|
|
|
default:
|
|
assert(0);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
/* Tessellation shaders pass outputs to the next shader using LDS.
|
|
*
|
|
* LS outputs = TCS inputs
|
|
* TCS outputs = TES inputs
|
|
*
|
|
* The LDS layout is:
|
|
* - TCS inputs for patch 0
|
|
* - TCS inputs for patch 1
|
|
* - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
|
|
* - ...
|
|
* - TCS outputs for patch 0 = get_tcs_out_patch0_offset
|
|
* - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
|
|
* - TCS outputs for patch 1
|
|
* - Per-patch TCS outputs for patch 1
|
|
* - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
|
|
* - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
|
|
* - ...
|
|
*
|
|
* All three shaders VS(LS), TCS, TES share the same LDS space.
|
|
*/
|
|
|
|
static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx)
|
|
{
|
|
return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);
|
|
}
|
|
|
|
static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
|
|
{
|
|
assert(ctx->type == PIPE_SHADER_TESS_CTRL);
|
|
|
|
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
|
|
return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
|
|
|
|
return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
|
|
{
|
|
unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
|
|
|
|
return LLVMConstInt(ctx->ac.i32, stride, 0);
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
|
|
{
|
|
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
|
|
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
|
|
|
|
const struct si_shader_info *info = &ctx->shader->selector->info;
|
|
unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
|
|
unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
|
|
unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
|
|
unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4;
|
|
return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx)
|
|
{
|
|
return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16),
|
|
LLVMConstInt(ctx->ac.i32, 4, 0), "");
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
|
|
{
|
|
return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
|
|
LLVMConstInt(ctx->ac.i32, 4, 0), "");
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
|
|
{
|
|
LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
|
|
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
|
|
|
|
return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
|
|
{
|
|
LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
|
|
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
|
|
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
|
|
|
|
return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset);
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
|
|
{
|
|
LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx);
|
|
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
|
|
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
|
|
|
|
return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
|
|
}
|
|
|
|
static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
|
|
{
|
|
unsigned tcs_out_vertices =
|
|
ctx->shader->selector ? ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]
|
|
: 0;
|
|
|
|
/* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
|
|
if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices)
|
|
return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
|
|
|
|
return si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 6);
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
|
|
{
|
|
unsigned stride;
|
|
|
|
switch (ctx->type) {
|
|
case PIPE_SHADER_VERTEX:
|
|
stride = ctx->shader->selector->lshs_vertex_stride / 4;
|
|
return LLVMConstInt(ctx->ac.i32, stride, 0);
|
|
|
|
case PIPE_SHADER_TESS_CTRL:
|
|
if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
|
|
stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
|
|
return LLVMConstInt(ctx->ac.i32, stride, 0);
|
|
}
|
|
return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
|
|
|
|
default:
|
|
assert(0);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static LLVMValueRef
|
|
get_dw_address_from_generic_indices(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride,
|
|
LLVMValueRef base_addr, LLVMValueRef vertex_index,
|
|
LLVMValueRef param_index, ubyte name, ubyte index)
|
|
{
|
|
if (vertex_dw_stride) {
|
|
base_addr = ac_build_imad(&ctx->ac, vertex_index, vertex_dw_stride, base_addr);
|
|
}
|
|
|
|
if (param_index) {
|
|
base_addr = ac_build_imad(&ctx->ac, param_index, LLVMConstInt(ctx->ac.i32, 4, 0), base_addr);
|
|
}
|
|
|
|
int param = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
|
|
name == TGSI_SEMANTIC_TESSOUTER
|
|
? si_shader_io_get_unique_index_patch(name, index)
|
|
: si_shader_io_get_unique_index(name, index, false);
|
|
|
|
/* Add the base address of the element. */
|
|
return LLVMBuildAdd(ctx->ac.builder, base_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
|
|
}
|
|
|
|
/* The offchip buffer layout for TCS->TES is
|
|
*
|
|
* - attribute 0 of patch 0 vertex 0
|
|
* - attribute 0 of patch 0 vertex 1
|
|
* - attribute 0 of patch 0 vertex 2
|
|
* ...
|
|
* - attribute 0 of patch 1 vertex 0
|
|
* - attribute 0 of patch 1 vertex 1
|
|
* ...
|
|
* - attribute 1 of patch 0 vertex 0
|
|
* - attribute 1 of patch 0 vertex 1
|
|
* ...
|
|
* - per patch attribute 0 of patch 0
|
|
* - per patch attribute 0 of patch 1
|
|
* ...
|
|
*
|
|
* Note that every attribute has 4 components.
|
|
*/
|
|
static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
|
|
LLVMValueRef rel_patch_id, LLVMValueRef vertex_index,
|
|
LLVMValueRef param_index)
|
|
{
|
|
LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
|
|
LLVMValueRef param_stride, constant16;
|
|
|
|
vertices_per_patch = get_num_tcs_out_vertices(ctx);
|
|
num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
|
|
total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, "");
|
|
|
|
constant16 = LLVMConstInt(ctx->ac.i32, 16, 0);
|
|
if (vertex_index) {
|
|
base_addr = ac_build_imad(&ctx->ac, rel_patch_id, vertices_per_patch, vertex_index);
|
|
param_stride = total_vertices;
|
|
} else {
|
|
base_addr = rel_patch_id;
|
|
param_stride = num_patches;
|
|
}
|
|
|
|
base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
|
|
base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
|
|
|
|
if (!vertex_index) {
|
|
LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->tcs_offchip_layout, 12, 20);
|
|
|
|
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");
|
|
}
|
|
return base_addr;
|
|
}
|
|
|
|
static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context *ctx,
|
|
LLVMValueRef vertex_index,
|
|
LLVMValueRef param_index,
|
|
ubyte name, ubyte index)
|
|
{
|
|
unsigned param_index_base;
|
|
|
|
param_index_base = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
|
|
name == TGSI_SEMANTIC_TESSOUTER
|
|
? si_shader_io_get_unique_index_patch(name, index)
|
|
: si_shader_io_get_unique_index(name, index, false);
|
|
|
|
if (param_index) {
|
|
param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
|
|
LLVMConstInt(ctx->ac.i32, param_index_base, 0), "");
|
|
} else {
|
|
param_index = LLVMConstInt(ctx->ac.i32, param_index_base, 0);
|
|
}
|
|
|
|
return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), vertex_index, param_index);
|
|
}
|
|
|
|
static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
|
|
LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base,
|
|
bool can_speculate)
|
|
{
|
|
LLVMValueRef value, value2;
|
|
LLVMTypeRef vec_type = LLVMVectorType(type, 4);
|
|
|
|
if (swizzle == ~0) {
|
|
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
|
|
can_speculate, false);
|
|
|
|
return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
|
|
}
|
|
|
|
if (ac_get_type_size(type) != 8) {
|
|
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc,
|
|
can_speculate, false);
|
|
|
|
value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
|
|
return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0),
|
|
"");
|
|
}
|
|
|
|
value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4, ac_glc,
|
|
can_speculate, false);
|
|
|
|
value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4 + 4, ac_glc,
|
|
can_speculate, false);
|
|
|
|
return si_build_gather_64bit(ctx, type, value, value2);
|
|
}
|
|
|
|
/**
|
|
* Load from LSHS LDS storage.
|
|
*
|
|
* \param type output value type
|
|
* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
|
|
* \param dw_addr address in dwords
|
|
*/
|
|
static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
|
|
LLVMValueRef dw_addr)
|
|
{
|
|
LLVMValueRef value;
|
|
|
|
if (swizzle == ~0) {
|
|
LLVMValueRef values[4];
|
|
|
|
for (unsigned chan = 0; chan < 4; chan++)
|
|
values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
|
|
|
|
return ac_build_gather_values(&ctx->ac, values, 4);
|
|
}
|
|
|
|
/* Split 64-bit loads. */
|
|
if (ac_get_type_size(type) == 8) {
|
|
LLVMValueRef lo, hi;
|
|
|
|
lo = lshs_lds_load(ctx, ctx->ac.i32, swizzle, dw_addr);
|
|
hi = lshs_lds_load(ctx, ctx->ac.i32, swizzle + 1, dw_addr);
|
|
return si_build_gather_64bit(ctx, type, lo, hi);
|
|
}
|
|
|
|
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
|
|
|
|
value = ac_lds_load(&ctx->ac, dw_addr);
|
|
|
|
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
|
|
}
|
|
|
|
/**
|
|
* Store to LSHS LDS storage.
|
|
*
|
|
* \param swizzle offset (typically 0..3)
|
|
* \param dw_addr address in dwords
|
|
* \param value value to store
|
|
*/
|
|
static void lshs_lds_store(struct si_shader_context *ctx, unsigned dw_offset_imm,
|
|
LLVMValueRef dw_addr, LLVMValueRef value)
|
|
{
|
|
dw_addr =
|
|
LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, dw_offset_imm, 0), "");
|
|
|
|
ac_lds_store(&ctx->ac, dw_addr, value);
|
|
}
|
|
|
|
enum si_tess_ring
|
|
{
|
|
TCS_FACTOR_RING,
|
|
TESS_OFFCHIP_RING_TCS,
|
|
TESS_OFFCHIP_RING_TES,
|
|
};
|
|
|
|
static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring)
|
|
{
|
|
LLVMBuilderRef builder = ctx->ac.builder;
|
|
LLVMValueRef addr = ac_get_arg(
|
|
&ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->tes_offchip_addr : ctx->tcs_out_lds_layout);
|
|
|
|
/* TCS only receives high 13 bits of the address. */
|
|
if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
|
|
addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 0), "");
|
|
}
|
|
|
|
if (ring == TCS_FACTOR_RING) {
|
|
unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
|
|
addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
|
|
}
|
|
|
|
uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
|
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
|
|
|
if (ctx->screen->info.chip_class >= GFX10)
|
|
rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
|
|
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
|
else
|
|
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
|
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
|
|
|
LLVMValueRef desc[4];
|
|
desc[0] = addr;
|
|
desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
|
|
desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
|
|
desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false);
|
|
|
|
return ac_build_gather_values(&ctx->ac, desc, 4);
|
|
}
|
|
|
|
void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
|
|
{
|
|
ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES);
|
|
}
|
|
|
|
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
|
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
|
unsigned const_index, unsigned location,
|
|
unsigned driver_location, unsigned component,
|
|
unsigned num_components, bool is_patch,
|
|
bool is_compact, bool load_input)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
struct si_shader_info *info = &ctx->shader->selector->info;
|
|
LLVMValueRef dw_addr, stride;
|
|
ubyte name, index;
|
|
|
|
driver_location = driver_location / 4;
|
|
|
|
if (load_input) {
|
|
name = info->input_semantic_name[driver_location];
|
|
index = info->input_semantic_index[driver_location];
|
|
} else {
|
|
name = info->output_semantic_name[driver_location];
|
|
index = info->output_semantic_index[driver_location];
|
|
}
|
|
|
|
assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
|
|
name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
|
|
|
|
if (load_input) {
|
|
stride = get_tcs_in_vertex_dw_stride(ctx);
|
|
dw_addr = get_tcs_in_current_patch_offset(ctx);
|
|
} else {
|
|
if (is_patch) {
|
|
stride = NULL;
|
|
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
|
|
} else {
|
|
stride = get_tcs_out_vertex_dw_stride(ctx);
|
|
dw_addr = get_tcs_out_current_patch_offset(ctx);
|
|
}
|
|
}
|
|
|
|
if (!param_index) {
|
|
param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
|
|
}
|
|
|
|
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
|
|
name, index);
|
|
|
|
LLVMValueRef value[4];
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
unsigned offset = i;
|
|
if (ac_get_type_size(type) == 8)
|
|
offset *= 2;
|
|
|
|
offset += component;
|
|
value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr);
|
|
}
|
|
|
|
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
|
}
|
|
|
|
static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
|
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
|
unsigned const_index, unsigned location,
|
|
unsigned driver_location, unsigned component,
|
|
unsigned num_components, bool is_patch, bool is_compact,
|
|
bool load_input)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
struct si_shader_info *info = &ctx->shader->selector->info;
|
|
LLVMValueRef base, addr;
|
|
|
|
driver_location = driver_location / 4;
|
|
ubyte name = info->input_semantic_name[driver_location];
|
|
ubyte index = info->input_semantic_index[driver_location];
|
|
|
|
assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
|
|
name == TGSI_SEMANTIC_TESSOUTER) == is_patch);
|
|
|
|
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
|
|
|
if (!param_index) {
|
|
param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
|
|
}
|
|
|
|
addr =
|
|
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index);
|
|
|
|
/* TODO: This will generate rather ordinary llvm code, although it
|
|
* should be easy for the optimiser to fix up. In future we might want
|
|
* to refactor buffer_load().
|
|
*/
|
|
LLVMValueRef value[4];
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
unsigned offset = i;
|
|
if (ac_get_type_size(type) == 8) {
|
|
offset *= 2;
|
|
if (offset == 4) {
|
|
ubyte name = info->input_semantic_name[driver_location + 1];
|
|
ubyte index = info->input_semantic_index[driver_location + 1];
|
|
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
|
|
name, index);
|
|
}
|
|
|
|
offset = offset % 4;
|
|
}
|
|
|
|
offset += component;
|
|
value[i + component] =
|
|
buffer_load(ctx, type, offset, ctx->tess_offchip_ring, base, addr, true);
|
|
}
|
|
|
|
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
|
|
}
|
|
|
|
static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_variable *var,
|
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
|
unsigned const_index, LLVMValueRef src, unsigned writemask)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
struct si_shader_info *info = &ctx->shader->selector->info;
|
|
const unsigned component = var->data.location_frac;
|
|
unsigned driver_location = var->data.driver_location;
|
|
LLVMValueRef dw_addr, stride;
|
|
LLVMValueRef buffer, base, addr;
|
|
LLVMValueRef values[8];
|
|
bool skip_lds_store;
|
|
bool is_tess_factor = false, is_tess_inner = false;
|
|
|
|
driver_location = driver_location / 4;
|
|
ubyte name = info->output_semantic_name[driver_location];
|
|
ubyte index = info->output_semantic_index[driver_location];
|
|
|
|
bool is_const = !param_index;
|
|
if (!param_index)
|
|
param_index = LLVMConstInt(ctx->ac.i32, const_index, 0);
|
|
|
|
const bool is_patch = var->data.patch || var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
|
var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
|
|
|
|
/* Invalid SPIR-V can cause this. */
|
|
if ((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER ||
|
|
name == TGSI_SEMANTIC_TESSOUTER) != is_patch)
|
|
return;
|
|
|
|
if (!is_patch) {
|
|
stride = get_tcs_out_vertex_dw_stride(ctx);
|
|
dw_addr = get_tcs_out_current_patch_offset(ctx);
|
|
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
|
|
name, index);
|
|
|
|
skip_lds_store = !info->reads_pervertex_outputs;
|
|
} else {
|
|
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
|
|
dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index,
|
|
name, index);
|
|
|
|
skip_lds_store = !info->reads_perpatch_outputs;
|
|
|
|
if (is_const && const_index == 0) {
|
|
int name = info->output_semantic_name[driver_location];
|
|
|
|
/* Always write tess factors into LDS for the TCS epilog. */
|
|
if (name == TGSI_SEMANTIC_TESSINNER || name == TGSI_SEMANTIC_TESSOUTER) {
|
|
/* The epilog doesn't read LDS if invocation 0 defines tess factors. */
|
|
skip_lds_store = !info->reads_tessfactor_outputs &&
|
|
ctx->shader->selector->info.tessfactors_are_def_in_all_invocs;
|
|
is_tess_factor = true;
|
|
is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
|
|
}
|
|
}
|
|
}
|
|
|
|
buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
|
|
|
|
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
|
|
|
addr =
|
|
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index);
|
|
|
|
for (unsigned chan = component; chan < 8; chan++) {
|
|
if (!(writemask & (1 << chan)))
|
|
continue;
|
|
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
|
|
|
|
unsigned buffer_store_offset = chan % 4;
|
|
if (chan == 4) {
|
|
ubyte name = info->output_semantic_name[driver_location + 1];
|
|
ubyte index = info->output_semantic_index[driver_location + 1];
|
|
addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index,
|
|
name, index);
|
|
}
|
|
|
|
/* Skip LDS stores if there is no LDS read of this output. */
|
|
if (!skip_lds_store)
|
|
lshs_lds_store(ctx, chan, dw_addr, value);
|
|
|
|
value = ac_to_integer(&ctx->ac, value);
|
|
values[chan] = value;
|
|
|
|
if (writemask != 0xF && !is_tess_factor) {
|
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base,
|
|
4 * buffer_store_offset, ac_glc);
|
|
}
|
|
|
|
/* Write tess factors into VGPRs for the epilog. */
|
|
if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
|
|
if (!is_tess_inner) {
|
|
LLVMBuildStore(ctx->ac.builder, value, /* outer */
|
|
ctx->invoc0_tess_factors[chan]);
|
|
} else if (chan < 2) {
|
|
LLVMBuildStore(ctx->ac.builder, value, /* inner */
|
|
ctx->invoc0_tess_factors[4 + chan]);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (writemask == 0xF && !is_tess_factor) {
|
|
LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);
|
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr, base, 0, ac_glc);
|
|
}
|
|
}
|
|
|
|
static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->tes_u), ac_get_arg(&ctx->ac, ctx->tes_v),
|
|
ctx->ac.f32_0, ctx->ac.f32_0};
|
|
|
|
/* For triangles, the vector should be (u, v, 1-u-v). */
|
|
if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_TRIANGLES) {
|
|
coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
|
|
LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
|
|
}
|
|
return ac_build_gather_values(&ctx->ac, coord, 4);
|
|
}
|
|
|
|
static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic_name)
|
|
{
|
|
LLVMValueRef base, addr;
|
|
|
|
int param = si_shader_io_get_unique_index_patch(semantic_name, 0);
|
|
|
|
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
|
addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
|
|
LLVMConstInt(ctx->ac.i32, param, 0));
|
|
|
|
return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true);
|
|
}
|
|
|
|
static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned semantic_name)
|
|
{
|
|
LLVMValueRef buf, slot, val[4];
|
|
int i, offset;
|
|
|
|
slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
|
|
buf = ac_get_arg(&ctx->ac, ctx->rw_buffers);
|
|
buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
|
|
offset = semantic_name == TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL ? 4 : 0;
|
|
|
|
for (i = 0; i < 4; i++)
|
|
val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0));
|
|
return ac_build_gather_values(&ctx->ac, val, 4);
|
|
}
|
|
|
|
static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id,
|
|
bool load_default_state)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
unsigned semantic_name;
|
|
|
|
if (load_default_state) {
|
|
switch (varying_id) {
|
|
case VARYING_SLOT_TESS_LEVEL_INNER:
|
|
semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL;
|
|
break;
|
|
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
|
semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL;
|
|
break;
|
|
default:
|
|
unreachable("unknown tess level");
|
|
}
|
|
return load_tess_level_default(ctx, semantic_name);
|
|
}
|
|
|
|
switch (varying_id) {
|
|
case VARYING_SLOT_TESS_LEVEL_INNER:
|
|
semantic_name = TGSI_SEMANTIC_TESSINNER;
|
|
break;
|
|
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
|
semantic_name = TGSI_SEMANTIC_TESSOUTER;
|
|
break;
|
|
default:
|
|
unreachable("unknown tess level");
|
|
}
|
|
|
|
return load_tess_level(ctx, semantic_name);
|
|
}
|
|
|
|
static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
if (ctx->type == PIPE_SHADER_TESS_CTRL)
|
|
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
|
|
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
|
|
return get_num_tcs_out_vertices(ctx);
|
|
else
|
|
unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
|
|
}
|
|
|
|
/**
|
|
* Forward all outputs from the vertex shader to the TES. This is only used
|
|
* for the fixed function TCS.
|
|
*/
|
|
static void si_copy_tcs_inputs(struct si_shader_context *ctx)
|
|
{
|
|
LLVMValueRef invocation_id, buffer, buffer_offset;
|
|
LLVMValueRef lds_vertex_stride, lds_base;
|
|
uint64_t inputs;
|
|
|
|
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
|
|
buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
|
|
buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
|
|
|
lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
|
|
lds_base = get_tcs_in_current_patch_offset(ctx);
|
|
lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
|
|
|
|
inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
|
|
while (inputs) {
|
|
unsigned i = u_bit_scan64(&inputs);
|
|
|
|
LLVMValueRef lds_ptr =
|
|
LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
|
|
|
|
LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
|
|
ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
|
|
|
|
LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
|
|
|
|
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0,
|
|
ac_glc);
|
|
}
|
|
}
|
|
|
|
static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef rel_patch_id,
|
|
LLVMValueRef invocation_id,
|
|
LLVMValueRef tcs_out_current_patch_data_offset,
|
|
LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2])
|
|
{
|
|
struct si_shader *shader = ctx->shader;
|
|
unsigned tess_inner_index, tess_outer_index;
|
|
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
|
|
LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
|
|
unsigned stride, outer_comps, inner_comps, i, offset;
|
|
|
|
/* Add a barrier before loading tess factors from LDS. */
|
|
if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
|
|
si_llvm_emit_barrier(ctx);
|
|
|
|
/* Do this only for invocation 0, because the tess levels are per-patch,
|
|
* not per-vertex.
|
|
*
|
|
* This can't jump, because invocation 0 executes this. It should
|
|
* at least mask out the loads and stores for other invocations.
|
|
*/
|
|
ac_build_ifcc(&ctx->ac,
|
|
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
|
|
|
|
/* Determine the layout of one tess factor element in the buffer. */
|
|
switch (shader->key.part.tcs.epilog.prim_mode) {
|
|
case PIPE_PRIM_LINES:
|
|
stride = 2; /* 2 dwords, 1 vec2 store */
|
|
outer_comps = 2;
|
|
inner_comps = 0;
|
|
break;
|
|
case PIPE_PRIM_TRIANGLES:
|
|
stride = 4; /* 4 dwords, 1 vec4 store */
|
|
outer_comps = 3;
|
|
inner_comps = 1;
|
|
break;
|
|
case PIPE_PRIM_QUADS:
|
|
stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
|
|
outer_comps = 4;
|
|
inner_comps = 2;
|
|
break;
|
|
default:
|
|
assert(0);
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
inner[i] = LLVMGetUndef(ctx->ac.i32);
|
|
outer[i] = LLVMGetUndef(ctx->ac.i32);
|
|
}
|
|
|
|
if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
|
|
/* Tess factors are in VGPRs. */
|
|
for (i = 0; i < outer_comps; i++)
|
|
outer[i] = out[i] = invoc0_tf_outer[i];
|
|
for (i = 0; i < inner_comps; i++)
|
|
inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];
|
|
} else {
|
|
/* Load tess_inner and tess_outer from LDS.
|
|
* Any invocation can write them, so we can't get them from a temporary.
|
|
*/
|
|
tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
|
|
tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
|
|
|
|
lds_base = tcs_out_current_patch_data_offset;
|
|
lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
|
|
LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");
|
|
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
|
|
LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");
|
|
|
|
for (i = 0; i < outer_comps; i++) {
|
|
outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
|
|
}
|
|
for (i = 0; i < inner_comps; i++) {
|
|
inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
|
|
}
|
|
}
|
|
|
|
if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
|
|
/* For isolines, the hardware expects tess factors in the
|
|
* reverse order from what NIR specifies.
|
|
*/
|
|
LLVMValueRef tmp = out[0];
|
|
out[0] = out[1];
|
|
out[1] = tmp;
|
|
}
|
|
|
|
/* Convert the outputs to vectors for stores. */
|
|
vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
|
|
vec1 = NULL;
|
|
|
|
if (stride > 4)
|
|
vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
|
|
|
|
/* Get the buffer. */
|
|
buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
|
|
|
|
/* Get the offset. */
|
|
tf_base = ac_get_arg(&ctx->ac, ctx->tcs_factor_offset);
|
|
byteoffset =
|
|
LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
|
|
|
|
ac_build_ifcc(&ctx->ac,
|
|
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
|
|
|
|
/* Store the dynamic HS control word. */
|
|
offset = 0;
|
|
if (ctx->screen->info.chip_class <= GFX8) {
|
|
ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1,
|
|
ctx->ac.i32_0, tf_base, offset, ac_glc);
|
|
offset += 4;
|
|
}
|
|
|
|
ac_build_endif(&ctx->ac, 6504);
|
|
|
|
/* Store the tessellation factors. */
|
|
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, offset,
|
|
ac_glc);
|
|
offset += 16;
|
|
if (vec1)
|
|
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, offset,
|
|
ac_glc);
|
|
|
|
/* Store the tess factors into the offchip buffer if TES reads them. */
|
|
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
|
|
LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
|
|
LLVMValueRef tf_inner_offset;
|
|
unsigned param_outer, param_inner;
|
|
|
|
buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
|
|
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
|
|
|
param_outer = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
|
|
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
|
|
LLVMConstInt(ctx->ac.i32, param_outer, 0));
|
|
|
|
unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false)
|
|
? outer_comps
|
|
: util_next_power_of_two(outer_comps);
|
|
outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
|
|
|
|
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, base, 0,
|
|
ac_glc);
|
|
if (inner_comps) {
|
|
param_inner = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
|
|
tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
|
|
LLVMConstInt(ctx->ac.i32, param_inner, 0));
|
|
|
|
inner_vec =
|
|
inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps);
|
|
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, base,
|
|
0, ac_glc);
|
|
}
|
|
}
|
|
|
|
ac_build_endif(&ctx->ac, 6503);
|
|
}
|
|
|
|
/* This only writes the tessellation factor levels. */
|
|
static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
|
|
LLVMValueRef *addrs)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
LLVMBuilderRef builder = ctx->ac.builder;
|
|
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
|
|
|
|
si_copy_tcs_inputs(ctx);
|
|
|
|
rel_patch_id = get_rel_patch_id(ctx);
|
|
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
|
|
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
|
|
|
|
if (ctx->screen->info.chip_class >= GFX9) {
|
|
LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};
|
|
LLVMValueRef values[2];
|
|
|
|
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
|
|
|
|
values[0] = rel_patch_id;
|
|
values[1] = LLVMGetUndef(ctx->ac.i32);
|
|
rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
|
|
|
|
values[0] = tf_lds_offset;
|
|
values[1] = LLVMGetUndef(ctx->ac.i32);
|
|
tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
|
|
|
|
values[0] = invocation_id;
|
|
values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */
|
|
invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
|
|
}
|
|
|
|
/* Return epilog parameters from this function. */
|
|
LLVMValueRef ret = ctx->return_value;
|
|
unsigned vgpr;
|
|
|
|
if (ctx->screen->info.chip_class >= GFX9) {
|
|
ret =
|
|
si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
|
|
/* Tess offchip and tess factor offsets are at the beginning. */
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
|
|
vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
|
|
} else {
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);
|
|
/* Tess offchip and tess factor offsets are after user SGPRs. */
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
|
|
vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
|
|
}
|
|
|
|
/* VGPRs */
|
|
rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
|
|
invocation_id = ac_to_float(&ctx->ac, invocation_id);
|
|
tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
|
|
|
|
/* Leave a hole corresponding to the two input VGPRs. This ensures that
|
|
* the invocation_id output does not alias the tcs_rel_ids input,
|
|
* which saves a V_MOV on gfx9.
|
|
*/
|
|
vgpr += 2;
|
|
|
|
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
|
|
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
|
|
|
|
if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
|
|
vgpr++; /* skip the tess factor LDS offset */
|
|
for (unsigned i = 0; i < 6; i++) {
|
|
LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
|
|
value = ac_to_float(&ctx->ac, value);
|
|
ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
|
|
}
|
|
} else {
|
|
ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
|
|
}
|
|
ctx->return_value = ret;
|
|
}
|
|
|
|
/* Pass TCS inputs from LS to TCS on GFX9. */
|
|
static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
|
|
{
|
|
LLVMValueRef ret = ctx->return_value;
|
|
|
|
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
|
|
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
|
|
|
|
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
|
|
ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
|
|
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
|
|
|
|
ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
|
|
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
|
|
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
|
|
|
|
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
|
|
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
|
|
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),
|
|
vgpr++, "");
|
|
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
|
|
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),
|
|
vgpr++, "");
|
|
ctx->return_value = ret;
|
|
}
|
|
|
|
void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)
|
|
{
|
|
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
struct si_shader *shader = ctx->shader;
|
|
struct si_shader_info *info = &shader->selector->info;
|
|
unsigned i, chan;
|
|
LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id);
|
|
LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
|
|
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
|
|
|
|
/* Write outputs to LDS. The next shader (TCS aka HS) will read
|
|
* its inputs from it. */
|
|
for (i = 0; i < info->num_outputs; i++) {
|
|
unsigned name = info->output_semantic_name[i];
|
|
unsigned index = info->output_semantic_index[i];
|
|
|
|
/* The ARB_shader_viewport_layer_array spec contains the
|
|
* following issue:
|
|
*
|
|
* 2) What happens if gl_ViewportIndex or gl_Layer is
|
|
* written in the vertex shader and a geometry shader is
|
|
* present?
|
|
*
|
|
* RESOLVED: The value written by the last vertex processing
|
|
* stage is used. If the last vertex processing stage
|
|
* (vertex, tessellation evaluation or geometry) does not
|
|
* statically assign to gl_ViewportIndex or gl_Layer, index
|
|
* or layer zero is assumed.
|
|
*
|
|
* So writes to those outputs in VS-as-LS are simply ignored.
|
|
*/
|
|
if (name == TGSI_SEMANTIC_LAYER || name == TGSI_SEMANTIC_VIEWPORT_INDEX)
|
|
continue;
|
|
|
|
int param = si_shader_io_get_unique_index(name, index, false);
|
|
LLVMValueRef dw_addr =
|
|
LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
|
|
|
|
for (chan = 0; chan < 4; chan++) {
|
|
if (!(info->output_usagemask[i] & (1 << chan)))
|
|
continue;
|
|
|
|
lshs_lds_store(ctx, chan, dw_addr,
|
|
LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
|
|
}
|
|
}
|
|
|
|
if (ctx->screen->info.chip_class >= GFX9)
|
|
si_set_ls_return_value_for_tcs(ctx);
|
|
}
|
|
|
|
/**
|
|
* Compile the TCS epilog function. This writes tesselation factors to memory
|
|
* based on the output primitive type of the tesselator (determined by TES).
|
|
*/
|
|
void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
|
|
{
|
|
memset(&ctx->args, 0, sizeof(ctx->args));
|
|
|
|
if (ctx->screen->info.chip_class >= GFX9) {
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
|
|
} else {
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
|
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
|
|
}
|
|
|
|
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
|
|
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
|
|
struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */
|
|
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
|
|
struct ac_arg invocation_id; /* invocation ID within the patch */
|
|
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
|
|
struct ac_arg
|
|
tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */
|
|
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset);
|
|
|
|
struct ac_arg tess_factors[6];
|
|
for (unsigned i = 0; i < 6; i++)
|
|
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
|
|
|
|
/* Create the function. */
|
|
si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.chip_class >= GFX7 ? 128 : 0);
|
|
ac_declare_lds_as_pointer(&ctx->ac);
|
|
|
|
LLVMValueRef invoc0_tess_factors[6];
|
|
for (unsigned i = 0; i < 6; i++)
|
|
invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
|
|
|
|
si_write_tess_factors(ctx, ac_get_arg(&ctx->ac, rel_patch_id),
|
|
ac_get_arg(&ctx->ac, invocation_id),
|
|
ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
|
|
invoc0_tess_factors, invoc0_tess_factors + 4);
|
|
|
|
LLVMBuildRetVoid(ctx->ac.builder);
|
|
}
|
|
|
|
void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
|
|
{
|
|
ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
|
|
ctx->abi.load_tess_level = si_load_tess_level;
|
|
ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
|
|
ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
|
|
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
|
|
}
|
|
|
|
void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|
{
|
|
ctx->abi.load_tess_varyings = si_nir_load_input_tes;
|
|
ctx->abi.load_tess_coord = si_load_tess_coord;
|
|
ctx->abi.load_tess_level = si_load_tess_level;
|
|
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
|
|
|
|
if (ctx->shader->key.as_es)
|
|
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
|
|
else if (ngg_cull_shader)
|
|
ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue_4x_wave32;
|
|
else if (ctx->shader->key.as_ngg)
|
|
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
|
|
else
|
|
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
|
|
}
|