3288 lines
132 KiB
C
3288 lines
132 KiB
C
/*
|
|
* Copyright 2018 Collabora Ltd.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
|
* license, and/or sell copies of the Software, and to permit persons to whom
|
|
* the Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "zink_context.h"
|
|
#include "zink_compiler.h"
|
|
#include "zink_program.h"
|
|
#include "zink_screen.h"
|
|
#include "nir_to_spirv/nir_to_spirv.h"
|
|
|
|
#include "pipe/p_state.h"
|
|
|
|
#include "nir.h"
|
|
#include "compiler/nir/nir_builder.h"
|
|
|
|
#include "nir/tgsi_to_nir.h"
|
|
#include "tgsi/tgsi_dump.h"
|
|
#include "tgsi/tgsi_from_mesa.h"
|
|
|
|
#include "util/u_memory.h"
|
|
|
|
#include "compiler/spirv/nir_spirv.h"
|
|
#include "vulkan/util/vk_util.h"
|
|
|
|
bool
|
|
zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
|
|
|
|
static void
|
|
create_vs_pushconst(nir_shader *nir)
|
|
{
|
|
nir_variable *vs_pushconst;
|
|
/* create compatible layout for the ntv push constant loader */
|
|
struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
|
|
fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
|
|
fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
|
|
fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
|
|
fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
|
|
fields[1].name = ralloc_asprintf(nir, "draw_id");
|
|
fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
|
|
vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
|
|
glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
|
|
vs_pushconst->data.location = INT_MAX; //doesn't really matter
|
|
}
|
|
|
|
static void
|
|
create_cs_pushconst(nir_shader *nir)
|
|
{
|
|
nir_variable *cs_pushconst;
|
|
/* create compatible layout for the ntv push constant loader */
|
|
struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
|
|
fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
|
|
fields[0].name = ralloc_asprintf(nir, "work_dim");
|
|
fields[0].offset = 0;
|
|
cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
|
|
glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
|
|
cs_pushconst->data.location = INT_MAX; //doesn't really matter
|
|
}
|
|
|
|
static bool
|
|
reads_work_dim(nir_shader *shader)
|
|
{
|
|
return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
|
|
}
|
|
|
|
static bool
|
|
lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
|
|
{
|
|
if (in->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
|
|
if (instr->intrinsic != nir_intrinsic_load_work_dim)
|
|
return false;
|
|
|
|
if (instr->intrinsic == nir_intrinsic_load_work_dim) {
|
|
b->cursor = nir_after_instr(&instr->instr);
|
|
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
|
|
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
|
nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
|
|
load->num_components = 1;
|
|
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_work_dim(nir_shader *shader)
|
|
{
|
|
if (shader->info.stage != MESA_SHADER_KERNEL)
|
|
return false;
|
|
|
|
if (!reads_work_dim(shader))
|
|
return false;
|
|
|
|
return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
|
|
}
|
|
|
|
static bool
|
|
lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_load_deref)
|
|
return false;
|
|
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
|
|
if (var->data.mode != nir_var_shader_in)
|
|
return false;
|
|
if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
|
|
return false;
|
|
|
|
/* create second variable for the split */
|
|
nir_variable *var2 = nir_variable_clone(var, b->shader);
|
|
/* split new variable into second slot */
|
|
var2->data.driver_location++;
|
|
nir_shader_add_variable(b->shader, var2);
|
|
|
|
unsigned total_num_components = glsl_get_vector_elements(var->type);
|
|
/* new variable is the second half of the dvec */
|
|
var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
|
|
/* clamp original variable to a dvec2 */
|
|
var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
|
|
|
|
b->cursor = nir_after_instr(instr);
|
|
|
|
/* this is the first load instruction for the first half of the dvec3/4 components */
|
|
nir_ssa_def *load = nir_load_var(b, var);
|
|
/* this is the second load instruction for the second half of the dvec3/4 components */
|
|
nir_ssa_def *load2 = nir_load_var(b, var2);
|
|
|
|
nir_ssa_def *def[4];
|
|
/* create a new dvec3/4 comprised of all the loaded components from both variables */
|
|
def[0] = nir_vector_extract(b, load, nir_imm_int(b, 0));
|
|
def[1] = nir_vector_extract(b, load, nir_imm_int(b, 1));
|
|
def[2] = nir_vector_extract(b, load2, nir_imm_int(b, 0));
|
|
if (total_num_components == 4)
|
|
def[3] = nir_vector_extract(b, load2, nir_imm_int(b, 1));
|
|
nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
|
|
/* use the assembled dvec3/4 for all other uses of the load */
|
|
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
|
|
new_vec->parent_instr);
|
|
|
|
/* remove the original instr and its deref chain */
|
|
nir_instr *parent = intr->src[0].ssa->parent_instr;
|
|
nir_instr_remove(instr);
|
|
nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
|
|
|
|
return true;
|
|
}
|
|
|
|
/* mesa/gallium always provides UINT versions of 64bit formats:
|
|
* - rewrite loads as 32bit vec loads
|
|
* - cast back to 64bit
|
|
*/
|
|
static bool
|
|
lower_64bit_uint_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_load_deref)
|
|
return false;
|
|
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(intr->src[0].ssa->parent_instr));
|
|
if (var->data.mode != nir_var_shader_in)
|
|
return false;
|
|
if (glsl_get_bit_size(var->type) != 64 || glsl_get_base_type(var->type) >= GLSL_TYPE_SAMPLER)
|
|
return false;
|
|
|
|
unsigned num_components = glsl_get_vector_elements(var->type);
|
|
enum glsl_base_type base_type;
|
|
switch (glsl_get_base_type(var->type)) {
|
|
case GLSL_TYPE_UINT64:
|
|
base_type = GLSL_TYPE_UINT;
|
|
break;
|
|
case GLSL_TYPE_INT64:
|
|
base_type = GLSL_TYPE_INT;
|
|
break;
|
|
case GLSL_TYPE_DOUBLE:
|
|
base_type = GLSL_TYPE_FLOAT;
|
|
break;
|
|
default:
|
|
unreachable("unknown 64-bit vertex attribute format!");
|
|
}
|
|
var->type = glsl_vector_type(base_type, num_components * 2);
|
|
|
|
b->cursor = nir_after_instr(instr);
|
|
|
|
nir_ssa_def *load = nir_load_var(b, var);
|
|
nir_ssa_def *casted[2];
|
|
for (unsigned i = 0; i < num_components; i++)
|
|
casted[i] = nir_pack_64_2x32(b, nir_channels(b, load, BITFIELD_RANGE(i * 2, 2)));
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec(b, casted, num_components));
|
|
|
|
/* remove the original instr and its deref chain */
|
|
nir_instr *parent = intr->src[0].ssa->parent_instr;
|
|
nir_instr_remove(instr);
|
|
nir_deref_instr_remove_if_unused(nir_instr_as_deref(parent));
|
|
|
|
return true;
|
|
}
|
|
|
|
/* "64-bit three- and four-component vectors consume two consecutive locations."
|
|
* - 14.1.4. Location Assignment
|
|
*
|
|
* this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
|
|
* are assigned to consecutive locations, loaded separately, and then assembled back into a
|
|
* composite value that's used in place of the original loaded ssa src
|
|
*/
|
|
static bool
|
|
lower_64bit_vertex_attribs(nir_shader *shader)
|
|
{
|
|
if (shader->info.stage != MESA_SHADER_VERTEX)
|
|
return false;
|
|
|
|
bool progress = nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
|
|
progress |= nir_shader_instructions_pass(shader, lower_64bit_uint_attribs_instr, nir_metadata_dominance, NULL);
|
|
return progress;
|
|
}
|
|
|
|
static bool
|
|
lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
|
|
{
|
|
if (in->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
|
|
if (instr->intrinsic != nir_intrinsic_load_base_vertex)
|
|
return false;
|
|
|
|
b->cursor = nir_after_instr(&instr->instr);
|
|
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
|
|
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
|
nir_intrinsic_set_range(load, 4);
|
|
load->num_components = 1;
|
|
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
|
|
nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
|
|
&instr->dest.ssa,
|
|
nir_imm_int(b, 0),
|
|
NULL);
|
|
|
|
nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
|
|
composite->parent_instr);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_basevertex(nir_shader *shader)
|
|
{
|
|
if (shader->info.stage != MESA_SHADER_VERTEX)
|
|
return false;
|
|
|
|
if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
|
|
return false;
|
|
|
|
return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
|
|
}
|
|
|
|
|
|
static bool
|
|
lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
|
|
{
|
|
if (in->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
|
|
if (instr->intrinsic != nir_intrinsic_load_draw_id)
|
|
return false;
|
|
|
|
b->cursor = nir_before_instr(&instr->instr);
|
|
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
|
|
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
|
|
nir_intrinsic_set_range(load, 4);
|
|
load->num_components = 1;
|
|
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
|
|
nir_builder_instr_insert(b, &load->instr);
|
|
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_drawid(nir_shader *shader)
|
|
{
|
|
if (shader->info.stage != MESA_SHADER_VERTEX)
|
|
return false;
|
|
|
|
if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
|
|
return false;
|
|
|
|
return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
|
|
}
|
|
|
|
static bool
|
|
lower_dual_blend(nir_shader *shader)
|
|
{
|
|
bool progress = false;
|
|
nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
|
|
if (var) {
|
|
var->data.location = FRAG_RESULT_DATA0;
|
|
var->data.index = 1;
|
|
progress = true;
|
|
}
|
|
nir_shader_preserve_all_metadata(shader);
|
|
return progress;
|
|
}
|
|
|
|
void
|
|
zink_screen_init_compiler(struct zink_screen *screen)
|
|
{
|
|
static const struct nir_shader_compiler_options
|
|
default_options = {
|
|
.lower_ffma16 = true,
|
|
.lower_ffma32 = true,
|
|
.lower_ffma64 = true,
|
|
.lower_scmp = true,
|
|
.lower_fdph = true,
|
|
.lower_flrp32 = true,
|
|
.lower_fpow = true,
|
|
.lower_fsat = true,
|
|
.lower_extract_byte = true,
|
|
.lower_extract_word = true,
|
|
.lower_insert_byte = true,
|
|
.lower_insert_word = true,
|
|
.lower_mul_high = true,
|
|
.lower_rotate = true,
|
|
.lower_uadd_carry = true,
|
|
.lower_uadd_sat = true,
|
|
.lower_usub_sat = true,
|
|
.lower_vector_cmp = true,
|
|
.lower_int64_options = 0,
|
|
.lower_doubles_options = 0,
|
|
.lower_uniforms_to_ubo = true,
|
|
.has_fsub = true,
|
|
.has_isub = true,
|
|
.has_txs = true,
|
|
.lower_mul_2x32_64 = true,
|
|
.support_16bit_alu = true, /* not quite what it sounds like */
|
|
};
|
|
|
|
screen->nir_options = default_options;
|
|
|
|
if (!screen->info.feats.features.shaderInt64)
|
|
screen->nir_options.lower_int64_options = ~0;
|
|
|
|
if (!screen->info.feats.features.shaderFloat64) {
|
|
screen->nir_options.lower_doubles_options = ~0;
|
|
screen->nir_options.lower_flrp64 = true;
|
|
screen->nir_options.lower_ffma64 = true;
|
|
}
|
|
|
|
/*
|
|
The OpFRem and OpFMod instructions use cheap approximations of remainder,
|
|
and the error can be large due to the discontinuity in trunc() and floor().
|
|
This can produce mathematically unexpected results in some cases, such as
|
|
FMod(x,x) computing x rather than 0, and can also cause the result to have
|
|
a different sign than the infinitely precise result.
|
|
|
|
-Table 84. Precision of core SPIR-V Instructions
|
|
* for drivers that are known to have imprecise fmod for doubles, lower dmod
|
|
*/
|
|
if (screen->info.driver_props.driverID == VK_DRIVER_ID_MESA_RADV ||
|
|
screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|
|
screen->info.driver_props.driverID == VK_DRIVER_ID_AMD_PROPRIETARY)
|
|
screen->nir_options.lower_doubles_options = nir_lower_dmod;
|
|
}
|
|
|
|
const void *
|
|
zink_get_compiler_options(struct pipe_screen *pscreen,
|
|
enum pipe_shader_ir ir,
|
|
enum pipe_shader_type shader)
|
|
{
|
|
assert(ir == PIPE_SHADER_IR_NIR);
|
|
return &zink_screen(pscreen)->nir_options;
|
|
}
|
|
|
|
struct nir_shader *
|
|
zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
|
|
{
|
|
if (zink_debug & ZINK_DEBUG_TGSI) {
|
|
fprintf(stderr, "TGSI shader:\n---8<---\n");
|
|
tgsi_dump_to_file(tokens, 0, stderr);
|
|
fprintf(stderr, "---8<---\n\n");
|
|
}
|
|
|
|
return tgsi_to_nir(tokens, screen, false);
|
|
}
|
|
|
|
|
|
static bool
|
|
dest_is_64bit(nir_dest *dest, void *state)
|
|
{
|
|
bool *lower = (bool *)state;
|
|
if (dest && (nir_dest_bit_size(*dest) == 64)) {
|
|
*lower = true;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
src_is_64bit(nir_src *src, void *state)
|
|
{
|
|
bool *lower = (bool *)state;
|
|
if (src && (nir_src_bit_size(*src) == 64)) {
|
|
*lower = true;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
|
|
{
|
|
bool lower = false;
|
|
/* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
|
|
* doesn't have const variants, so do the ugly const_cast here. */
|
|
nir_instr *instr = (nir_instr *)const_instr;
|
|
|
|
nir_foreach_dest(instr, dest_is_64bit, &lower);
|
|
if (lower)
|
|
return true;
|
|
nir_foreach_src(instr, src_is_64bit, &lower);
|
|
return lower;
|
|
}
|
|
|
|
static bool
|
|
filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
|
|
{
|
|
nir_instr *instr = (nir_instr *)const_instr;
|
|
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
|
switch (alu->op) {
|
|
case nir_op_pack_64_2x32_split:
|
|
case nir_op_pack_32_2x16_split:
|
|
case nir_op_unpack_32_2x16_split_x:
|
|
case nir_op_unpack_32_2x16_split_y:
|
|
case nir_op_unpack_64_2x32_split_x:
|
|
case nir_op_unpack_64_2x32_split_y:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
struct bo_vars {
|
|
nir_variable *uniforms[5];
|
|
nir_variable *ubo[5];
|
|
nir_variable *ssbo[5];
|
|
uint32_t first_ubo;
|
|
uint32_t first_ssbo;
|
|
};
|
|
|
|
static struct bo_vars
|
|
get_bo_vars(struct zink_shader *zs, nir_shader *shader)
|
|
{
|
|
struct bo_vars bo;
|
|
memset(&bo, 0, sizeof(bo));
|
|
if (zs->ubos_used)
|
|
bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
|
|
assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
|
|
if (zs->ssbos_used)
|
|
bo.first_ssbo = ffs(zs->ssbos_used) - 1;
|
|
assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
|
|
unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
|
|
if (var->data.mode == nir_var_mem_ssbo) {
|
|
assert(!bo.ssbo[idx]);
|
|
bo.ssbo[idx] = var;
|
|
} else {
|
|
if (var->data.driver_location) {
|
|
assert(!bo.ubo[idx]);
|
|
bo.ubo[idx] = var;
|
|
} else {
|
|
assert(!bo.uniforms[idx]);
|
|
bo.uniforms[idx] = var;
|
|
}
|
|
}
|
|
}
|
|
return bo;
|
|
}
|
|
|
|
static bool
|
|
bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
struct bo_vars *bo = data;
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
nir_variable *var = NULL;
|
|
nir_ssa_def *offset = NULL;
|
|
bool is_load = true;
|
|
b->cursor = nir_before_instr(instr);
|
|
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_store_ssbo:
|
|
var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
|
|
offset = intr->src[2].ssa;
|
|
is_load = false;
|
|
break;
|
|
case nir_intrinsic_load_ssbo:
|
|
var = bo->ssbo[nir_dest_bit_size(intr->dest) >> 4];
|
|
offset = intr->src[1].ssa;
|
|
break;
|
|
case nir_intrinsic_load_ubo:
|
|
if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
|
|
var = bo->uniforms[nir_dest_bit_size(intr->dest) >> 4];
|
|
else
|
|
var = bo->ubo[nir_dest_bit_size(intr->dest) >> 4];
|
|
offset = intr->src[1].ssa;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
nir_src offset_src = nir_src_for_ssa(offset);
|
|
if (!nir_src_is_const(offset_src))
|
|
return false;
|
|
|
|
unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
|
|
const struct glsl_type *strct_type = glsl_get_array_element(var->type);
|
|
unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
|
|
bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
|
|
if (has_unsized || offset_bytes + intr->num_components - 1 < size)
|
|
return false;
|
|
|
|
unsigned rewrites = 0;
|
|
nir_ssa_def *result[2];
|
|
for (unsigned i = 0; i < intr->num_components; i++) {
|
|
if (offset_bytes + i >= size) {
|
|
rewrites++;
|
|
if (is_load)
|
|
result[i] = nir_imm_zero(b, 1, nir_dest_bit_size(intr->dest));
|
|
}
|
|
}
|
|
assert(rewrites == intr->num_components);
|
|
if (is_load) {
|
|
nir_ssa_def *load = nir_vec(b, result, intr->num_components);
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
|
|
}
|
|
nir_instr_remove(instr);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
bound_bo_access(nir_shader *shader, struct zink_shader *zs)
|
|
{
|
|
struct bo_vars bo = get_bo_vars(zs, shader);
|
|
return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
|
|
}
|
|
|
|
static void
|
|
optimize_nir(struct nir_shader *s, struct zink_shader *zs)
|
|
{
|
|
bool progress;
|
|
do {
|
|
progress = false;
|
|
if (s->options->lower_int64_options)
|
|
NIR_PASS_V(s, nir_lower_int64);
|
|
NIR_PASS_V(s, nir_lower_vars_to_ssa);
|
|
NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
|
|
NIR_PASS(progress, s, nir_opt_copy_prop_vars);
|
|
NIR_PASS(progress, s, nir_copy_prop);
|
|
NIR_PASS(progress, s, nir_opt_remove_phis);
|
|
if (s->options->lower_int64_options) {
|
|
NIR_PASS(progress, s, nir_lower_64bit_phis);
|
|
NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
|
|
}
|
|
NIR_PASS(progress, s, nir_opt_dce);
|
|
NIR_PASS(progress, s, nir_opt_dead_cf);
|
|
NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
|
|
NIR_PASS(progress, s, nir_opt_cse);
|
|
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
|
|
NIR_PASS(progress, s, nir_opt_algebraic);
|
|
NIR_PASS(progress, s, nir_opt_constant_folding);
|
|
NIR_PASS(progress, s, nir_opt_undef);
|
|
NIR_PASS(progress, s, zink_nir_lower_b2b);
|
|
if (zs)
|
|
NIR_PASS(progress, s, bound_bo_access, zs);
|
|
} while (progress);
|
|
|
|
do {
|
|
progress = false;
|
|
NIR_PASS(progress, s, nir_opt_algebraic_late);
|
|
if (progress) {
|
|
NIR_PASS_V(s, nir_copy_prop);
|
|
NIR_PASS_V(s, nir_opt_dce);
|
|
NIR_PASS_V(s, nir_opt_cse);
|
|
}
|
|
} while (progress);
|
|
}
|
|
|
|
/* - copy the lowered fbfetch variable
|
|
* - set the new one up as an input attachment for descriptor 0.6
|
|
* - load it as an image
|
|
* - overwrite the previous load
|
|
*/
|
|
static bool
|
|
lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
bool ms = data != NULL;
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_load_deref)
|
|
return false;
|
|
nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
|
|
if (!var->data.fb_fetch_output)
|
|
return false;
|
|
b->cursor = nir_after_instr(instr);
|
|
nir_variable *fbfetch = nir_variable_clone(var, b->shader);
|
|
/* If Dim is SubpassData, ... Image Format must be Unknown
|
|
* - SPIRV OpTypeImage specification
|
|
*/
|
|
fbfetch->data.image.format = 0;
|
|
fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
|
|
fbfetch->data.mode = nir_var_uniform;
|
|
fbfetch->data.binding = ZINK_FBFETCH_BINDING;
|
|
fbfetch->data.binding = ZINK_FBFETCH_BINDING;
|
|
fbfetch->data.sample = ms;
|
|
enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
|
|
fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
|
|
nir_shader_add_variable(b->shader, fbfetch);
|
|
nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
|
|
nir_ssa_def *sample = ms ? nir_load_sample_id(b) : nir_ssa_undef(b, 1, 32);
|
|
nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
|
|
{
|
|
nir_foreach_shader_out_variable(var, shader) {
|
|
if (var->data.fb_fetch_output) {
|
|
*fbfetch = var;
|
|
break;
|
|
}
|
|
}
|
|
assert(*fbfetch);
|
|
if (!*fbfetch)
|
|
return false;
|
|
return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
|
|
}
|
|
|
|
/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
|
|
static bool
|
|
check_psiz(struct nir_shader *s)
|
|
{
|
|
bool have_psiz = false;
|
|
nir_foreach_shader_out_variable(var, s) {
|
|
if (var->data.location == VARYING_SLOT_PSIZ) {
|
|
/* genuine PSIZ outputs will have this set */
|
|
have_psiz |= !!var->data.explicit_location;
|
|
}
|
|
}
|
|
return have_psiz;
|
|
}
|
|
|
|
static nir_variable *
|
|
find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz)
|
|
{
|
|
unsigned found = 0;
|
|
if (!location_frac && location != VARYING_SLOT_PSIZ) {
|
|
nir_foreach_shader_out_variable(var, nir) {
|
|
if (var->data.location == location)
|
|
found++;
|
|
}
|
|
}
|
|
if (found) {
|
|
/* multiple variables found for this location: find the biggest one */
|
|
nir_variable *out = NULL;
|
|
unsigned slots = 0;
|
|
nir_foreach_shader_out_variable(var, nir) {
|
|
if (var->data.location == location) {
|
|
unsigned count_slots = glsl_count_vec4_slots(var->type, false, false);
|
|
if (count_slots > slots) {
|
|
slots = count_slots;
|
|
out = var;
|
|
}
|
|
}
|
|
}
|
|
return out;
|
|
} else {
|
|
/* only one variable found or this is location_frac */
|
|
nir_foreach_shader_out_variable(var, nir) {
|
|
if (var->data.location == location &&
|
|
(var->data.location_frac == location_frac ||
|
|
(glsl_type_is_array(var->type) ? glsl_array_size(var->type) : glsl_get_vector_elements(var->type)) >= location_frac + 1)) {
|
|
if (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)
|
|
return var;
|
|
}
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static bool
|
|
is_inlined(const bool *inlined, const struct pipe_stream_output *output)
|
|
{
|
|
for (unsigned i = 0; i < output->num_components; i++)
|
|
if (!inlined[output->start_component + i])
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
update_psiz_location(nir_shader *nir, nir_variable *psiz)
|
|
{
|
|
uint32_t last_output = util_last_bit64(nir->info.outputs_written);
|
|
if (last_output < VARYING_SLOT_VAR0)
|
|
last_output = VARYING_SLOT_VAR0;
|
|
else
|
|
last_output++;
|
|
/* this should get fixed up by slot remapping */
|
|
psiz->data.location = last_output;
|
|
}
|
|
|
|
static const struct glsl_type *
|
|
clamp_slot_type(const struct glsl_type *type, unsigned slot)
|
|
{
|
|
/* could be dvec/dmat/mat: each member is the same */
|
|
const struct glsl_type *plain = glsl_without_array_or_matrix(type);
|
|
/* determine size of each member type */
|
|
unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
|
|
/* normalize slot idx to current type's size */
|
|
slot %= slot_count;
|
|
unsigned slot_components = glsl_get_components(plain);
|
|
if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
|
|
slot_components *= 2;
|
|
/* create a vec4 mask of the selected slot's components out of all the components */
|
|
uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
|
|
/* return a vecN of the selected components */
|
|
slot_components = util_bitcount(mask);
|
|
return glsl_vec_type(slot_components);
|
|
}
|
|
|
|
static const struct glsl_type *
|
|
unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
|
|
{
|
|
const struct glsl_type *type = slot_type;
|
|
unsigned slot_count = 0;
|
|
unsigned cur_slot = 0;
|
|
/* iterate over all the members in the struct, stopping once the slot idx is reached */
|
|
for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
|
|
/* use array type for slot counting but return array member type for unroll */
|
|
const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
|
|
type = glsl_without_array(arraytype);
|
|
slot_count = glsl_count_vec4_slots(arraytype, false, false);
|
|
}
|
|
*slot_idx -= (cur_slot - slot_count);
|
|
if (!glsl_type_is_struct_or_ifc(type))
|
|
/* this is a fully unrolled struct: find the number of vec components to output */
|
|
type = clamp_slot_type(type, *slot_idx);
|
|
return type;
|
|
}
|
|
|
|
static unsigned
|
|
get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
|
|
{
|
|
assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
|
|
const struct glsl_type *orig_type = var->type;
|
|
const struct glsl_type *type = glsl_without_array(var->type);
|
|
unsigned slot_idx = slot - so_slot;
|
|
if (type != orig_type)
|
|
slot_idx %= glsl_count_vec4_slots(type, false, false);
|
|
/* need to find the vec4 that's being exported by this slot */
|
|
while (glsl_type_is_struct_or_ifc(type))
|
|
type = unroll_struct_type(type, &slot_idx);
|
|
|
|
/* arrays here are already fully unrolled from their structs, so slot handling is implicit */
|
|
unsigned num_components = glsl_get_components(glsl_without_array(type));
|
|
const struct glsl_type *arraytype = orig_type;
|
|
while (glsl_type_is_array(arraytype) && !glsl_type_is_struct_or_ifc(glsl_without_array(arraytype))) {
|
|
num_components *= glsl_array_size(arraytype);
|
|
arraytype = glsl_get_array_element(arraytype);
|
|
}
|
|
assert(num_components);
|
|
/* gallium handles xfb in terms of 32bit units */
|
|
if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
|
|
num_components *= 2;
|
|
return num_components;
|
|
}
|
|
|
|
static const struct pipe_stream_output *
|
|
find_packed_output(const struct pipe_stream_output_info *so_info, uint8_t *reverse_map, unsigned slot)
|
|
{
|
|
for (unsigned i = 0; i < so_info->num_outputs; i++) {
|
|
const struct pipe_stream_output *packed_output = &so_info->output[i];
|
|
if (reverse_map[packed_output->register_index] == slot)
|
|
return packed_output;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void
|
|
update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
|
|
uint64_t outputs_written, bool have_psiz)
|
|
{
|
|
uint8_t reverse_map[VARYING_SLOT_MAX] = {0};
|
|
unsigned slot = 0;
|
|
/* semi-copied from iris */
|
|
while (outputs_written) {
|
|
int bit = u_bit_scan64(&outputs_written);
|
|
/* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
|
|
if (bit == VARYING_SLOT_PSIZ && !have_psiz)
|
|
continue;
|
|
reverse_map[slot++] = bit;
|
|
}
|
|
|
|
bool have_fake_psiz = false;
|
|
nir_foreach_shader_out_variable(var, zs->nir) {
|
|
var->data.explicit_xfb_buffer = 0;
|
|
if (var->data.location == VARYING_SLOT_PSIZ && !var->data.explicit_location)
|
|
have_fake_psiz = true;
|
|
}
|
|
|
|
bool inlined[VARYING_SLOT_MAX][4] = {0};
|
|
uint64_t packed = 0;
|
|
uint8_t packed_components[VARYING_SLOT_MAX] = {0};
|
|
uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
|
|
uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
|
|
uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
|
|
nir_variable *psiz = NULL;
|
|
for (unsigned i = 0; i < so_info->num_outputs; i++) {
|
|
const struct pipe_stream_output *output = &so_info->output[i];
|
|
unsigned slot = reverse_map[output->register_index];
|
|
/* always set stride to be used during draw */
|
|
zs->sinfo.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
|
|
if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
|
|
nir_variable *var = NULL;
|
|
unsigned so_slot;
|
|
while (!var)
|
|
var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
|
|
if (var->data.location == VARYING_SLOT_PSIZ)
|
|
psiz = var;
|
|
so_slot = slot + 1;
|
|
slot = reverse_map[output->register_index];
|
|
if (var->data.explicit_xfb_buffer) {
|
|
/* handle dvec3 where gallium splits streamout over 2 registers */
|
|
for (unsigned j = 0; j < output->num_components; j++)
|
|
inlined[slot][output->start_component + j] = true;
|
|
}
|
|
if (is_inlined(inlined[slot], output))
|
|
continue;
|
|
bool is_struct = glsl_type_is_struct_or_ifc(glsl_without_array(var->type));
|
|
unsigned num_components = get_slot_components(var, slot, so_slot);
|
|
/* if this is the entire variable, try to blast it out during the initial declaration
|
|
* structs must be handled later to ensure accurate analysis
|
|
*/
|
|
if (!is_struct && (num_components == output->num_components || (num_components > output->num_components && output->num_components == 4))) {
|
|
var->data.explicit_xfb_buffer = 1;
|
|
var->data.xfb.buffer = output->output_buffer;
|
|
var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
|
|
var->data.offset = output->dst_offset * 4;
|
|
var->data.stream = output->stream;
|
|
for (unsigned j = 0; j < output->num_components; j++)
|
|
inlined[slot][output->start_component + j] = true;
|
|
} else {
|
|
/* otherwise store some metadata for later */
|
|
packed |= BITFIELD64_BIT(slot);
|
|
packed_components[slot] += output->num_components;
|
|
packed_streams[slot] |= BITFIELD_BIT(output->stream);
|
|
packed_buffers[slot] |= BITFIELD_BIT(output->output_buffer);
|
|
for (unsigned j = 0; j < output->num_components; j++)
|
|
packed_offsets[output->register_index][j + output->start_component] = output->dst_offset + j;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* if this was flagged as a packed output before, and if all the components are
|
|
* being output with the same stream on the same buffer with increasing offsets, this entire variable
|
|
* can be consolidated into a single output to conserve locations
|
|
*/
|
|
for (unsigned i = 0; i < so_info->num_outputs; i++) {
|
|
const struct pipe_stream_output *output = &so_info->output[i];
|
|
unsigned slot = reverse_map[output->register_index];
|
|
if (is_inlined(inlined[slot], output))
|
|
continue;
|
|
if (zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) {
|
|
nir_variable *var = NULL;
|
|
while (!var)
|
|
var = find_var_with_location_frac(zs->nir, slot--, output->start_component, have_psiz);
|
|
/* this is a lowered 64bit variable that can't be exported due to packing */
|
|
if (var->data.is_xfb)
|
|
goto out;
|
|
|
|
unsigned num_slots = glsl_count_vec4_slots(var->type, false, false);
|
|
/* for each variable, iterate over all the variable's slots and inline the outputs */
|
|
for (unsigned j = 0; j < num_slots; j++) {
|
|
slot = var->data.location + j;
|
|
const struct pipe_stream_output *packed_output = find_packed_output(so_info, reverse_map, slot);
|
|
if (!packed_output)
|
|
goto out;
|
|
|
|
/* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
|
|
if (!(packed & BITFIELD64_BIT(slot)) ||
|
|
util_bitcount(packed_streams[slot]) != 1 ||
|
|
util_bitcount(packed_buffers[slot]) != 1)
|
|
goto out;
|
|
|
|
/* if all the components the variable exports to this slot aren't captured, skip consolidation */
|
|
unsigned num_components = get_slot_components(var, slot, var->data.location);
|
|
if (glsl_type_is_array(var->type) && !glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
|
|
num_components /= glsl_array_size(var->type);
|
|
if (num_components != packed_components[slot])
|
|
goto out;
|
|
|
|
/* in order to pack the xfb output, all the offsets must be sequentially incrementing */
|
|
uint32_t prev_offset = packed_offsets[packed_output->register_index][0];
|
|
for (unsigned k = 1; k < num_components; k++) {
|
|
/* if the offsets are not incrementing as expected, skip consolidation */
|
|
if (packed_offsets[packed_output->register_index][k] != prev_offset + 1)
|
|
goto out;
|
|
prev_offset = packed_offsets[packed_output->register_index][k + packed_output->start_component];
|
|
}
|
|
}
|
|
/* this output can be consolidated: blast out all the data inlined */
|
|
var->data.explicit_xfb_buffer = 1;
|
|
var->data.xfb.buffer = output->output_buffer;
|
|
var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
|
|
var->data.offset = output->dst_offset * 4;
|
|
var->data.stream = output->stream;
|
|
/* GLSL specifies that interface blocks are split per-buffer in XFB */
|
|
if (glsl_type_is_array(var->type) && glsl_array_size(var->type) > 1 && glsl_type_is_interface(glsl_without_array(var->type)))
|
|
zs->sinfo.so_propagate |= BITFIELD_BIT(var->data.location - VARYING_SLOT_VAR0);
|
|
/* mark all slot components inlined to skip subsequent loop iterations */
|
|
for (unsigned j = 0; j < num_slots; j++) {
|
|
slot = var->data.location + j;
|
|
for (unsigned k = 0; k < packed_components[slot]; k++)
|
|
inlined[slot][k] = true;
|
|
packed &= ~BITFIELD64_BIT(slot);
|
|
}
|
|
continue;
|
|
}
|
|
out:
|
|
/* these are packed/explicit varyings which can't be exported with normal output */
|
|
zs->sinfo.so_info.output[zs->sinfo.so_info.num_outputs] = *output;
|
|
/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
|
|
zs->sinfo.so_info_slots[zs->sinfo.so_info.num_outputs++] = reverse_map[output->register_index];
|
|
}
|
|
zs->sinfo.have_xfb = zs->sinfo.so_info.num_outputs || zs->sinfo.so_propagate;
|
|
/* ensure this doesn't get output in the shader by unsetting location */
|
|
if (have_fake_psiz && psiz)
|
|
update_psiz_location(zs->nir, psiz);
|
|
}
|
|
|
|
struct decompose_state {
|
|
nir_variable **split;
|
|
bool needs_w;
|
|
};
|
|
|
|
static bool
|
|
lower_attrib(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
struct decompose_state *state = data;
|
|
nir_variable **split = state->split;
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_load_deref)
|
|
return false;
|
|
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
|
|
nir_variable *var = nir_deref_instr_get_variable(deref);
|
|
if (var != split[0])
|
|
return false;
|
|
unsigned num_components = glsl_get_vector_elements(split[0]->type);
|
|
b->cursor = nir_after_instr(instr);
|
|
nir_ssa_def *loads[4];
|
|
for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
|
|
loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
|
|
if (state->needs_w) {
|
|
/* oob load w comopnent to get correct value for int/float */
|
|
loads[3] = nir_channel(b, loads[0], 3);
|
|
loads[0] = nir_channel(b, loads[0], 0);
|
|
}
|
|
nir_ssa_def *new_load = nir_vec(b, loads, num_components);
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
|
|
nir_instr_remove_v(instr);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
|
|
{
|
|
uint32_t bits = 0;
|
|
nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
|
|
bits |= BITFIELD_BIT(var->data.driver_location);
|
|
bits = ~bits;
|
|
u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
|
|
nir_variable *split[5];
|
|
struct decompose_state state;
|
|
state.split = split;
|
|
nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
|
|
assert(var);
|
|
split[0] = var;
|
|
bits |= BITFIELD_BIT(var->data.driver_location);
|
|
const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
|
|
unsigned num_components = glsl_get_vector_elements(var->type);
|
|
state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
|
|
for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
|
|
split[i+1] = nir_variable_clone(var, nir);
|
|
split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
|
|
if (decomposed_attrs_without_w & BITFIELD_BIT(location))
|
|
split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
|
|
else
|
|
split[i+1]->type = new_type;
|
|
split[i+1]->data.driver_location = ffs(bits) - 1;
|
|
bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
|
|
nir_shader_add_variable(nir, split[i+1]);
|
|
}
|
|
var->data.mode = nir_var_shader_temp;
|
|
nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
|
|
}
|
|
nir_fixup_deref_modes(nir);
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
optimize_nir(nir, NULL);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
struct zink_screen *screen = data;
|
|
const bool has_int64 = screen->info.feats.features.shaderInt64;
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
b->cursor = nir_before_instr(instr);
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_ssbo_atomic_add:
|
|
case nir_intrinsic_ssbo_atomic_umin:
|
|
case nir_intrinsic_ssbo_atomic_imin:
|
|
case nir_intrinsic_ssbo_atomic_umax:
|
|
case nir_intrinsic_ssbo_atomic_imax:
|
|
case nir_intrinsic_ssbo_atomic_and:
|
|
case nir_intrinsic_ssbo_atomic_or:
|
|
case nir_intrinsic_ssbo_atomic_xor:
|
|
case nir_intrinsic_ssbo_atomic_exchange:
|
|
case nir_intrinsic_ssbo_atomic_comp_swap: {
|
|
/* convert offset to uint[idx] */
|
|
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, 4);
|
|
nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
|
|
return true;
|
|
}
|
|
case nir_intrinsic_load_ssbo:
|
|
case nir_intrinsic_load_ubo: {
|
|
/* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
|
|
bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
|
|
nir_src_is_const(intr->src[0]) &&
|
|
nir_src_as_uint(intr->src[0]) == 0 &&
|
|
nir_dest_bit_size(intr->dest) == 64 &&
|
|
nir_intrinsic_align_offset(intr) % 8 != 0;
|
|
force_2x32 |= nir_dest_bit_size(intr->dest) == 64 && !has_int64;
|
|
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
|
|
nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
|
|
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
|
|
if (force_2x32) {
|
|
/* this is always scalarized */
|
|
assert(intr->dest.ssa.num_components == 1);
|
|
/* rewrite as 2x32 */
|
|
nir_ssa_def *load[2];
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
if (intr->intrinsic == nir_intrinsic_load_ssbo)
|
|
load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
|
|
else
|
|
load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
|
|
nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
|
|
}
|
|
/* cast back to 64bit */
|
|
nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
|
|
nir_instr_remove(instr);
|
|
}
|
|
return true;
|
|
}
|
|
case nir_intrinsic_load_shared:
|
|
b->cursor = nir_before_instr(instr);
|
|
bool force_2x32 = nir_dest_bit_size(intr->dest) == 64 && !has_int64;
|
|
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : nir_dest_bit_size(intr->dest)) / 8);
|
|
nir_instr_rewrite_src_ssa(instr, &intr->src[0], offset);
|
|
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
|
|
if (force_2x32) {
|
|
/* this is always scalarized */
|
|
assert(intr->dest.ssa.num_components == 1);
|
|
/* rewrite as 2x32 */
|
|
nir_ssa_def *load[2];
|
|
for (unsigned i = 0; i < 2; i++)
|
|
load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
|
|
/* cast back to 64bit */
|
|
nir_ssa_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, casted);
|
|
nir_instr_remove(instr);
|
|
return true;
|
|
}
|
|
break;
|
|
case nir_intrinsic_store_ssbo: {
|
|
b->cursor = nir_before_instr(instr);
|
|
bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
|
|
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
|
|
nir_instr_rewrite_src_ssa(instr, &intr->src[2], offset);
|
|
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
|
|
if (force_2x32) {
|
|
/* this is always scalarized */
|
|
assert(intr->src[0].ssa->num_components == 1);
|
|
nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
|
|
for (unsigned i = 0; i < 2; i++)
|
|
nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
|
|
nir_instr_remove(instr);
|
|
}
|
|
return true;
|
|
}
|
|
case nir_intrinsic_store_shared: {
|
|
b->cursor = nir_before_instr(instr);
|
|
bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
|
|
nir_ssa_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
|
|
nir_instr_rewrite_src_ssa(instr, &intr->src[1], offset);
|
|
/* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
|
|
if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
|
|
/* this is always scalarized */
|
|
assert(intr->src[0].ssa->num_components == 1);
|
|
nir_ssa_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
|
|
for (unsigned i = 0; i < 2; i++)
|
|
nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
|
|
nir_instr_remove(instr);
|
|
}
|
|
return true;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
|
|
{
|
|
return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
|
|
}
|
|
|
|
static nir_variable *
|
|
get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
|
|
{
|
|
nir_variable *var, **ptr;
|
|
unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
|
|
|
|
if (ssbo)
|
|
ptr = &bo->ssbo[bit_size >> 4];
|
|
else {
|
|
if (!idx) {
|
|
ptr = &bo->uniforms[bit_size >> 4];
|
|
} else
|
|
ptr = &bo->ubo[bit_size >> 4];
|
|
}
|
|
var = *ptr;
|
|
if (!var) {
|
|
if (ssbo)
|
|
var = bo->ssbo[32 >> 4];
|
|
else {
|
|
if (!idx)
|
|
var = bo->uniforms[32 >> 4];
|
|
else
|
|
var = bo->ubo[32 >> 4];
|
|
}
|
|
var = nir_variable_clone(var, shader);
|
|
*ptr = var;
|
|
nir_shader_add_variable(shader, var);
|
|
|
|
struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
|
|
fields[0].name = ralloc_strdup(shader, "base");
|
|
fields[1].name = ralloc_strdup(shader, "unsized");
|
|
unsigned array_size = glsl_get_length(var->type);
|
|
const struct glsl_type *bare_type = glsl_without_array(var->type);
|
|
const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
|
|
unsigned length = glsl_get_length(array_type);
|
|
const struct glsl_type *type;
|
|
const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
|
|
if (bit_size > 32) {
|
|
assert(bit_size == 64);
|
|
type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
|
|
} else {
|
|
type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
|
|
}
|
|
fields[0].type = type;
|
|
fields[1].type = unsized;
|
|
var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
|
|
var->data.driver_location = idx;
|
|
}
|
|
return var;
|
|
}
|
|
|
|
static void
|
|
rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
|
|
{
|
|
nir_intrinsic_op op;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_ssbo_atomic_add:
|
|
op = nir_intrinsic_deref_atomic_add;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_umin:
|
|
op = nir_intrinsic_deref_atomic_umin;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_imin:
|
|
op = nir_intrinsic_deref_atomic_imin;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_umax:
|
|
op = nir_intrinsic_deref_atomic_umax;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_imax:
|
|
op = nir_intrinsic_deref_atomic_imax;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_and:
|
|
op = nir_intrinsic_deref_atomic_and;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_or:
|
|
op = nir_intrinsic_deref_atomic_or;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_xor:
|
|
op = nir_intrinsic_deref_atomic_xor;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_exchange:
|
|
op = nir_intrinsic_deref_atomic_exchange;
|
|
break;
|
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
|
op = nir_intrinsic_deref_atomic_comp_swap;
|
|
break;
|
|
default:
|
|
unreachable("unknown intrinsic");
|
|
}
|
|
/* atomic ops are always 32bit */
|
|
assert(nir_dest_bit_size(intr->dest) == 32);
|
|
nir_ssa_def *offset = intr->src[1].ssa;
|
|
nir_src *src = &intr->src[0];
|
|
nir_variable *var = get_bo_var(b->shader, bo, true, src, 32);
|
|
nir_deref_instr *deref_var = nir_build_deref_var(b, var);
|
|
nir_ssa_def *idx = src->ssa;
|
|
if (bo->first_ssbo)
|
|
idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
|
|
nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
|
|
nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
|
|
|
|
/* generate new atomic deref ops for every component */
|
|
nir_ssa_def *result[4];
|
|
unsigned num_components = nir_dest_num_components(intr->dest);
|
|
for (unsigned i = 0; i < num_components; i++) {
|
|
nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
|
|
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
|
|
nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, 1, 32, "");
|
|
new_instr->src[0] = nir_src_for_ssa(&deref_arr->dest.ssa);
|
|
/* deref ops have no offset src, so copy the srcs after it */
|
|
for (unsigned i = 2; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++)
|
|
nir_src_copy(&new_instr->src[i - 1], &intr->src[i]);
|
|
nir_builder_instr_insert(b, &new_instr->instr);
|
|
|
|
result[i] = &new_instr->dest.ssa;
|
|
offset = nir_iadd_imm(b, offset, 1);
|
|
}
|
|
|
|
nir_ssa_def *load = nir_vec(b, result, num_components);
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
|
|
nir_instr_remove(instr);
|
|
}
|
|
|
|
static bool
|
|
remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
struct bo_vars *bo = data;
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
nir_variable *var = NULL;
|
|
nir_ssa_def *offset = NULL;
|
|
bool is_load = true;
|
|
b->cursor = nir_before_instr(instr);
|
|
nir_src *src;
|
|
bool ssbo = true;
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_ssbo_atomic_add:
|
|
case nir_intrinsic_ssbo_atomic_umin:
|
|
case nir_intrinsic_ssbo_atomic_imin:
|
|
case nir_intrinsic_ssbo_atomic_umax:
|
|
case nir_intrinsic_ssbo_atomic_imax:
|
|
case nir_intrinsic_ssbo_atomic_and:
|
|
case nir_intrinsic_ssbo_atomic_or:
|
|
case nir_intrinsic_ssbo_atomic_xor:
|
|
case nir_intrinsic_ssbo_atomic_exchange:
|
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
|
rewrite_atomic_ssbo_instr(b, instr, bo);
|
|
return true;
|
|
case nir_intrinsic_store_ssbo:
|
|
src = &intr->src[1];
|
|
var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
|
|
offset = intr->src[2].ssa;
|
|
is_load = false;
|
|
break;
|
|
case nir_intrinsic_load_ssbo:
|
|
src = &intr->src[0];
|
|
var = get_bo_var(b->shader, bo, true, src, nir_dest_bit_size(intr->dest));
|
|
offset = intr->src[1].ssa;
|
|
break;
|
|
case nir_intrinsic_load_ubo:
|
|
src = &intr->src[0];
|
|
var = get_bo_var(b->shader, bo, false, src, nir_dest_bit_size(intr->dest));
|
|
offset = intr->src[1].ssa;
|
|
ssbo = false;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
assert(var);
|
|
assert(offset);
|
|
nir_deref_instr *deref_var = nir_build_deref_var(b, var);
|
|
nir_ssa_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
|
|
if (!ssbo && bo->first_ubo && var->data.driver_location)
|
|
idx = nir_iadd_imm(b, idx, -bo->first_ubo);
|
|
else if (ssbo && bo->first_ssbo)
|
|
idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
|
|
nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
|
|
nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
|
|
assert(intr->num_components <= 2);
|
|
if (is_load) {
|
|
nir_ssa_def *result[2];
|
|
for (unsigned i = 0; i < intr->num_components; i++) {
|
|
nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
|
|
result[i] = nir_load_deref(b, deref_arr);
|
|
if (intr->intrinsic == nir_intrinsic_load_ssbo)
|
|
nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
|
|
offset = nir_iadd_imm(b, offset, 1);
|
|
}
|
|
nir_ssa_def *load = nir_vec(b, result, intr->num_components);
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
|
|
} else {
|
|
nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
|
|
nir_build_store_deref(b, &deref_arr->dest.ssa, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
|
|
}
|
|
nir_instr_remove(instr);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
remove_bo_access(nir_shader *shader, struct zink_shader *zs)
|
|
{
|
|
struct bo_vars bo = get_bo_vars(zs, shader);
|
|
return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
|
|
}
|
|
|
|
static void
|
|
assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
|
|
{
|
|
unsigned slot = var->data.location;
|
|
switch (slot) {
|
|
case -1:
|
|
case VARYING_SLOT_POS:
|
|
case VARYING_SLOT_PNTC:
|
|
case VARYING_SLOT_PSIZ:
|
|
case VARYING_SLOT_LAYER:
|
|
case VARYING_SLOT_PRIMITIVE_ID:
|
|
case VARYING_SLOT_CLIP_DIST0:
|
|
case VARYING_SLOT_CULL_DIST0:
|
|
case VARYING_SLOT_VIEWPORT:
|
|
case VARYING_SLOT_FACE:
|
|
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
|
case VARYING_SLOT_TESS_LEVEL_INNER:
|
|
/* use a sentinel value to avoid counting later */
|
|
var->data.driver_location = UINT_MAX;
|
|
break;
|
|
|
|
default:
|
|
if (var->data.patch) {
|
|
assert(slot >= VARYING_SLOT_PATCH0);
|
|
slot -= VARYING_SLOT_PATCH0;
|
|
}
|
|
if (slot_map[slot] == 0xff) {
|
|
assert(*reserved < MAX_VARYING);
|
|
slot_map[slot] = *reserved;
|
|
if (stage == MESA_SHADER_TESS_EVAL && var->data.mode == nir_var_shader_in && !var->data.patch)
|
|
*reserved += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
|
|
else
|
|
*reserved += glsl_count_vec4_slots(var->type, false, false);
|
|
}
|
|
slot = slot_map[slot];
|
|
assert(slot < MAX_VARYING);
|
|
var->data.driver_location = slot;
|
|
}
|
|
}
|
|
|
|
ALWAYS_INLINE static bool
|
|
is_texcoord(gl_shader_stage stage, const nir_variable *var)
|
|
{
|
|
if (stage != MESA_SHADER_FRAGMENT)
|
|
return false;
|
|
return var->data.location >= VARYING_SLOT_TEX0 &&
|
|
var->data.location <= VARYING_SLOT_TEX7;
|
|
}
|
|
|
|
static bool
|
|
assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
|
|
{
|
|
unsigned slot = var->data.location;
|
|
switch (slot) {
|
|
case VARYING_SLOT_POS:
|
|
case VARYING_SLOT_PNTC:
|
|
case VARYING_SLOT_PSIZ:
|
|
case VARYING_SLOT_LAYER:
|
|
case VARYING_SLOT_PRIMITIVE_ID:
|
|
case VARYING_SLOT_CLIP_DIST0:
|
|
case VARYING_SLOT_CULL_DIST0:
|
|
case VARYING_SLOT_VIEWPORT:
|
|
case VARYING_SLOT_FACE:
|
|
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
|
case VARYING_SLOT_TESS_LEVEL_INNER:
|
|
/* use a sentinel value to avoid counting later */
|
|
var->data.driver_location = UINT_MAX;
|
|
break;
|
|
default:
|
|
if (var->data.patch) {
|
|
assert(slot >= VARYING_SLOT_PATCH0);
|
|
slot -= VARYING_SLOT_PATCH0;
|
|
}
|
|
if (slot_map[slot] == (unsigned char)-1) {
|
|
if (stage != MESA_SHADER_TESS_CTRL && !is_texcoord(stage, var))
|
|
/* dead io */
|
|
return false;
|
|
/* - texcoords can't be eliminated in fs due to GL_COORD_REPLACE
|
|
* - patch variables may be read in the workgroup
|
|
*/
|
|
slot_map[slot] = (*reserved)++;
|
|
}
|
|
var->data.driver_location = slot_map[slot];
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
static bool
|
|
rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
nir_variable *var = data;
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_load_deref)
|
|
return false;
|
|
nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
|
|
if (deref_var != var)
|
|
return false;
|
|
nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
|
|
return true;
|
|
}
|
|
|
|
void
|
|
zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
|
|
{
|
|
unsigned reserved = 0;
|
|
unsigned char slot_map[VARYING_SLOT_MAX];
|
|
memset(slot_map, -1, sizeof(slot_map));
|
|
bool do_fixup = false;
|
|
nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
|
|
if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
|
|
/* remove injected pointsize from all but the last vertex stage */
|
|
nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
|
|
if (var && !var->data.explicit_location) {
|
|
var->data.mode = nir_var_shader_temp;
|
|
nir_fixup_deref_modes(producer);
|
|
NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
optimize_nir(producer, NULL);
|
|
}
|
|
}
|
|
if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
|
|
/* never assign from tcs -> tes, always invert */
|
|
nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
|
|
assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
|
|
nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
|
|
if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
|
|
/* this is an output, nothing more needs to be done for it to be dropped */
|
|
do_fixup = true;
|
|
}
|
|
} else {
|
|
nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
|
|
assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
|
|
nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
|
|
if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
|
|
do_fixup = true;
|
|
/* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
|
|
nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
|
|
}
|
|
}
|
|
}
|
|
if (!do_fixup)
|
|
return;
|
|
nir_fixup_deref_modes(nir);
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
optimize_nir(nir, NULL);
|
|
}
|
|
|
|
/* all types that hit this function contain something that is 64bit */
|
|
static const struct glsl_type *
|
|
rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var)
|
|
{
|
|
if (glsl_type_is_array(type)) {
|
|
const struct glsl_type *child = glsl_get_array_element(type);
|
|
unsigned elements = glsl_get_aoa_size(type);
|
|
unsigned stride = glsl_get_explicit_stride(type);
|
|
return glsl_array_type(rewrite_64bit_type(nir, child, var), elements, stride);
|
|
}
|
|
/* rewrite structs recursively */
|
|
if (glsl_type_is_struct_or_ifc(type)) {
|
|
unsigned nmembers = glsl_get_length(type);
|
|
struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
|
|
unsigned xfb_offset = 0;
|
|
for (unsigned i = 0; i < nmembers; i++) {
|
|
const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
|
|
fields[i] = *f;
|
|
xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
|
|
if (i < nmembers - 1 && xfb_offset % 8 &&
|
|
glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1))) {
|
|
var->data.is_xfb = true;
|
|
}
|
|
fields[i].type = rewrite_64bit_type(nir, f->type, var);
|
|
}
|
|
return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
|
|
}
|
|
if (!glsl_type_is_64bit(type))
|
|
return type;
|
|
enum glsl_base_type base_type;
|
|
switch (glsl_get_base_type(type)) {
|
|
case GLSL_TYPE_UINT64:
|
|
base_type = GLSL_TYPE_UINT;
|
|
break;
|
|
case GLSL_TYPE_INT64:
|
|
base_type = GLSL_TYPE_INT;
|
|
break;
|
|
case GLSL_TYPE_DOUBLE:
|
|
base_type = GLSL_TYPE_FLOAT;
|
|
break;
|
|
default:
|
|
unreachable("unknown 64-bit vertex attribute format!");
|
|
}
|
|
if (glsl_type_is_scalar(type))
|
|
return glsl_vector_type(base_type, 2);
|
|
unsigned num_components;
|
|
if (glsl_type_is_matrix(type)) {
|
|
/* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
|
|
unsigned vec_components = glsl_get_vector_elements(type);
|
|
if (vec_components == 3)
|
|
vec_components = 4;
|
|
num_components = vec_components * 2 * glsl_get_matrix_columns(type);
|
|
} else {
|
|
num_components = glsl_get_vector_elements(type) * 2;
|
|
if (num_components <= 4)
|
|
return glsl_vector_type(base_type, num_components);
|
|
}
|
|
/* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
|
|
struct glsl_struct_field fields[8] = {0};
|
|
unsigned remaining = num_components;
|
|
unsigned nfields = 0;
|
|
for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
|
|
assert(i < ARRAY_SIZE(fields));
|
|
fields[i].name = "";
|
|
fields[i].offset = i * 16;
|
|
fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
|
|
}
|
|
char buf[64];
|
|
snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
|
|
return glsl_struct_type(fields, nfields, buf, true);
|
|
}
|
|
|
|
static const struct glsl_type *
|
|
deref_is_matrix(nir_deref_instr *deref)
|
|
{
|
|
if (glsl_type_is_matrix(deref->type))
|
|
return deref->type;
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
if (parent)
|
|
return deref_is_matrix(parent);
|
|
return NULL;
|
|
}
|
|
|
|
/* rewrite all input/output variables using 32bit types and load/stores */
|
|
static bool
|
|
lower_64bit_vars(nir_shader *shader)
|
|
{
|
|
bool progress = false;
|
|
struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
|
|
struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_shader_in | nir_var_shader_out) {
|
|
if (!glsl_type_contains_64bit(var->type))
|
|
continue;
|
|
var->type = rewrite_64bit_type(shader, var->type, var);
|
|
/* once type is rewritten, rewrite all loads and stores */
|
|
nir_foreach_function(function, shader) {
|
|
bool func_progress = false;
|
|
if (!function->impl)
|
|
continue;
|
|
nir_builder b;
|
|
nir_builder_init(&b, function->impl);
|
|
nir_foreach_block(block, function->impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
switch (instr->type) {
|
|
case nir_instr_type_deref: {
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
|
if (!(deref->modes & (nir_var_shader_in | nir_var_shader_out)))
|
|
continue;
|
|
if (nir_deref_instr_get_variable(deref) != var)
|
|
continue;
|
|
|
|
/* matrix types are special: store the original deref type for later use */
|
|
const struct glsl_type *matrix = deref_is_matrix(deref);
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
if (!matrix) {
|
|
/* if this isn't a direct matrix deref, it's maybe a matrix row deref */
|
|
hash_table_foreach(derefs, he) {
|
|
/* propagate parent matrix type to row deref */
|
|
if (he->key == parent)
|
|
matrix = he->data;
|
|
}
|
|
}
|
|
if (matrix)
|
|
_mesa_hash_table_insert(derefs, deref, (void*)matrix);
|
|
if (deref->deref_type == nir_deref_type_var)
|
|
deref->type = var->type;
|
|
else
|
|
deref->type = rewrite_64bit_type(shader, deref->type, var);
|
|
}
|
|
break;
|
|
case nir_instr_type_intrinsic: {
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_store_deref &&
|
|
intr->intrinsic != nir_intrinsic_load_deref)
|
|
break;
|
|
if (nir_intrinsic_get_var(intr, 0) != var)
|
|
break;
|
|
if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
|
|
(intr->intrinsic == nir_intrinsic_load_deref && intr->dest.ssa.bit_size != 64))
|
|
break;
|
|
b.cursor = nir_before_instr(instr);
|
|
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
|
|
unsigned num_components = intr->num_components * 2;
|
|
nir_ssa_def *comp[NIR_MAX_VEC_COMPONENTS];
|
|
/* this is the stored matrix type from the deref */
|
|
struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
|
|
const struct glsl_type *matrix = he ? he->data : NULL;
|
|
func_progress = true;
|
|
if (intr->intrinsic == nir_intrinsic_store_deref) {
|
|
/* first, unpack the src data to 32bit vec2 components */
|
|
for (unsigned i = 0; i < intr->num_components; i++) {
|
|
nir_ssa_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
|
|
comp[i * 2] = nir_channel(&b, ssa, 0);
|
|
comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
|
|
}
|
|
unsigned wrmask = nir_intrinsic_write_mask(intr);
|
|
unsigned mask = 0;
|
|
/* expand writemask for doubled components */
|
|
for (unsigned i = 0; i < intr->num_components; i++) {
|
|
if (wrmask & BITFIELD_BIT(i))
|
|
mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
|
|
}
|
|
if (matrix) {
|
|
/* matrix types always come from array (row) derefs */
|
|
assert(deref->deref_type == nir_deref_type_array);
|
|
nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
|
|
/* let optimization clean up consts later */
|
|
nir_ssa_def *index = deref->arr.index.ssa;
|
|
/* this might be an indirect array index:
|
|
* - iterate over matrix columns
|
|
* - add if blocks for each column
|
|
* - perform the store in the block
|
|
*/
|
|
for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
|
|
nir_push_if(&b, nir_ieq_imm(&b, index, idx));
|
|
unsigned vec_components = glsl_get_vector_elements(matrix);
|
|
/* always clamp dvec3 to 4 components */
|
|
if (vec_components == 3)
|
|
vec_components = 4;
|
|
unsigned start_component = idx * vec_components * 2;
|
|
/* struct member */
|
|
unsigned member = start_component / 4;
|
|
/* number of components remaining */
|
|
unsigned remaining = num_components;
|
|
for (unsigned i = 0; i < num_components; member++) {
|
|
if (!(mask & BITFIELD_BIT(i)))
|
|
continue;
|
|
assert(member < glsl_get_length(var_deref->type));
|
|
/* deref the rewritten struct to the appropriate vec4/vec2 */
|
|
nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
|
|
unsigned incr = MIN2(remaining, 4);
|
|
/* assemble the write component vec */
|
|
nir_ssa_def *val = nir_vec(&b, &comp[i], incr);
|
|
/* use the number of components being written as the writemask */
|
|
if (glsl_get_vector_elements(strct->type) > val->num_components)
|
|
val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
|
|
nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
|
|
remaining -= incr;
|
|
i += incr;
|
|
}
|
|
nir_pop_if(&b, NULL);
|
|
}
|
|
_mesa_set_add(deletes, &deref->instr);
|
|
} else if (num_components <= 4) {
|
|
/* simple store case: just write out the components */
|
|
nir_ssa_def *dest = nir_vec(&b, comp, num_components);
|
|
nir_store_deref(&b, deref, dest, mask);
|
|
} else {
|
|
/* writing > 4 components: access the struct and write to the appropriate vec4 members */
|
|
for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
|
|
if (!(mask & BITFIELD_MASK(4)))
|
|
continue;
|
|
nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
|
|
nir_ssa_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
|
|
if (glsl_get_vector_elements(strct->type) > dest->num_components)
|
|
dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
|
|
nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
|
|
mask >>= 4;
|
|
}
|
|
}
|
|
} else {
|
|
nir_ssa_def *dest = NULL;
|
|
if (matrix) {
|
|
/* matrix types always come from array (row) derefs */
|
|
assert(deref->deref_type == nir_deref_type_array);
|
|
nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
|
|
/* let optimization clean up consts later */
|
|
nir_ssa_def *index = deref->arr.index.ssa;
|
|
/* this might be an indirect array index:
|
|
* - iterate over matrix columns
|
|
* - add if blocks for each column
|
|
* - phi the loads using the array index
|
|
*/
|
|
unsigned cols = glsl_get_matrix_columns(matrix);
|
|
nir_ssa_def *dests[4];
|
|
for (unsigned idx = 0; idx < cols; idx++) {
|
|
/* don't add an if for the final row: this will be handled in the else */
|
|
if (idx < cols - 1)
|
|
nir_push_if(&b, nir_ieq_imm(&b, index, idx));
|
|
unsigned vec_components = glsl_get_vector_elements(matrix);
|
|
/* always clamp dvec3 to 4 components */
|
|
if (vec_components == 3)
|
|
vec_components = 4;
|
|
unsigned start_component = idx * vec_components * 2;
|
|
/* struct member */
|
|
unsigned member = start_component / 4;
|
|
/* number of components remaining */
|
|
unsigned remaining = num_components;
|
|
/* component index */
|
|
unsigned comp_idx = 0;
|
|
for (unsigned i = 0; i < num_components; member++) {
|
|
assert(member < glsl_get_length(var_deref->type));
|
|
nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
|
|
nir_ssa_def *load = nir_load_deref(&b, strct);
|
|
unsigned incr = MIN2(remaining, 4);
|
|
/* repack the loads to 64bit */
|
|
for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
|
|
comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
|
|
remaining -= incr;
|
|
i += incr;
|
|
}
|
|
dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
|
|
if (idx < cols - 1)
|
|
nir_push_else(&b, NULL);
|
|
}
|
|
/* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
|
|
for (unsigned idx = cols - 1; idx >= 1; idx--) {
|
|
nir_pop_if(&b, NULL);
|
|
dest = nir_if_phi(&b, dests[idx - 1], dest);
|
|
}
|
|
_mesa_set_add(deletes, &deref->instr);
|
|
} else if (num_components <= 4) {
|
|
/* simple load case */
|
|
nir_ssa_def *load = nir_load_deref(&b, deref);
|
|
/* pack 32bit loads into 64bit: this will automagically get optimized out later */
|
|
for (unsigned i = 0; i < intr->num_components; i++) {
|
|
comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
|
|
}
|
|
dest = nir_vec(&b, comp, intr->num_components);
|
|
} else {
|
|
/* writing > 4 components: access the struct and load the appropriate vec4 members */
|
|
for (unsigned i = 0; i < 2; i++, num_components -= 4) {
|
|
nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
|
|
nir_ssa_def *load = nir_load_deref(&b, strct);
|
|
comp[i * 2] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_MASK(2)));
|
|
if (num_components > 2)
|
|
comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
|
|
}
|
|
dest = nir_vec(&b, comp, intr->num_components);
|
|
}
|
|
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, dest, instr);
|
|
}
|
|
_mesa_set_add(deletes, instr);
|
|
break;
|
|
}
|
|
break;
|
|
default: break;
|
|
}
|
|
}
|
|
}
|
|
if (func_progress)
|
|
nir_metadata_preserve(function->impl, nir_metadata_none);
|
|
/* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
|
|
set_foreach_remove(deletes, he)
|
|
nir_instr_remove((void*)he->key);
|
|
}
|
|
progress = true;
|
|
}
|
|
ralloc_free(deletes);
|
|
ralloc_free(derefs);
|
|
if (progress) {
|
|
nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
|
|
nir_lower_phis_to_scalar(shader, false);
|
|
}
|
|
return progress;
|
|
}
|
|
|
|
static bool
|
|
split_blocks(nir_shader *nir)
|
|
{
|
|
bool progress = false;
|
|
bool changed = true;
|
|
do {
|
|
progress = false;
|
|
nir_foreach_shader_out_variable(var, nir) {
|
|
const struct glsl_type *base_type = glsl_without_array(var->type);
|
|
nir_variable *members[32]; //can't have more than this without breaking NIR
|
|
if (!glsl_type_is_struct(base_type))
|
|
continue;
|
|
/* TODO: arrays? */
|
|
if (!glsl_type_is_struct(var->type) || glsl_get_length(var->type) == 1)
|
|
continue;
|
|
if (glsl_count_attribute_slots(var->type, false) == 1)
|
|
continue;
|
|
unsigned offset = 0;
|
|
for (unsigned i = 0; i < glsl_get_length(var->type); i++) {
|
|
members[i] = nir_variable_clone(var, nir);
|
|
members[i]->type = glsl_get_struct_field(var->type, i);
|
|
members[i]->name = (void*)glsl_get_struct_elem_name(var->type, i);
|
|
members[i]->data.location += offset;
|
|
offset += glsl_count_attribute_slots(members[i]->type, false);
|
|
nir_shader_add_variable(nir, members[i]);
|
|
}
|
|
nir_foreach_function(function, nir) {
|
|
bool func_progress = false;
|
|
if (!function->impl)
|
|
continue;
|
|
nir_builder b;
|
|
nir_builder_init(&b, function->impl);
|
|
nir_foreach_block(block, function->impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
switch (instr->type) {
|
|
case nir_instr_type_deref: {
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
|
if (!(deref->modes & nir_var_shader_out))
|
|
continue;
|
|
if (nir_deref_instr_get_variable(deref) != var)
|
|
continue;
|
|
if (deref->deref_type != nir_deref_type_struct)
|
|
continue;
|
|
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
|
if (parent->deref_type != nir_deref_type_var)
|
|
continue;
|
|
deref->modes = nir_var_shader_temp;
|
|
parent->modes = nir_var_shader_temp;
|
|
b.cursor = nir_before_instr(instr);
|
|
nir_ssa_def *dest = &nir_build_deref_var(&b, members[deref->strct.index])->dest.ssa;
|
|
nir_ssa_def_rewrite_uses_after(&deref->dest.ssa, dest, &deref->instr);
|
|
nir_instr_remove(&deref->instr);
|
|
func_progress = true;
|
|
break;
|
|
}
|
|
default: break;
|
|
}
|
|
}
|
|
}
|
|
if (func_progress)
|
|
nir_metadata_preserve(function->impl, nir_metadata_none);
|
|
}
|
|
var->data.mode = nir_var_shader_temp;
|
|
changed = true;
|
|
progress = true;
|
|
}
|
|
} while (progress);
|
|
return changed;
|
|
}
|
|
|
|
static void
|
|
zink_shader_dump(void *words, size_t size, const char *file)
|
|
{
|
|
FILE *fp = fopen(file, "wb");
|
|
if (fp) {
|
|
fwrite(words, 1, size, fp);
|
|
fclose(fp);
|
|
fprintf(stderr, "wrote '%s'...\n", file);
|
|
}
|
|
}
|
|
|
|
VkShaderModule
|
|
zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv)
|
|
{
|
|
VkShaderModule mod;
|
|
VkShaderModuleCreateInfo smci = {0};
|
|
|
|
if (!spirv)
|
|
spirv = zs->spirv;
|
|
|
|
if (zink_debug & ZINK_DEBUG_SPIRV) {
|
|
char buf[256];
|
|
static int i;
|
|
snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
|
|
zink_shader_dump(spirv->words, spirv->num_words * sizeof(uint32_t), buf);
|
|
}
|
|
|
|
smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
|
smci.codeSize = spirv->num_words * sizeof(uint32_t);
|
|
smci.pCode = spirv->words;
|
|
|
|
#ifndef NDEBUG
|
|
if (zink_debug & ZINK_DEBUG_VALIDATION) {
|
|
static const struct spirv_to_nir_options spirv_options = {
|
|
.environment = NIR_SPIRV_VULKAN,
|
|
.caps = {
|
|
.float64 = true,
|
|
.int16 = true,
|
|
.int64 = true,
|
|
.tessellation = true,
|
|
.float_controls = true,
|
|
.image_ms_array = true,
|
|
.image_read_without_format = true,
|
|
.image_write_without_format = true,
|
|
.storage_image_ms = true,
|
|
.geometry_streams = true,
|
|
.storage_8bit = true,
|
|
.storage_16bit = true,
|
|
.variable_pointers = true,
|
|
.stencil_export = true,
|
|
.post_depth_coverage = true,
|
|
.transform_feedback = true,
|
|
.device_group = true,
|
|
.draw_parameters = true,
|
|
.shader_viewport_index_layer = true,
|
|
.multiview = true,
|
|
.physical_storage_buffer_address = true,
|
|
.int64_atomics = true,
|
|
.subgroup_arithmetic = true,
|
|
.subgroup_basic = true,
|
|
.subgroup_ballot = true,
|
|
.subgroup_quad = true,
|
|
.subgroup_shuffle = true,
|
|
.subgroup_vote = true,
|
|
.vk_memory_model = true,
|
|
.vk_memory_model_device_scope = true,
|
|
.int8 = true,
|
|
.float16 = true,
|
|
.demote_to_helper_invocation = true,
|
|
.sparse_residency = true,
|
|
.min_lod = true,
|
|
},
|
|
.ubo_addr_format = nir_address_format_32bit_index_offset,
|
|
.ssbo_addr_format = nir_address_format_32bit_index_offset,
|
|
.phys_ssbo_addr_format = nir_address_format_64bit_global,
|
|
.push_const_addr_format = nir_address_format_logical,
|
|
.shared_addr_format = nir_address_format_32bit_offset,
|
|
};
|
|
uint32_t num_spec_entries = 0;
|
|
struct nir_spirv_specialization *spec_entries = NULL;
|
|
VkSpecializationInfo sinfo = {0};
|
|
VkSpecializationMapEntry me[3];
|
|
uint32_t size[3] = {1,1,1};
|
|
if (!zs->nir->info.workgroup_size[0]) {
|
|
sinfo.mapEntryCount = 3;
|
|
sinfo.pMapEntries = &me[0];
|
|
sinfo.dataSize = sizeof(uint32_t) * 3;
|
|
sinfo.pData = size;
|
|
uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
|
|
for (int i = 0; i < 3; i++) {
|
|
me[i].size = sizeof(uint32_t);
|
|
me[i].constantID = ids[i];
|
|
me[i].offset = i * sizeof(uint32_t);
|
|
}
|
|
spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
|
|
}
|
|
nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
|
|
spec_entries, num_spec_entries,
|
|
zs->nir->info.stage, "main", &spirv_options, &screen->nir_options);
|
|
assert(nir);
|
|
ralloc_free(nir);
|
|
free(spec_entries);
|
|
}
|
|
#endif
|
|
|
|
VkResult ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod);
|
|
bool success = zink_screen_handle_vkresult(screen, ret);
|
|
assert(success);
|
|
return success ? mod : VK_NULL_HANDLE;
|
|
}
|
|
|
|
static bool
|
|
find_var_deref(nir_shader *nir, nir_variable *var)
|
|
{
|
|
nir_foreach_function(function, nir) {
|
|
if (!function->impl)
|
|
continue;
|
|
|
|
nir_foreach_block(block, function->impl) {
|
|
nir_foreach_instr(instr, block) {
|
|
if (instr->type != nir_instr_type_deref)
|
|
continue;
|
|
nir_deref_instr *deref = nir_instr_as_deref(instr);
|
|
if (deref->deref_type == nir_deref_type_var && deref->var == var)
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void
|
|
prune_io(nir_shader *nir)
|
|
{
|
|
nir_foreach_shader_in_variable_safe(var, nir) {
|
|
if (!find_var_deref(nir, var))
|
|
var->data.mode = nir_var_shader_temp;
|
|
}
|
|
nir_foreach_shader_out_variable_safe(var, nir) {
|
|
if (!find_var_deref(nir, var))
|
|
var->data.mode = nir_var_shader_temp;
|
|
}
|
|
}
|
|
|
|
VkShaderModule
|
|
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
|
|
{
|
|
VkShaderModule mod = VK_NULL_HANDLE;
|
|
struct zink_shader_info *sinfo = &zs->sinfo;
|
|
nir_shader *nir = nir_shader_clone(NULL, base_nir);
|
|
bool need_optimize = false;
|
|
bool inlined_uniforms = false;
|
|
|
|
if (key) {
|
|
if (key->inline_uniforms) {
|
|
NIR_PASS_V(nir, nir_inline_uniforms,
|
|
nir->info.num_inlinable_uniforms,
|
|
key->base.inlined_uniform_values,
|
|
nir->info.inlinable_uniform_dw_offsets);
|
|
|
|
inlined_uniforms = true;
|
|
}
|
|
|
|
/* TODO: use a separate mem ctx here for ralloc */
|
|
switch (zs->nir->info.stage) {
|
|
case MESA_SHADER_VERTEX: {
|
|
uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
|
|
const struct zink_vs_key *vs_key = zink_vs_key(key);
|
|
switch (vs_key->size) {
|
|
case 4:
|
|
decomposed_attrs = vs_key->u32.decomposed_attrs;
|
|
decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
|
|
break;
|
|
case 2:
|
|
decomposed_attrs = vs_key->u16.decomposed_attrs;
|
|
decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
|
|
break;
|
|
case 1:
|
|
decomposed_attrs = vs_key->u8.decomposed_attrs;
|
|
decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
|
|
break;
|
|
default: break;
|
|
}
|
|
if (decomposed_attrs || decomposed_attrs_without_w)
|
|
NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
|
|
FALLTHROUGH;
|
|
}
|
|
case MESA_SHADER_TESS_EVAL:
|
|
case MESA_SHADER_GEOMETRY:
|
|
if (zink_vs_key_base(key)->last_vertex_stage) {
|
|
if (zs->sinfo.have_xfb)
|
|
sinfo->last_vertex = true;
|
|
|
|
if (!zink_vs_key_base(key)->clip_halfz && screen->driver_workarounds.depth_clip_control_missing) {
|
|
NIR_PASS_V(nir, nir_lower_clip_halfz);
|
|
}
|
|
if (zink_vs_key_base(key)->push_drawid) {
|
|
NIR_PASS_V(nir, lower_drawid);
|
|
}
|
|
}
|
|
break;
|
|
case MESA_SHADER_FRAGMENT:
|
|
if (!zink_fs_key(key)->samples &&
|
|
nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
|
|
/* VK will always use gl_SampleMask[] values even if sample count is 0,
|
|
* so we need to skip this write here to mimic GL's behavior of ignoring it
|
|
*/
|
|
nir_foreach_shader_out_variable(var, nir) {
|
|
if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
|
|
var->data.mode = nir_var_shader_temp;
|
|
}
|
|
nir_fixup_deref_modes(nir);
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
need_optimize = true;
|
|
}
|
|
if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
|
|
NIR_PASS_V(nir, lower_dual_blend);
|
|
}
|
|
if (zink_fs_key(key)->coord_replace_bits) {
|
|
NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
|
|
false, zink_fs_key(key)->coord_replace_yinvert);
|
|
}
|
|
if (zink_fs_key(key)->force_persample_interp || zink_fs_key(key)->fbfetch_ms) {
|
|
nir_foreach_shader_in_variable(var, nir)
|
|
var->data.sample = true;
|
|
nir->info.fs.uses_sample_qualifier = true;
|
|
nir->info.fs.uses_sample_shading = true;
|
|
}
|
|
if (nir->info.fs.uses_fbfetch_output) {
|
|
nir_variable *fbfetch = NULL;
|
|
NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key(key)->fbfetch_ms);
|
|
/* old variable must be deleted to avoid spirv errors */
|
|
fbfetch->data.mode = nir_var_shader_temp;
|
|
nir_fixup_deref_modes(nir);
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
need_optimize = true;
|
|
}
|
|
break;
|
|
default: break;
|
|
}
|
|
if (key->base.nonseamless_cube_mask) {
|
|
NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
|
|
need_optimize = true;
|
|
}
|
|
}
|
|
if (screen->driconf.inline_uniforms) {
|
|
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
|
|
NIR_PASS_V(nir, rewrite_bo_access, screen);
|
|
NIR_PASS_V(nir, remove_bo_access, zs);
|
|
need_optimize = true;
|
|
}
|
|
if (inlined_uniforms) {
|
|
optimize_nir(nir, zs);
|
|
|
|
/* This must be done again. */
|
|
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
|
|
nir_var_shader_out);
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
|
if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
|
|
zs->can_inline = false;
|
|
} else if (need_optimize)
|
|
optimize_nir(nir, zs);
|
|
prune_io(nir);
|
|
|
|
NIR_PASS_V(nir, nir_convert_from_ssa, true);
|
|
|
|
struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen->spirv_version);
|
|
if (spirv)
|
|
mod = zink_shader_spirv_compile(screen, zs, spirv);
|
|
|
|
ralloc_free(nir);
|
|
|
|
/* TODO: determine if there's any reason to cache spirv output? */
|
|
if (zs->is_generated)
|
|
zs->spirv = spirv;
|
|
else
|
|
ralloc_free(spirv);
|
|
return mod;
|
|
}
|
|
|
|
static bool
|
|
lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic != nir_intrinsic_load_instance_id)
|
|
return false;
|
|
b->cursor = nir_after_instr(instr);
|
|
nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
|
|
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_baseinstance(nir_shader *shader)
|
|
{
|
|
if (shader->info.stage != MESA_SHADER_VERTEX)
|
|
return false;
|
|
return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
|
|
}
|
|
|
|
/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
|
|
* so instead we delete all those broken variables and just make new ones
|
|
*/
|
|
static bool
|
|
unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
|
|
{
|
|
uint64_t max_ssbo_size = 0;
|
|
uint64_t max_ubo_size = 0;
|
|
uint64_t max_uniform_size = 0;
|
|
|
|
if (!shader->info.num_ssbos && !shader->info.num_ubos)
|
|
return false;
|
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
|
|
const struct glsl_type *type = glsl_without_array(var->type);
|
|
if (type_is_counter(type))
|
|
continue;
|
|
/* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
|
|
unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
|
|
const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
|
|
if (interface_type) {
|
|
unsigned block_size = glsl_get_explicit_size(interface_type, true);
|
|
block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
|
|
size = MAX2(size, block_size);
|
|
}
|
|
if (var->data.mode == nir_var_mem_ubo) {
|
|
if (var->data.driver_location)
|
|
max_ubo_size = MAX2(max_ubo_size, size);
|
|
else
|
|
max_uniform_size = MAX2(max_uniform_size, size);
|
|
} else {
|
|
max_ssbo_size = MAX2(max_ssbo_size, size);
|
|
if (interface_type) {
|
|
if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
|
|
needs_size = true;
|
|
}
|
|
}
|
|
var->data.mode = nir_var_shader_temp;
|
|
}
|
|
nir_fixup_deref_modes(shader);
|
|
NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
optimize_nir(shader, NULL);
|
|
|
|
struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
|
|
fields[0].name = ralloc_strdup(shader, "base");
|
|
fields[1].name = ralloc_strdup(shader, "unsized");
|
|
if (shader->info.num_ubos) {
|
|
if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
|
|
fields[0].type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
|
|
nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
|
|
glsl_array_type(glsl_interface_type(fields, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
|
|
"uniform_0");
|
|
var->interface_type = var->type;
|
|
var->data.mode = nir_var_mem_ubo;
|
|
var->data.driver_location = 0;
|
|
}
|
|
|
|
unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
|
|
uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
|
|
if (num_ubos && ubos_used) {
|
|
fields[0].type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
|
|
/* shrink array as much as possible */
|
|
unsigned first_ubo = ffs(ubos_used) - 2;
|
|
assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
|
|
num_ubos -= first_ubo;
|
|
assert(num_ubos);
|
|
nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
|
|
glsl_array_type(glsl_struct_type(fields, 1, "struct", false), num_ubos, 0),
|
|
"ubos");
|
|
var->interface_type = var->type;
|
|
var->data.mode = nir_var_mem_ubo;
|
|
var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
|
|
}
|
|
}
|
|
if (shader->info.num_ssbos && zs->ssbos_used) {
|
|
/* shrink array as much as possible */
|
|
unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
|
|
assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
|
|
unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
|
|
assert(num_ssbos);
|
|
const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
|
|
const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
|
|
fields[0].type = ssbo_type;
|
|
fields[1].type = max_ssbo_size ? unsized : NULL;
|
|
unsigned field_count = max_ssbo_size && needs_size ? 2 : 1;
|
|
nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
|
|
glsl_array_type(glsl_struct_type(fields, field_count, "struct", false), num_ssbos, 0),
|
|
"ssbos");
|
|
var->interface_type = var->type;
|
|
var->data.mode = nir_var_mem_ssbo;
|
|
var->data.driver_location = first_ssbo;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static uint32_t
|
|
get_src_mask(unsigned total, nir_src src)
|
|
{
|
|
if (nir_src_is_const(src))
|
|
return BITFIELD_BIT(nir_src_as_uint(src));
|
|
return BITFIELD_MASK(total);
|
|
}
|
|
|
|
static bool
|
|
analyze_io(struct zink_shader *zs, nir_shader *shader)
|
|
{
|
|
bool ret = false;
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
nir_foreach_block(block, impl) {
|
|
nir_foreach_instr(instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
|
switch (intrin->intrinsic) {
|
|
case nir_intrinsic_store_ssbo:
|
|
zs->ssbos_used |= get_src_mask(shader->info.num_ssbos, intrin->src[1]);
|
|
break;
|
|
|
|
case nir_intrinsic_get_ssbo_size: {
|
|
zs->ssbos_used |= get_src_mask(shader->info.num_ssbos, intrin->src[0]);
|
|
ret = true;
|
|
break;
|
|
}
|
|
case nir_intrinsic_ssbo_atomic_add:
|
|
case nir_intrinsic_ssbo_atomic_imin:
|
|
case nir_intrinsic_ssbo_atomic_umin:
|
|
case nir_intrinsic_ssbo_atomic_imax:
|
|
case nir_intrinsic_ssbo_atomic_umax:
|
|
case nir_intrinsic_ssbo_atomic_and:
|
|
case nir_intrinsic_ssbo_atomic_or:
|
|
case nir_intrinsic_ssbo_atomic_xor:
|
|
case nir_intrinsic_ssbo_atomic_exchange:
|
|
case nir_intrinsic_ssbo_atomic_comp_swap:
|
|
case nir_intrinsic_ssbo_atomic_fmin:
|
|
case nir_intrinsic_ssbo_atomic_fmax:
|
|
case nir_intrinsic_ssbo_atomic_fcomp_swap:
|
|
case nir_intrinsic_load_ssbo:
|
|
zs->ssbos_used |= get_src_mask(shader->info.num_ssbos, intrin->src[0]);
|
|
break;
|
|
case nir_intrinsic_load_ubo:
|
|
case nir_intrinsic_load_ubo_vec4:
|
|
zs->ubos_used |= get_src_mask(shader->info.num_ubos, intrin->src[0]);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
struct zink_bindless_info {
|
|
nir_variable *bindless[4];
|
|
unsigned bindless_set;
|
|
};
|
|
|
|
/* this is a "default" bindless texture used if the shader has no texture variables */
|
|
static nir_variable *
|
|
create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
|
|
{
|
|
unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
|
|
nir_variable *var;
|
|
|
|
const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
|
|
var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
|
|
var->data.descriptor_set = descriptor_set;
|
|
var->data.driver_location = var->data.binding = binding;
|
|
return var;
|
|
}
|
|
|
|
/* this is a "default" bindless image used if the shader has no image variables */
|
|
static nir_variable *
|
|
create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
|
|
{
|
|
unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
|
|
nir_variable *var;
|
|
|
|
const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
|
|
var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
|
|
var->data.descriptor_set = descriptor_set;
|
|
var->data.driver_location = var->data.binding = binding;
|
|
var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
|
|
return var;
|
|
}
|
|
|
|
/* rewrite bindless instructions as array deref instructions */
|
|
static bool
|
|
lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
|
|
{
|
|
struct zink_bindless_info *bindless = data;
|
|
|
|
if (in->type == nir_instr_type_tex) {
|
|
nir_tex_instr *tex = nir_instr_as_tex(in);
|
|
int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
|
|
if (idx == -1)
|
|
return false;
|
|
|
|
nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
|
|
if (!var)
|
|
var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
|
|
b->cursor = nir_before_instr(in);
|
|
nir_deref_instr *deref = nir_build_deref_var(b, var);
|
|
if (glsl_type_is_array(var->type))
|
|
deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
|
|
nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
|
|
|
|
/* bindless sampling uses the variable type directly, which means the tex instr has to exactly
|
|
* match up with it in contrast to normal sampler ops where things are a bit more flexible;
|
|
* this results in cases where a shader is passed with sampler2DArray but the tex instr only has
|
|
* 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
|
|
*
|
|
* to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
|
|
* - Warhammer 40k: Dawn of War III
|
|
*/
|
|
unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
|
|
unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
|
|
unsigned coord_components = nir_src_num_components(tex->src[c].src);
|
|
if (coord_components < needed_components) {
|
|
nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
|
|
nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
|
|
tex->coord_components = needed_components;
|
|
}
|
|
return true;
|
|
}
|
|
if (in->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
|
|
|
|
nir_intrinsic_op op;
|
|
#define OP_SWAP(OP) \
|
|
case nir_intrinsic_bindless_image_##OP: \
|
|
op = nir_intrinsic_image_deref_##OP; \
|
|
break;
|
|
|
|
|
|
/* convert bindless intrinsics to deref intrinsics */
|
|
switch (instr->intrinsic) {
|
|
OP_SWAP(atomic_add)
|
|
OP_SWAP(atomic_and)
|
|
OP_SWAP(atomic_comp_swap)
|
|
OP_SWAP(atomic_dec_wrap)
|
|
OP_SWAP(atomic_exchange)
|
|
OP_SWAP(atomic_fadd)
|
|
OP_SWAP(atomic_fmax)
|
|
OP_SWAP(atomic_fmin)
|
|
OP_SWAP(atomic_imax)
|
|
OP_SWAP(atomic_imin)
|
|
OP_SWAP(atomic_inc_wrap)
|
|
OP_SWAP(atomic_or)
|
|
OP_SWAP(atomic_umax)
|
|
OP_SWAP(atomic_umin)
|
|
OP_SWAP(atomic_xor)
|
|
OP_SWAP(format)
|
|
OP_SWAP(load)
|
|
OP_SWAP(order)
|
|
OP_SWAP(samples)
|
|
OP_SWAP(size)
|
|
OP_SWAP(store)
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
|
|
nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
|
|
if (!var)
|
|
var = create_bindless_image(b->shader, dim, bindless->bindless_set);
|
|
instr->intrinsic = op;
|
|
b->cursor = nir_before_instr(in);
|
|
nir_deref_instr *deref = nir_build_deref_var(b, var);
|
|
if (glsl_type_is_array(var->type))
|
|
deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
|
|
nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
|
|
{
|
|
if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
|
|
return false;
|
|
nir_fixup_deref_modes(shader);
|
|
NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
optimize_nir(shader, NULL);
|
|
return true;
|
|
}
|
|
|
|
/* convert shader image/texture io variables to int64 handles for bindless indexing */
|
|
static bool
|
|
lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
|
|
{
|
|
if (in->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
|
|
if (instr->intrinsic != nir_intrinsic_load_deref &&
|
|
instr->intrinsic != nir_intrinsic_store_deref)
|
|
return false;
|
|
|
|
nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
|
|
nir_variable *var = nir_deref_instr_get_variable(src_deref);
|
|
if (var->data.bindless)
|
|
return false;
|
|
if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
|
|
return false;
|
|
if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
|
|
return false;
|
|
|
|
var->type = glsl_int64_t_type();
|
|
var->data.bindless = 1;
|
|
b->cursor = nir_before_instr(in);
|
|
nir_deref_instr *deref = nir_build_deref_var(b, var);
|
|
if (instr->intrinsic == nir_intrinsic_load_deref) {
|
|
nir_ssa_def *def = nir_load_deref(b, deref);
|
|
nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
|
|
nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
|
|
} else {
|
|
nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
|
|
}
|
|
nir_instr_remove(in);
|
|
nir_instr_remove(&src_deref->instr);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_bindless_io(nir_shader *shader)
|
|
{
|
|
return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
|
|
}
|
|
|
|
static uint32_t
|
|
zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
|
|
{
|
|
if (stage == MESA_SHADER_NONE) {
|
|
unreachable("not supported");
|
|
} else {
|
|
switch (type) {
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
|
|
return stage * 2 + !!index;
|
|
|
|
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
|
assert(index < PIPE_MAX_SAMPLERS);
|
|
return (stage * PIPE_MAX_SAMPLERS) + index;
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
|
return stage + (compact_descriptors * (ZINK_SHADER_COUNT * 2));
|
|
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
|
assert(index < ZINK_MAX_SHADER_IMAGES);
|
|
return (stage * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_SHADER_COUNT * PIPE_MAX_SAMPLERS));
|
|
|
|
default:
|
|
unreachable("unexpected type");
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
|
|
{
|
|
if (glsl_type_is_struct(type)) {
|
|
for (unsigned i = 0; i < glsl_get_length(type); i++)
|
|
handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
|
|
return;
|
|
}
|
|
|
|
/* just a random scalar in a struct */
|
|
if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
|
|
return;
|
|
|
|
VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
|
|
unsigned binding;
|
|
switch (vktype) {
|
|
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
|
binding = 0;
|
|
break;
|
|
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
|
binding = 1;
|
|
break;
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
|
binding = 2;
|
|
break;
|
|
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
|
binding = 3;
|
|
break;
|
|
default:
|
|
unreachable("unknown");
|
|
}
|
|
if (!bindless->bindless[binding]) {
|
|
bindless->bindless[binding] = nir_variable_clone(var, nir);
|
|
bindless->bindless[binding]->data.bindless = 0;
|
|
bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
|
|
bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
|
|
bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
|
|
if (!bindless->bindless[binding]->data.image.format)
|
|
bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
|
|
nir_shader_add_variable(nir, bindless->bindless[binding]);
|
|
} else {
|
|
assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
|
|
}
|
|
var->data.mode = nir_var_shader_temp;
|
|
}
|
|
|
|
static enum pipe_prim_type
|
|
prim_to_pipe(enum shader_prim primitive_type)
|
|
{
|
|
switch (primitive_type) {
|
|
case SHADER_PRIM_POINTS:
|
|
return PIPE_PRIM_POINTS;
|
|
case SHADER_PRIM_LINES:
|
|
case SHADER_PRIM_LINE_LOOP:
|
|
case SHADER_PRIM_LINE_STRIP:
|
|
case SHADER_PRIM_LINES_ADJACENCY:
|
|
case SHADER_PRIM_LINE_STRIP_ADJACENCY:
|
|
return PIPE_PRIM_LINES;
|
|
default:
|
|
return PIPE_PRIM_TRIANGLES;
|
|
}
|
|
}
|
|
|
|
static enum pipe_prim_type
|
|
tess_prim_to_pipe(enum tess_primitive_mode prim_mode)
|
|
{
|
|
switch (prim_mode) {
|
|
case TESS_PRIMITIVE_ISOLINES:
|
|
return PIPE_PRIM_LINES;
|
|
default:
|
|
return PIPE_PRIM_TRIANGLES;
|
|
}
|
|
}
|
|
|
|
static enum pipe_prim_type
|
|
get_shader_base_prim_type(struct nir_shader *nir)
|
|
{
|
|
switch (nir->info.stage) {
|
|
case MESA_SHADER_GEOMETRY:
|
|
return prim_to_pipe(nir->info.gs.output_primitive);
|
|
case MESA_SHADER_TESS_EVAL:
|
|
return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : tess_prim_to_pipe(nir->info.tess._primitive_mode);
|
|
default:
|
|
break;
|
|
}
|
|
return PIPE_PRIM_MAX;
|
|
}
|
|
|
|
static bool
|
|
convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
|
|
{
|
|
struct zink_screen *screen = data;
|
|
if (instr->type != nir_instr_type_tex)
|
|
return false;
|
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
|
if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
|
|
return false;
|
|
if (tex->is_sparse && screen->need_2D_sparse) {
|
|
/* no known case of this exists: only nvidia can hit it, and nothing uses it */
|
|
mesa_loge("unhandled/unsupported 1D sparse texture!");
|
|
abort();
|
|
}
|
|
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
|
|
b->cursor = nir_before_instr(instr);
|
|
tex->coord_components++;
|
|
unsigned srcs[] = {
|
|
nir_tex_src_coord,
|
|
nir_tex_src_offset,
|
|
nir_tex_src_ddx,
|
|
nir_tex_src_ddy,
|
|
};
|
|
for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
|
|
unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
|
|
if (c == -1)
|
|
continue;
|
|
if (tex->src[c].src.ssa->num_components == tex->coord_components)
|
|
continue;
|
|
nir_ssa_def *def;
|
|
nir_ssa_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
|
|
if (tex->src[c].src.ssa->num_components == 1)
|
|
def = nir_vec2(b, tex->src[c].src.ssa, zero);
|
|
else
|
|
def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
|
|
nir_instr_rewrite_src_ssa(instr, &tex->src[c].src, def);
|
|
}
|
|
b->cursor = nir_after_instr(instr);
|
|
unsigned needed_components = nir_tex_instr_dest_size(tex);
|
|
unsigned num_components = tex->dest.ssa.num_components;
|
|
if (needed_components > num_components) {
|
|
tex->dest.ssa.num_components = needed_components;
|
|
assert(num_components < 3);
|
|
/* take either xz or just x since this is promoted to 2D from 1D */
|
|
uint32_t mask = num_components == 2 ? (1|4) : 1;
|
|
nir_ssa_def *dst = nir_channels(b, &tex->dest.ssa, mask);
|
|
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dst, dst->parent_instr);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
|
|
{
|
|
bool found = false;
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
|
|
const struct glsl_type *type = glsl_without_array(var->type);
|
|
unsigned length = glsl_get_length(var->type);
|
|
if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
|
|
continue;
|
|
const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
|
|
var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
|
|
|
|
found = true;
|
|
}
|
|
if (found)
|
|
nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
|
|
return found;
|
|
}
|
|
|
|
static void
|
|
scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
|
|
{
|
|
nir_foreach_function(function, shader) {
|
|
if (!function->impl)
|
|
continue;
|
|
nir_foreach_block_safe(block, function->impl) {
|
|
nir_foreach_instr_safe(instr, block) {
|
|
if (instr->type == nir_instr_type_tex) {
|
|
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
|
zs->sinfo.have_sparse |= tex->is_sparse;
|
|
}
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
if (intr->intrinsic == nir_intrinsic_image_deref_load ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_store ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_atomic_fadd ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_size ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_samples ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_format ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_order) {
|
|
|
|
nir_variable *var =
|
|
nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
|
|
|
|
/* Structs have been lowered already, so get_aoa_size is sufficient. */
|
|
const unsigned size =
|
|
glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
|
|
BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
|
|
var->data.binding + (MAX2(size, 1) - 1));
|
|
}
|
|
if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
|
|
intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
|
|
zs->sinfo.have_sparse = true;
|
|
|
|
static bool warned = false;
|
|
if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_image_deref_atomic_add: {
|
|
nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
|
if (util_format_is_float(var->data.image.format))
|
|
fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static bool
|
|
lower_sparse_instr(nir_builder *b, nir_instr *in, void *data)
|
|
{
|
|
if (in->type != nir_instr_type_intrinsic)
|
|
return false;
|
|
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
|
|
if (instr->intrinsic != nir_intrinsic_is_sparse_texels_resident)
|
|
return false;
|
|
|
|
/* vulkan vec can only be a vec4, but this is (maybe) vec5,
|
|
* so just rewrite as the first component since ntv is going to use a different
|
|
* method for storing the residency value anyway
|
|
*/
|
|
b->cursor = nir_before_instr(&instr->instr);
|
|
nir_instr *parent = instr->src[0].ssa->parent_instr;
|
|
assert(parent->type == nir_instr_type_alu);
|
|
nir_alu_instr *alu = nir_instr_as_alu(parent);
|
|
nir_ssa_def_rewrite_uses_after(instr->src[0].ssa, nir_channel(b, alu->src[0].src.ssa, 0), parent);
|
|
nir_instr_remove(parent);
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
lower_sparse(nir_shader *shader)
|
|
{
|
|
return nir_shader_instructions_pass(shader, lower_sparse_instr, nir_metadata_dominance, NULL);
|
|
}
|
|
|
|
static bool
|
|
match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data)
|
|
{
|
|
if (in->type != nir_instr_type_tex)
|
|
return false;
|
|
nir_tex_instr *tex = nir_instr_as_tex(in);
|
|
if (tex->op == nir_texop_txs)
|
|
return false;
|
|
int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
|
|
nir_variable *var = NULL;
|
|
if (handle != -1) {
|
|
var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
|
|
} else {
|
|
nir_foreach_variable_with_modes(img, b->shader, nir_var_uniform) {
|
|
if (glsl_type_is_sampler(glsl_without_array(img->type))) {
|
|
unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
|
|
if (tex->texture_index >= img->data.driver_location &&
|
|
tex->texture_index < img->data.driver_location + size) {
|
|
var = img;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
assert(var);
|
|
const struct glsl_type *type = glsl_without_array(var->type);
|
|
enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
|
|
bool is_int = glsl_base_type_is_integer(ret_type);
|
|
unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
|
|
unsigned dest_size = nir_dest_bit_size(tex->dest);
|
|
b->cursor = nir_after_instr(in);
|
|
unsigned num_components = nir_dest_num_components(tex->dest);
|
|
bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4;
|
|
if (bit_size == dest_size && !rewrite_depth)
|
|
return false;
|
|
nir_ssa_def *dest = &tex->dest.ssa;
|
|
if (bit_size != dest_size) {
|
|
tex->dest.ssa.bit_size = bit_size;
|
|
tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
|
|
if (rewrite_depth) {
|
|
assert(!tex->is_new_style_shadow);
|
|
tex->dest.ssa.num_components = 1;
|
|
tex->is_new_style_shadow = true;
|
|
}
|
|
|
|
if (is_int) {
|
|
if (glsl_unsigned_base_type_of(ret_type) == ret_type)
|
|
dest = nir_u2uN(b, &tex->dest.ssa, dest_size);
|
|
else
|
|
dest = nir_i2iN(b, &tex->dest.ssa, dest_size);
|
|
} else {
|
|
dest = nir_f2fN(b, &tex->dest.ssa, dest_size);
|
|
}
|
|
if (rewrite_depth) {
|
|
nir_ssa_def *vec[4] = {dest, dest, dest, dest};
|
|
dest = nir_vec(b, vec, num_components);
|
|
}
|
|
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, dest, dest->parent_instr);
|
|
} else if (rewrite_depth) {
|
|
assert(!tex->is_new_style_shadow);
|
|
tex->dest.ssa.num_components = 1;
|
|
tex->is_new_style_shadow = true;
|
|
nir_ssa_def *vec[4] = {dest, dest, dest, dest};
|
|
nir_ssa_def *splat = nir_vec(b, vec, num_components);
|
|
nir_ssa_def_rewrite_uses_after(dest, splat, splat->parent_instr);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
match_tex_dests(nir_shader *shader)
|
|
{
|
|
return nir_shader_instructions_pass(shader, match_tex_dests_instr, nir_metadata_dominance, NULL);
|
|
}
|
|
|
|
struct zink_shader *
|
|
zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
|
|
const struct pipe_stream_output_info *so_info)
|
|
{
|
|
struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
|
|
bool have_psiz = false;
|
|
|
|
ret->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
|
|
|
|
ret->hash = _mesa_hash_pointer(ret);
|
|
ret->reduced_prim = get_shader_base_prim_type(nir);
|
|
|
|
ret->programs = _mesa_pointer_set_create(NULL);
|
|
simple_mtx_init(&ret->lock, mtx_plain);
|
|
|
|
nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
|
|
if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
|
|
nir->info.stage == MESA_SHADER_TESS_EVAL)
|
|
indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
|
|
|
|
NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
|
|
UINT32_MAX);
|
|
|
|
if (nir->info.stage == MESA_SHADER_VERTEX)
|
|
create_vs_pushconst(nir);
|
|
else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
|
|
nir->info.stage == MESA_SHADER_TESS_EVAL)
|
|
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
|
|
else if (nir->info.stage == MESA_SHADER_KERNEL)
|
|
create_cs_pushconst(nir);
|
|
|
|
if (nir->info.stage < MESA_SHADER_FRAGMENT)
|
|
have_psiz = check_psiz(nir);
|
|
NIR_PASS_V(nir, lower_basevertex);
|
|
NIR_PASS_V(nir, lower_work_dim);
|
|
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
|
|
NIR_PASS_V(nir, lower_baseinstance);
|
|
NIR_PASS_V(nir, lower_sparse);
|
|
|
|
if (screen->need_2D_zs)
|
|
NIR_PASS_V(nir, lower_1d_shadow, screen);
|
|
|
|
{
|
|
nir_lower_subgroups_options subgroup_options = {0};
|
|
subgroup_options.lower_to_scalar = true;
|
|
subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
|
|
subgroup_options.ballot_bit_size = 32;
|
|
subgroup_options.ballot_components = 4;
|
|
subgroup_options.lower_subgroup_masks = true;
|
|
if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(nir->info.stage))) {
|
|
subgroup_options.subgroup_size = 1;
|
|
subgroup_options.lower_vote_trivial = true;
|
|
}
|
|
NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
|
|
}
|
|
|
|
if (so_info && so_info->num_outputs)
|
|
NIR_PASS_V(nir, split_blocks);
|
|
|
|
optimize_nir(nir, NULL);
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
|
NIR_PASS_V(nir, nir_lower_discard_if);
|
|
NIR_PASS_V(nir, nir_lower_fragcolor,
|
|
nir->info.fs.color_is_dual_source ? 1 : 8);
|
|
NIR_PASS_V(nir, lower_64bit_vertex_attribs);
|
|
bool needs_size = analyze_io(ret, nir);
|
|
NIR_PASS_V(nir, unbreak_bos, ret, needs_size);
|
|
/* run in compile if there could be inlined uniforms */
|
|
if (!screen->driconf.inline_uniforms) {
|
|
NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared);
|
|
NIR_PASS_V(nir, rewrite_bo_access, screen);
|
|
NIR_PASS_V(nir, remove_bo_access, ret);
|
|
}
|
|
|
|
if (zink_debug & ZINK_DEBUG_NIR) {
|
|
fprintf(stderr, "NIR shader:\n---8<---\n");
|
|
nir_print_shader(nir, stderr);
|
|
fprintf(stderr, "---8<---\n");
|
|
}
|
|
|
|
struct zink_bindless_info bindless = {0};
|
|
bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
|
|
bool has_bindless_io = false;
|
|
nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
|
|
var->data.is_xfb = false;
|
|
if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
|
|
has_bindless_io = true;
|
|
break;
|
|
}
|
|
}
|
|
if (has_bindless_io)
|
|
NIR_PASS_V(nir, lower_bindless_io);
|
|
|
|
optimize_nir(nir, NULL);
|
|
prune_io(nir);
|
|
|
|
scan_nir(screen, nir, ret);
|
|
|
|
foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
|
|
if (_nir_shader_variable_has_mode(var, nir_var_uniform |
|
|
nir_var_image |
|
|
nir_var_mem_ubo |
|
|
nir_var_mem_ssbo)) {
|
|
enum zink_descriptor_type ztype;
|
|
const struct glsl_type *type = glsl_without_array(var->type);
|
|
if (var->data.mode == nir_var_mem_ubo) {
|
|
ztype = ZINK_DESCRIPTOR_TYPE_UBO;
|
|
/* buffer 0 is a push descriptor */
|
|
var->data.descriptor_set = !!var->data.driver_location;
|
|
var->data.binding = !var->data.driver_location ? nir->info.stage :
|
|
zink_binding(nir->info.stage,
|
|
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
var->data.driver_location,
|
|
screen->compact_descriptors);
|
|
assert(var->data.driver_location || var->data.binding < 10);
|
|
VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
|
int binding = var->data.binding;
|
|
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
|
|
assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
|
|
ret->num_bindings[ztype]++;
|
|
} else if (var->data.mode == nir_var_mem_ssbo) {
|
|
ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
|
|
var->data.descriptor_set = screen->desc_set_id[ztype];
|
|
var->data.binding = zink_binding(nir->info.stage,
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
var->data.driver_location,
|
|
screen->compact_descriptors);
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_length(var->type);
|
|
assert(ret->bindings[ztype][ret->num_bindings[ztype]].size);
|
|
ret->num_bindings[ztype]++;
|
|
} else {
|
|
assert(var->data.mode == nir_var_uniform ||
|
|
var->data.mode == nir_var_image);
|
|
if (var->data.bindless) {
|
|
ret->bindless = true;
|
|
handle_bindless_var(nir, var, type, &bindless);
|
|
} else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
|
|
VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
|
|
ztype = zink_desc_type_from_vktype(vktype);
|
|
if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
|
|
ret->num_texel_buffers++;
|
|
var->data.driver_location = var->data.binding;
|
|
var->data.descriptor_set = screen->desc_set_id[ztype];
|
|
var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
|
|
if (glsl_type_is_array(var->type))
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
|
|
else
|
|
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
|
|
ret->num_bindings[ztype]++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
bool bindless_lowered = false;
|
|
NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
|
|
ret->bindless |= bindless_lowered;
|
|
|
|
if (!screen->info.feats.features.shaderInt64)
|
|
NIR_PASS_V(nir, lower_64bit_vars);
|
|
NIR_PASS_V(nir, match_tex_dests);
|
|
|
|
ret->nir = nir;
|
|
if (so_info && so_info->num_outputs)
|
|
update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
|
|
else if (have_psiz) {
|
|
bool have_fake_psiz = false;
|
|
nir_variable *psiz = NULL;
|
|
nir_foreach_shader_out_variable(var, nir) {
|
|
if (var->data.location == VARYING_SLOT_PSIZ) {
|
|
if (!var->data.explicit_location)
|
|
have_fake_psiz = true;
|
|
else
|
|
psiz = var;
|
|
}
|
|
}
|
|
if (have_fake_psiz && psiz) {
|
|
psiz->data.mode = nir_var_shader_temp;
|
|
nir_fixup_deref_modes(nir);
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
|
|
}
|
|
}
|
|
|
|
ret->can_inline = true;
|
|
|
|
return ret;
|
|
}
|
|
|
|
char *
|
|
zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
|
|
{
|
|
struct zink_screen *screen = zink_screen(pscreen);
|
|
nir_shader *nir = nirptr;
|
|
|
|
nir_lower_tex_options tex_opts = {
|
|
.lower_invalid_implicit_lod = true,
|
|
};
|
|
/*
|
|
Sampled Image must be an object whose type is OpTypeSampledImage.
|
|
The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
|
|
or Rect, and the Arrayed and MS operands must be 0.
|
|
- SPIRV, OpImageSampleProj* opcodes
|
|
*/
|
|
tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
|
|
BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
|
|
tex_opts.lower_txp_array = true;
|
|
if (!screen->info.feats.features.shaderImageGatherExtended)
|
|
tex_opts.lower_tg4_offsets = true;
|
|
NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
|
|
if (nir->info.stage == MESA_SHADER_GEOMETRY)
|
|
NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
|
|
optimize_nir(nir, NULL);
|
|
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
|
if (screen->driconf.inline_uniforms)
|
|
nir_find_inlinable_uniforms(nir);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void
|
|
zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
|
|
{
|
|
set_foreach(shader->programs, entry) {
|
|
if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
|
|
struct zink_compute_program *comp = (void*)entry->key;
|
|
if (!comp->base.removed) {
|
|
_mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
|
|
comp->base.removed = true;
|
|
}
|
|
comp->shader = NULL;
|
|
zink_compute_program_reference(ctx, &comp, NULL);
|
|
} else {
|
|
struct zink_gfx_program *prog = (void*)entry->key;
|
|
enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
|
|
assert(pstage < ZINK_SHADER_COUNT);
|
|
if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
|
|
_mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders);
|
|
prog->base.removed = true;
|
|
}
|
|
if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)
|
|
prog->shaders[pstage] = NULL;
|
|
/* only remove generated tcs during parent tes destruction */
|
|
if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
|
|
prog->shaders[PIPE_SHADER_TESS_CTRL] = NULL;
|
|
zink_gfx_program_reference(ctx, &prog, NULL);
|
|
}
|
|
}
|
|
if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated) {
|
|
/* automatically destroy generated tcs shaders when tes is destroyed */
|
|
zink_shader_free(ctx, shader->generated);
|
|
shader->generated = NULL;
|
|
}
|
|
_mesa_set_destroy(shader->programs, NULL);
|
|
ralloc_free(shader->nir);
|
|
ralloc_free(shader->spirv);
|
|
FREE(shader);
|
|
}
|
|
|
|
|
|
VkShaderModule
|
|
zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices)
|
|
{
|
|
assert(zs->nir->info.stage == MESA_SHADER_TESS_CTRL);
|
|
/* shortcut all the nir passes since we just have to change this one word */
|
|
zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
|
|
return zink_shader_spirv_compile(screen, zs, NULL);
|
|
}
|
|
|
|
/* creating a passthrough tcs shader that's roughly:
|
|
|
|
#version 150
|
|
#extension GL_ARB_tessellation_shader : require
|
|
|
|
in vec4 some_var[gl_MaxPatchVertices];
|
|
out vec4 some_var_out;
|
|
|
|
layout(push_constant) uniform tcsPushConstants {
|
|
layout(offset = 0) float TessLevelInner[2];
|
|
layout(offset = 8) float TessLevelOuter[4];
|
|
} u_tcsPushConstants;
|
|
layout(vertices = $vertices_per_patch) out;
|
|
void main()
|
|
{
|
|
gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
|
|
gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
|
|
some_var_out = some_var[gl_InvocationID];
|
|
}
|
|
|
|
*/
|
|
struct zink_shader *
|
|
zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
|
|
{
|
|
struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
|
|
ret->hash = _mesa_hash_pointer(ret);
|
|
ret->programs = _mesa_pointer_set_create(NULL);
|
|
simple_mtx_init(&ret->lock, mtx_plain);
|
|
|
|
nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
|
|
nir_function *fn = nir_function_create(nir, "main");
|
|
fn->is_entrypoint = true;
|
|
nir_function_impl *impl = nir_function_impl_create(fn);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
b.cursor = nir_before_block(nir_start_block(impl));
|
|
|
|
nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
|
|
|
|
nir_foreach_shader_out_variable(var, vs->nir) {
|
|
const struct glsl_type *type = var->type;
|
|
const struct glsl_type *in_type = var->type;
|
|
const struct glsl_type *out_type = var->type;
|
|
char buf[1024];
|
|
snprintf(buf, sizeof(buf), "%s_out", var->name);
|
|
in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
|
|
out_type = glsl_array_type(type, vertices_per_patch, 0);
|
|
|
|
nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
|
|
nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
|
|
out->data.location = in->data.location = var->data.location;
|
|
out->data.location_frac = in->data.location_frac = var->data.location_frac;
|
|
|
|
/* gl_in[] receives values from equivalent built-in output
|
|
variables written by the vertex shader (section 2.14.7). Each array
|
|
element of gl_in[] is a structure holding values for a specific vertex of
|
|
the input patch. The length of gl_in[] is equal to the
|
|
implementation-dependent maximum patch size (gl_MaxPatchVertices).
|
|
- ARB_tessellation_shader
|
|
*/
|
|
/* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
|
|
nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
|
|
nir_ssa_def *load = nir_load_deref(&b, in_array_var);
|
|
nir_deref_instr *out_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
|
|
nir_store_deref(&b, out_array_var, load, 0xff);
|
|
}
|
|
nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
|
|
gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
|
|
gl_TessLevelInner->data.patch = 1;
|
|
nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
|
|
gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
|
|
gl_TessLevelOuter->data.patch = 1;
|
|
|
|
/* hacks so we can size these right for now */
|
|
struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
|
|
/* just use a single blob for padding here because it's easier */
|
|
fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
|
|
fields[0].name = ralloc_asprintf(nir, "padding");
|
|
fields[0].offset = 0;
|
|
fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
|
|
fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
|
|
fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
|
|
fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
|
|
fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
|
|
fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
|
|
nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
|
|
glsl_struct_type(fields, 3, "struct", false), "pushconst");
|
|
pushconst->data.location = VARYING_SLOT_VAR0;
|
|
|
|
nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
|
|
nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
|
|
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
|
|
nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
|
|
}
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
|
|
nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
|
|
}
|
|
|
|
nir->info.tess.tcs_vertices_out = vertices_per_patch;
|
|
nir_validate_shader(nir, "created");
|
|
|
|
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
|
|
optimize_nir(nir, NULL);
|
|
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
|
NIR_PASS_V(nir, nir_convert_from_ssa, true);
|
|
|
|
ret->nir = nir;
|
|
ret->is_generated = true;
|
|
return ret;
|
|
}
|