2015-10-24 19:30:31 +01:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Rob Clark <robclark@freedesktop.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2018-11-09 18:49:55 +00:00
|
|
|
#include "util/debug.h"
|
2019-05-07 14:05:58 +01:00
|
|
|
#include "util/u_math.h"
|
2015-10-24 19:30:31 +01:00
|
|
|
|
|
|
|
#include "ir3_nir.h"
|
|
|
|
#include "ir3_compiler.h"
|
|
|
|
#include "ir3_shader.h"
|
|
|
|
|
2016-03-28 15:28:29 +01:00
|
|
|
static const nir_shader_compiler_options options = {
|
|
|
|
.lower_fpow = true,
|
|
|
|
.lower_scmp = true,
|
2019-06-06 15:29:35 +01:00
|
|
|
.lower_flrp16 = true,
|
2016-03-28 15:28:29 +01:00
|
|
|
.lower_flrp32 = true,
|
|
|
|
.lower_flrp64 = true,
|
|
|
|
.lower_ffract = true,
|
2019-06-03 21:18:55 +01:00
|
|
|
.lower_fmod = true,
|
2016-05-14 18:40:48 +01:00
|
|
|
.lower_fdiv = true,
|
2019-02-06 21:32:21 +00:00
|
|
|
.lower_isign = true,
|
2018-02-27 08:19:21 +00:00
|
|
|
.lower_ldexp = true,
|
2019-03-08 23:42:22 +00:00
|
|
|
.lower_uadd_carry = true,
|
2019-11-27 18:43:54 +00:00
|
|
|
.lower_usub_borrow = true,
|
2019-03-08 23:42:22 +00:00
|
|
|
.lower_mul_high = true,
|
2019-12-15 19:18:13 +00:00
|
|
|
.lower_mul_2x32_64 = true,
|
2020-09-24 16:46:31 +01:00
|
|
|
.fuse_ffma16 = true,
|
|
|
|
.fuse_ffma32 = true,
|
|
|
|
.fuse_ffma64 = true,
|
2016-03-28 15:28:29 +01:00
|
|
|
.vertex_id_zero_based = true,
|
|
|
|
.lower_extract_byte = true,
|
|
|
|
.lower_extract_word = true,
|
2019-03-23 15:38:37 +00:00
|
|
|
.lower_all_io_to_elements = true,
|
2018-06-01 19:07:15 +01:00
|
|
|
.lower_helper_invocation = true,
|
2019-03-19 18:45:40 +00:00
|
|
|
.lower_bitfield_insert_to_shifts = true,
|
|
|
|
.lower_bitfield_extract_to_shifts = true,
|
2019-12-15 18:43:39 +00:00
|
|
|
.lower_pack_half_2x16 = true,
|
|
|
|
.lower_pack_snorm_4x8 = true,
|
|
|
|
.lower_pack_snorm_2x16 = true,
|
|
|
|
.lower_pack_unorm_4x8 = true,
|
|
|
|
.lower_pack_unorm_2x16 = true,
|
|
|
|
.lower_unpack_half_2x16 = true,
|
|
|
|
.lower_unpack_snorm_4x8 = true,
|
|
|
|
.lower_unpack_snorm_2x16 = true,
|
|
|
|
.lower_unpack_unorm_4x8 = true,
|
|
|
|
.lower_unpack_unorm_2x16 = true,
|
2020-04-24 19:27:33 +01:00
|
|
|
.lower_pack_split = true,
|
2019-03-23 15:38:37 +00:00
|
|
|
.use_interpolated_input_intrinsics = true,
|
2019-06-04 01:11:57 +01:00
|
|
|
.lower_rotate = true,
|
2019-10-08 03:46:00 +01:00
|
|
|
.lower_to_scalar = true,
|
2019-09-27 18:15:02 +01:00
|
|
|
.has_imul24 = true,
|
2020-09-04 11:24:26 +01:00
|
|
|
.has_fsub = true,
|
|
|
|
.has_isub = true,
|
2020-06-21 21:26:57 +01:00
|
|
|
.lower_wpos_pntc = true,
|
2020-07-20 19:14:18 +01:00
|
|
|
.lower_cs_local_index_from_id = true,
|
2020-07-28 09:26:54 +01:00
|
|
|
|
|
|
|
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
|
|
|
|
* but that should be harmless for GL since 64b is not
|
|
|
|
* supported there.
|
|
|
|
*/
|
|
|
|
.lower_int64_options = (nir_lower_int64_options)~0,
|
2020-09-13 20:27:35 +01:00
|
|
|
.lower_uniforms_to_ubo = true,
|
2016-03-28 15:28:29 +01:00
|
|
|
};
|
|
|
|
|
2019-02-26 13:28:09 +00:00
|
|
|
/* we don't want to lower vertex_id to _zero_based on newer gpus: */
|
|
|
|
static const nir_shader_compiler_options options_a6xx = {
|
|
|
|
.lower_fpow = true,
|
|
|
|
.lower_scmp = true,
|
2019-06-06 15:29:35 +01:00
|
|
|
.lower_flrp16 = true,
|
2019-02-26 13:28:09 +00:00
|
|
|
.lower_flrp32 = true,
|
|
|
|
.lower_flrp64 = true,
|
|
|
|
.lower_ffract = true,
|
2019-06-03 21:18:55 +01:00
|
|
|
.lower_fmod = true,
|
2019-02-26 13:28:09 +00:00
|
|
|
.lower_fdiv = true,
|
|
|
|
.lower_isign = true,
|
|
|
|
.lower_ldexp = true,
|
2019-03-08 23:42:22 +00:00
|
|
|
.lower_uadd_carry = true,
|
2019-11-27 18:43:54 +00:00
|
|
|
.lower_usub_borrow = true,
|
2019-03-08 23:42:22 +00:00
|
|
|
.lower_mul_high = true,
|
2019-12-15 19:18:13 +00:00
|
|
|
.lower_mul_2x32_64 = true,
|
2020-09-24 16:46:31 +01:00
|
|
|
.fuse_ffma16 = true,
|
|
|
|
.fuse_ffma32 = true,
|
|
|
|
.fuse_ffma64 = true,
|
2019-02-26 13:28:09 +00:00
|
|
|
.vertex_id_zero_based = false,
|
|
|
|
.lower_extract_byte = true,
|
|
|
|
.lower_extract_word = true,
|
2019-03-23 15:38:37 +00:00
|
|
|
.lower_all_io_to_elements = true,
|
2019-02-26 13:28:09 +00:00
|
|
|
.lower_helper_invocation = true,
|
2019-03-19 18:45:40 +00:00
|
|
|
.lower_bitfield_insert_to_shifts = true,
|
|
|
|
.lower_bitfield_extract_to_shifts = true,
|
2019-12-15 18:43:39 +00:00
|
|
|
.lower_pack_half_2x16 = true,
|
|
|
|
.lower_pack_snorm_4x8 = true,
|
|
|
|
.lower_pack_snorm_2x16 = true,
|
|
|
|
.lower_pack_unorm_4x8 = true,
|
|
|
|
.lower_pack_unorm_2x16 = true,
|
|
|
|
.lower_unpack_half_2x16 = true,
|
|
|
|
.lower_unpack_snorm_4x8 = true,
|
|
|
|
.lower_unpack_snorm_2x16 = true,
|
|
|
|
.lower_unpack_unorm_4x8 = true,
|
|
|
|
.lower_unpack_unorm_2x16 = true,
|
2020-04-24 19:27:33 +01:00
|
|
|
.lower_pack_split = true,
|
2019-03-23 15:38:37 +00:00
|
|
|
.use_interpolated_input_intrinsics = true,
|
2019-06-04 01:11:57 +01:00
|
|
|
.lower_rotate = true,
|
2019-07-12 20:36:45 +01:00
|
|
|
.vectorize_io = true,
|
2019-10-08 03:46:00 +01:00
|
|
|
.lower_to_scalar = true,
|
2019-09-27 18:15:02 +01:00
|
|
|
.has_imul24 = true,
|
2020-09-04 11:24:26 +01:00
|
|
|
.has_fsub = true,
|
|
|
|
.has_isub = true,
|
2019-11-19 07:20:10 +00:00
|
|
|
.max_unroll_iterations = 32,
|
2020-06-21 21:26:57 +01:00
|
|
|
.lower_wpos_pntc = true,
|
2020-07-20 19:14:18 +01:00
|
|
|
.lower_cs_local_index_from_id = true,
|
2020-07-28 09:26:54 +01:00
|
|
|
|
|
|
|
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
|
|
|
|
* but that should be harmless for GL since 64b is not
|
|
|
|
* supported there.
|
|
|
|
*/
|
|
|
|
.lower_int64_options = (nir_lower_int64_options)~0,
|
2020-09-13 20:27:35 +01:00
|
|
|
.lower_uniforms_to_ubo = true,
|
2021-03-11 10:35:31 +00:00
|
|
|
.lower_device_index_to_zero = true,
|
2019-02-26 13:28:09 +00:00
|
|
|
};
|
|
|
|
|
2016-03-28 15:28:29 +01:00
|
|
|
const nir_shader_compiler_options *
|
2017-05-23 14:09:41 +01:00
|
|
|
ir3_get_compiler_options(struct ir3_compiler *compiler)
|
2016-03-28 15:28:29 +01:00
|
|
|
{
|
2019-02-26 13:28:09 +00:00
|
|
|
if (compiler->gpu_id >= 600)
|
|
|
|
return &options_a6xx;
|
2016-03-28 15:28:29 +01:00
|
|
|
return &options;
|
|
|
|
}
|
|
|
|
|
2020-08-20 21:29:58 +01:00
|
|
|
static bool
|
|
|
|
ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
|
|
|
|
unsigned bit_size,
|
|
|
|
unsigned num_components,
|
|
|
|
nir_intrinsic_instr *low,
|
2020-03-13 15:43:16 +00:00
|
|
|
nir_intrinsic_instr *high,
|
|
|
|
void *data)
|
2020-08-20 21:29:58 +01:00
|
|
|
{
|
|
|
|
assert(bit_size >= 8);
|
|
|
|
if (bit_size != 32)
|
|
|
|
return false;
|
|
|
|
unsigned byte_size = bit_size / 8;
|
|
|
|
|
|
|
|
int size = num_components * byte_size;
|
|
|
|
|
|
|
|
/* Don't care about alignment past vec4. */
|
|
|
|
assert(util_is_power_of_two_nonzero(align_mul));
|
|
|
|
align_mul = MIN2(align_mul, 16);
|
|
|
|
align_offset &= 15;
|
|
|
|
|
|
|
|
/* Our offset alignment should aways be at least 4 bytes */
|
|
|
|
if (align_mul < 4)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
unsigned worst_start_offset = 16 - align_mul + align_offset;
|
|
|
|
if (worst_start_offset + size > 16)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-10-24 19:54:56 +01:00
|
|
|
#define OPT(nir, pass, ...) ({ \
|
|
|
|
bool this_progress = false; \
|
|
|
|
NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
|
|
|
|
this_progress; \
|
|
|
|
})
|
|
|
|
|
|
|
|
#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
|
|
|
|
|
2020-07-06 17:26:14 +01:00
|
|
|
void
|
2016-05-09 17:41:00 +01:00
|
|
|
ir3_optimize_loop(nir_shader *s)
|
|
|
|
{
|
|
|
|
bool progress;
|
2018-08-19 00:42:04 +01:00
|
|
|
unsigned lower_flrp =
|
|
|
|
(s->options->lower_flrp16 ? 16 : 0) |
|
|
|
|
(s->options->lower_flrp32 ? 32 : 0) |
|
|
|
|
(s->options->lower_flrp64 ? 64 : 0);
|
|
|
|
|
2016-05-09 17:41:00 +01:00
|
|
|
do {
|
|
|
|
progress = false;
|
|
|
|
|
|
|
|
OPT_V(s, nir_lower_vars_to_ssa);
|
2018-03-28 05:00:01 +01:00
|
|
|
progress |= OPT(s, nir_opt_copy_prop_vars);
|
2018-08-30 01:26:03 +01:00
|
|
|
progress |= OPT(s, nir_opt_dead_write_vars);
|
2019-08-30 05:14:54 +01:00
|
|
|
progress |= OPT(s, nir_lower_alu_to_scalar, NULL, NULL);
|
2016-09-13 23:14:28 +01:00
|
|
|
progress |= OPT(s, nir_lower_phis_to_scalar);
|
2016-05-09 17:41:00 +01:00
|
|
|
|
|
|
|
progress |= OPT(s, nir_copy_prop);
|
|
|
|
progress |= OPT(s, nir_opt_dce);
|
|
|
|
progress |= OPT(s, nir_opt_cse);
|
2020-11-24 22:34:51 +00:00
|
|
|
static int gcm = -1;
|
|
|
|
if (gcm == -1)
|
|
|
|
gcm = env_var_as_unsigned("GCM", 0);
|
2018-01-29 19:53:13 +00:00
|
|
|
if (gcm == 1)
|
|
|
|
progress |= OPT(s, nir_opt_gcm, true);
|
|
|
|
else if (gcm == 2)
|
|
|
|
progress |= OPT(s, nir_opt_gcm, false);
|
2018-06-19 00:11:55 +01:00
|
|
|
progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
|
2018-01-26 15:43:48 +00:00
|
|
|
progress |= OPT(s, nir_opt_intrinsics);
|
2016-05-09 17:41:00 +01:00
|
|
|
progress |= OPT(s, nir_opt_algebraic);
|
2019-12-15 19:18:13 +00:00
|
|
|
progress |= OPT(s, nir_lower_alu);
|
2020-04-24 19:28:58 +01:00
|
|
|
progress |= OPT(s, nir_lower_pack);
|
2016-05-09 17:41:00 +01:00
|
|
|
progress |= OPT(s, nir_opt_constant_folding);
|
2018-08-19 00:42:04 +01:00
|
|
|
|
2020-03-13 15:33:15 +00:00
|
|
|
nir_load_store_vectorize_options vectorize_opts = {
|
|
|
|
.modes = nir_var_mem_ubo,
|
|
|
|
.callback = ir3_nir_should_vectorize_mem,
|
|
|
|
.robust_modes = 0,
|
|
|
|
};
|
|
|
|
progress |= OPT(s, nir_opt_load_store_vectorize, &vectorize_opts);
|
2020-08-20 21:29:58 +01:00
|
|
|
|
2018-08-19 00:42:04 +01:00
|
|
|
if (lower_flrp != 0) {
|
|
|
|
if (OPT(s, nir_lower_flrp,
|
|
|
|
lower_flrp,
|
2020-07-23 03:13:16 +01:00
|
|
|
false /* always_precise */)) {
|
2018-08-19 00:42:04 +01:00
|
|
|
OPT(s, nir_opt_constant_folding);
|
|
|
|
progress = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Nothing should rematerialize any flrps, so we only
|
|
|
|
* need to do this lowering once.
|
|
|
|
*/
|
|
|
|
lower_flrp = 0;
|
|
|
|
}
|
|
|
|
|
2018-01-26 15:43:48 +00:00
|
|
|
progress |= OPT(s, nir_opt_dead_cf);
|
|
|
|
if (OPT(s, nir_opt_trivial_continues)) {
|
|
|
|
progress |= true;
|
|
|
|
/* If nir_opt_trivial_continues makes progress, then we need to clean
|
|
|
|
* things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
|
|
|
|
* to make progress.
|
|
|
|
*/
|
|
|
|
OPT(s, nir_copy_prop);
|
|
|
|
OPT(s, nir_opt_dce);
|
|
|
|
}
|
2019-04-08 11:13:49 +01:00
|
|
|
progress |= OPT(s, nir_opt_if, false);
|
2020-07-04 22:46:24 +01:00
|
|
|
progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
|
2018-01-26 15:43:48 +00:00
|
|
|
progress |= OPT(s, nir_opt_remove_phis);
|
|
|
|
progress |= OPT(s, nir_opt_undef);
|
2016-05-09 17:41:00 +01:00
|
|
|
} while (progress);
|
|
|
|
}
|
|
|
|
|
2020-05-06 22:58:28 +01:00
|
|
|
static bool
|
|
|
|
should_split_wrmask(const nir_instr *instr, const void *data)
|
|
|
|
{
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
switch (intr->intrinsic) {
|
|
|
|
case nir_intrinsic_store_ssbo:
|
|
|
|
case nir_intrinsic_store_shared:
|
|
|
|
case nir_intrinsic_store_global:
|
2020-10-29 14:05:24 +00:00
|
|
|
case nir_intrinsic_store_scratch:
|
2020-05-06 22:58:28 +01:00
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-05 19:43:13 +01:00
|
|
|
void
|
2020-06-15 22:24:00 +01:00
|
|
|
ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s)
|
2015-10-24 19:30:31 +01:00
|
|
|
{
|
|
|
|
struct nir_lower_tex_options tex_options = {
|
|
|
|
.lower_rect = 0,
|
2019-03-19 18:55:21 +00:00
|
|
|
.lower_tg4_offsets = true,
|
2015-10-24 19:30:31 +01:00
|
|
|
};
|
|
|
|
|
2020-06-15 22:24:00 +01:00
|
|
|
if (compiler->gpu_id >= 400) {
|
2015-10-24 19:30:31 +01:00
|
|
|
/* a4xx seems to have *no* sam.p */
|
|
|
|
tex_options.lower_txp = ~0; /* lower all txp */
|
|
|
|
} else {
|
|
|
|
/* a3xx just needs to avoid sam.p for 3d tex */
|
|
|
|
tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D);
|
|
|
|
}
|
|
|
|
|
2018-11-09 16:08:16 +00:00
|
|
|
if (ir3_shader_debug & IR3_DBG_DISASM) {
|
2015-10-24 19:30:31 +01:00
|
|
|
debug_printf("----------------------\n");
|
|
|
|
nir_print_shader(s, stdout);
|
|
|
|
debug_printf("----------------------\n");
|
|
|
|
}
|
|
|
|
|
2020-06-15 22:24:00 +01:00
|
|
|
if (s->info.stage == MESA_SHADER_GEOMETRY)
|
|
|
|
NIR_PASS_V(s, ir3_nir_lower_gs);
|
2015-10-24 19:54:56 +01:00
|
|
|
|
2020-06-15 22:24:00 +01:00
|
|
|
NIR_PASS_V(s, nir_lower_io_arrays_to_elements_no_indirects, false);
|
2020-06-10 10:11:27 +01:00
|
|
|
|
2020-06-15 22:24:00 +01:00
|
|
|
NIR_PASS_V(s, nir_lower_amul, ir3_glsl_type_size);
|
|
|
|
|
|
|
|
OPT_V(s, nir_lower_regs_to_ssa);
|
|
|
|
OPT_V(s, nir_lower_wrmasks, should_split_wrmask, s);
|
2015-10-24 19:54:56 +01:00
|
|
|
|
|
|
|
OPT_V(s, nir_lower_tex, &tex_options);
|
|
|
|
OPT_V(s, nir_lower_load_const_to_scalar);
|
2020-06-15 22:24:00 +01:00
|
|
|
if (compiler->gpu_id < 500)
|
2017-11-19 17:28:53 +00:00
|
|
|
OPT_V(s, ir3_nir_lower_tg4_to_tex);
|
2015-10-24 19:30:31 +01:00
|
|
|
|
2016-05-09 17:41:00 +01:00
|
|
|
ir3_optimize_loop(s);
|
2015-10-24 19:30:31 +01:00
|
|
|
|
2020-06-15 20:14:04 +01:00
|
|
|
/* do idiv lowering after first opt loop to get a chance to propagate
|
|
|
|
* constants for divide by immed power-of-two:
|
2016-05-09 17:41:00 +01:00
|
|
|
*/
|
2021-04-07 19:17:46 +01:00
|
|
|
nir_lower_idiv_options idiv_options = {
|
|
|
|
.imprecise_32bit_lowering = true,
|
|
|
|
.allow_fp16 = true,
|
|
|
|
};
|
|
|
|
const bool idiv_progress = OPT(s, nir_lower_idiv, &idiv_options);
|
2020-05-02 00:00:17 +01:00
|
|
|
|
2020-06-15 20:14:04 +01:00
|
|
|
if (idiv_progress)
|
2016-05-09 17:41:00 +01:00
|
|
|
ir3_optimize_loop(s);
|
2015-10-24 19:30:31 +01:00
|
|
|
|
2020-06-10 10:11:27 +01:00
|
|
|
OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
|
|
|
|
|
|
|
if (ir3_shader_debug & IR3_DBG_DISASM) {
|
|
|
|
debug_printf("----------------------\n");
|
|
|
|
nir_print_shader(s, stdout);
|
|
|
|
debug_printf("----------------------\n");
|
|
|
|
}
|
|
|
|
|
2021-02-18 22:19:38 +00:00
|
|
|
nir_foreach_uniform_variable_safe(var, s) {
|
|
|
|
exec_node_remove(&var->node);
|
|
|
|
}
|
|
|
|
nir_validate_shader(s, "after uniform var removal");
|
|
|
|
|
2020-06-10 10:11:27 +01:00
|
|
|
nir_sweep(s);
|
|
|
|
}
|
|
|
|
|
2020-06-15 22:24:00 +01:00
|
|
|
/**
|
|
|
|
* Late passes that need to be done after pscreen->finalize_nir()
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ir3_nir_post_finalize(struct ir3_compiler *compiler, nir_shader *s)
|
|
|
|
{
|
2020-06-10 23:42:15 +01:00
|
|
|
NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
|
|
|
|
ir3_glsl_type_size, (nir_lower_io_options)0);
|
2020-06-15 22:24:00 +01:00
|
|
|
|
|
|
|
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
|
|
|
/* NOTE: lower load_barycentric_at_sample first, since it
|
|
|
|
* produces load_barycentric_at_offset:
|
|
|
|
*/
|
|
|
|
NIR_PASS_V(s, ir3_nir_lower_load_barycentric_at_sample);
|
|
|
|
NIR_PASS_V(s, ir3_nir_lower_load_barycentric_at_offset);
|
|
|
|
NIR_PASS_V(s, ir3_nir_move_varying_inputs);
|
|
|
|
NIR_PASS_V(s, nir_lower_fb_read);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (compiler->gpu_id >= 600 &&
|
|
|
|
s->info.stage == MESA_SHADER_FRAGMENT &&
|
|
|
|
!(ir3_shader_debug & IR3_DBG_NOFP16)) {
|
2021-02-08 02:10:08 +00:00
|
|
|
NIR_PASS_V(s, nir_lower_mediump_io, nir_var_shader_out, 0, false);
|
2020-06-15 22:24:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* we cannot ensure that ir3_finalize_nir() is only called once, so
|
|
|
|
* we also need to do trig workarounds here:
|
|
|
|
*/
|
|
|
|
OPT_V(s, ir3_nir_apply_trig_workarounds);
|
|
|
|
|
|
|
|
ir3_optimize_loop(s);
|
|
|
|
}
|
|
|
|
|
2020-07-03 11:01:17 +01:00
|
|
|
static bool
|
2020-07-09 21:15:18 +01:00
|
|
|
ir3_nir_lower_view_layer_id(nir_shader *nir, bool layer_zero, bool view_zero)
|
2020-07-03 11:01:17 +01:00
|
|
|
{
|
2020-07-09 21:15:18 +01:00
|
|
|
unsigned layer_id_loc = ~0, view_id_loc = ~0;
|
2020-07-19 00:24:25 +01:00
|
|
|
nir_foreach_shader_in_variable(var, nir) {
|
2020-07-09 21:15:18 +01:00
|
|
|
if (var->data.location == VARYING_SLOT_LAYER)
|
2020-07-03 11:01:17 +01:00
|
|
|
layer_id_loc = var->data.driver_location;
|
2020-07-09 21:15:18 +01:00
|
|
|
if (var->data.location == VARYING_SLOT_VIEWPORT)
|
|
|
|
view_id_loc = var->data.driver_location;
|
2020-07-03 11:01:17 +01:00
|
|
|
}
|
|
|
|
|
2020-07-09 21:15:18 +01:00
|
|
|
assert(!layer_zero || layer_id_loc != ~0);
|
|
|
|
assert(!view_zero || view_id_loc != ~0);
|
2020-07-03 11:01:17 +01:00
|
|
|
|
|
|
|
bool progress = false;
|
|
|
|
nir_builder b;
|
|
|
|
|
|
|
|
nir_foreach_function(func, nir) {
|
|
|
|
nir_builder_init(&b, func->impl);
|
|
|
|
|
|
|
|
nir_foreach_block(block, func->impl) {
|
|
|
|
nir_foreach_instr_safe(instr, block) {
|
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intrin =
|
|
|
|
nir_instr_as_intrinsic(instr);
|
|
|
|
|
|
|
|
if (intrin->intrinsic != nir_intrinsic_load_input)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
unsigned base = nir_intrinsic_base(intrin);
|
2020-07-09 21:15:18 +01:00
|
|
|
if (base != layer_id_loc && base != view_id_loc)
|
2020-07-03 11:01:17 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
b.cursor = nir_before_instr(&intrin->instr);
|
|
|
|
nir_ssa_def *zero = nir_imm_int(&b, 0);
|
|
|
|
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
|
2021-03-03 06:13:38 +00:00
|
|
|
zero);
|
2020-07-03 11:01:17 +01:00
|
|
|
nir_instr_remove(&intrin->instr);
|
|
|
|
progress = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (progress) {
|
|
|
|
nir_metadata_preserve(func->impl,
|
|
|
|
nir_metadata_block_index |
|
|
|
|
nir_metadata_dominance);
|
|
|
|
} else {
|
|
|
|
nir_metadata_preserve(func->impl, nir_metadata_all);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return progress;
|
|
|
|
}
|
|
|
|
|
2020-06-10 10:11:27 +01:00
|
|
|
void
|
|
|
|
ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
|
|
|
|
{
|
|
|
|
if (ir3_shader_debug & IR3_DBG_DISASM) {
|
|
|
|
debug_printf("----------------------\n");
|
|
|
|
nir_print_shader(s, stdout);
|
|
|
|
debug_printf("----------------------\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
bool progress = false;
|
|
|
|
|
|
|
|
if (so->key.has_gs || so->key.tessellation) {
|
|
|
|
switch (so->shader->type) {
|
|
|
|
case MESA_SHADER_VERTEX:
|
2020-06-15 22:12:58 +01:00
|
|
|
NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation);
|
2020-06-10 10:11:27 +01:00
|
|
|
progress = true;
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_TESS_CTRL:
|
2020-06-15 22:12:58 +01:00
|
|
|
NIR_PASS_V(s, ir3_nir_lower_tess_ctrl, so, so->key.tessellation);
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
|
2020-06-10 10:11:27 +01:00
|
|
|
progress = true;
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_TESS_EVAL:
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
NIR_PASS_V(s, ir3_nir_lower_tess_eval, so, so->key.tessellation);
|
2020-06-10 10:11:27 +01:00
|
|
|
if (so->key.has_gs)
|
2020-06-15 22:12:58 +01:00
|
|
|
NIR_PASS_V(s, ir3_nir_lower_to_explicit_output, so, so->key.tessellation);
|
2020-06-10 10:11:27 +01:00
|
|
|
progress = true;
|
|
|
|
break;
|
|
|
|
case MESA_SHADER_GEOMETRY:
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
NIR_PASS_V(s, ir3_nir_lower_to_explicit_input, so);
|
2020-06-10 10:11:27 +01:00
|
|
|
progress = true;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s->info.stage == MESA_SHADER_VERTEX) {
|
|
|
|
if (so->key.ucp_enables)
|
|
|
|
progress |= OPT(s, nir_lower_clip_vs, so->key.ucp_enables, false, false, NULL);
|
|
|
|
} else if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
2020-07-09 21:15:18 +01:00
|
|
|
bool layer_zero = so->key.layer_zero && (s->info.inputs_read & VARYING_BIT_LAYER);
|
|
|
|
bool view_zero = so->key.view_zero && (s->info.inputs_read & VARYING_BIT_VIEWPORT);
|
|
|
|
|
2020-09-30 10:02:35 +01:00
|
|
|
if (so->key.ucp_enables && !so->shader->compiler->has_clip_cull)
|
2020-06-10 10:11:27 +01:00
|
|
|
progress |= OPT(s, nir_lower_clip_fs, so->key.ucp_enables, false);
|
2020-07-09 21:15:18 +01:00
|
|
|
if (layer_zero || view_zero)
|
|
|
|
progress |= OPT(s, ir3_nir_lower_view_layer_id, layer_zero, view_zero);
|
2020-06-10 10:11:27 +01:00
|
|
|
}
|
|
|
|
|
2020-07-07 19:56:35 +01:00
|
|
|
/* Move large constant variables to the constants attached to the NIR
|
|
|
|
* shader, which we will upload in the immediates range. This generates
|
|
|
|
* amuls, so we need to clean those up after.
|
|
|
|
*
|
|
|
|
* Passing no size_align, we would get packed values, which if we end up
|
|
|
|
* having to load with LDC would result in extra reads to unpack from
|
|
|
|
* straddling loads. Align everything to vec4 to avoid that, though we
|
|
|
|
* could theoretically do better.
|
|
|
|
*/
|
|
|
|
OPT_V(s, nir_opt_large_constants, glsl_get_vec4_size_align_bytes, 32 /* bytes */);
|
|
|
|
OPT_V(s, ir3_nir_lower_load_constant, so);
|
|
|
|
|
2020-06-17 18:07:09 +01:00
|
|
|
if (!so->binning_pass)
|
|
|
|
OPT_V(s, ir3_nir_analyze_ubo_ranges, so);
|
|
|
|
|
|
|
|
progress |= OPT(s, ir3_nir_lower_ubo_loads, so);
|
2020-06-15 20:14:04 +01:00
|
|
|
|
2020-10-30 15:38:40 +00:00
|
|
|
/* Lower large temporaries to scratch, which in Qualcomm terms is private
|
|
|
|
* memory, to avoid excess register pressure. This should happen after
|
|
|
|
* nir_opt_large_constants, because loading from a UBO is much, much less
|
|
|
|
* expensive.
|
|
|
|
*/
|
|
|
|
if (so->shader->compiler->has_pvtmem) {
|
|
|
|
NIR_PASS_V(s, nir_lower_vars_to_scratch, nir_var_function_temp,
|
|
|
|
16 * 16 /* bytes */, glsl_get_natural_size_align_bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-07-07 19:56:35 +01:00
|
|
|
OPT_V(s, nir_lower_amul, ir3_glsl_type_size);
|
|
|
|
|
2020-06-15 20:14:04 +01:00
|
|
|
/* UBO offset lowering has to come after we've decided what will
|
|
|
|
* be left as load_ubo
|
|
|
|
*/
|
2020-08-18 23:45:02 +01:00
|
|
|
if (so->shader->compiler->gpu_id >= 600)
|
2020-09-14 21:13:47 +01:00
|
|
|
progress |= OPT(s, nir_lower_ubo_vec4);
|
2020-08-18 23:45:02 +01:00
|
|
|
|
2020-06-15 20:14:04 +01:00
|
|
|
OPT_V(s, ir3_nir_lower_io_offsets, so->shader->compiler->gpu_id);
|
|
|
|
|
2020-06-10 10:11:27 +01:00
|
|
|
if (progress)
|
|
|
|
ir3_optimize_loop(s);
|
|
|
|
|
2020-11-13 19:48:57 +00:00
|
|
|
/* Fixup indirect load_uniform's which end up with a const base offset
|
|
|
|
* which is too large to encode. Do this late(ish) so we actually
|
|
|
|
* can differentiate indirect vs non-indirect.
|
|
|
|
*/
|
|
|
|
if (OPT(s, ir3_nir_fixup_load_uniform))
|
|
|
|
ir3_optimize_loop(s);
|
|
|
|
|
2019-09-27 11:49:06 +01:00
|
|
|
/* Do late algebraic optimization to turn add(a, neg(b)) back into
|
|
|
|
* subs, then the mandatory cleanup after algebraic. Note that it may
|
|
|
|
* produce fnegs, and if so then we need to keep running to squash
|
|
|
|
* fneg(fneg(a)).
|
|
|
|
*/
|
|
|
|
bool more_late_algebraic = true;
|
|
|
|
while (more_late_algebraic) {
|
|
|
|
more_late_algebraic = OPT(s, nir_opt_algebraic_late);
|
|
|
|
OPT_V(s, nir_opt_constant_folding);
|
|
|
|
OPT_V(s, nir_copy_prop);
|
|
|
|
OPT_V(s, nir_opt_dce);
|
|
|
|
OPT_V(s, nir_opt_cse);
|
|
|
|
}
|
|
|
|
|
2019-05-22 20:23:03 +01:00
|
|
|
OPT_V(s, nir_opt_sink, nir_move_const_undef);
|
2018-06-05 18:42:21 +01:00
|
|
|
|
2018-11-09 16:08:16 +00:00
|
|
|
if (ir3_shader_debug & IR3_DBG_DISASM) {
|
2015-10-24 19:30:31 +01:00
|
|
|
debug_printf("----------------------\n");
|
|
|
|
nir_print_shader(s, stdout);
|
|
|
|
debug_printf("----------------------\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
nir_sweep(s);
|
2020-06-15 20:14:04 +01:00
|
|
|
|
|
|
|
/* Binning pass variants re-use the const_state of the corresponding
|
|
|
|
* draw pass shader, so that same const emit can be re-used for both
|
|
|
|
* passes:
|
|
|
|
*/
|
|
|
|
if (!so->binning_pass)
|
|
|
|
ir3_setup_const_state(s, so, ir3_const_state(so));
|
2015-10-24 19:30:31 +01:00
|
|
|
}
|
2017-10-30 17:23:37 +00:00
|
|
|
|
2019-05-07 14:05:58 +01:00
|
|
|
static void
|
2017-10-30 17:23:37 +00:00
|
|
|
ir3_nir_scan_driver_consts(nir_shader *shader,
|
2019-05-06 22:52:27 +01:00
|
|
|
struct ir3_const_state *layout)
|
2017-10-30 17:23:37 +00:00
|
|
|
{
|
2020-03-21 18:06:59 +00:00
|
|
|
nir_foreach_function (function, shader) {
|
2017-10-30 17:23:37 +00:00
|
|
|
if (!function->impl)
|
|
|
|
continue;
|
|
|
|
|
2020-03-21 18:06:59 +00:00
|
|
|
nir_foreach_block (block, function->impl) {
|
|
|
|
nir_foreach_instr (instr, block) {
|
2017-10-30 17:23:37 +00:00
|
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nir_intrinsic_instr *intr =
|
|
|
|
nir_instr_as_intrinsic(instr);
|
|
|
|
unsigned idx;
|
|
|
|
|
|
|
|
switch (intr->intrinsic) {
|
2020-09-22 09:24:45 +01:00
|
|
|
case nir_intrinsic_get_ssbo_size:
|
2020-07-21 15:20:14 +01:00
|
|
|
if (ir3_bindless_resource(intr->src[0]))
|
|
|
|
break;
|
2019-03-29 19:57:52 +00:00
|
|
|
idx = nir_src_as_uint(intr->src[0]);
|
2017-10-30 17:23:37 +00:00
|
|
|
if (layout->ssbo_size.mask & (1 << idx))
|
|
|
|
break;
|
|
|
|
layout->ssbo_size.mask |= (1 << idx);
|
|
|
|
layout->ssbo_size.off[idx] =
|
|
|
|
layout->ssbo_size.count;
|
|
|
|
layout->ssbo_size.count += 1; /* one const per */
|
|
|
|
break;
|
2020-02-05 22:54:42 +00:00
|
|
|
case nir_intrinsic_image_atomic_add:
|
|
|
|
case nir_intrinsic_image_atomic_imin:
|
|
|
|
case nir_intrinsic_image_atomic_umin:
|
|
|
|
case nir_intrinsic_image_atomic_imax:
|
|
|
|
case nir_intrinsic_image_atomic_umax:
|
|
|
|
case nir_intrinsic_image_atomic_and:
|
|
|
|
case nir_intrinsic_image_atomic_or:
|
|
|
|
case nir_intrinsic_image_atomic_xor:
|
|
|
|
case nir_intrinsic_image_atomic_exchange:
|
|
|
|
case nir_intrinsic_image_atomic_comp_swap:
|
|
|
|
case nir_intrinsic_image_store:
|
|
|
|
case nir_intrinsic_image_size:
|
|
|
|
idx = nir_src_as_uint(intr->src[0]);
|
2017-11-09 15:57:55 +00:00
|
|
|
if (layout->image_dims.mask & (1 << idx))
|
|
|
|
break;
|
|
|
|
layout->image_dims.mask |= (1 << idx);
|
2018-06-07 18:54:15 +01:00
|
|
|
layout->image_dims.off[idx] =
|
2017-11-09 15:57:55 +00:00
|
|
|
layout->image_dims.count;
|
|
|
|
layout->image_dims.count += 3; /* three const per */
|
|
|
|
break;
|
2019-08-01 22:22:46 +01:00
|
|
|
case nir_intrinsic_load_base_vertex:
|
|
|
|
case nir_intrinsic_load_first_vertex:
|
|
|
|
layout->num_driver_params =
|
|
|
|
MAX2(layout->num_driver_params, IR3_DP_VTXID_BASE + 1);
|
|
|
|
break;
|
2019-11-17 17:17:47 +00:00
|
|
|
case nir_intrinsic_load_base_instance:
|
|
|
|
layout->num_driver_params =
|
|
|
|
MAX2(layout->num_driver_params, IR3_DP_INSTID_BASE + 1);
|
|
|
|
break;
|
2019-08-01 22:22:46 +01:00
|
|
|
case nir_intrinsic_load_user_clip_plane:
|
2020-07-05 06:58:01 +01:00
|
|
|
idx = nir_intrinsic_ucp_id(intr);
|
2019-08-01 22:22:46 +01:00
|
|
|
layout->num_driver_params =
|
2020-07-05 06:58:01 +01:00
|
|
|
MAX2(layout->num_driver_params, IR3_DP_UCP0_X + (idx + 1) * 4);
|
2019-08-01 22:22:46 +01:00
|
|
|
break;
|
|
|
|
case nir_intrinsic_load_num_work_groups:
|
|
|
|
layout->num_driver_params =
|
|
|
|
MAX2(layout->num_driver_params, IR3_DP_NUM_WORK_GROUPS_Z + 1);
|
|
|
|
break;
|
|
|
|
case nir_intrinsic_load_local_group_size:
|
|
|
|
layout->num_driver_params =
|
|
|
|
MAX2(layout->num_driver_params, IR3_DP_LOCAL_GROUP_SIZE_Z + 1);
|
|
|
|
break;
|
2021-03-01 15:31:56 +00:00
|
|
|
case nir_intrinsic_load_base_work_group_id:
|
|
|
|
layout->num_driver_params =
|
|
|
|
MAX2(layout->num_driver_params, IR3_DP_BASE_GROUP_Z + 1);
|
|
|
|
break;
|
2017-10-30 17:23:37 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-05-07 14:05:58 +01:00
|
|
|
|
2020-06-15 20:14:04 +01:00
|
|
|
/* Sets up the variant-dependent constant state for the ir3_shader. Note
|
2020-05-30 00:31:43 +01:00
|
|
|
* that it is also used from ir3_nir_analyze_ubo_ranges() to figure out the
|
|
|
|
* maximum number of driver params that would eventually be used, to leave
|
|
|
|
* space for this function to allocate the driver params.
|
|
|
|
*/
|
|
|
|
void
|
2020-06-15 20:14:04 +01:00
|
|
|
ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|
|
|
struct ir3_const_state *const_state)
|
2019-05-07 14:05:58 +01:00
|
|
|
{
|
2020-06-15 20:14:04 +01:00
|
|
|
struct ir3_compiler *compiler = v->shader->compiler;
|
2019-05-07 14:05:58 +01:00
|
|
|
|
|
|
|
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
|
|
|
|
|
|
|
|
ir3_nir_scan_driver_consts(nir, const_state);
|
|
|
|
|
2019-08-01 22:22:46 +01:00
|
|
|
if ((compiler->gpu_id < 500) &&
|
2020-06-15 20:14:04 +01:00
|
|
|
(v->shader->stream_output.num_outputs > 0)) {
|
2019-08-01 22:22:46 +01:00
|
|
|
const_state->num_driver_params =
|
|
|
|
MAX2(const_state->num_driver_params, IR3_DP_VTXCNT_MAX + 1);
|
|
|
|
}
|
|
|
|
|
2020-06-14 20:54:05 +01:00
|
|
|
const_state->num_ubos = nir->info.num_ubos;
|
2020-05-13 00:07:50 +01:00
|
|
|
|
2019-08-01 22:22:46 +01:00
|
|
|
/* num_driver_params is scalar, align to vec4: */
|
|
|
|
const_state->num_driver_params = align(const_state->num_driver_params, 4);
|
|
|
|
|
2020-06-14 20:44:17 +01:00
|
|
|
debug_assert((const_state->ubo_state.size % 16) == 0);
|
|
|
|
unsigned constoff = const_state->ubo_state.size / 16;
|
2019-05-07 14:05:58 +01:00
|
|
|
unsigned ptrsz = ir3_pointer_size(compiler);
|
|
|
|
|
|
|
|
if (const_state->num_ubos > 0) {
|
|
|
|
const_state->offsets.ubo = constoff;
|
2020-05-13 00:07:50 +01:00
|
|
|
constoff += align(const_state->num_ubos * ptrsz, 4) / 4;
|
2019-05-07 14:05:58 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (const_state->ssbo_size.count > 0) {
|
|
|
|
unsigned cnt = const_state->ssbo_size.count;
|
|
|
|
const_state->offsets.ssbo_sizes = constoff;
|
|
|
|
constoff += align(cnt, 4) / 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const_state->image_dims.count > 0) {
|
|
|
|
unsigned cnt = const_state->image_dims.count;
|
|
|
|
const_state->offsets.image_dims = constoff;
|
|
|
|
constoff += align(cnt, 4) / 4;
|
|
|
|
}
|
|
|
|
|
2020-06-24 20:58:44 +01:00
|
|
|
if (const_state->num_driver_params > 0) {
|
|
|
|
/* offset cannot be 0 for vs params loaded by CP_DRAW_INDIRECT_MULTI */
|
|
|
|
if (v->type == MESA_SHADER_VERTEX && compiler->gpu_id >= 600)
|
|
|
|
constoff = MAX2(constoff, 1);
|
2019-08-01 22:22:46 +01:00
|
|
|
const_state->offsets.driver_param = constoff;
|
2020-06-24 20:58:44 +01:00
|
|
|
}
|
2019-08-01 22:22:46 +01:00
|
|
|
constoff += const_state->num_driver_params / 4;
|
2019-05-07 14:05:58 +01:00
|
|
|
|
2020-06-15 20:14:04 +01:00
|
|
|
if ((v->type == MESA_SHADER_VERTEX) &&
|
2019-05-07 14:05:58 +01:00
|
|
|
(compiler->gpu_id < 500) &&
|
2020-06-15 20:14:04 +01:00
|
|
|
v->shader->stream_output.num_outputs > 0) {
|
2019-05-07 14:05:58 +01:00
|
|
|
const_state->offsets.tfbo = constoff;
|
|
|
|
constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
|
|
|
|
}
|
|
|
|
|
2020-06-15 20:14:04 +01:00
|
|
|
switch (v->type) {
|
2019-10-11 05:02:45 +01:00
|
|
|
case MESA_SHADER_VERTEX:
|
|
|
|
const_state->offsets.primitive_param = constoff;
|
|
|
|
constoff += 1;
|
|
|
|
break;
|
2019-10-23 03:44:42 +01:00
|
|
|
case MESA_SHADER_TESS_CTRL:
|
|
|
|
case MESA_SHADER_TESS_EVAL:
|
|
|
|
constoff = align(constoff - 1, 4) + 3;
|
|
|
|
const_state->offsets.primitive_param = constoff;
|
|
|
|
const_state->offsets.primitive_map = constoff + 5;
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
constoff += 5 + DIV_ROUND_UP(v->input_size, 4);
|
2019-10-23 03:44:42 +01:00
|
|
|
break;
|
2019-10-11 05:02:45 +01:00
|
|
|
case MESA_SHADER_GEOMETRY:
|
|
|
|
const_state->offsets.primitive_param = constoff;
|
|
|
|
const_state->offsets.primitive_map = constoff + 1;
|
ir3: Switch tess lowering to use location
Clip & cull distances, which are compact arrays, exposed a lot of holes
because they can take up multiple slots and partially overlap.
I wanted to eliminate our dependence on knowing the layout of the
variables, as this can get complicated with things like partially
overlapping arrays, which can happen with ARB_enhanced_layouts or with
clip/cull distance arrays. This means no longer changing the layout
based on whether the i/o is part of an array or not, and no longer
matching producer <-> consumer based on the variables. At the end of the
day we have to match things based on the user-specified location, so for
simplicity this switches the entire i/o handling to be based off the
user location rather than the driver location. This means that the
primitive map may be a little bigger, but it reduces the complexity
because we never have to build a table mapping user location to driver
location, and it reduces the amount of work done at link time in the SSO
case. It also brings us closer to what the other drivers do.
While here, I also fixed the handling of component qualifiers, which was
another thing broken with clip/cull distances.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6959>
2020-09-24 15:24:55 +01:00
|
|
|
constoff += 1 + DIV_ROUND_UP(v->input_size, 4);
|
2019-10-11 05:02:45 +01:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2019-05-07 14:05:58 +01:00
|
|
|
const_state->offsets.immediate = constoff;
|
2020-05-30 00:31:43 +01:00
|
|
|
|
2020-06-24 11:03:59 +01:00
|
|
|
assert(constoff <= ir3_max_const(v));
|
2019-05-07 14:05:58 +01:00
|
|
|
}
|