ir3: Make nir compiler options a part of ir3_compiler

This would allow for sub-gens to have different options.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13986>
This commit is contained in:
Danylo Piliaiev 2021-11-30 18:06:53 +02:00
parent b8d486f298
commit e1f89a1da2
4 changed files with 133 additions and 125 deletions

View File

@ -69,6 +69,121 @@ ir3_compiler_destroy(struct ir3_compiler *compiler)
ralloc_free(compiler);
}
static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_fdiv = true,
.lower_isign = true,
.lower_ldexp = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_high = true,
.lower_mul_2x32_64 = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_pack_half_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
.lower_unpack_half_2x16 = true,
.lower_unpack_snorm_4x8 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_pack_split = true,
.use_interpolated_input_intrinsics = true,
.lower_rotate = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,
.has_isub = true,
.lower_wpos_pntc = true,
.lower_cs_local_index_from_id = true,
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
* but that should be harmless for GL since 64b is not
* supported there.
*/
.lower_int64_options = (nir_lower_int64_options)~0,
.lower_uniforms_to_ubo = true,
.use_scoped_barrier = true,
};
/* we don't want to lower vertex_id to _zero_based on newer gpus: */
static const nir_shader_compiler_options options_a6xx = {
.lower_fpow = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_fdiv = true,
.lower_isign = true,
.lower_ldexp = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_high = true,
.lower_mul_2x32_64 = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.vertex_id_zero_based = false,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_pack_half_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
.lower_unpack_half_2x16 = true,
.lower_unpack_snorm_4x8 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_pack_split = true,
.use_interpolated_input_intrinsics = true,
.lower_rotate = true,
.vectorize_io = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,
.has_isub = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling = nir_var_all,
.lower_wpos_pntc = true,
.lower_cs_local_index_from_id = true,
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
* but that should be harmless for GL since 64b is not
* supported there.
*/
.lower_int64_options = (nir_lower_int64_options)~0,
.lower_uniforms_to_ubo = true,
.lower_device_index_to_zero = true,
.use_scoped_barrier = true,
};
struct ir3_compiler *
ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
bool robust_ubo_access)
@ -192,7 +307,19 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->bool_type = (compiler->gen >= 5) ? TYPE_U16 : TYPE_U32;
if (compiler->gen >= 6) {
compiler->nir_options = options_a6xx;
} else {
compiler->nir_options = options;
}
ir3_disk_cache_init(compiler);
return compiler;
}
const nir_shader_compiler_options *
ir3_get_compiler_options(struct ir3_compiler *compiler)
{
return &compiler->nir_options;
}

View File

@ -27,6 +27,7 @@
#ifndef IR3_COMPILER_H_
#define IR3_COMPILER_H_
#include "compiler/nir/nir.h"
#include "util/disk_cache.h"
#include "util/log.h"
@ -45,6 +46,8 @@ struct ir3_compiler {
struct disk_cache *disk_cache;
struct nir_shader_compiler_options nir_options;
/* If true, UBO accesses are assumed to be bounds-checked as defined by
* VK_EXT_robustness2 and optimizations may have to be more conservative.
*/
@ -186,6 +189,9 @@ bool ir3_disk_cache_retrieve(struct ir3_compiler *compiler,
void ir3_disk_cache_store(struct ir3_compiler *compiler,
struct ir3_shader_variant *v);
const nir_shader_compiler_options *
ir3_get_compiler_options(struct ir3_compiler *compiler);
int ir3_compile_shader_nir(struct ir3_compiler *compiler,
struct ir3_shader_variant *so);

View File

@ -31,129 +31,6 @@
#include "ir3_nir.h"
#include "ir3_shader.h"
static const nir_shader_compiler_options options = {
.lower_fpow = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_fdiv = true,
.lower_isign = true,
.lower_ldexp = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_high = true,
.lower_mul_2x32_64 = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_pack_half_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
.lower_unpack_half_2x16 = true,
.lower_unpack_snorm_4x8 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_pack_split = true,
.use_interpolated_input_intrinsics = true,
.lower_rotate = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,
.has_isub = true,
.lower_wpos_pntc = true,
.lower_cs_local_index_from_id = true,
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
* but that should be harmless for GL since 64b is not
* supported there.
*/
.lower_int64_options = (nir_lower_int64_options)~0,
.lower_uniforms_to_ubo = true,
.use_scoped_barrier = true,
};
/* we don't want to lower vertex_id to _zero_based on newer gpus: */
static const nir_shader_compiler_options options_a6xx = {
.lower_fpow = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_fdiv = true,
.lower_isign = true,
.lower_ldexp = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_mul_high = true,
.lower_mul_2x32_64 = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.vertex_id_zero_based = false,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_pack_half_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_unorm_2x16 = true,
.lower_unpack_half_2x16 = true,
.lower_unpack_snorm_4x8 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_pack_split = true,
.use_interpolated_input_intrinsics = true,
.lower_rotate = true,
.vectorize_io = true,
.lower_to_scalar = true,
.has_imul24 = true,
.has_fsub = true,
.has_isub = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling = nir_var_all,
.lower_wpos_pntc = true,
.lower_cs_local_index_from_id = true,
/* Only needed for the spirv_to_nir() pass done in ir3_cmdline.c
* but that should be harmless for GL since 64b is not
* supported there.
*/
.lower_int64_options = (nir_lower_int64_options)~0,
.lower_uniforms_to_ubo = true,
.lower_device_index_to_zero = true,
.use_scoped_barrier = true,
};
const nir_shader_compiler_options *
ir3_get_compiler_options(struct ir3_compiler *compiler)
{
if (compiler->gen >= 6)
return &options_a6xx;
return &options;
}
static bool
ir3_nir_should_vectorize_mem(unsigned align_mul, unsigned align_offset,
unsigned bit_size, unsigned num_components,

View File

@ -62,8 +62,6 @@ bool ir3_nir_lower_64b_intrinsics(nir_shader *shader);
bool ir3_nir_lower_64b_undef(nir_shader *shader);
bool ir3_nir_lower_64b_global(nir_shader *shader);
const nir_shader_compiler_options *
ir3_get_compiler_options(struct ir3_compiler *compiler);
void ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s);
void ir3_nir_lower_io_to_temporaries(nir_shader *s);
void ir3_finalize_nir(struct ir3_compiler *compiler, nir_shader *s);