From 55c71217ec7a184753d64560323c18acd50b0fcf Mon Sep 17 00:00:00 2001 From: Vadym Shovkoplias Date: Fri, 6 May 2022 18:52:47 +0300 Subject: [PATCH] driconf: Add a limit_trig_input_range option With this option enabled range of input values for fsin and fcos is limited to [-2*pi : 2*pi] by calculating the reminder after 2*pi modulo division. This helps to improve calculation precision for large input arguments on Intel. -v2: Add limit_trig_input_range option to prog_key to update shader cache (Lionel) Signed-off-by: Vadym Shovkoplias Reviewed-by: Lionel Landwerlin Part-of: --- src/gallium/drivers/crocus/crocus_program.c | 7 +- src/gallium/drivers/crocus/crocus_screen.c | 2 + src/gallium/drivers/crocus/crocus_screen.h | 1 + src/gallium/drivers/crocus/driinfo_crocus.h | 1 + src/gallium/drivers/iris/driinfo_iris.h | 1 + src/gallium/drivers/iris/iris_context.h | 1 + src/gallium/drivers/iris/iris_program.c | 85 ++++++++++--------- src/gallium/drivers/iris/iris_screen.c | 2 + src/gallium/drivers/iris/iris_screen.h | 1 + src/intel/compiler/brw_compiler.h | 7 ++ src/intel/compiler/brw_nir.c | 3 + src/intel/compiler/brw_nir.h | 2 + .../compiler/brw_nir_trig_workarounds.py | 7 ++ src/intel/vulkan/anv_device.c | 3 + src/intel/vulkan/anv_pipeline.c | 59 ++++++------- src/intel/vulkan/anv_private.h | 1 + src/util/driconf.h | 4 + 17 files changed, 117 insertions(+), 70 deletions(-) diff --git a/src/gallium/drivers/crocus/crocus_program.c b/src/gallium/drivers/crocus/crocus_program.c index 08e408cb334..f1ee9b9c816 100644 --- a/src/gallium/drivers/crocus/crocus_program.c +++ b/src/gallium/drivers/crocus/crocus_program.c @@ -51,7 +51,10 @@ .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \ .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \ .base.tex.compressed_multisample_layout_mask = ~0 -#define KEY_INIT() .base.program_string_id = ish->program_id, KEY_INIT_NO_ID() +#define KEY_INIT() \ + .base.program_string_id = ish->program_id, \ + .base.limit_trig_input_range = screen->driconf.limit_trig_input_range, \ + KEY_INIT_NO_ID() static void crocus_sanitize_tex_key(struct brw_sampler_prog_key_data *key) @@ -1660,8 +1663,8 @@ crocus_update_compiled_tes(struct crocus_context *ice) struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL]; struct crocus_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; - struct brw_tes_prog_key key = { KEY_INIT() }; struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; + struct brw_tes_prog_key key = { KEY_INIT() }; const struct intel_device_info *devinfo = &screen->devinfo; if (ish->nos & (1ull << CROCUS_NOS_TEXTURES)) diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c index 52a54a1e248..2c0e82d3ec8 100644 --- a/src/gallium/drivers/crocus/crocus_screen.c +++ b/src/gallium/drivers/crocus/crocus_screen.c @@ -762,6 +762,8 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config) driQueryOptionb(config->options, "disable_throttling"); screen->driconf.always_flush_cache = driQueryOptionb(config->options, "always_flush_cache"); + screen->driconf.limit_trig_input_range = + driQueryOptionb(config->options, "limit_trig_input_range"); screen->precompile = env_var_as_boolean("shader_precompile", true); diff --git a/src/gallium/drivers/crocus/crocus_screen.h b/src/gallium/drivers/crocus/crocus_screen.h index c5b5a2c8df2..31d4fad158c 100644 --- a/src/gallium/drivers/crocus/crocus_screen.h +++ b/src/gallium/drivers/crocus/crocus_screen.h @@ -199,6 +199,7 @@ struct crocus_screen { bool dual_color_blend_by_location; bool disable_throttling; bool always_flush_cache; + bool limit_trig_input_range; } driconf; uint64_t aperture_bytes; diff --git a/src/gallium/drivers/crocus/driinfo_crocus.h b/src/gallium/drivers/crocus/driinfo_crocus.h index 829bf7f818c..71fc5a3dc4e 100644 --- a/src/gallium/drivers/crocus/driinfo_crocus.h +++ b/src/gallium/drivers/crocus/driinfo_crocus.h @@ -4,6 +4,7 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false) DRI_CONF_DISABLE_THROTTLING(false) DRI_CONF_ALWAYS_FLUSH_CACHE(false) + DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_PERFORMANCE diff --git a/src/gallium/drivers/iris/driinfo_iris.h b/src/gallium/drivers/iris/driinfo_iris.h index 15ede27e4e3..ff19011b954 100644 --- a/src/gallium/drivers/iris/driinfo_iris.h +++ b/src/gallium/drivers/iris/driinfo_iris.h @@ -5,6 +5,7 @@ DRI_CONF_SECTION_DEBUG DRI_CONF_DISABLE_THROTTLING(false) DRI_CONF_ALWAYS_FLUSH_CACHE(false) DRI_CONF_OPT_B(sync_compile, false, "Always compile synchronously (will cause stalls)") + DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_PERFORMANCE diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 7b73c7be06b..648c27411dc 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -208,6 +208,7 @@ enum iris_nos_dep { struct iris_base_prog_key { unsigned program_string_id; + bool limit_trig_input_range; }; /** diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 1ea9be793e0..02423c0d308 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -47,9 +47,12 @@ #include "iris_context.h" #include "nir/tgsi_to_nir.h" -#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id -#define BRW_KEY_INIT(gen, prog_id) \ +#define KEY_INIT(prefix) \ + .prefix.program_string_id = ish->program_id, \ + .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range +#define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \ .base.program_string_id = prog_id, \ + .base.limit_trig_input_range = limit_trig_input, \ .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \ .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \ .base.tex.compressed_multisample_layout_mask = ~0, \ @@ -95,11 +98,12 @@ iris_finalize_program(struct iris_compiled_shader *shader, } static struct brw_vs_prog_key -iris_to_brw_vs_key(const struct intel_device_info *devinfo, +iris_to_brw_vs_key(const struct iris_screen *screen, const struct iris_vs_prog_key *key) { return (struct brw_vs_prog_key) { - BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), + BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id, + key->vue.base.limit_trig_input_range), /* Don't tell the backend about our clip plane constants, we've * already lowered them in NIR and don't want it doing it again. @@ -109,11 +113,12 @@ iris_to_brw_vs_key(const struct intel_device_info *devinfo, } static struct brw_tcs_prog_key -iris_to_brw_tcs_key(const struct intel_device_info *devinfo, +iris_to_brw_tcs_key(const struct iris_screen *screen, const struct iris_tcs_prog_key *key) { return (struct brw_tcs_prog_key) { - BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), + BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id, + key->vue.base.limit_trig_input_range), ._tes_primitive_mode = key->_tes_primitive_mode, .input_vertices = key->input_vertices, .patch_outputs_written = key->patch_outputs_written, @@ -123,31 +128,34 @@ iris_to_brw_tcs_key(const struct intel_device_info *devinfo, } static struct brw_tes_prog_key -iris_to_brw_tes_key(const struct intel_device_info *devinfo, +iris_to_brw_tes_key(const struct iris_screen *screen, const struct iris_tes_prog_key *key) { return (struct brw_tes_prog_key) { - BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), + BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id, + key->vue.base.limit_trig_input_range), .patch_inputs_read = key->patch_inputs_read, .inputs_read = key->inputs_read, }; } static struct brw_gs_prog_key -iris_to_brw_gs_key(const struct intel_device_info *devinfo, +iris_to_brw_gs_key(const struct iris_screen *screen, const struct iris_gs_prog_key *key) { return (struct brw_gs_prog_key) { - BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id), + BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id, + key->vue.base.limit_trig_input_range), }; } static struct brw_wm_prog_key -iris_to_brw_fs_key(const struct intel_device_info *devinfo, +iris_to_brw_fs_key(const struct iris_screen *screen, const struct iris_fs_prog_key *key) { return (struct brw_wm_prog_key) { - BRW_KEY_INIT(devinfo->ver, key->base.program_string_id), + BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id, + key->base.limit_trig_input_range), .nr_color_regions = key->nr_color_regions, .flat_shade = key->flat_shade, .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha, @@ -164,11 +172,12 @@ iris_to_brw_fs_key(const struct intel_device_info *devinfo, } static struct brw_cs_prog_key -iris_to_brw_cs_key(const struct intel_device_info *devinfo, +iris_to_brw_cs_key(const struct iris_screen *screen, const struct iris_cs_prog_key *key) { return (struct brw_cs_prog_key) { - BRW_KEY_INIT(devinfo->ver, key->base.program_string_id), + BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id, + key->base.limit_trig_input_range), }; } @@ -1113,7 +1122,6 @@ iris_debug_recompile(struct iris_screen *screen, || list_is_singular(&ish->variants)) return; - const struct intel_device_info *devinfo = &screen->devinfo; const struct brw_compiler *c = screen->compiler; const struct shader_info *info = &ish->nir->info; @@ -1130,22 +1138,22 @@ iris_debug_recompile(struct iris_screen *screen, switch (info->stage) { case MESA_SHADER_VERTEX: - old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key); + old_key.vs = iris_to_brw_vs_key(screen, old_iris_key); break; case MESA_SHADER_TESS_CTRL: - old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key); + old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key); break; case MESA_SHADER_TESS_EVAL: - old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key); + old_key.tes = iris_to_brw_tes_key(screen, old_iris_key); break; case MESA_SHADER_GEOMETRY: - old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key); + old_key.gs = iris_to_brw_gs_key(screen, old_iris_key); break; case MESA_SHADER_FRAGMENT: - old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key); + old_key.wm = iris_to_brw_fs_key(screen, old_iris_key); break; case MESA_SHADER_COMPUTE: - old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key); + old_key.cs = iris_to_brw_cs_key(screen, old_iris_key); break; default: unreachable("invalid shader stage"); @@ -1342,7 +1350,7 @@ iris_compile_vs(struct iris_screen *screen, &vue_prog_data->vue_map, nir->info.outputs_written, nir->info.separate_shader, /* pos_slots */ 1); - struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key); + struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key); struct brw_compile_vs_params params = { .nir = nir, @@ -1395,7 +1403,7 @@ iris_update_compiled_vs(struct iris_context *ice) struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_VERTEX]; - struct iris_vs_prog_key key = { KEY_ID(vue.base) }; + struct iris_vs_prog_key key = { KEY_INIT(vue.base) }; screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS]; @@ -1501,7 +1509,7 @@ iris_compile_tcs(struct iris_screen *screen, struct iris_binding_table bt; const struct iris_tcs_prog_key *const key = &shader->key.tcs; - struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key); + struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key); if (ish) { nir = nir_shader_clone(mem_ctx, ish->nir); @@ -1709,7 +1717,7 @@ iris_compile_tes(struct iris_screen *screen, brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, key->patch_inputs_read); - struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key); + struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key); struct brw_compile_tes_params params = { .nir = nir, @@ -1763,7 +1771,7 @@ iris_update_compiled_tes(struct iris_context *ice) struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]; - struct iris_tes_prog_key key = { KEY_ID(vue.base) }; + struct iris_tes_prog_key key = { KEY_INIT(vue.base) }; get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read); screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key); @@ -1848,7 +1856,7 @@ iris_compile_gs(struct iris_screen *screen, &vue_prog_data->vue_map, nir->info.outputs_written, nir->info.separate_shader, /* pos_slots */ 1); - struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key); + struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key); struct brw_compile_gs_params params = { .nir = nir, @@ -1904,7 +1912,7 @@ iris_update_compiled_gs(struct iris_context *ice) struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; if (ish) { - struct iris_gs_prog_key key = { KEY_ID(vue.base) }; + struct iris_gs_prog_key key = { KEY_INIT(vue.base) }; screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key); bool added; @@ -1984,7 +1992,7 @@ iris_compile_fs(struct iris_screen *screen, brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); - struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key); + struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key); struct brw_compile_fs_params params = { .nir = nir, @@ -2035,8 +2043,8 @@ iris_update_compiled_fs(struct iris_context *ice) struct u_upload_mgr *uploader = ice->shaders.uploader_driver; struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; - struct iris_fs_prog_key key = { KEY_ID(base) }; struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + struct iris_fs_prog_key key = { KEY_INIT(base) }; screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key); struct brw_vue_map *last_vue_map = @@ -2261,7 +2269,7 @@ iris_compile_cs(struct iris_screen *screen, iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, num_system_values, num_cbufs); - struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key); + struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key); struct brw_compile_cs_params params = { .nir = nir, @@ -2303,9 +2311,8 @@ iris_update_compiled_cs(struct iris_context *ice) struct u_upload_mgr *uploader = ice->shaders.uploader_driver; struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; - - struct iris_cs_prog_key key = { KEY_ID(base) }; struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + struct iris_cs_prog_key key = { KEY_INIT(base) }; screen->vtbl.populate_cs_key(ice, &key); struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS]; @@ -2519,7 +2526,7 @@ iris_create_compute_state(struct pipe_context *ctx, // XXX: disallow more than 64KB of shared variables if (screen->precompile) { - struct iris_cs_prog_key key = { KEY_ID(base) }; + struct iris_cs_prog_key key = { KEY_INIT(base) }; struct iris_compiled_shader *shader = iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS, @@ -2599,13 +2606,13 @@ iris_create_shader_state(struct pipe_context *ctx, if (info->clip_distance_array_size == 0) ish->nos |= (1ull << IRIS_NOS_RASTERIZER); - key.vs = (struct iris_vs_prog_key) { KEY_ID(vue.base) }; + key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) }; key_size = sizeof(key.vs); break; case MESA_SHADER_TESS_CTRL: { key.tcs = (struct iris_tcs_prog_key) { - KEY_ID(vue.base), + KEY_INIT(vue.base), // XXX: make sure the linker fills this out from the TES... ._tes_primitive_mode = info->tess._primitive_mode ? info->tess._primitive_mode @@ -2632,7 +2639,7 @@ iris_create_shader_state(struct pipe_context *ctx, ish->nos |= (1ull << IRIS_NOS_RASTERIZER); key.tes = (struct iris_tes_prog_key) { - KEY_ID(vue.base), + KEY_INIT(vue.base), // XXX: not ideal, need TCS output/TES input unification .inputs_read = info->inputs_read, .patch_inputs_read = info->patch_inputs_read, @@ -2646,7 +2653,7 @@ iris_create_shader_state(struct pipe_context *ctx, if (info->clip_distance_array_size == 0) ish->nos |= (1ull << IRIS_NOS_RASTERIZER); - key.gs = (struct iris_gs_prog_key) { KEY_ID(vue.base) }; + key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) }; key_size = sizeof(key.gs); break; @@ -2672,7 +2679,7 @@ iris_create_shader_state(struct pipe_context *ctx, const struct intel_device_info *devinfo = &screen->devinfo; key.fs = (struct iris_fs_prog_key) { - KEY_ID(base), + KEY_INIT(base), .nr_color_regions = util_bitcount(color_outputs), .coherent_fb_fetch = devinfo->ver >= 9, .input_slots_valid = diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 180de3b9250..df0c9012991 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -843,6 +843,8 @@ iris_screen_create(int fd, const struct pipe_screen_config *config) driQueryOptionb(config->options, "always_flush_cache"); screen->driconf.sync_compile = driQueryOptionb(config->options, "sync_compile"); + screen->driconf.limit_trig_input_range = + driQueryOptionb(config->options, "limit_trig_input_range"); screen->precompile = env_var_as_boolean("shader_precompile", true); diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h index 92b77e9cb86..c62322cd142 100644 --- a/src/gallium/drivers/iris/iris_screen.h +++ b/src/gallium/drivers/iris/iris_screen.h @@ -181,6 +181,7 @@ struct iris_screen { bool disable_throttling; bool always_flush_cache; bool sync_compile; + bool limit_trig_input_range; } driconf; /** Does the kernel support various features (KERNEL_HAS_* bitfield)? */ diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 3691c4325d6..0caeafa8a3f 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -247,6 +247,13 @@ struct brw_base_prog_key { enum brw_subgroup_size_type subgroup_size_type; bool robust_buffer_access; + + /** + * Apply workarounds for SIN and COS input range problems. + * This limits input range for SIN and COS to [-2p : 2p] to + * avoid precision issues. + */ + bool limit_trig_input_range; struct brw_sampler_prog_key_data tex; }; diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 81930286604..0381c67c6e5 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -1409,6 +1409,9 @@ brw_nir_apply_key(nir_shader *nir, }; OPT(nir_lower_subgroups, &subgroups_options); + if (key->limit_trig_input_range) + OPT(brw_nir_limit_trig_input_range_workaround); + if (progress) brw_nir_optimize(nir, compiler, is_scalar, false); } diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index 59b65301fb2..98f6c946c11 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -142,6 +142,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir, bool brw_nir_apply_trig_workarounds(nir_shader *nir); +bool brw_nir_limit_trig_input_range_workaround(nir_shader *nir); + void brw_nir_apply_tcs_quads_workaround(nir_shader *nir); void brw_nir_apply_key(nir_shader *nir, diff --git a/src/intel/compiler/brw_nir_trig_workarounds.py b/src/intel/compiler/brw_nir_trig_workarounds.py index 7425ff4c6ec..5d6a7601d31 100644 --- a/src/intel/compiler/brw_nir_trig_workarounds.py +++ b/src/intel/compiler/brw_nir_trig_workarounds.py @@ -33,12 +33,17 @@ import argparse import sys +from math import pi TRIG_WORKAROUNDS = [ (('fsin', 'x(is_not_const)'), ('fmul', ('fsin', 'x'), 0.99997)), (('fcos', 'x(is_not_const)'), ('fmul', ('fcos', 'x'), 0.99997)), ] +LIMIT_TRIG_INPUT_RANGE_WORKAROUND = [ + (('fsin', 'x(is_not_const)'), ('fsin', ('fmod', 'x', 2.0 * pi))), + (('fcos', 'x(is_not_const)'), ('fcos', ('fmod', 'x', 2.0 * pi))), +] def main(): parser = argparse.ArgumentParser() @@ -54,6 +59,8 @@ def run(): print('#include "brw_nir.h"') print(nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds", TRIG_WORKAROUNDS).render()) + print(nir_algebraic.AlgebraicPass("brw_nir_limit_trig_input_range_workaround", + LIMIT_TRIG_INPUT_RANGE_WORKAROUND).render()) if __name__ == '__main__': diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 41f7486feeb..45c2e2b7a34 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -74,6 +74,7 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_SECTION_DEBUG DRI_CONF_ALWAYS_FLUSH_CACHE(false) DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) + DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false) DRI_CONF_SECTION_END }; @@ -1104,6 +1105,8 @@ anv_init_dri_options(struct anv_instance *instance) instance->assume_full_subgroups = driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups"); + instance->limit_trig_input_range = + driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); } VkResult anv_CreateInstance( diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 06e3c872ab3..8d4238f8bec 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -314,26 +314,28 @@ populate_sampler_prog_key(const struct intel_device_info *devinfo, } static void -populate_base_prog_key(const struct intel_device_info *devinfo, +populate_base_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_acccess, struct brw_base_prog_key *key) { key->subgroup_size_type = subgroup_size_type; key->robust_buffer_access = robust_buffer_acccess; + key->limit_trig_input_range = + device->physical->instance->limit_trig_input_range; - populate_sampler_prog_key(devinfo, &key->tex); + populate_sampler_prog_key(&device->info, &key->tex); } static void -populate_vs_prog_key(const struct intel_device_info *devinfo, +populate_vs_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_acccess, struct brw_vs_prog_key *key) { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, subgroup_size_type, + populate_base_prog_key(device, subgroup_size_type, robust_buffer_acccess, &key->base); /* XXX: Handle vertex input work-arounds */ @@ -342,7 +344,7 @@ populate_vs_prog_key(const struct intel_device_info *devinfo, } static void -populate_tcs_prog_key(const struct intel_device_info *devinfo, +populate_tcs_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_acccess, unsigned input_vertices, @@ -350,33 +352,33 @@ populate_tcs_prog_key(const struct intel_device_info *devinfo, { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, subgroup_size_type, + populate_base_prog_key(device, subgroup_size_type, robust_buffer_acccess, &key->base); key->input_vertices = input_vertices; } static void -populate_tes_prog_key(const struct intel_device_info *devinfo, +populate_tes_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_acccess, struct brw_tes_prog_key *key) { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, subgroup_size_type, + populate_base_prog_key(device, subgroup_size_type, robust_buffer_acccess, &key->base); } static void -populate_gs_prog_key(const struct intel_device_info *devinfo, +populate_gs_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_acccess, struct brw_gs_prog_key *key) { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, subgroup_size_type, + populate_base_prog_key(device, subgroup_size_type, robust_buffer_acccess, &key->base); } @@ -436,25 +438,25 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline, } static void -populate_task_prog_key(const struct intel_device_info *devinfo, +populate_task_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_access, struct brw_task_prog_key *key) { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base); + populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base); } static void -populate_mesh_prog_key(const struct intel_device_info *devinfo, +populate_mesh_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_access, struct brw_mesh_prog_key *key) { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base); + populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base); } static void @@ -467,11 +469,10 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, struct brw_wm_prog_key *key) { const struct anv_device *device = pipeline->base.device; - const struct intel_device_info *devinfo = &device->info; memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base); + populate_base_prog_key(device, flags, robust_buffer_acccess, &key->base); /* We set this to 0 here and set to the actual value before we call * brw_compile_fs. @@ -515,26 +516,26 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline, } static void -populate_cs_prog_key(const struct intel_device_info *devinfo, +populate_cs_prog_key(const struct anv_device *device, enum brw_subgroup_size_type subgroup_size_type, bool robust_buffer_acccess, struct brw_cs_prog_key *key) { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, subgroup_size_type, + populate_base_prog_key(device, subgroup_size_type, robust_buffer_acccess, &key->base); } static void -populate_bs_prog_key(const struct intel_device_info *devinfo, +populate_bs_prog_key(const struct anv_device *device, VkPipelineShaderStageCreateFlags flags, bool robust_buffer_access, struct brw_bs_prog_key *key) { memset(key, 0, sizeof(*key)); - populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base); + populate_base_prog_key(device, flags, robust_buffer_access, &key->base); } struct anv_pipeline_stage { @@ -1466,26 +1467,26 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, enum brw_subgroup_size_type subgroup_size_type = anv_subgroup_size_type(stage, stages[stage].module, sinfo->flags, rss_info); - const struct intel_device_info *devinfo = &pipeline->base.device->info; + const struct anv_device *device = pipeline->base.device; switch (stage) { case MESA_SHADER_VERTEX: - populate_vs_prog_key(devinfo, subgroup_size_type, + populate_vs_prog_key(device, subgroup_size_type, pipeline->base.device->robust_buffer_access, &stages[stage].key.vs); break; case MESA_SHADER_TESS_CTRL: - populate_tcs_prog_key(devinfo, subgroup_size_type, + populate_tcs_prog_key(device, subgroup_size_type, pipeline->base.device->robust_buffer_access, info->pTessellationState->patchControlPoints, &stages[stage].key.tcs); break; case MESA_SHADER_TESS_EVAL: - populate_tes_prog_key(devinfo, subgroup_size_type, + populate_tes_prog_key(device, subgroup_size_type, pipeline->base.device->robust_buffer_access, &stages[stage].key.tes); break; case MESA_SHADER_GEOMETRY: - populate_gs_prog_key(devinfo, subgroup_size_type, + populate_gs_prog_key(device, subgroup_size_type, pipeline->base.device->robust_buffer_access, &stages[stage].key.gs); break; @@ -1503,12 +1504,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline, break; } case MESA_SHADER_TASK: - populate_task_prog_key(devinfo, subgroup_size_type, + populate_task_prog_key(device, subgroup_size_type, pipeline->base.device->robust_buffer_access, &stages[stage].key.task); break; case MESA_SHADER_MESH: - populate_mesh_prog_key(devinfo, subgroup_size_type, + populate_mesh_prog_key(device, subgroup_size_type, pipeline->base.device->robust_buffer_access, &stages[stage].key.mesh); break; @@ -1947,7 +1948,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline, const enum brw_subgroup_size_type subgroup_size_type = anv_subgroup_size_type(MESA_SHADER_COMPUTE, stage.module, info->stage.flags, rss_info); - populate_cs_prog_key(&device->info, subgroup_size_type, + populate_cs_prog_key(device, subgroup_size_type, device->robust_buffer_access, &stage.key.cs); @@ -2749,7 +2750,7 @@ anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline, }, }; - populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags, + populate_bs_prog_key(pipeline->base.device, sinfo->flags, pipeline->base.device->robust_buffer_access, &stages[i].key.bs); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 3113cbdcd1e..5bb91ea8701 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1085,6 +1085,7 @@ struct anv_instance { * Workarounds for game bugs. */ bool assume_full_subgroups; + bool limit_trig_input_range; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); diff --git a/src/util/driconf.h b/src/util/driconf.h index a5ca3f1d6c7..068ed227af0 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -291,6 +291,10 @@ DRI_CONF_OPT_B(vk_dont_care_as_load, def, \ "Treat VK_ATTACHMENT_LOAD_OP_DONT_CARE as LOAD_OP_LOAD, workaround on tiler GPUs for games that confuse these two load ops") +#define DRI_CONF_LIMIT_TRIG_INPUT_RANGE(def) \ + DRI_CONF_OPT_B(limit_trig_input_range, def, \ + "Limit trig input range to [-2p : 2p] to improve sin/cos calculation precision on Intel") + /** * \brief Image quality-related options */