driconf: Add a limit_trig_input_range option
With this option enabled range of input values for fsin and fcos is limited to [-2*pi : 2*pi] by calculating the reminder after 2*pi modulo division. This helps to improve calculation precision for large input arguments on Intel. -v2: Add limit_trig_input_range option to prog_key to update shader cache (Lionel) Signed-off-by: Vadym Shovkoplias <vadym.shovkoplias@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16388>
This commit is contained in:
parent
0ff3517fb7
commit
55c71217ec
|
@ -51,7 +51,10 @@
|
|||
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
|
||||
.base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
|
||||
.base.tex.compressed_multisample_layout_mask = ~0
|
||||
#define KEY_INIT() .base.program_string_id = ish->program_id, KEY_INIT_NO_ID()
|
||||
#define KEY_INIT() \
|
||||
.base.program_string_id = ish->program_id, \
|
||||
.base.limit_trig_input_range = screen->driconf.limit_trig_input_range, \
|
||||
KEY_INIT_NO_ID()
|
||||
|
||||
static void
|
||||
crocus_sanitize_tex_key(struct brw_sampler_prog_key_data *key)
|
||||
|
@ -1660,8 +1663,8 @@ crocus_update_compiled_tes(struct crocus_context *ice)
|
|||
struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
|
||||
struct crocus_uncompiled_shader *ish =
|
||||
ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
|
||||
struct brw_tes_prog_key key = { KEY_INIT() };
|
||||
struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
|
||||
struct brw_tes_prog_key key = { KEY_INIT() };
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))
|
||||
|
|
|
@ -762,6 +762,8 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config)
|
|||
driQueryOptionb(config->options, "disable_throttling");
|
||||
screen->driconf.always_flush_cache =
|
||||
driQueryOptionb(config->options, "always_flush_cache");
|
||||
screen->driconf.limit_trig_input_range =
|
||||
driQueryOptionb(config->options, "limit_trig_input_range");
|
||||
|
||||
screen->precompile = env_var_as_boolean("shader_precompile", true);
|
||||
|
||||
|
|
|
@ -199,6 +199,7 @@ struct crocus_screen {
|
|||
bool dual_color_blend_by_location;
|
||||
bool disable_throttling;
|
||||
bool always_flush_cache;
|
||||
bool limit_trig_input_range;
|
||||
} driconf;
|
||||
|
||||
uint64_t aperture_bytes;
|
||||
|
|
|
@ -4,6 +4,7 @@ DRI_CONF_SECTION_DEBUG
|
|||
DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
|
||||
DRI_CONF_DISABLE_THROTTLING(false)
|
||||
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
|
||||
DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_PERFORMANCE
|
||||
|
|
|
@ -5,6 +5,7 @@ DRI_CONF_SECTION_DEBUG
|
|||
DRI_CONF_DISABLE_THROTTLING(false)
|
||||
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
|
||||
DRI_CONF_OPT_B(sync_compile, false, "Always compile synchronously (will cause stalls)")
|
||||
DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_PERFORMANCE
|
||||
|
|
|
@ -208,6 +208,7 @@ enum iris_nos_dep {
|
|||
|
||||
struct iris_base_prog_key {
|
||||
unsigned program_string_id;
|
||||
bool limit_trig_input_range;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -47,9 +47,12 @@
|
|||
#include "iris_context.h"
|
||||
#include "nir/tgsi_to_nir.h"
|
||||
|
||||
#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
|
||||
#define BRW_KEY_INIT(gen, prog_id) \
|
||||
#define KEY_INIT(prefix) \
|
||||
.prefix.program_string_id = ish->program_id, \
|
||||
.prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
|
||||
#define BRW_KEY_INIT(gen, prog_id, limit_trig_input) \
|
||||
.base.program_string_id = prog_id, \
|
||||
.base.limit_trig_input_range = limit_trig_input, \
|
||||
.base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
|
||||
.base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688, \
|
||||
.base.tex.compressed_multisample_layout_mask = ~0, \
|
||||
|
@ -95,11 +98,12 @@ iris_finalize_program(struct iris_compiled_shader *shader,
|
|||
}
|
||||
|
||||
static struct brw_vs_prog_key
|
||||
iris_to_brw_vs_key(const struct intel_device_info *devinfo,
|
||||
iris_to_brw_vs_key(const struct iris_screen *screen,
|
||||
const struct iris_vs_prog_key *key)
|
||||
{
|
||||
return (struct brw_vs_prog_key) {
|
||||
BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
|
||||
BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
|
||||
key->vue.base.limit_trig_input_range),
|
||||
|
||||
/* Don't tell the backend about our clip plane constants, we've
|
||||
* already lowered them in NIR and don't want it doing it again.
|
||||
|
@ -109,11 +113,12 @@ iris_to_brw_vs_key(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
static struct brw_tcs_prog_key
|
||||
iris_to_brw_tcs_key(const struct intel_device_info *devinfo,
|
||||
iris_to_brw_tcs_key(const struct iris_screen *screen,
|
||||
const struct iris_tcs_prog_key *key)
|
||||
{
|
||||
return (struct brw_tcs_prog_key) {
|
||||
BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
|
||||
BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
|
||||
key->vue.base.limit_trig_input_range),
|
||||
._tes_primitive_mode = key->_tes_primitive_mode,
|
||||
.input_vertices = key->input_vertices,
|
||||
.patch_outputs_written = key->patch_outputs_written,
|
||||
|
@ -123,31 +128,34 @@ iris_to_brw_tcs_key(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
static struct brw_tes_prog_key
|
||||
iris_to_brw_tes_key(const struct intel_device_info *devinfo,
|
||||
iris_to_brw_tes_key(const struct iris_screen *screen,
|
||||
const struct iris_tes_prog_key *key)
|
||||
{
|
||||
return (struct brw_tes_prog_key) {
|
||||
BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
|
||||
BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
|
||||
key->vue.base.limit_trig_input_range),
|
||||
.patch_inputs_read = key->patch_inputs_read,
|
||||
.inputs_read = key->inputs_read,
|
||||
};
|
||||
}
|
||||
|
||||
static struct brw_gs_prog_key
|
||||
iris_to_brw_gs_key(const struct intel_device_info *devinfo,
|
||||
iris_to_brw_gs_key(const struct iris_screen *screen,
|
||||
const struct iris_gs_prog_key *key)
|
||||
{
|
||||
return (struct brw_gs_prog_key) {
|
||||
BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
|
||||
BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
|
||||
key->vue.base.limit_trig_input_range),
|
||||
};
|
||||
}
|
||||
|
||||
static struct brw_wm_prog_key
|
||||
iris_to_brw_fs_key(const struct intel_device_info *devinfo,
|
||||
iris_to_brw_fs_key(const struct iris_screen *screen,
|
||||
const struct iris_fs_prog_key *key)
|
||||
{
|
||||
return (struct brw_wm_prog_key) {
|
||||
BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
|
||||
BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
|
||||
key->base.limit_trig_input_range),
|
||||
.nr_color_regions = key->nr_color_regions,
|
||||
.flat_shade = key->flat_shade,
|
||||
.alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
|
||||
|
@ -164,11 +172,12 @@ iris_to_brw_fs_key(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
static struct brw_cs_prog_key
|
||||
iris_to_brw_cs_key(const struct intel_device_info *devinfo,
|
||||
iris_to_brw_cs_key(const struct iris_screen *screen,
|
||||
const struct iris_cs_prog_key *key)
|
||||
{
|
||||
return (struct brw_cs_prog_key) {
|
||||
BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
|
||||
BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
|
||||
key->base.limit_trig_input_range),
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1113,7 +1122,6 @@ iris_debug_recompile(struct iris_screen *screen,
|
|||
|| list_is_singular(&ish->variants))
|
||||
return;
|
||||
|
||||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
const struct brw_compiler *c = screen->compiler;
|
||||
const struct shader_info *info = &ish->nir->info;
|
||||
|
||||
|
@ -1130,22 +1138,22 @@ iris_debug_recompile(struct iris_screen *screen,
|
|||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
|
||||
old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
|
||||
old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
|
||||
old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
|
||||
old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
|
||||
old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
|
||||
old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid shader stage");
|
||||
|
@ -1342,7 +1350,7 @@ iris_compile_vs(struct iris_screen *screen,
|
|||
&vue_prog_data->vue_map, nir->info.outputs_written,
|
||||
nir->info.separate_shader, /* pos_slots */ 1);
|
||||
|
||||
struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
|
||||
struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
|
||||
|
||||
struct brw_compile_vs_params params = {
|
||||
.nir = nir,
|
||||
|
@ -1395,7 +1403,7 @@ iris_update_compiled_vs(struct iris_context *ice)
|
|||
struct iris_uncompiled_shader *ish =
|
||||
ice->shaders.uncompiled[MESA_SHADER_VERTEX];
|
||||
|
||||
struct iris_vs_prog_key key = { KEY_ID(vue.base) };
|
||||
struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
|
||||
screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
|
||||
|
||||
struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
|
||||
|
@ -1501,7 +1509,7 @@ iris_compile_tcs(struct iris_screen *screen,
|
|||
struct iris_binding_table bt;
|
||||
|
||||
const struct iris_tcs_prog_key *const key = &shader->key.tcs;
|
||||
struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
|
||||
struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
|
||||
|
||||
if (ish) {
|
||||
nir = nir_shader_clone(mem_ctx, ish->nir);
|
||||
|
@ -1709,7 +1717,7 @@ iris_compile_tes(struct iris_screen *screen,
|
|||
brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
|
||||
key->patch_inputs_read);
|
||||
|
||||
struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
|
||||
struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
|
||||
|
||||
struct brw_compile_tes_params params = {
|
||||
.nir = nir,
|
||||
|
@ -1763,7 +1771,7 @@ iris_update_compiled_tes(struct iris_context *ice)
|
|||
struct iris_uncompiled_shader *ish =
|
||||
ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
|
||||
|
||||
struct iris_tes_prog_key key = { KEY_ID(vue.base) };
|
||||
struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
|
||||
get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
|
||||
screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
|
||||
|
||||
|
@ -1848,7 +1856,7 @@ iris_compile_gs(struct iris_screen *screen,
|
|||
&vue_prog_data->vue_map, nir->info.outputs_written,
|
||||
nir->info.separate_shader, /* pos_slots */ 1);
|
||||
|
||||
struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
|
||||
struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
|
||||
|
||||
struct brw_compile_gs_params params = {
|
||||
.nir = nir,
|
||||
|
@ -1904,7 +1912,7 @@ iris_update_compiled_gs(struct iris_context *ice)
|
|||
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
|
||||
|
||||
if (ish) {
|
||||
struct iris_gs_prog_key key = { KEY_ID(vue.base) };
|
||||
struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
|
||||
screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
|
||||
|
||||
bool added;
|
||||
|
@ -1984,7 +1992,7 @@ iris_compile_fs(struct iris_screen *screen,
|
|||
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
|
||||
|
||||
struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
|
||||
struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
|
||||
|
||||
struct brw_compile_fs_params params = {
|
||||
.nir = nir,
|
||||
|
@ -2035,8 +2043,8 @@ iris_update_compiled_fs(struct iris_context *ice)
|
|||
struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
|
||||
struct iris_uncompiled_shader *ish =
|
||||
ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
|
||||
struct iris_fs_prog_key key = { KEY_ID(base) };
|
||||
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
|
||||
struct iris_fs_prog_key key = { KEY_INIT(base) };
|
||||
screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
|
||||
|
||||
struct brw_vue_map *last_vue_map =
|
||||
|
@ -2261,7 +2269,7 @@ iris_compile_cs(struct iris_screen *screen,
|
|||
iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
|
||||
num_system_values, num_cbufs);
|
||||
|
||||
struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
|
||||
struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
|
||||
|
||||
struct brw_compile_cs_params params = {
|
||||
.nir = nir,
|
||||
|
@ -2303,9 +2311,8 @@ iris_update_compiled_cs(struct iris_context *ice)
|
|||
struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
|
||||
struct iris_uncompiled_shader *ish =
|
||||
ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
|
||||
|
||||
struct iris_cs_prog_key key = { KEY_ID(base) };
|
||||
struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
|
||||
struct iris_cs_prog_key key = { KEY_INIT(base) };
|
||||
screen->vtbl.populate_cs_key(ice, &key);
|
||||
|
||||
struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
|
||||
|
@ -2519,7 +2526,7 @@ iris_create_compute_state(struct pipe_context *ctx,
|
|||
// XXX: disallow more than 64KB of shared variables
|
||||
|
||||
if (screen->precompile) {
|
||||
struct iris_cs_prog_key key = { KEY_ID(base) };
|
||||
struct iris_cs_prog_key key = { KEY_INIT(base) };
|
||||
|
||||
struct iris_compiled_shader *shader =
|
||||
iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS,
|
||||
|
@ -2599,13 +2606,13 @@ iris_create_shader_state(struct pipe_context *ctx,
|
|||
if (info->clip_distance_array_size == 0)
|
||||
ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
|
||||
|
||||
key.vs = (struct iris_vs_prog_key) { KEY_ID(vue.base) };
|
||||
key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
|
||||
key_size = sizeof(key.vs);
|
||||
break;
|
||||
|
||||
case MESA_SHADER_TESS_CTRL: {
|
||||
key.tcs = (struct iris_tcs_prog_key) {
|
||||
KEY_ID(vue.base),
|
||||
KEY_INIT(vue.base),
|
||||
// XXX: make sure the linker fills this out from the TES...
|
||||
._tes_primitive_mode =
|
||||
info->tess._primitive_mode ? info->tess._primitive_mode
|
||||
|
@ -2632,7 +2639,7 @@ iris_create_shader_state(struct pipe_context *ctx,
|
|||
ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
|
||||
|
||||
key.tes = (struct iris_tes_prog_key) {
|
||||
KEY_ID(vue.base),
|
||||
KEY_INIT(vue.base),
|
||||
// XXX: not ideal, need TCS output/TES input unification
|
||||
.inputs_read = info->inputs_read,
|
||||
.patch_inputs_read = info->patch_inputs_read,
|
||||
|
@ -2646,7 +2653,7 @@ iris_create_shader_state(struct pipe_context *ctx,
|
|||
if (info->clip_distance_array_size == 0)
|
||||
ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
|
||||
|
||||
key.gs = (struct iris_gs_prog_key) { KEY_ID(vue.base) };
|
||||
key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
|
||||
key_size = sizeof(key.gs);
|
||||
break;
|
||||
|
||||
|
@ -2672,7 +2679,7 @@ iris_create_shader_state(struct pipe_context *ctx,
|
|||
const struct intel_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
key.fs = (struct iris_fs_prog_key) {
|
||||
KEY_ID(base),
|
||||
KEY_INIT(base),
|
||||
.nr_color_regions = util_bitcount(color_outputs),
|
||||
.coherent_fb_fetch = devinfo->ver >= 9,
|
||||
.input_slots_valid =
|
||||
|
|
|
@ -843,6 +843,8 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
|
|||
driQueryOptionb(config->options, "always_flush_cache");
|
||||
screen->driconf.sync_compile =
|
||||
driQueryOptionb(config->options, "sync_compile");
|
||||
screen->driconf.limit_trig_input_range =
|
||||
driQueryOptionb(config->options, "limit_trig_input_range");
|
||||
|
||||
screen->precompile = env_var_as_boolean("shader_precompile", true);
|
||||
|
||||
|
|
|
@ -181,6 +181,7 @@ struct iris_screen {
|
|||
bool disable_throttling;
|
||||
bool always_flush_cache;
|
||||
bool sync_compile;
|
||||
bool limit_trig_input_range;
|
||||
} driconf;
|
||||
|
||||
/** Does the kernel support various features (KERNEL_HAS_* bitfield)? */
|
||||
|
|
|
@ -247,6 +247,13 @@ struct brw_base_prog_key {
|
|||
|
||||
enum brw_subgroup_size_type subgroup_size_type;
|
||||
bool robust_buffer_access;
|
||||
|
||||
/**
|
||||
* Apply workarounds for SIN and COS input range problems.
|
||||
* This limits input range for SIN and COS to [-2p : 2p] to
|
||||
* avoid precision issues.
|
||||
*/
|
||||
bool limit_trig_input_range;
|
||||
struct brw_sampler_prog_key_data tex;
|
||||
};
|
||||
|
||||
|
|
|
@ -1409,6 +1409,9 @@ brw_nir_apply_key(nir_shader *nir,
|
|||
};
|
||||
OPT(nir_lower_subgroups, &subgroups_options);
|
||||
|
||||
if (key->limit_trig_input_range)
|
||||
OPT(brw_nir_limit_trig_input_range_workaround);
|
||||
|
||||
if (progress)
|
||||
brw_nir_optimize(nir, compiler, is_scalar, false);
|
||||
}
|
||||
|
|
|
@ -142,6 +142,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
|
|||
|
||||
bool brw_nir_apply_trig_workarounds(nir_shader *nir);
|
||||
|
||||
bool brw_nir_limit_trig_input_range_workaround(nir_shader *nir);
|
||||
|
||||
void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
|
||||
|
||||
void brw_nir_apply_key(nir_shader *nir,
|
||||
|
|
|
@ -33,12 +33,17 @@
|
|||
|
||||
import argparse
|
||||
import sys
|
||||
from math import pi
|
||||
|
||||
TRIG_WORKAROUNDS = [
|
||||
(('fsin', 'x(is_not_const)'), ('fmul', ('fsin', 'x'), 0.99997)),
|
||||
(('fcos', 'x(is_not_const)'), ('fmul', ('fcos', 'x'), 0.99997)),
|
||||
]
|
||||
|
||||
LIMIT_TRIG_INPUT_RANGE_WORKAROUND = [
|
||||
(('fsin', 'x(is_not_const)'), ('fsin', ('fmod', 'x', 2.0 * pi))),
|
||||
(('fcos', 'x(is_not_const)'), ('fcos', ('fmod', 'x', 2.0 * pi))),
|
||||
]
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
@ -54,6 +59,8 @@ def run():
|
|||
print('#include "brw_nir.h"')
|
||||
print(nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds",
|
||||
TRIG_WORKAROUNDS).render())
|
||||
print(nir_algebraic.AlgebraicPass("brw_nir_limit_trig_input_range_workaround",
|
||||
LIMIT_TRIG_INPUT_RANGE_WORKAROUND).render())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -74,6 +74,7 @@ static const driOptionDescription anv_dri_options[] = {
|
|||
DRI_CONF_SECTION_DEBUG
|
||||
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
|
||||
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
|
||||
DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
|
||||
DRI_CONF_SECTION_END
|
||||
};
|
||||
|
||||
|
@ -1104,6 +1105,8 @@ anv_init_dri_options(struct anv_instance *instance)
|
|||
|
||||
instance->assume_full_subgroups =
|
||||
driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
|
||||
instance->limit_trig_input_range =
|
||||
driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
|
||||
}
|
||||
|
||||
VkResult anv_CreateInstance(
|
||||
|
|
|
@ -314,26 +314,28 @@ populate_sampler_prog_key(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
static void
|
||||
populate_base_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_base_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_base_prog_key *key)
|
||||
{
|
||||
key->subgroup_size_type = subgroup_size_type;
|
||||
key->robust_buffer_access = robust_buffer_acccess;
|
||||
key->limit_trig_input_range =
|
||||
device->physical->instance->limit_trig_input_range;
|
||||
|
||||
populate_sampler_prog_key(devinfo, &key->tex);
|
||||
populate_sampler_prog_key(&device->info, &key->tex);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_vs_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_vs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_vs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type,
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
|
||||
/* XXX: Handle vertex input work-arounds */
|
||||
|
@ -342,7 +344,7 @@ populate_vs_prog_key(const struct intel_device_info *devinfo,
|
|||
}
|
||||
|
||||
static void
|
||||
populate_tcs_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_tcs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
unsigned input_vertices,
|
||||
|
@ -350,33 +352,33 @@ populate_tcs_prog_key(const struct intel_device_info *devinfo,
|
|||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type,
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
|
||||
key->input_vertices = input_vertices;
|
||||
}
|
||||
|
||||
static void
|
||||
populate_tes_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_tes_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_tes_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type,
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_gs_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_gs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_gs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type,
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
}
|
||||
|
||||
|
@ -436,25 +438,25 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
|
||||
static void
|
||||
populate_task_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_task_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_access,
|
||||
struct brw_task_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_mesh_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_mesh_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_access,
|
||||
struct brw_mesh_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -467,11 +469,10 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
|
|||
struct brw_wm_prog_key *key)
|
||||
{
|
||||
const struct anv_device *device = pipeline->base.device;
|
||||
const struct intel_device_info *devinfo = &device->info;
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
|
||||
populate_base_prog_key(device, flags, robust_buffer_acccess, &key->base);
|
||||
|
||||
/* We set this to 0 here and set to the actual value before we call
|
||||
* brw_compile_fs.
|
||||
|
@ -515,26 +516,26 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
|
|||
}
|
||||
|
||||
static void
|
||||
populate_cs_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_cs_prog_key(const struct anv_device *device,
|
||||
enum brw_subgroup_size_type subgroup_size_type,
|
||||
bool robust_buffer_acccess,
|
||||
struct brw_cs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, subgroup_size_type,
|
||||
populate_base_prog_key(device, subgroup_size_type,
|
||||
robust_buffer_acccess, &key->base);
|
||||
}
|
||||
|
||||
static void
|
||||
populate_bs_prog_key(const struct intel_device_info *devinfo,
|
||||
populate_bs_prog_key(const struct anv_device *device,
|
||||
VkPipelineShaderStageCreateFlags flags,
|
||||
bool robust_buffer_access,
|
||||
struct brw_bs_prog_key *key)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
|
||||
populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
|
||||
populate_base_prog_key(device, flags, robust_buffer_access, &key->base);
|
||||
}
|
||||
|
||||
struct anv_pipeline_stage {
|
||||
|
@ -1466,26 +1467,26 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
|||
enum brw_subgroup_size_type subgroup_size_type =
|
||||
anv_subgroup_size_type(stage, stages[stage].module, sinfo->flags, rss_info);
|
||||
|
||||
const struct intel_device_info *devinfo = &pipeline->base.device->info;
|
||||
const struct anv_device *device = pipeline->base.device;
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
populate_vs_prog_key(devinfo, subgroup_size_type,
|
||||
populate_vs_prog_key(device, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.vs);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
populate_tcs_prog_key(devinfo, subgroup_size_type,
|
||||
populate_tcs_prog_key(device, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
info->pTessellationState->patchControlPoints,
|
||||
&stages[stage].key.tcs);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
populate_tes_prog_key(devinfo, subgroup_size_type,
|
||||
populate_tes_prog_key(device, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.tes);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
populate_gs_prog_key(devinfo, subgroup_size_type,
|
||||
populate_gs_prog_key(device, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.gs);
|
||||
break;
|
||||
|
@ -1503,12 +1504,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
|
|||
break;
|
||||
}
|
||||
case MESA_SHADER_TASK:
|
||||
populate_task_prog_key(devinfo, subgroup_size_type,
|
||||
populate_task_prog_key(device, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.task);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
populate_mesh_prog_key(devinfo, subgroup_size_type,
|
||||
populate_mesh_prog_key(device, subgroup_size_type,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[stage].key.mesh);
|
||||
break;
|
||||
|
@ -1947,7 +1948,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
|
|||
const enum brw_subgroup_size_type subgroup_size_type =
|
||||
anv_subgroup_size_type(MESA_SHADER_COMPUTE, stage.module, info->stage.flags, rss_info);
|
||||
|
||||
populate_cs_prog_key(&device->info, subgroup_size_type,
|
||||
populate_cs_prog_key(device, subgroup_size_type,
|
||||
device->robust_buffer_access,
|
||||
&stage.key.cs);
|
||||
|
||||
|
@ -2749,7 +2750,7 @@ anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
|
|||
},
|
||||
};
|
||||
|
||||
populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
|
||||
populate_bs_prog_key(pipeline->base.device, sinfo->flags,
|
||||
pipeline->base.device->robust_buffer_access,
|
||||
&stages[i].key.bs);
|
||||
|
||||
|
|
|
@ -1085,6 +1085,7 @@ struct anv_instance {
|
|||
* Workarounds for game bugs.
|
||||
*/
|
||||
bool assume_full_subgroups;
|
||||
bool limit_trig_input_range;
|
||||
};
|
||||
|
||||
VkResult anv_init_wsi(struct anv_physical_device *physical_device);
|
||||
|
|
|
@ -291,6 +291,10 @@
|
|||
DRI_CONF_OPT_B(vk_dont_care_as_load, def, \
|
||||
"Treat VK_ATTACHMENT_LOAD_OP_DONT_CARE as LOAD_OP_LOAD, workaround on tiler GPUs for games that confuse these two load ops")
|
||||
|
||||
#define DRI_CONF_LIMIT_TRIG_INPUT_RANGE(def) \
|
||||
DRI_CONF_OPT_B(limit_trig_input_range, def, \
|
||||
"Limit trig input range to [-2p : 2p] to improve sin/cos calculation precision on Intel")
|
||||
|
||||
/**
|
||||
* \brief Image quality-related options
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue