2014-08-01 00:14:51 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2014 Connor Abbott
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Connor Abbott (cwabbott0@gmail.com)
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2017-03-20 16:04:16 +00:00
|
|
|
#ifndef NIR_H
|
|
|
|
#define NIR_H
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
#include "util/hash_table.h"
|
2016-01-18 10:16:48 +00:00
|
|
|
#include "compiler/glsl/list.h"
|
2014-08-01 00:14:51 +01:00
|
|
|
#include "GL/gl.h" /* GLenum */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
#include "util/list.h"
|
2014-08-01 00:14:51 +01:00
|
|
|
#include "util/ralloc.h"
|
2015-01-15 16:06:05 +00:00
|
|
|
#include "util/set.h"
|
2018-11-13 15:45:03 +00:00
|
|
|
#include "util/bitscan.h"
|
2015-02-11 23:05:06 +00:00
|
|
|
#include "util/bitset.h"
|
2020-08-31 17:08:49 +01:00
|
|
|
#include "util/enum_operators.h"
|
2016-05-13 21:17:34 +01:00
|
|
|
#include "util/macros.h"
|
2020-01-10 22:09:43 +00:00
|
|
|
#include "util/format/u_format.h"
|
2015-11-25 16:03:26 +00:00
|
|
|
#include "compiler/nir_types.h"
|
2016-01-18 08:47:13 +00:00
|
|
|
#include "compiler/shader_enums.h"
|
2016-10-13 00:46:11 +01:00
|
|
|
#include "compiler/shader_info.h"
|
2020-02-27 13:04:25 +00:00
|
|
|
#define XXH_INLINE_ALL
|
|
|
|
#include "util/xxhash.h"
|
2014-08-01 00:14:51 +01:00
|
|
|
#include <stdio.h>
|
|
|
|
|
2017-11-23 13:16:43 +00:00
|
|
|
#ifndef NDEBUG
|
2017-02-16 15:16:38 +00:00
|
|
|
#include "util/debug.h"
|
2017-11-23 13:16:43 +00:00
|
|
|
#endif /* NDEBUG */
|
2017-02-16 15:16:38 +00:00
|
|
|
|
nir: use Python to autogenerate opcode information
Before, we used a system where a file, nir_opcodes.h, defined some macros that
were included to generate the enum values and the nir_op_infos structure. This
worked pretty well, but for development the error messages were never very
useful, Python tools couldn't understand the opcode list, and it was difficult
to use nir_opcodes.h to do other things like autogenerate a builder API. Now, we
store opcode information in nir_opcodes.py, and we have nir_opcodes_c.py to
generate the old nir_opcodes.c and nir_opcodes_h.py to generate nir_opcodes.h,
which contains all the enum names and gets included into nir.h like before. In
addition to solving the above problems, using Python and Mako to generate
everything means that it's much easier to add keep information centralized as we
add new things like constant propagation that require per-opcode information.
v2:
- make Opcode derive from object (Dylan)
- don't use assert like it's a function (Dylan)
- style fixes for fnoise, use xrange (Dylan)
- use iterkeys() in nir_opcodes_h.py (Dylan)
- use pydoc-style comments (Jason)
- don't make fmin/fmax commutative and associative yet (Jason)
Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
v3 Jason Ekstrand <jason.ekstrand@intel.com>
- Alphabetize source file lists
- Generate nir_opcodes.h in the builddir instead of the source dir
- Include $(builddir)/src/glsl/nir in the i965 build
- Rework nir_opcodes.h generation so it generates a complete header file
instead of one that has to be embedded inside an enum declaration
2015-01-23 04:32:14 +00:00
|
|
|
#include "nir_opcodes.h"
|
|
|
|
|
2017-05-08 18:34:53 +01:00
|
|
|
#if defined(_WIN32) && !defined(snprintf)
|
|
|
|
#define snprintf _snprintf
|
|
|
|
#endif
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2014-10-17 00:53:03 +01:00
|
|
|
#define NIR_FALSE 0u
|
|
|
|
#define NIR_TRUE (~0u)
|
2019-03-09 16:17:55 +00:00
|
|
|
#define NIR_MAX_VEC_COMPONENTS 16
|
2019-03-19 19:14:02 +00:00
|
|
|
#define NIR_MAX_MATRIX_COLUMNS 4
|
2019-10-25 19:55:06 +01:00
|
|
|
#define NIR_STREAM_PACKED (1 << 8)
|
2019-03-09 16:17:55 +00:00
|
|
|
typedef uint16_t nir_component_mask_t;
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_num_components_valid(unsigned num_components)
|
|
|
|
{
|
|
|
|
return (num_components >= 1 &&
|
|
|
|
num_components <= 4) ||
|
|
|
|
num_components == 8 ||
|
|
|
|
num_components == 16;
|
|
|
|
}
|
2014-10-17 00:53:03 +01:00
|
|
|
|
2014-12-05 19:00:05 +00:00
|
|
|
/** Defines a cast function
|
|
|
|
*
|
|
|
|
* This macro defines a cast function from in_type to out_type where
|
|
|
|
* out_type is some structure type that contains a field of type out_type.
|
|
|
|
*
|
|
|
|
* Note that you have to be a bit careful as the generated cast function
|
|
|
|
* destroys constness.
|
|
|
|
*/
|
2016-10-06 02:09:25 +01:00
|
|
|
#define NIR_DEFINE_CAST(name, in_type, out_type, field, \
|
|
|
|
type_field, type_value) \
|
|
|
|
static inline out_type * \
|
|
|
|
name(const in_type *parent) \
|
|
|
|
{ \
|
|
|
|
assert(parent && parent->type_field == type_value); \
|
|
|
|
return exec_node_data(out_type, parent, field); \
|
2014-12-05 19:00:05 +00:00
|
|
|
}
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
struct nir_function;
|
2014-10-29 19:42:54 +00:00
|
|
|
struct nir_shader;
|
2015-01-30 05:45:53 +00:00
|
|
|
struct nir_instr;
|
2018-03-16 07:20:57 +00:00
|
|
|
struct nir_builder;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Description of built-in state associated with a uniform
|
|
|
|
*
|
|
|
|
* \sa nir_variable::state_slots
|
|
|
|
*/
|
|
|
|
typedef struct {
|
2017-11-16 15:19:22 +00:00
|
|
|
gl_state_index16 tokens[STATE_LENGTH];
|
2019-10-25 17:01:27 +01:00
|
|
|
uint16_t swizzle;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_state_slot;
|
|
|
|
|
|
|
|
typedef enum {
|
2016-04-11 21:32:59 +01:00
|
|
|
nir_var_shader_in = (1 << 0),
|
|
|
|
nir_var_shader_out = (1 << 1),
|
2019-01-15 22:56:29 +00:00
|
|
|
nir_var_shader_temp = (1 << 2),
|
2019-01-15 23:05:04 +00:00
|
|
|
nir_var_function_temp = (1 << 3),
|
2016-04-11 21:32:59 +01:00
|
|
|
nir_var_uniform = (1 << 4),
|
2019-01-15 23:09:27 +00:00
|
|
|
nir_var_mem_ubo = (1 << 5),
|
2016-04-11 21:32:59 +01:00
|
|
|
nir_var_system_value = (1 << 6),
|
2019-01-15 23:11:23 +00:00
|
|
|
nir_var_mem_ssbo = (1 << 7),
|
2019-01-15 23:12:38 +00:00
|
|
|
nir_var_mem_shared = (1 << 8),
|
2018-10-23 13:06:16 +01:00
|
|
|
nir_var_mem_global = (1 << 9),
|
2019-03-19 20:24:35 +00:00
|
|
|
nir_var_mem_push_const = (1 << 10), /* not actually used for variables */
|
2020-08-18 20:02:21 +01:00
|
|
|
nir_var_mem_constant = (1 << 11),
|
|
|
|
nir_num_variable_modes = 12,
|
2019-03-19 20:24:35 +00:00
|
|
|
nir_var_all = (1 << nir_num_variable_modes) - 1,
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_variable_mode;
|
2020-08-31 17:08:49 +01:00
|
|
|
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_variable_mode)
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2017-07-01 06:56:51 +01:00
|
|
|
/**
|
|
|
|
* Rounding modes.
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
nir_rounding_mode_undef = 0,
|
|
|
|
nir_rounding_mode_rtne = 1, /* round to nearest even */
|
|
|
|
nir_rounding_mode_ru = 2, /* round up */
|
|
|
|
nir_rounding_mode_rd = 3, /* round down */
|
|
|
|
nir_rounding_mode_rtz = 4, /* round towards zero */
|
|
|
|
} nir_rounding_mode;
|
2016-11-30 06:19:28 +00:00
|
|
|
|
|
|
|
typedef union {
|
2019-03-26 23:59:03 +00:00
|
|
|
bool b;
|
|
|
|
float f32;
|
|
|
|
double f64;
|
|
|
|
int8_t i8;
|
|
|
|
uint8_t u8;
|
|
|
|
int16_t i16;
|
|
|
|
uint16_t u16;
|
|
|
|
int32_t i32;
|
|
|
|
uint32_t u32;
|
|
|
|
int64_t i64;
|
|
|
|
uint64_t u64;
|
2016-11-30 06:19:28 +00:00
|
|
|
} nir_const_value;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2019-03-26 23:59:03 +00:00
|
|
|
#define nir_const_value_to_array(arr, c, components, m) \
|
|
|
|
{ \
|
|
|
|
for (unsigned i = 0; i < components; ++i) \
|
|
|
|
arr[i] = c[i].m; \
|
|
|
|
} while (false)
|
|
|
|
|
2019-06-20 16:36:10 +01:00
|
|
|
static inline nir_const_value
|
|
|
|
nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
|
|
|
|
{
|
|
|
|
nir_const_value v;
|
|
|
|
memset(&v, 0, sizeof(v));
|
|
|
|
|
|
|
|
switch (bit_size) {
|
|
|
|
case 1: v.b = x; break;
|
|
|
|
case 8: v.u8 = x; break;
|
|
|
|
case 16: v.u16 = x; break;
|
|
|
|
case 32: v.u32 = x; break;
|
|
|
|
case 64: v.u64 = x; break;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
}
|
|
|
|
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_const_value
|
|
|
|
nir_const_value_for_int(int64_t i, unsigned bit_size)
|
|
|
|
{
|
|
|
|
nir_const_value v;
|
|
|
|
memset(&v, 0, sizeof(v));
|
|
|
|
|
|
|
|
assert(bit_size <= 64);
|
|
|
|
if (bit_size < 64) {
|
|
|
|
assert(i >= (-(1ll << (bit_size - 1))));
|
|
|
|
assert(i < (1ll << (bit_size - 1)));
|
|
|
|
}
|
|
|
|
|
|
|
|
return nir_const_value_for_raw_uint(i, bit_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_const_value
|
|
|
|
nir_const_value_for_uint(uint64_t u, unsigned bit_size)
|
|
|
|
{
|
|
|
|
nir_const_value v;
|
|
|
|
memset(&v, 0, sizeof(v));
|
|
|
|
|
|
|
|
assert(bit_size <= 64);
|
|
|
|
if (bit_size < 64)
|
|
|
|
assert(u < (1ull << bit_size));
|
|
|
|
|
|
|
|
return nir_const_value_for_raw_uint(u, bit_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_const_value
|
|
|
|
nir_const_value_for_bool(bool b, unsigned bit_size)
|
|
|
|
{
|
|
|
|
/* Booleans use a 0/-1 convention */
|
|
|
|
return nir_const_value_for_int(-(int)b, bit_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This one isn't inline because it requires half-float conversion */
|
|
|
|
nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
|
|
|
|
|
|
|
|
static inline int64_t
|
|
|
|
nir_const_value_as_int(nir_const_value value, unsigned bit_size)
|
|
|
|
{
|
|
|
|
switch (bit_size) {
|
|
|
|
/* int1_t uses 0/-1 convention */
|
|
|
|
case 1: return -(int)value.b;
|
|
|
|
case 8: return value.i8;
|
|
|
|
case 16: return value.i16;
|
|
|
|
case 32: return value.i32;
|
|
|
|
case 64: return value.i64;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-25 19:59:49 +01:00
|
|
|
static inline uint64_t
|
2019-06-20 16:36:10 +01:00
|
|
|
nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
|
|
|
|
{
|
|
|
|
switch (bit_size) {
|
|
|
|
case 1: return value.b;
|
|
|
|
case 8: return value.u8;
|
|
|
|
case 16: return value.u16;
|
|
|
|
case 32: return value.u32;
|
|
|
|
case 64: return value.u64;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid bit size");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
|
|
|
|
{
|
|
|
|
int64_t i = nir_const_value_as_int(value, bit_size);
|
|
|
|
|
|
|
|
/* Booleans of any size use 0/-1 convention */
|
|
|
|
assert(i == 0 || i == -1);
|
|
|
|
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This one isn't inline because it requires half-float conversion */
|
|
|
|
double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct nir_constant {
|
|
|
|
/**
|
|
|
|
* Value of the constant.
|
|
|
|
*
|
|
|
|
* The field used to back the values supplied by the constant is determined
|
2014-12-19 01:13:22 +00:00
|
|
|
* by the type associated with the \c nir_variable. Constants may be
|
2014-08-01 00:14:51 +01:00
|
|
|
* scalars, vectors, or matrices.
|
|
|
|
*/
|
2019-06-06 16:51:25 +01:00
|
|
|
nir_const_value values[NIR_MAX_VEC_COMPONENTS];
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2015-11-06 16:35:21 +00:00
|
|
|
/* we could get this from the var->type but makes clone *much* easier to
|
|
|
|
* not have to care about the type.
|
|
|
|
*/
|
|
|
|
unsigned num_elements;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/* Array elements / Structure Fields */
|
|
|
|
struct nir_constant **elements;
|
|
|
|
} nir_constant;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Layout qualifiers for gl_FragDepth.
|
|
|
|
*
|
|
|
|
* The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared
|
|
|
|
* with a layout qualifier.
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
nir_depth_layout_none, /**< No depth layout is specified. */
|
|
|
|
nir_depth_layout_any,
|
|
|
|
nir_depth_layout_greater,
|
|
|
|
nir_depth_layout_less,
|
|
|
|
nir_depth_layout_unchanged
|
|
|
|
} nir_depth_layout;
|
|
|
|
|
2018-05-08 07:58:59 +01:00
|
|
|
/**
|
|
|
|
* Enum keeping track of how a variable was declared.
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
/**
|
|
|
|
* Normal declaration.
|
|
|
|
*/
|
|
|
|
nir_var_declared_normally = 0,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Variable is implicitly generated by the compiler and should not be
|
|
|
|
* visible via the API.
|
|
|
|
*/
|
|
|
|
nir_var_hidden,
|
|
|
|
} nir_var_declaration_type;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/**
|
|
|
|
* Either a uniform, global variable, shader input, or shader output. Based on
|
|
|
|
* ir_variable - it should be easy to translate between the two.
|
|
|
|
*/
|
|
|
|
|
2016-01-04 18:24:08 +00:00
|
|
|
typedef struct nir_variable {
|
2014-08-01 00:14:51 +01:00
|
|
|
struct exec_node node;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Declared type of the variable
|
|
|
|
*/
|
|
|
|
const struct glsl_type *type;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Declared name of the variable
|
|
|
|
*/
|
|
|
|
char *name;
|
|
|
|
|
|
|
|
struct nir_variable_data {
|
2016-04-11 21:32:59 +01:00
|
|
|
/**
|
|
|
|
* Storage class of the variable.
|
|
|
|
*
|
|
|
|
* \sa nir_variable_mode
|
|
|
|
*/
|
2020-08-18 20:02:21 +01:00
|
|
|
unsigned mode:12;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Is the variable read-only?
|
|
|
|
*
|
|
|
|
* This is set for variables declared as \c const, shader inputs,
|
|
|
|
* and uniforms.
|
|
|
|
*/
|
|
|
|
unsigned read_only:1;
|
|
|
|
unsigned centroid:1;
|
|
|
|
unsigned sample:1;
|
2015-10-02 08:01:23 +01:00
|
|
|
unsigned patch:1;
|
2014-08-01 00:14:51 +01:00
|
|
|
unsigned invariant:1;
|
|
|
|
|
2019-10-23 02:05:10 +01:00
|
|
|
/**
|
|
|
|
* Precision qualifier.
|
|
|
|
*
|
|
|
|
* In desktop GLSL we do not care about precision qualifiers at all, in
|
|
|
|
* fact, the spec says that precision qualifiers are ignored.
|
|
|
|
*
|
|
|
|
* To make things easy, we make it so that this field is always
|
|
|
|
* GLSL_PRECISION_NONE on desktop shaders. This way all the variables
|
|
|
|
* have the same precision value and the checks we add in the compiler
|
|
|
|
* for this field will never break a desktop shader compile.
|
|
|
|
*/
|
|
|
|
unsigned precision:2;
|
|
|
|
|
2019-05-15 11:49:29 +01:00
|
|
|
/**
|
|
|
|
* Can this variable be coalesced with another?
|
|
|
|
*
|
|
|
|
* This is set by nir_lower_io_to_temporaries to say that any
|
|
|
|
* copies involving this variable should stay put. Propagating it can
|
|
|
|
* duplicate the resulting load/store, which is not wanted, and may
|
|
|
|
* result in a load/store of the variable with an indirect offset which
|
|
|
|
* the backend may not be able to handle.
|
|
|
|
*/
|
|
|
|
unsigned cannot_coalesce:1;
|
|
|
|
|
2017-09-11 07:19:22 +01:00
|
|
|
/**
|
|
|
|
* When separate shader programs are enabled, only input/outputs between
|
|
|
|
* the stages of a multi-stage separate program can be safely removed
|
|
|
|
* from the shader interface. Other input/outputs must remains active.
|
|
|
|
*
|
|
|
|
* This is also used to make sure xfb varyings that are unused by the
|
|
|
|
* fragment shader are not removed.
|
|
|
|
*/
|
|
|
|
unsigned always_active_io:1;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/**
|
|
|
|
* Interpolation mode for shader inputs / outputs
|
|
|
|
*
|
2016-07-07 10:02:38 +01:00
|
|
|
* \sa glsl_interp_mode
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
2020-01-24 09:14:42 +00:00
|
|
|
unsigned interpolation:3;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* If non-zero, then this variable may be packed along with other variables
|
|
|
|
* into a single varying slot, so this offset should be applied when
|
|
|
|
* accessing components. For example, an offset of 1 means that the x
|
|
|
|
* component of this variable is actually stored in component y of the
|
|
|
|
* location specified by \c location.
|
|
|
|
*/
|
|
|
|
unsigned location_frac:2;
|
|
|
|
|
2016-10-04 04:32:22 +01:00
|
|
|
/**
|
|
|
|
* If true, this variable represents an array of scalars that should
|
|
|
|
* be tightly packed. In other words, consecutive array elements
|
|
|
|
* should be stored one component apart, rather than one slot apart.
|
|
|
|
*/
|
2017-09-06 04:18:45 +01:00
|
|
|
unsigned compact:1;
|
2016-10-04 04:32:22 +01:00
|
|
|
|
2016-07-20 04:33:46 +01:00
|
|
|
/**
|
|
|
|
* Whether this is a fragment shader output implicitly initialized with
|
|
|
|
* the previous contents of the specified render target at the
|
|
|
|
* framebuffer location corresponding to this shader invocation.
|
|
|
|
*/
|
|
|
|
unsigned fb_fetch_output:1;
|
|
|
|
|
2018-02-18 21:41:56 +00:00
|
|
|
/**
|
|
|
|
* Non-zero if this variable is considered bindless as defined by
|
|
|
|
* ARB_bindless_texture.
|
|
|
|
*/
|
|
|
|
unsigned bindless:1;
|
|
|
|
|
2018-02-23 15:06:30 +00:00
|
|
|
/**
|
|
|
|
* Was an explicit binding set in the shader?
|
|
|
|
*/
|
|
|
|
unsigned explicit_binding:1;
|
|
|
|
|
2019-10-23 02:05:10 +01:00
|
|
|
/**
|
|
|
|
* Was the location explicitly set in the shader?
|
|
|
|
*
|
|
|
|
* If the location is explicitly set in the shader, it \b cannot be changed
|
|
|
|
* by the linker or by the API (e.g., calls to \c glBindAttribLocation have
|
|
|
|
* no effect).
|
|
|
|
*/
|
|
|
|
unsigned explicit_location:1;
|
|
|
|
|
2017-12-12 16:09:19 +00:00
|
|
|
/**
|
|
|
|
* Was a transfer feedback buffer set in the shader?
|
|
|
|
*/
|
|
|
|
unsigned explicit_xfb_buffer:1;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Was a transfer feedback stride set in the shader?
|
|
|
|
*/
|
|
|
|
unsigned explicit_xfb_stride:1;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Was an explicit offset set in the shader?
|
|
|
|
*/
|
|
|
|
unsigned explicit_offset:1;
|
|
|
|
|
2020-03-13 03:18:27 +00:00
|
|
|
/**
|
|
|
|
* Layout of the matrix. Uses glsl_matrix_layout values.
|
|
|
|
*/
|
|
|
|
unsigned matrix_layout:2;
|
|
|
|
|
2019-10-23 02:05:10 +01:00
|
|
|
/**
|
|
|
|
* Non-zero if this variable was created by lowering a named interface
|
|
|
|
* block.
|
|
|
|
*/
|
|
|
|
unsigned from_named_ifc_block:1;
|
|
|
|
|
2019-10-23 01:37:28 +01:00
|
|
|
/**
|
|
|
|
* How the variable was declared. See nir_var_declaration_type.
|
|
|
|
*
|
|
|
|
* This is used to detect variables generated by the compiler, so should
|
|
|
|
* not be visible via the API.
|
|
|
|
*/
|
|
|
|
unsigned how_declared:2;
|
|
|
|
|
2020-02-11 22:41:05 +00:00
|
|
|
/**
|
|
|
|
* Is this variable per-view? If so, we know it must be an array with
|
|
|
|
* size corresponding to the number of views.
|
|
|
|
*/
|
|
|
|
unsigned per_view:1;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/**
|
2020-08-19 22:34:11 +01:00
|
|
|
* \brief Layout qualifier for gl_FragDepth. See nir_depth_layout.
|
2014-08-01 00:14:51 +01:00
|
|
|
*
|
|
|
|
* This is not equal to \c ir_depth_layout_none if and only if this
|
|
|
|
* variable is \c gl_FragDepth and a layout qualifier is specified.
|
|
|
|
*/
|
2020-08-19 22:34:11 +01:00
|
|
|
unsigned depth_layout:3;
|
2019-10-25 20:21:30 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Vertex stream output identifier.
|
|
|
|
*
|
|
|
|
* For packed outputs, NIR_STREAM_PACKED is set and bits [2*i+1,2*i]
|
|
|
|
* indicate the stream of the i-th component.
|
|
|
|
*/
|
|
|
|
unsigned stream:9;
|
|
|
|
|
2019-11-07 21:53:58 +00:00
|
|
|
/**
|
2020-08-19 22:34:11 +01:00
|
|
|
* See gl_access_qualifier.
|
|
|
|
*
|
2019-11-07 21:53:58 +00:00
|
|
|
* Access flags for memory variables (SSBO/global), image uniforms, and
|
|
|
|
* bindless images in uniforms/inputs/outputs.
|
|
|
|
*/
|
2020-08-19 22:34:11 +01:00
|
|
|
unsigned access:8;
|
2019-11-07 21:53:58 +00:00
|
|
|
|
2019-10-25 20:21:30 +01:00
|
|
|
/**
|
2019-11-07 23:02:06 +00:00
|
|
|
* Descriptor set binding for sampler or UBO.
|
2019-10-25 20:21:30 +01:00
|
|
|
*/
|
2019-11-07 23:02:06 +00:00
|
|
|
unsigned descriptor_set:5;
|
2019-10-25 20:21:30 +01:00
|
|
|
|
|
|
|
/**
|
2019-11-07 23:02:06 +00:00
|
|
|
* output index for dual source blending.
|
2019-10-25 20:21:30 +01:00
|
|
|
*/
|
2019-11-07 23:02:06 +00:00
|
|
|
unsigned index;
|
2019-10-25 20:21:30 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Initial binding point for a sampler or UBO.
|
|
|
|
*
|
|
|
|
* For array types, this represents the binding point for the first element.
|
|
|
|
*/
|
2019-11-06 07:31:25 +00:00
|
|
|
unsigned binding;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Storage location of the base of this variable
|
|
|
|
*
|
|
|
|
* The precise meaning of this field depends on the nature of the variable.
|
|
|
|
*
|
|
|
|
* - Vertex shader input: one of the values from \c gl_vert_attrib.
|
|
|
|
* - Vertex shader output: one of the values from \c gl_varying_slot.
|
|
|
|
* - Geometry shader input: one of the values from \c gl_varying_slot.
|
|
|
|
* - Geometry shader output: one of the values from \c gl_varying_slot.
|
|
|
|
* - Fragment shader input: one of the values from \c gl_varying_slot.
|
|
|
|
* - Fragment shader output: one of the values from \c gl_frag_result.
|
|
|
|
* - Uniforms: Per-stage uniform slot number for default uniform block.
|
|
|
|
* - Uniforms: Index within the uniform block definition for UBO members.
|
2015-08-30 03:50:34 +01:00
|
|
|
* - Non-UBO Uniforms: uniform slot number.
|
2014-08-01 00:14:51 +01:00
|
|
|
* - Other: This field is not currently used.
|
|
|
|
*
|
|
|
|
* If the variable is a uniform, shader input, or shader output, and the
|
|
|
|
* slot has not been assigned, the value will be -1.
|
|
|
|
*/
|
|
|
|
int location;
|
|
|
|
|
|
|
|
/**
|
2019-10-25 20:21:30 +01:00
|
|
|
* The actual location of the variable in the IR. Only valid for inputs,
|
|
|
|
* outputs, and uniforms (including samplers and images).
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
2019-10-25 20:21:30 +01:00
|
|
|
unsigned driver_location;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/**
|
2017-12-12 16:09:19 +00:00
|
|
|
* Location an atomic counter or transform feedback is stored at.
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
2015-12-29 10:02:56 +00:00
|
|
|
unsigned offset;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2019-10-25 20:18:32 +01:00
|
|
|
union {
|
|
|
|
struct {
|
2020-01-10 22:09:43 +00:00
|
|
|
/** Image internal format if specified explicitly, otherwise PIPE_FORMAT_NONE. */
|
|
|
|
enum pipe_format format;
|
2019-10-25 20:18:32 +01:00
|
|
|
} image;
|
|
|
|
|
2020-05-01 18:47:58 +01:00
|
|
|
struct {
|
|
|
|
/**
|
|
|
|
* For OpenCL inline samplers. See cl_sampler_addressing_mode and cl_sampler_filter_mode
|
|
|
|
*/
|
|
|
|
unsigned is_inline_sampler : 1;
|
|
|
|
unsigned addressing_mode : 3;
|
|
|
|
unsigned normalized_coordinates : 1;
|
|
|
|
unsigned filter_mode : 1;
|
|
|
|
} sampler;
|
|
|
|
|
2019-10-25 20:18:32 +01:00
|
|
|
struct {
|
|
|
|
/**
|
|
|
|
* Transform feedback buffer.
|
|
|
|
*/
|
|
|
|
uint16_t buffer:2;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transform feedback stride.
|
|
|
|
*/
|
|
|
|
uint16_t stride;
|
|
|
|
} xfb;
|
|
|
|
};
|
2014-08-01 00:14:51 +01:00
|
|
|
} data;
|
|
|
|
|
2019-11-15 15:15:14 +00:00
|
|
|
/**
|
|
|
|
* Identifier for this variable generated by nir_index_vars() that is unique
|
|
|
|
* among other variables in the same exec_list.
|
|
|
|
*/
|
|
|
|
unsigned index;
|
|
|
|
|
2019-10-23 01:43:59 +01:00
|
|
|
/* Number of nir_variable_data members */
|
2019-11-15 14:40:19 +00:00
|
|
|
uint16_t num_members;
|
2019-10-23 01:43:59 +01:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/**
|
|
|
|
* Built-in state that backs this uniform
|
|
|
|
*
|
|
|
|
* Once set at variable creation, \c state_slots must remain invariant.
|
|
|
|
* This is because, ideally, this array would be shared by all clones of
|
|
|
|
* this variable in the IR tree. In other words, we'd really like for it
|
|
|
|
* to be a fly-weight.
|
|
|
|
*
|
|
|
|
* If the variable is not a uniform, \c num_state_slots will be zero and
|
|
|
|
* \c state_slots will be \c NULL.
|
|
|
|
*/
|
|
|
|
/*@{*/
|
2019-11-15 14:40:19 +00:00
|
|
|
uint16_t num_state_slots; /**< Number of state slots used */
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_state_slot *state_slots; /**< State descriptors. */
|
|
|
|
/*@}*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Constant expression assigned in the initializer of the variable
|
2016-07-16 01:17:40 +01:00
|
|
|
*
|
|
|
|
* This field should only be used temporarily by creators of NIR shaders
|
|
|
|
* and then lower_constant_initializers can be used to get rid of them.
|
|
|
|
* Most of the rest of NIR ignores this field or asserts that it's NULL.
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
|
|
|
nir_constant *constant_initializer;
|
|
|
|
|
2019-12-10 20:37:53 +00:00
|
|
|
/**
|
|
|
|
* Global variable assigned in the initializer of the variable
|
|
|
|
* This field should only be used temporarily by creators of NIR shaders
|
|
|
|
* and then lower_constant_initializers can be used to get rid of them.
|
|
|
|
* Most of the rest of NIR ignores this field or asserts that it's NULL.
|
|
|
|
*/
|
|
|
|
struct nir_variable *pointer_initializer;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/**
|
|
|
|
* For variables that are in an interface block or are an instance of an
|
|
|
|
* interface block, this is the \c GLSL_TYPE_INTERFACE type for that block.
|
|
|
|
*
|
|
|
|
* \sa ir_variable::location
|
|
|
|
*/
|
|
|
|
const struct glsl_type *interface_type;
|
2018-03-21 23:48:35 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Description of per-member data for per-member struct variables
|
|
|
|
*
|
|
|
|
* This is used for variables which are actually an amalgamation of
|
|
|
|
* multiple entities such as a struct of built-in values or a struct of
|
|
|
|
* inputs each with their own layout specifier. This is only allowed on
|
|
|
|
* variables with a struct or array of array of struct type.
|
|
|
|
*/
|
|
|
|
struct nir_variable_data *members;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_variable;
|
|
|
|
|
2020-07-20 20:32:01 +01:00
|
|
|
static inline bool
|
|
|
|
_nir_shader_variable_has_mode(nir_variable *var, unsigned modes)
|
|
|
|
{
|
|
|
|
/* This isn't a shader variable */
|
|
|
|
assert(!(modes & nir_var_function_temp));
|
|
|
|
return var->data.mode & modes;
|
|
|
|
}
|
|
|
|
|
2020-07-20 22:30:37 +01:00
|
|
|
#define nir_foreach_variable_in_list(var, var_list) \
|
2015-10-03 02:15:06 +01:00
|
|
|
foreach_list_typed(nir_variable, var, node, var_list)
|
2016-03-25 17:18:35 +00:00
|
|
|
|
2020-07-20 22:30:37 +01:00
|
|
|
#define nir_foreach_variable_in_list_safe(var, var_list) \
|
2016-03-25 17:18:35 +00:00
|
|
|
foreach_list_typed_safe(nir_variable, var, node, var_list)
|
2015-10-03 02:15:06 +01:00
|
|
|
|
2020-07-20 22:30:37 +01:00
|
|
|
#define nir_foreach_variable_in_shader(var, shader) \
|
|
|
|
nir_foreach_variable_in_list(var, &(shader)->variables)
|
|
|
|
|
|
|
|
#define nir_foreach_variable_in_shader_safe(var, shader) \
|
|
|
|
nir_foreach_variable_in_list_safe(var, &(shader)->variables)
|
|
|
|
|
2020-07-20 20:32:01 +01:00
|
|
|
#define nir_foreach_variable_with_modes(var, shader, modes) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_in_shader(var, shader) \
|
2020-07-20 20:32:01 +01:00
|
|
|
if (_nir_shader_variable_has_mode(var, modes))
|
|
|
|
|
|
|
|
#define nir_foreach_variable_with_modes_safe(var, shader, modes) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_in_shader_safe(var, shader) \
|
2020-07-20 20:32:01 +01:00
|
|
|
if (_nir_shader_variable_has_mode(var, modes))
|
|
|
|
|
2020-07-19 00:24:25 +01:00
|
|
|
#define nir_foreach_shader_in_variable(var, shader) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_shader_in)
|
2020-07-19 00:24:25 +01:00
|
|
|
|
|
|
|
#define nir_foreach_shader_in_variable_safe(var, shader) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_in)
|
2020-07-19 00:24:25 +01:00
|
|
|
|
|
|
|
#define nir_foreach_shader_out_variable(var, shader) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_shader_out)
|
2020-07-19 00:24:25 +01:00
|
|
|
|
|
|
|
#define nir_foreach_shader_out_variable_safe(var, shader) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_with_modes_safe(var, shader, nir_var_shader_out)
|
2020-07-19 00:24:25 +01:00
|
|
|
|
2020-07-19 00:45:18 +01:00
|
|
|
#define nir_foreach_uniform_variable(var, shader) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_with_modes(var, shader, nir_var_uniform)
|
2020-07-19 00:45:18 +01:00
|
|
|
|
|
|
|
#define nir_foreach_uniform_variable_safe(var, shader) \
|
2020-07-20 22:30:37 +01:00
|
|
|
nir_foreach_variable_with_modes_safe(var, shader, nir_var_uniform)
|
2020-07-19 00:45:18 +01:00
|
|
|
|
2016-02-12 19:58:06 +00:00
|
|
|
static inline bool
|
|
|
|
nir_variable_is_global(const nir_variable *var)
|
|
|
|
{
|
2019-01-15 23:05:04 +00:00
|
|
|
return var->data.mode != nir_var_function_temp;
|
2016-02-12 19:58:06 +00:00
|
|
|
}
|
|
|
|
|
2016-01-04 18:24:08 +00:00
|
|
|
typedef struct nir_register {
|
2014-08-01 00:14:51 +01:00
|
|
|
struct exec_node node;
|
|
|
|
|
|
|
|
unsigned num_components; /** < number of vector components */
|
|
|
|
unsigned num_array_elems; /** < size of array (0 for no array) */
|
|
|
|
|
2015-11-17 14:45:18 +00:00
|
|
|
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
|
|
|
|
uint8_t bit_size;
|
|
|
|
|
2014-10-31 04:18:22 +00:00
|
|
|
/** generic register index. */
|
2014-08-01 00:14:51 +01:00
|
|
|
unsigned index;
|
|
|
|
|
|
|
|
/** only for debug purposes, can be NULL */
|
|
|
|
const char *name;
|
|
|
|
|
2017-02-28 01:21:42 +00:00
|
|
|
/** set of nir_srcs where this register is used (read from) */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
struct list_head uses;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2017-02-28 01:21:42 +00:00
|
|
|
/** set of nir_dests where this register is defined (written to) */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
struct list_head defs;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2017-02-28 01:21:42 +00:00
|
|
|
/** set of nir_ifs where this register is used as a condition */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
struct list_head if_uses;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_register;
|
|
|
|
|
2016-12-14 04:54:38 +00:00
|
|
|
#define nir_foreach_register(reg, reg_list) \
|
|
|
|
foreach_list_typed(nir_register, reg, node, reg_list)
|
|
|
|
#define nir_foreach_register_safe(reg, reg_list) \
|
|
|
|
foreach_list_typed_safe(nir_register, reg, node, reg_list)
|
|
|
|
|
nir: Fix holes in nir_instr
Found using pahole.
Changes in peak memory usage according to Valgrind massif:
mean soft fp64 using uint64: 1,343,991,403 => 1,342,759,331
gfxbench5 aztec ruins high 11: 63,619,971 => 63,555,571
deus ex mankind divided 148: 62,887,728 => 62,845,304
deus ex mankind divided 2890: 72,399,750 => 71,922,686
dirt showdown 676: 69,464,023 => 69,238,607
dolphin ubershaders 210: 78,359,728 => 77,822,072
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-10-31 02:15:18 +00:00
|
|
|
typedef enum PACKED {
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_instr_type_alu,
|
2018-03-15 04:45:38 +00:00
|
|
|
nir_instr_type_deref,
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_instr_type_call,
|
2014-12-05 19:03:06 +00:00
|
|
|
nir_instr_type_tex,
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_instr_type_intrinsic,
|
|
|
|
nir_instr_type_load_const,
|
|
|
|
nir_instr_type_jump,
|
|
|
|
nir_instr_type_ssa_undef,
|
|
|
|
nir_instr_type_phi,
|
2014-10-31 04:04:15 +00:00
|
|
|
nir_instr_type_parallel_copy,
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_instr_type;
|
|
|
|
|
2015-01-30 05:45:53 +00:00
|
|
|
typedef struct nir_instr {
|
2014-08-01 00:14:51 +01:00
|
|
|
struct exec_node node;
|
|
|
|
struct nir_block *block;
|
nir: Fix holes in nir_instr
Found using pahole.
Changes in peak memory usage according to Valgrind massif:
mean soft fp64 using uint64: 1,343,991,403 => 1,342,759,331
gfxbench5 aztec ruins high 11: 63,619,971 => 63,555,571
deus ex mankind divided 148: 62,887,728 => 62,845,304
deus ex mankind divided 2890: 72,399,750 => 71,922,686
dirt showdown 676: 69,464,023 => 69,238,607
dolphin ubershaders 210: 78,359,728 => 77,822,072
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-10-31 02:15:18 +00:00
|
|
|
nir_instr_type type;
|
2015-09-09 00:43:51 +01:00
|
|
|
|
2015-02-09 22:41:10 +00:00
|
|
|
/* A temporary for optimization and analysis passes to use for storing
|
|
|
|
* flags. For instance, DCE uses this to store the "dead/live" info.
|
|
|
|
*/
|
|
|
|
uint8_t pass_flags;
|
nir: Fix holes in nir_instr
Found using pahole.
Changes in peak memory usage according to Valgrind massif:
mean soft fp64 using uint64: 1,343,991,403 => 1,342,759,331
gfxbench5 aztec ruins high 11: 63,619,971 => 63,555,571
deus ex mankind divided 148: 62,887,728 => 62,845,304
deus ex mankind divided 2890: 72,399,750 => 71,922,686
dirt showdown 676: 69,464,023 => 69,238,607
dolphin ubershaders 210: 78,359,728 => 77,822,072
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-10-31 02:15:18 +00:00
|
|
|
|
|
|
|
/** generic instruction index. */
|
|
|
|
unsigned index;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_instr;
|
|
|
|
|
2014-12-19 23:30:15 +00:00
|
|
|
static inline nir_instr *
|
2015-02-05 05:22:45 +00:00
|
|
|
nir_instr_next(nir_instr *instr)
|
2014-12-19 23:30:15 +00:00
|
|
|
{
|
2015-02-05 05:22:45 +00:00
|
|
|
struct exec_node *next = exec_node_get_next(&instr->node);
|
|
|
|
if (exec_node_is_tail_sentinel(next))
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return exec_node_data(nir_instr, next, node);
|
2014-12-19 23:30:15 +00:00
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2014-12-19 23:30:15 +00:00
|
|
|
static inline nir_instr *
|
2015-02-05 05:22:45 +00:00
|
|
|
nir_instr_prev(nir_instr *instr)
|
2014-12-19 23:30:15 +00:00
|
|
|
{
|
2015-02-05 05:22:45 +00:00
|
|
|
struct exec_node *prev = exec_node_get_prev(&instr->node);
|
|
|
|
if (exec_node_is_head_sentinel(prev))
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return exec_node_data(nir_instr, prev, node);
|
2014-12-19 23:30:15 +00:00
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2015-07-15 20:00:47 +01:00
|
|
|
static inline bool
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_instr_is_first(const nir_instr *instr)
|
2015-07-15 20:00:47 +01:00
|
|
|
{
|
2017-06-07 00:19:15 +01:00
|
|
|
return exec_node_is_head_sentinel(exec_node_get_prev_const(&instr->node));
|
2015-07-15 20:00:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_instr_is_last(const nir_instr *instr)
|
2015-07-15 20:00:47 +01:00
|
|
|
{
|
2017-06-07 00:19:15 +01:00
|
|
|
return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
|
2015-07-15 20:00:47 +01:00
|
|
|
}
|
|
|
|
|
2016-01-04 18:24:08 +00:00
|
|
|
typedef struct nir_ssa_def {
|
2014-08-01 00:14:51 +01:00
|
|
|
/** for debugging only, can be NULL */
|
|
|
|
const char* name;
|
|
|
|
|
2014-10-31 04:18:22 +00:00
|
|
|
/** generic SSA definition index. */
|
2014-08-01 00:14:51 +01:00
|
|
|
unsigned index;
|
|
|
|
|
2020-07-23 20:29:02 +01:00
|
|
|
/** Ordered SSA definition index used by nir_liveness. */
|
2014-10-29 21:17:17 +00:00
|
|
|
unsigned live_index;
|
|
|
|
|
2018-06-06 03:00:42 +01:00
|
|
|
/** Instruction which produces this SSA value. */
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_instr *parent_instr;
|
|
|
|
|
2017-02-28 01:21:42 +00:00
|
|
|
/** set of nir_instrs where this register is used (read from) */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
struct list_head uses;
|
2014-12-19 01:13:22 +00:00
|
|
|
|
2017-02-28 01:21:42 +00:00
|
|
|
/** set of nir_ifs where this register is used as a condition */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
struct list_head if_uses;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
uint8_t num_components;
|
2015-11-17 14:45:18 +00:00
|
|
|
|
|
|
|
/* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
|
|
|
|
uint8_t bit_size;
|
2019-10-15 20:48:10 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* True if this SSA value may have different values in different SIMD
|
|
|
|
* invocations of the shader. This is set by nir_divergence_analysis.
|
|
|
|
*/
|
|
|
|
bool divergent;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_ssa_def;
|
|
|
|
|
|
|
|
struct nir_src;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_register *reg;
|
|
|
|
struct nir_src *indirect; /** < NULL for no indirect offset */
|
|
|
|
unsigned base_offset;
|
|
|
|
|
|
|
|
/* TODO use-def chain goes here */
|
|
|
|
} nir_reg_src;
|
|
|
|
|
|
|
|
typedef struct {
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
nir_instr *parent_instr;
|
|
|
|
struct list_head def_link;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_register *reg;
|
|
|
|
struct nir_src *indirect; /** < NULL for no indirect offset */
|
|
|
|
unsigned base_offset;
|
|
|
|
|
|
|
|
/* TODO def-use chain goes here */
|
|
|
|
} nir_reg_dest;
|
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
struct nir_if;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct nir_src {
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
union {
|
2018-06-06 03:00:42 +01:00
|
|
|
/** Instruction that consumes this value as a source. */
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
nir_instr *parent_instr;
|
|
|
|
struct nir_if *parent_if;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct list_head use_link;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
union {
|
|
|
|
nir_reg_src reg;
|
|
|
|
nir_ssa_def *ssa;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool is_ssa;
|
|
|
|
} nir_src;
|
|
|
|
|
2016-04-13 18:19:50 +01:00
|
|
|
static inline nir_src
|
|
|
|
nir_src_init(void)
|
|
|
|
{
|
|
|
|
nir_src src = { { NULL } };
|
|
|
|
return src;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NIR_SRC_INIT nir_src_init()
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
#define nir_foreach_use(src, reg_or_ssa_def) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
|
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
#define nir_foreach_use_safe(src, reg_or_ssa_def) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link)
|
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
#define nir_foreach_if_use(src, reg_or_ssa_def) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
|
|
|
|
|
2016-04-27 04:30:10 +01:00
|
|
|
#define nir_foreach_if_use_safe(src, reg_or_ssa_def) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link)
|
2015-04-22 02:00:21 +01:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
union {
|
|
|
|
nir_reg_dest reg;
|
|
|
|
nir_ssa_def ssa;
|
|
|
|
};
|
|
|
|
|
|
|
|
bool is_ssa;
|
|
|
|
} nir_dest;
|
|
|
|
|
2016-04-13 18:19:50 +01:00
|
|
|
static inline nir_dest
|
|
|
|
nir_dest_init(void)
|
|
|
|
{
|
|
|
|
nir_dest dest = { { { NULL } } };
|
|
|
|
return dest;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NIR_DEST_INIT nir_dest_init()
|
2015-04-22 02:00:21 +01:00
|
|
|
|
2016-04-27 04:34:01 +01:00
|
|
|
#define nir_foreach_def(dest, reg) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link)
|
|
|
|
|
2016-04-27 04:34:01 +01:00
|
|
|
#define nir_foreach_def_safe(dest, reg) \
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link)
|
|
|
|
|
2015-01-21 19:10:11 +00:00
|
|
|
static inline nir_src
|
|
|
|
nir_src_for_ssa(nir_ssa_def *def)
|
|
|
|
{
|
2015-04-22 02:00:21 +01:00
|
|
|
nir_src src = NIR_SRC_INIT;
|
2015-01-21 19:10:11 +00:00
|
|
|
|
|
|
|
src.is_ssa = true;
|
|
|
|
src.ssa = def;
|
|
|
|
|
|
|
|
return src;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_src
|
|
|
|
nir_src_for_reg(nir_register *reg)
|
|
|
|
{
|
2015-04-22 02:00:21 +01:00
|
|
|
nir_src src = NIR_SRC_INIT;
|
2015-01-21 19:10:11 +00:00
|
|
|
|
|
|
|
src.is_ssa = false;
|
|
|
|
src.reg.reg = reg;
|
|
|
|
src.reg.indirect = NULL;
|
|
|
|
src.reg.base_offset = 0;
|
|
|
|
|
|
|
|
return src;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_dest
|
|
|
|
nir_dest_for_reg(nir_register *reg)
|
|
|
|
{
|
2015-04-22 02:00:21 +01:00
|
|
|
nir_dest dest = NIR_DEST_INIT;
|
2015-01-21 19:10:11 +00:00
|
|
|
|
|
|
|
dest.reg.reg = reg;
|
|
|
|
|
|
|
|
return dest;
|
|
|
|
}
|
|
|
|
|
2015-08-14 18:18:39 +01:00
|
|
|
static inline unsigned
|
|
|
|
nir_src_bit_size(nir_src src)
|
|
|
|
{
|
|
|
|
return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
|
|
|
|
}
|
|
|
|
|
2018-03-15 04:44:51 +00:00
|
|
|
static inline unsigned
|
|
|
|
nir_src_num_components(nir_src src)
|
|
|
|
{
|
|
|
|
return src.is_ssa ? src.ssa->num_components : src.reg.reg->num_components;
|
|
|
|
}
|
|
|
|
|
2018-10-20 14:36:21 +01:00
|
|
|
static inline bool
|
|
|
|
nir_src_is_const(nir_src src)
|
|
|
|
{
|
|
|
|
return src.is_ssa &&
|
|
|
|
src.ssa->parent_instr->type == nir_instr_type_load_const;
|
|
|
|
}
|
|
|
|
|
2019-10-15 20:48:10 +01:00
|
|
|
static inline bool
|
|
|
|
nir_src_is_divergent(nir_src src)
|
|
|
|
{
|
|
|
|
assert(src.is_ssa);
|
|
|
|
return src.ssa->divergent;
|
|
|
|
}
|
|
|
|
|
2015-08-14 18:18:39 +01:00
|
|
|
static inline unsigned
|
|
|
|
nir_dest_bit_size(nir_dest dest)
|
|
|
|
{
|
|
|
|
return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
|
|
|
|
}
|
|
|
|
|
2018-03-15 04:44:51 +00:00
|
|
|
static inline unsigned
|
|
|
|
nir_dest_num_components(nir_dest dest)
|
|
|
|
{
|
|
|
|
return dest.is_ssa ? dest.ssa.num_components : dest.reg.reg->num_components;
|
|
|
|
}
|
|
|
|
|
2019-10-15 20:48:10 +01:00
|
|
|
static inline bool
|
|
|
|
nir_dest_is_divergent(nir_dest dest)
|
|
|
|
{
|
|
|
|
assert(dest.is_ssa);
|
|
|
|
return dest.ssa.divergent;
|
|
|
|
}
|
|
|
|
|
2020-03-10 21:38:12 +00:00
|
|
|
/* Are all components the same, ie. .xxxx */
|
|
|
|
static inline bool
|
|
|
|
nir_is_same_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
|
|
|
|
{
|
|
|
|
for (unsigned i = 1; i < nr_comp; i++)
|
|
|
|
if (swiz[i] != swiz[0])
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Are all components sequential, ie. .yzw */
|
|
|
|
static inline bool
|
|
|
|
nir_is_sequential_comp_swizzle(uint8_t *swiz, unsigned nr_comp)
|
|
|
|
{
|
|
|
|
for (unsigned i = 1; i < nr_comp; i++)
|
|
|
|
if (swiz[i] != (swiz[0] + i))
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-09-09 21:18:29 +01:00
|
|
|
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
|
|
|
|
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_src src;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \name input modifiers
|
|
|
|
*/
|
|
|
|
/*@{*/
|
|
|
|
/**
|
2014-12-19 01:13:22 +00:00
|
|
|
* For inputs interpreted as floating point, flips the sign bit. For
|
|
|
|
* inputs interpreted as integers, performs the two's complement negation.
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
|
|
|
bool negate;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clears the sign bit for floating point values, and computes the integer
|
|
|
|
* absolute value for integers. Note that the negate modifier acts after
|
|
|
|
* the absolute value modifier, therefore if both are set then all inputs
|
|
|
|
* will become negative.
|
|
|
|
*/
|
|
|
|
bool abs;
|
|
|
|
/*@}*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* For each input component, says which component of the register it is
|
|
|
|
* chosen from. Note that which elements of the swizzle are used and which
|
|
|
|
* are ignored are based on the write mask for most opcodes - for example,
|
|
|
|
* a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
|
|
|
|
* a swizzle of {2, x, 1, 0} where x means "don't care."
|
|
|
|
*/
|
2018-07-12 02:40:23 +01:00
|
|
|
uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_alu_src;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_dest dest;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* \name saturate output modifier
|
|
|
|
*
|
|
|
|
* Only valid for opcodes that output floating-point numbers. Clamps the
|
|
|
|
* output to between 0.0 and 1.0 inclusive.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool saturate;
|
|
|
|
|
2018-07-12 02:40:23 +01:00
|
|
|
unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is true */
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_alu_dest;
|
|
|
|
|
2018-10-18 17:59:40 +01:00
|
|
|
/** NIR sized and unsized types
|
|
|
|
*
|
|
|
|
* The values in this enum are carefully chosen so that the sized type is
|
|
|
|
* just the unsized type OR the number of bits.
|
|
|
|
*/
|
2020-05-14 20:50:52 +01:00
|
|
|
typedef enum PACKED {
|
2015-01-29 00:27:40 +00:00
|
|
|
nir_type_invalid = 0, /* Not a valid type */
|
2018-10-18 17:59:40 +01:00
|
|
|
nir_type_int = 2,
|
|
|
|
nir_type_uint = 4,
|
|
|
|
nir_type_bool = 6,
|
|
|
|
nir_type_float = 128,
|
|
|
|
nir_type_bool1 = 1 | nir_type_bool,
|
2019-01-31 15:05:44 +00:00
|
|
|
nir_type_bool8 = 8 | nir_type_bool,
|
2019-01-30 10:02:39 +00:00
|
|
|
nir_type_bool16 = 16 | nir_type_bool,
|
2015-05-15 17:21:23 +01:00
|
|
|
nir_type_bool32 = 32 | nir_type_bool,
|
2018-10-18 17:59:40 +01:00
|
|
|
nir_type_int1 = 1 | nir_type_int,
|
2015-05-15 17:21:23 +01:00
|
|
|
nir_type_int8 = 8 | nir_type_int,
|
|
|
|
nir_type_int16 = 16 | nir_type_int,
|
|
|
|
nir_type_int32 = 32 | nir_type_int,
|
|
|
|
nir_type_int64 = 64 | nir_type_int,
|
2018-10-18 17:59:40 +01:00
|
|
|
nir_type_uint1 = 1 | nir_type_uint,
|
2015-05-15 17:21:23 +01:00
|
|
|
nir_type_uint8 = 8 | nir_type_uint,
|
|
|
|
nir_type_uint16 = 16 | nir_type_uint,
|
|
|
|
nir_type_uint32 = 32 | nir_type_uint,
|
|
|
|
nir_type_uint64 = 64 | nir_type_uint,
|
|
|
|
nir_type_float16 = 16 | nir_type_float,
|
|
|
|
nir_type_float32 = 32 | nir_type_float,
|
|
|
|
nir_type_float64 = 64 | nir_type_float,
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_alu_type;
|
|
|
|
|
2018-10-18 17:59:40 +01:00
|
|
|
#define NIR_ALU_TYPE_SIZE_MASK 0x79
|
|
|
|
#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x86
|
2015-05-15 17:21:23 +01:00
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
nir_alu_type_get_type_size(nir_alu_type type)
|
|
|
|
{
|
|
|
|
return type & NIR_ALU_TYPE_SIZE_MASK;
|
|
|
|
}
|
|
|
|
|
2020-06-03 20:07:52 +01:00
|
|
|
static inline nir_alu_type
|
2015-05-15 17:21:23 +01:00
|
|
|
nir_alu_type_get_base_type(nir_alu_type type)
|
|
|
|
{
|
2020-06-03 20:07:52 +01:00
|
|
|
return (nir_alu_type)(type & NIR_ALU_TYPE_BASE_TYPE_MASK);
|
2015-05-15 17:21:23 +01:00
|
|
|
}
|
|
|
|
|
2016-12-07 07:34:02 +00:00
|
|
|
static inline nir_alu_type
|
2017-03-08 00:46:17 +00:00
|
|
|
nir_get_nir_type_for_glsl_base_type(enum glsl_base_type base_type)
|
2016-12-07 07:34:02 +00:00
|
|
|
{
|
2017-03-08 00:46:17 +00:00
|
|
|
switch (base_type) {
|
2016-12-07 07:34:02 +00:00
|
|
|
case GLSL_TYPE_BOOL:
|
2018-10-19 17:14:47 +01:00
|
|
|
return nir_type_bool1;
|
2016-12-07 07:34:02 +00:00
|
|
|
break;
|
|
|
|
case GLSL_TYPE_UINT:
|
|
|
|
return nir_type_uint32;
|
|
|
|
break;
|
|
|
|
case GLSL_TYPE_INT:
|
|
|
|
return nir_type_int32;
|
|
|
|
break;
|
2017-07-01 06:54:50 +01:00
|
|
|
case GLSL_TYPE_UINT16:
|
|
|
|
return nir_type_uint16;
|
|
|
|
break;
|
|
|
|
case GLSL_TYPE_INT16:
|
|
|
|
return nir_type_int16;
|
|
|
|
break;
|
2018-01-25 12:59:06 +00:00
|
|
|
case GLSL_TYPE_UINT8:
|
|
|
|
return nir_type_uint8;
|
|
|
|
case GLSL_TYPE_INT8:
|
|
|
|
return nir_type_int8;
|
2017-02-15 08:26:46 +00:00
|
|
|
case GLSL_TYPE_UINT64:
|
|
|
|
return nir_type_uint64;
|
|
|
|
break;
|
|
|
|
case GLSL_TYPE_INT64:
|
|
|
|
return nir_type_int64;
|
|
|
|
break;
|
2016-12-07 07:34:02 +00:00
|
|
|
case GLSL_TYPE_FLOAT:
|
|
|
|
return nir_type_float32;
|
|
|
|
break;
|
2017-07-01 06:54:50 +01:00
|
|
|
case GLSL_TYPE_FLOAT16:
|
|
|
|
return nir_type_float16;
|
|
|
|
break;
|
2016-12-07 07:34:02 +00:00
|
|
|
case GLSL_TYPE_DOUBLE:
|
|
|
|
return nir_type_float64;
|
|
|
|
break;
|
2019-06-01 00:15:02 +01:00
|
|
|
|
|
|
|
case GLSL_TYPE_SAMPLER:
|
|
|
|
case GLSL_TYPE_IMAGE:
|
|
|
|
case GLSL_TYPE_ATOMIC_UINT:
|
|
|
|
case GLSL_TYPE_STRUCT:
|
|
|
|
case GLSL_TYPE_INTERFACE:
|
|
|
|
case GLSL_TYPE_ARRAY:
|
|
|
|
case GLSL_TYPE_VOID:
|
|
|
|
case GLSL_TYPE_SUBROUTINE:
|
|
|
|
case GLSL_TYPE_FUNCTION:
|
|
|
|
case GLSL_TYPE_ERROR:
|
|
|
|
return nir_type_invalid;
|
2016-12-07 07:34:02 +00:00
|
|
|
}
|
2019-06-01 00:15:02 +01:00
|
|
|
|
|
|
|
unreachable("unknown type");
|
2016-12-07 07:34:02 +00:00
|
|
|
}
|
|
|
|
|
2017-03-08 00:46:17 +00:00
|
|
|
static inline nir_alu_type
|
|
|
|
nir_get_nir_type_for_glsl_type(const struct glsl_type *type)
|
|
|
|
{
|
|
|
|
return nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(type));
|
|
|
|
}
|
|
|
|
|
2017-07-01 06:58:26 +01:00
|
|
|
nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst,
|
|
|
|
nir_rounding_mode rnd);
|
2016-12-07 07:34:42 +00:00
|
|
|
|
2019-03-20 17:11:20 +00:00
|
|
|
static inline nir_op
|
|
|
|
nir_op_vec(unsigned components)
|
|
|
|
{
|
|
|
|
switch (components) {
|
2019-05-06 17:45:46 +01:00
|
|
|
case 1: return nir_op_mov;
|
2019-03-20 17:11:20 +00:00
|
|
|
case 2: return nir_op_vec2;
|
|
|
|
case 3: return nir_op_vec3;
|
|
|
|
case 4: return nir_op_vec4;
|
2019-03-09 16:17:55 +00:00
|
|
|
case 8: return nir_op_vec8;
|
|
|
|
case 16: return nir_op_vec16;
|
2019-03-20 17:11:20 +00:00
|
|
|
default: unreachable("bad component count");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-30 18:06:22 +01:00
|
|
|
static inline bool
|
|
|
|
nir_op_is_vec(nir_op op)
|
|
|
|
{
|
|
|
|
switch (op) {
|
|
|
|
case nir_op_mov:
|
|
|
|
case nir_op_vec2:
|
|
|
|
case nir_op_vec3:
|
|
|
|
case nir_op_vec4:
|
|
|
|
case nir_op_vec8:
|
|
|
|
case nir_op_vec16:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-01 10:23:28 +00:00
|
|
|
static inline bool
|
|
|
|
nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size)
|
|
|
|
{
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP16) ||
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP32) ||
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_SIGNED_ZERO_INF_NAN_PRESERVE_FP64);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_is_denorm_flush_to_zero(unsigned execution_mode, unsigned bit_size)
|
|
|
|
{
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) ||
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) ||
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_is_denorm_preserve(unsigned execution_mode, unsigned bit_size)
|
|
|
|
{
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) ||
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) ||
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_DENORM_PRESERVE_FP64);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_is_rounding_mode_rtne(unsigned execution_mode, unsigned bit_size)
|
|
|
|
{
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_is_rounding_mode_rtz(unsigned execution_mode, unsigned bit_size)
|
|
|
|
{
|
|
|
|
return (16 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
|
|
|
|
(32 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
|
|
|
|
(64 == bit_size && execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_has_any_rounding_mode_rtz(unsigned execution_mode)
|
|
|
|
{
|
|
|
|
return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) ||
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) ||
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_has_any_rounding_mode_rtne(unsigned execution_mode)
|
|
|
|
{
|
|
|
|
return (execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) ||
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) ||
|
|
|
|
(execution_mode & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_rounding_mode
|
|
|
|
nir_get_rounding_mode_from_float_controls(unsigned execution_mode,
|
|
|
|
nir_alu_type type)
|
|
|
|
{
|
|
|
|
if (nir_alu_type_get_base_type(type) != nir_type_float)
|
|
|
|
return nir_rounding_mode_undef;
|
|
|
|
|
|
|
|
unsigned bit_size = nir_alu_type_get_type_size(type);
|
|
|
|
|
|
|
|
if (nir_is_rounding_mode_rtz(execution_mode, bit_size))
|
|
|
|
return nir_rounding_mode_rtz;
|
|
|
|
if (nir_is_rounding_mode_rtne(execution_mode, bit_size))
|
|
|
|
return nir_rounding_mode_rtne;
|
|
|
|
return nir_rounding_mode_undef;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_has_any_rounding_mode_enabled(unsigned execution_mode)
|
|
|
|
{
|
|
|
|
bool result =
|
|
|
|
nir_has_any_rounding_mode_rtne(execution_mode) ||
|
|
|
|
nir_has_any_rounding_mode_rtz(execution_mode);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-12-16 20:22:01 +00:00
|
|
|
typedef enum {
|
2019-05-09 23:27:14 +01:00
|
|
|
/**
|
|
|
|
* Operation where the first two sources are commutative.
|
|
|
|
*
|
|
|
|
* For 2-source operations, this just mathematical commutativity. Some
|
|
|
|
* 3-source operations, like ffma, are only commutative in the first two
|
|
|
|
* sources.
|
|
|
|
*/
|
|
|
|
NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
|
2014-12-16 20:22:01 +00:00
|
|
|
NIR_OP_IS_ASSOCIATIVE = (1 << 1),
|
|
|
|
} nir_op_algebraic_property;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
const char *name;
|
|
|
|
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t num_inputs;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/**
|
2014-12-16 22:43:26 +00:00
|
|
|
* The number of components in the output
|
2014-08-01 00:14:51 +01:00
|
|
|
*
|
2014-12-16 22:43:26 +00:00
|
|
|
* If non-zero, this is the size of the output and input sizes are
|
|
|
|
* explicitly given; swizzle and writemask are still in effect, but if
|
|
|
|
* the output component is masked out, then the input component may
|
|
|
|
* still be in use.
|
2014-08-01 00:14:51 +01:00
|
|
|
*
|
2014-12-16 22:43:26 +00:00
|
|
|
* If zero, the opcode acts in the standard, per-component manner; the
|
|
|
|
* operation is performed on each component (except the ones that are
|
|
|
|
* masked out) with the input being taken from the input swizzle for
|
|
|
|
* that component.
|
|
|
|
*
|
|
|
|
* The size of some of the inputs may be given (i.e. non-zero) even
|
|
|
|
* though output_size is zero; in that case, the inputs with a zero
|
|
|
|
* size act per-component, while the inputs with non-zero size don't.
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t output_size;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/**
|
2014-12-19 01:13:22 +00:00
|
|
|
* The type of vector that the instruction outputs. Note that the
|
|
|
|
* staurate modifier is only allowed on outputs with the float type.
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
nir_alu_type output_type;
|
|
|
|
|
|
|
|
/**
|
2014-12-16 22:43:26 +00:00
|
|
|
* The number of components in each input
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t input_sizes[NIR_MAX_VEC_COMPONENTS];
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/**
|
2014-12-19 01:13:22 +00:00
|
|
|
* The type of vector that each input takes. Note that negate and
|
|
|
|
* absolute value are only allowed on inputs with int or float type and
|
|
|
|
* behave differently on the two.
|
2014-08-01 00:14:51 +01:00
|
|
|
*/
|
2018-07-12 02:40:23 +01:00
|
|
|
nir_alu_type input_types[NIR_MAX_VEC_COMPONENTS];
|
2014-12-16 20:22:01 +00:00
|
|
|
|
|
|
|
nir_op_algebraic_property algebraic_properties;
|
2019-02-12 11:55:28 +00:00
|
|
|
|
|
|
|
/* Whether this represents a numeric conversion opcode */
|
|
|
|
bool is_conversion;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_op_info;
|
|
|
|
|
|
|
|
extern const nir_op_info nir_op_infos[nir_num_opcodes];
|
|
|
|
|
|
|
|
typedef struct nir_alu_instr {
|
|
|
|
nir_instr instr;
|
|
|
|
nir_op op;
|
2016-03-17 17:50:27 +00:00
|
|
|
|
|
|
|
/** Indicates that this ALU instruction generates an exact value
|
|
|
|
*
|
|
|
|
* This is kind of a mixture of GLSL "precise" and "invariant" and not
|
|
|
|
* really equivalent to either. This indicates that the value generated by
|
|
|
|
* this operation is high-precision and any code transformations that touch
|
|
|
|
* it must ensure that the resulting value is bit-for-bit identical to the
|
|
|
|
* original.
|
|
|
|
*/
|
2019-05-17 21:46:38 +01:00
|
|
|
bool exact:1;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Indicates that this instruction do not cause wrapping to occur, in the
|
|
|
|
* form of overflow or underflow.
|
|
|
|
*/
|
|
|
|
bool no_signed_wrap:1;
|
|
|
|
bool no_unsigned_wrap:1;
|
2016-03-17 17:50:27 +00:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_alu_dest dest;
|
|
|
|
nir_alu_src src[];
|
|
|
|
} nir_alu_instr;
|
|
|
|
|
2015-09-09 21:18:29 +01:00
|
|
|
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
|
|
|
|
nir_alu_instr *instr);
|
|
|
|
void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
|
|
|
nir_alu_instr *instr);
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/* is this source channel used? */
|
|
|
|
static inline bool
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_alu_instr_channel_used(const nir_alu_instr *instr, unsigned src,
|
|
|
|
unsigned channel)
|
2014-08-01 00:14:51 +01:00
|
|
|
{
|
|
|
|
if (nir_op_infos[instr->op].input_sizes[src] > 0)
|
|
|
|
return channel < nir_op_infos[instr->op].input_sizes[src];
|
|
|
|
|
|
|
|
return (instr->dest.write_mask >> channel) & 1;
|
|
|
|
}
|
|
|
|
|
2018-11-07 23:47:45 +00:00
|
|
|
static inline nir_component_mask_t
|
|
|
|
nir_alu_instr_src_read_mask(const nir_alu_instr *instr, unsigned src)
|
|
|
|
{
|
|
|
|
nir_component_mask_t read_mask = 0;
|
|
|
|
for (unsigned c = 0; c < NIR_MAX_VEC_COMPONENTS; c++) {
|
|
|
|
if (!nir_alu_instr_channel_used(instr, src, c))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
read_mask |= (1 << instr->src[src].swizzle[c]);
|
|
|
|
}
|
|
|
|
return read_mask;
|
|
|
|
}
|
|
|
|
|
2019-06-07 16:35:51 +01:00
|
|
|
/**
|
|
|
|
* Get the number of channels used for a source
|
2015-01-25 16:42:34 +00:00
|
|
|
*/
|
|
|
|
static inline unsigned
|
2015-03-17 05:03:28 +00:00
|
|
|
nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src)
|
2015-01-25 16:42:34 +00:00
|
|
|
{
|
|
|
|
if (nir_op_infos[instr->op].input_sizes[src] > 0)
|
|
|
|
return nir_op_infos[instr->op].input_sizes[src];
|
|
|
|
|
2019-06-07 16:35:51 +01:00
|
|
|
return nir_dest_num_components(instr->dest.dest);
|
2015-01-25 16:42:34 +00:00
|
|
|
}
|
|
|
|
|
2019-06-07 23:57:35 +01:00
|
|
|
static inline bool
|
|
|
|
nir_alu_instr_is_comparison(const nir_alu_instr *instr)
|
|
|
|
{
|
|
|
|
switch (instr->op) {
|
|
|
|
case nir_op_flt:
|
|
|
|
case nir_op_fge:
|
|
|
|
case nir_op_feq:
|
2020-08-18 18:51:57 +01:00
|
|
|
case nir_op_fneu:
|
2019-06-07 23:57:35 +01:00
|
|
|
case nir_op_ilt:
|
|
|
|
case nir_op_ult:
|
|
|
|
case nir_op_ige:
|
|
|
|
case nir_op_uge:
|
|
|
|
case nir_op_ieq:
|
|
|
|
case nir_op_ine:
|
|
|
|
case nir_op_i2b1:
|
|
|
|
case nir_op_f2b1:
|
|
|
|
case nir_op_inot:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-13 22:06:55 +01:00
|
|
|
bool nir_const_value_negative_equal(nir_const_value c1, nir_const_value c2,
|
2019-06-13 20:59:29 +01:00
|
|
|
nir_alu_type full_type);
|
2018-05-24 19:37:51 +01:00
|
|
|
|
2016-07-29 09:29:11 +01:00
|
|
|
bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2,
|
|
|
|
unsigned src1, unsigned src2);
|
|
|
|
|
2018-05-23 02:18:07 +01:00
|
|
|
bool nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
|
|
|
|
const nir_alu_instr *alu2,
|
|
|
|
unsigned src1, unsigned src2);
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef enum {
|
|
|
|
nir_deref_type_var,
|
|
|
|
nir_deref_type_array,
|
2018-03-15 04:45:38 +00:00
|
|
|
nir_deref_type_array_wildcard,
|
2018-11-28 18:26:52 +00:00
|
|
|
nir_deref_type_ptr_as_array,
|
2018-03-15 04:45:38 +00:00
|
|
|
nir_deref_type_struct,
|
|
|
|
nir_deref_type_cast,
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_deref_type;
|
|
|
|
|
2018-03-15 04:45:38 +00:00
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
|
|
/** The type of this deref instruction */
|
|
|
|
nir_deref_type deref_type;
|
|
|
|
|
|
|
|
/** The mode of the underlying variable */
|
|
|
|
nir_variable_mode mode;
|
|
|
|
|
|
|
|
/** The dereferenced type of the resulting pointer value */
|
|
|
|
const struct glsl_type *type;
|
|
|
|
|
|
|
|
union {
|
|
|
|
/** Variable being dereferenced if deref_type is a deref_var */
|
|
|
|
nir_variable *var;
|
|
|
|
|
|
|
|
/** Parent deref if deref_type is not deref_var */
|
|
|
|
nir_src parent;
|
|
|
|
};
|
|
|
|
|
|
|
|
/** Additional deref parameters */
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
nir_src index;
|
|
|
|
} arr;
|
|
|
|
|
|
|
|
struct {
|
|
|
|
unsigned index;
|
|
|
|
} strct;
|
2018-11-28 18:26:52 +00:00
|
|
|
|
|
|
|
struct {
|
|
|
|
unsigned ptr_stride;
|
2020-08-24 15:51:04 +01:00
|
|
|
unsigned align_mul;
|
|
|
|
unsigned align_offset;
|
2018-11-28 18:26:52 +00:00
|
|
|
} cast;
|
2018-03-15 04:45:38 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/** Destination to store the resulting "pointer" */
|
|
|
|
nir_dest dest;
|
|
|
|
} nir_deref_instr;
|
|
|
|
|
2019-04-19 21:09:04 +01:00
|
|
|
static inline nir_deref_instr *nir_src_as_deref(nir_src src);
|
2018-03-15 04:45:38 +00:00
|
|
|
|
2018-03-16 08:15:47 +00:00
|
|
|
static inline nir_deref_instr *
|
|
|
|
nir_deref_instr_parent(const nir_deref_instr *instr)
|
|
|
|
{
|
|
|
|
if (instr->deref_type == nir_deref_type_var)
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return nir_src_as_deref(instr->parent);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_variable *
|
|
|
|
nir_deref_instr_get_variable(const nir_deref_instr *instr)
|
|
|
|
{
|
|
|
|
while (instr->deref_type != nir_deref_type_var) {
|
|
|
|
if (instr->deref_type == nir_deref_type_cast)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
instr = nir_deref_instr_parent(instr);
|
|
|
|
}
|
|
|
|
|
|
|
|
return instr->var;
|
|
|
|
}
|
|
|
|
|
2018-06-29 03:46:01 +01:00
|
|
|
bool nir_deref_instr_has_indirect(nir_deref_instr *instr);
|
2019-06-18 11:12:49 +01:00
|
|
|
bool nir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr);
|
2019-05-22 21:00:20 +01:00
|
|
|
bool nir_deref_instr_has_complex_use(nir_deref_instr *instr);
|
2018-06-29 03:46:01 +01:00
|
|
|
|
2018-03-21 00:32:07 +00:00
|
|
|
bool nir_deref_instr_remove_if_unused(nir_deref_instr *instr);
|
|
|
|
|
2020-08-27 17:59:54 +01:00
|
|
|
unsigned nir_deref_instr_array_stride(nir_deref_instr *instr);
|
2018-11-28 18:26:52 +00:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
|
2019-03-19 15:18:49 +00:00
|
|
|
struct nir_function *callee;
|
2018-03-22 23:41:18 +00:00
|
|
|
|
|
|
|
unsigned num_params;
|
|
|
|
nir_src params[];
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_call_instr;
|
|
|
|
|
|
|
|
#include "nir_intrinsics.h"
|
|
|
|
|
2020-08-12 04:48:12 +01:00
|
|
|
#define NIR_INTRINSIC_MAX_CONST_INDEX 5
|
2016-01-13 23:43:14 +00:00
|
|
|
|
2014-12-19 01:13:22 +00:00
|
|
|
/** Represents an intrinsic
|
|
|
|
*
|
|
|
|
* An intrinsic is an instruction type for handling things that are
|
|
|
|
* more-or-less regular operations but don't just consume and produce SSA
|
|
|
|
* values like ALU operations do. Intrinsics are not for things that have
|
|
|
|
* special semantic meaning such as phi nodes and parallel copies.
|
|
|
|
* Examples of intrinsics include variable load/store operations, system
|
|
|
|
* value loads, and the like. Even though texturing more-or-less falls
|
|
|
|
* under this category, texturing is its own instruction type because
|
|
|
|
* trying to represent texturing with intrinsics would lead to a
|
|
|
|
* combinatorial explosion of intrinsic opcodes.
|
|
|
|
*
|
|
|
|
* By having a single instruction type for handling a lot of different
|
|
|
|
* cases, optimization passes can look for intrinsics and, for the most
|
|
|
|
* part, completely ignore them. Each intrinsic type also has a few
|
|
|
|
* possible flags that govern whether or not they can be reordered or
|
|
|
|
* eliminated. That way passes like dead code elimination can still work
|
|
|
|
* on intrisics without understanding the meaning of each.
|
|
|
|
*
|
|
|
|
* Each intrinsic has some number of constant indices, some number of
|
|
|
|
* variables, and some number of sources. What these sources, variables,
|
|
|
|
* and indices mean depends on the intrinsic and is documented with the
|
|
|
|
* intrinsic declaration in nir_intrinsics.h. Intrinsics and texture
|
|
|
|
* instructions are the only types of instruction that can operate on
|
|
|
|
* variables.
|
|
|
|
*/
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
|
|
nir_intrinsic_op intrinsic;
|
|
|
|
|
|
|
|
nir_dest dest;
|
|
|
|
|
2014-12-19 01:13:22 +00:00
|
|
|
/** number of components if this is a vectorized intrinsic
|
|
|
|
*
|
|
|
|
* Similarly to ALU operations, some intrinsics are vectorized.
|
|
|
|
* An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0.
|
|
|
|
* For vectorized intrinsics, the num_components field specifies the
|
|
|
|
* number of destination components and the number of source components
|
|
|
|
* for all sources with nir_intrinsic_infos.src_components[i] == 0.
|
|
|
|
*/
|
2014-12-04 01:03:19 +00:00
|
|
|
uint8_t num_components;
|
|
|
|
|
2016-01-13 23:43:14 +00:00
|
|
|
int const_index[NIR_INTRINSIC_MAX_CONST_INDEX];
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
nir_src src[];
|
|
|
|
} nir_intrinsic_instr;
|
|
|
|
|
2018-04-05 01:40:33 +01:00
|
|
|
static inline nir_variable *
|
|
|
|
nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
|
|
|
|
{
|
|
|
|
return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
|
|
|
|
}
|
|
|
|
|
2019-07-19 00:14:03 +01:00
|
|
|
typedef enum {
|
|
|
|
/* Memory ordering. */
|
|
|
|
NIR_MEMORY_ACQUIRE = 1 << 0,
|
|
|
|
NIR_MEMORY_RELEASE = 1 << 1,
|
2020-01-09 18:03:12 +00:00
|
|
|
NIR_MEMORY_ACQ_REL = NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE,
|
2019-07-19 00:14:03 +01:00
|
|
|
|
|
|
|
/* Memory visibility operations. */
|
2020-02-20 17:47:06 +00:00
|
|
|
NIR_MEMORY_MAKE_AVAILABLE = 1 << 2,
|
|
|
|
NIR_MEMORY_MAKE_VISIBLE = 1 << 3,
|
2019-07-19 00:14:03 +01:00
|
|
|
} nir_memory_semantics;
|
|
|
|
|
|
|
|
typedef enum {
|
2020-05-05 08:13:20 +01:00
|
|
|
NIR_SCOPE_NONE,
|
2019-07-19 00:14:03 +01:00
|
|
|
NIR_SCOPE_INVOCATION,
|
2020-02-21 18:46:29 +00:00
|
|
|
NIR_SCOPE_SUBGROUP,
|
|
|
|
NIR_SCOPE_WORKGROUP,
|
|
|
|
NIR_SCOPE_QUEUE_FAMILY,
|
|
|
|
NIR_SCOPE_DEVICE,
|
2019-07-19 00:14:03 +01:00
|
|
|
} nir_scope;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/**
|
|
|
|
* \name NIR intrinsics semantic flags
|
|
|
|
*
|
|
|
|
* information about what the compiler can do with the intrinsics.
|
|
|
|
*
|
|
|
|
* \sa nir_intrinsic_info::flags
|
|
|
|
*/
|
2014-12-19 23:56:55 +00:00
|
|
|
typedef enum {
|
|
|
|
/**
|
|
|
|
* whether the intrinsic can be safely eliminated if none of its output
|
|
|
|
* value is not being used.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0),
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2014-12-19 23:56:55 +00:00
|
|
|
/**
|
|
|
|
* Whether the intrinsic can be reordered with respect to any other
|
|
|
|
* intrinsic, i.e. whether the only reordering dependencies of the
|
|
|
|
* intrinsic are due to the register reads/writes.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_CAN_REORDER = (1 << 1),
|
|
|
|
} nir_intrinsic_semantic_flag;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2016-01-13 23:43:14 +00:00
|
|
|
/**
|
|
|
|
* \name NIR intrinsics const-index flag
|
|
|
|
*
|
|
|
|
* Indicates the usage of a const_index slot.
|
|
|
|
*
|
|
|
|
* \sa nir_intrinsic_info::index_map
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
/**
|
|
|
|
* Generally instructions that take a offset src argument, can encode
|
|
|
|
* a constant 'base' value which is added to the offset.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_BASE = 1,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* For store instructions, a writemask for the store.
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_WRMASK,
|
2016-01-13 23:43:14 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The stream-id for GS emit_vertex/end_primitive intrinsics.
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_STREAM_ID,
|
2016-01-13 23:43:14 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The clip-plane id for load_user_clip_plane intrinsic.
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_UCP_ID,
|
2016-01-13 23:43:14 +00:00
|
|
|
|
2015-11-24 21:52:49 +00:00
|
|
|
/**
|
nir: Add a range_base+range to nir_intrinsic_load_ubo().
For UBO accesses to be the same performance as classic GL default uniform
block uniforms, we need to be able to push them through the same path. On
freedreno, we haven't been uploading UBOs as push constants when they're
used for indirect array access, because we don't know what range of the
UBO is needed for an access.
I believe we won't be able to calculate the range in general in spirv
given casts that can happen, so we define a [0, ~0] range to be "We don't
know anything". We use that at the moment for all UBO loads except for
nir_lower_uniforms_to_ubo, where we now avoid losing the range information
that default uniform block loads come with.
In a departure from other NIR intrinsics with a "base", I didn't make the
base an be something you have to add to the src[1] offset. This keeps us
from needing to modify all drivers (particularly since the base+offset
thing can mean needing to do addition in the backend), makes backend
tracking of ranges easy, and makes the range calculations in
load_store_vectorizer reasonable. However, this could definitely cause
some confusion for people used to the normal NIR base.
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6359>
2020-08-14 21:10:02 +01:00
|
|
|
* The start of NIR_INTRINSIC_RANGE. Only present on instructions that
|
|
|
|
* don't have NIR_INTRINSIC_BASE.
|
|
|
|
*
|
|
|
|
* If the [range_base, range] is [0, ~0], then we don't know the possible
|
|
|
|
* range of the access.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_RANGE_BASE,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The amount of data, starting from BASE or RANGE_BASE, that this
|
|
|
|
* instruction may access. This is used to provide bounds if the offset is
|
|
|
|
* not constant.
|
2015-11-24 21:52:49 +00:00
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_RANGE,
|
2015-11-24 21:52:49 +00:00
|
|
|
|
2016-03-25 17:17:28 +00:00
|
|
|
/**
|
|
|
|
* The Vulkan descriptor set for vulkan_resource_index intrinsic.
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_DESC_SET,
|
2016-03-25 17:17:28 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* The Vulkan descriptor set binding for vulkan_resource_index intrinsic.
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_BINDING,
|
2016-03-25 17:17:28 +00:00
|
|
|
|
2016-05-23 07:46:46 +01:00
|
|
|
/**
|
|
|
|
* Component offset.
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_COMPONENT,
|
2016-05-23 07:46:46 +01:00
|
|
|
|
nir: Add new intrinsics for fragment shader input interpolation.
Backends can normally handle shader inputs solely by looking at
load_input intrinsics, and ignore the nir_variables in nir->inputs.
One exception is fragment shader inputs. load_input doesn't capture
the necessary interpolation information - flat, smooth, noperspective
mode, and centroid, sample, or pixel for the location. This means
that backends have to interpolate based on the nir_variables, then
associate those with the load_input intrinsics (say, by storing a
map of which variables are at which locations).
With GL_ARB_enhanced_layouts, we're going to have multiple varyings
packed into a single vec4 location. The intrinsics make this easy:
simply load N components from location <loc, component>. However,
working with variables and correlating the two is very awkward; we'd
much rather have intrinsics capture all the necessary information.
Fragment shader input interpolation typically works by producing a
set of barycentric coordinates, then using those to do a linear
interpolation between the values at the triangle's corners.
We represent this by introducing five new load_barycentric_* intrinsics:
- load_barycentric_pixel (ordinary variable)
- load_barycentric_centroid (centroid qualified variable)
- load_barycentric_sample (sample qualified variable)
- load_barycentric_at_sample (ARB_gpu_shader5's interpolateAtSample())
- load_barycentric_at_offset (ARB_gpu_shader5's interpolateAtOffset())
Each of these take the interpolation mode (smooth or noperspective only)
as a const_index, and produce a vec2. The last two also take a sample
or offset source.
We then introduce a new load_interpolated_input intrinsic, which
is like a normal load_input intrinsic, but with an additional
barycentric coordinate source.
The intention is that flat inputs will still use regular load_input
intrinsics. This makes them distinguishable from normal inputs that
need fancy interpolation, while also providing all the necessary data.
This nicely unifies regular inputs and interpolateAt functions.
Qualifiers and variables become irrelevant; there are just
load_barycentric intrinsics that determine the interpolation.
v2: Document the interp_mode const_index value, define a new
BARYCENTRIC() helper rather than using SYSTEM_VALUE() for
some of them (requested by Jason Ekstrand).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-07-12 09:46:43 +01:00
|
|
|
/**
|
|
|
|
* Interpolation mode (only meaningful for FS inputs).
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_INTERP_MODE,
|
nir: Add new intrinsics for fragment shader input interpolation.
Backends can normally handle shader inputs solely by looking at
load_input intrinsics, and ignore the nir_variables in nir->inputs.
One exception is fragment shader inputs. load_input doesn't capture
the necessary interpolation information - flat, smooth, noperspective
mode, and centroid, sample, or pixel for the location. This means
that backends have to interpolate based on the nir_variables, then
associate those with the load_input intrinsics (say, by storing a
map of which variables are at which locations).
With GL_ARB_enhanced_layouts, we're going to have multiple varyings
packed into a single vec4 location. The intrinsics make this easy:
simply load N components from location <loc, component>. However,
working with variables and correlating the two is very awkward; we'd
much rather have intrinsics capture all the necessary information.
Fragment shader input interpolation typically works by producing a
set of barycentric coordinates, then using those to do a linear
interpolation between the values at the triangle's corners.
We represent this by introducing five new load_barycentric_* intrinsics:
- load_barycentric_pixel (ordinary variable)
- load_barycentric_centroid (centroid qualified variable)
- load_barycentric_sample (sample qualified variable)
- load_barycentric_at_sample (ARB_gpu_shader5's interpolateAtSample())
- load_barycentric_at_offset (ARB_gpu_shader5's interpolateAtOffset())
Each of these take the interpolation mode (smooth or noperspective only)
as a const_index, and produce a vec2. The last two also take a sample
or offset source.
We then introduce a new load_interpolated_input intrinsic, which
is like a normal load_input intrinsic, but with an additional
barycentric coordinate source.
The intention is that flat inputs will still use regular load_input
intrinsics. This makes them distinguishable from normal inputs that
need fancy interpolation, while also providing all the necessary data.
This nicely unifies regular inputs and interpolateAt functions.
Qualifiers and variables become irrelevant; there are just
load_barycentric intrinsics that determine the interpolation.
v2: Document the interp_mode const_index value, define a new
BARYCENTRIC() helper rather than using SYSTEM_VALUE() for
some of them (requested by Jason Ekstrand).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-07-12 09:46:43 +01:00
|
|
|
|
2017-08-30 04:09:58 +01:00
|
|
|
/**
|
|
|
|
* A binary nir_op to use when performing a reduction or scan operation
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_REDUCTION_OP,
|
2017-08-30 04:09:58 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Cluster size for reduction operations
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_CLUSTER_SIZE,
|
2017-08-30 04:09:58 +01:00
|
|
|
|
2018-03-22 23:41:18 +00:00
|
|
|
/**
|
|
|
|
* Parameter index for a load_param intrinsic
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_PARAM_IDX,
|
2018-03-22 23:41:18 +00:00
|
|
|
|
2018-08-16 21:11:44 +01:00
|
|
|
/**
|
|
|
|
* Image dimensionality for image intrinsics
|
|
|
|
*
|
|
|
|
* One of GLSL_SAMPLER_DIM_*
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_IMAGE_DIM,
|
2018-08-16 21:11:44 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Non-zero if we are accessing an array image
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_IMAGE_ARRAY,
|
2018-08-16 21:11:44 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Image format for image intrinsics
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_FORMAT,
|
2018-08-16 21:11:44 +01:00
|
|
|
|
|
|
|
/**
|
2018-11-28 19:44:56 +00:00
|
|
|
* Access qualifiers for image and memory access intrinsics
|
2018-08-16 21:11:44 +01:00
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_ACCESS,
|
2018-08-16 21:11:44 +01:00
|
|
|
|
2018-11-13 15:45:03 +00:00
|
|
|
/**
|
|
|
|
* Alignment for offsets and addresses
|
|
|
|
*
|
|
|
|
* These two parameters, specify an alignment in terms of a multiplier and
|
2020-09-08 21:07:56 +01:00
|
|
|
* an offset. The multiplier is always a power of two. The offset or
|
|
|
|
* address parameter X of the intrinsic is guaranteed to satisfy the
|
|
|
|
* following:
|
2018-11-13 15:45:03 +00:00
|
|
|
*
|
|
|
|
* (X - align_offset) % align_mul == 0
|
2020-09-08 21:07:56 +01:00
|
|
|
*
|
|
|
|
* For constant offset values, align_mul will be NIR_ALIGN_MUL_MAX and the
|
|
|
|
* align_offset will be modulo that.
|
2018-11-13 15:45:03 +00:00
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_ALIGN_MUL,
|
|
|
|
NIR_INTRINSIC_ALIGN_OFFSET,
|
2018-11-13 15:45:03 +00:00
|
|
|
|
2018-12-13 22:50:19 +00:00
|
|
|
/**
|
|
|
|
* The Vulkan descriptor type for a vulkan_resource_[re]index intrinsic.
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_DESC_TYPE,
|
2018-12-13 22:50:19 +00:00
|
|
|
|
2019-05-31 18:44:40 +01:00
|
|
|
/**
|
2020-09-30 21:19:45 +01:00
|
|
|
* The nir_alu_type of input data to a store or conversion
|
2019-05-31 18:44:40 +01:00
|
|
|
*/
|
2020-10-01 03:20:53 +01:00
|
|
|
NIR_INTRINSIC_SRC_TYPE,
|
|
|
|
|
|
|
|
/**
|
2020-09-30 21:19:45 +01:00
|
|
|
* The nir_alu_type of the data output from a load or conversion
|
2020-10-01 03:20:53 +01:00
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_DEST_TYPE,
|
2019-05-31 18:44:40 +01:00
|
|
|
|
2018-05-09 19:37:24 +01:00
|
|
|
/**
|
|
|
|
* The swizzle mask for the instructions
|
|
|
|
* SwizzleInvocationsAMD and SwizzleInvocationsMaskedAMD
|
|
|
|
*/
|
2019-06-13 15:48:41 +01:00
|
|
|
NIR_INTRINSIC_SWIZZLE_MASK,
|
2018-05-09 19:37:24 +01:00
|
|
|
|
2019-06-04 10:40:14 +01:00
|
|
|
/* Separate source/dest access flags for copies */
|
2019-07-31 22:50:56 +01:00
|
|
|
NIR_INTRINSIC_SRC_ACCESS,
|
|
|
|
NIR_INTRINSIC_DST_ACCESS,
|
2019-06-04 10:40:14 +01:00
|
|
|
|
2019-10-11 01:17:10 +01:00
|
|
|
/* Driver location for nir_load_patch_location_ir3 */
|
|
|
|
NIR_INTRINSIC_DRIVER_LOCATION,
|
|
|
|
|
2019-07-19 00:14:03 +01:00
|
|
|
/**
|
|
|
|
* Mask of nir_memory_semantics, includes ordering and visibility.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_MEMORY_SEMANTICS,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Mask of nir_variable_modes affected by the memory operation.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_MEMORY_MODES,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Value of nir_scope.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_MEMORY_SCOPE,
|
|
|
|
|
2020-05-05 08:13:20 +01:00
|
|
|
/**
|
|
|
|
* Value of nir_scope.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_EXECUTION_SCOPE,
|
|
|
|
|
2020-08-12 04:48:12 +01:00
|
|
|
/**
|
|
|
|
* Value of nir_io_semantics.
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_IO_SEMANTICS,
|
|
|
|
|
2020-09-30 21:19:45 +01:00
|
|
|
/**
|
|
|
|
* The rounding mode of a conversion
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_ROUNDING_MODE,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Whether or not to saturate in conversions
|
|
|
|
*/
|
|
|
|
NIR_INTRINSIC_SATURATE,
|
|
|
|
|
2016-01-13 23:43:14 +00:00
|
|
|
NIR_INTRINSIC_NUM_INDEX_FLAGS,
|
|
|
|
|
|
|
|
} nir_intrinsic_index_flag;
|
|
|
|
|
2020-09-08 21:07:56 +01:00
|
|
|
/**
|
|
|
|
* Maximum valid value for a nir align_mul value (in intrinsics or derefs).
|
|
|
|
*
|
|
|
|
* Offsets can be signed, so this is the largest power of two in int32_t.
|
|
|
|
*/
|
|
|
|
#define NIR_ALIGN_MUL_MAX 0x40000000
|
|
|
|
|
2020-08-12 04:48:12 +01:00
|
|
|
typedef struct {
|
|
|
|
unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */
|
|
|
|
unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */
|
|
|
|
unsigned dual_source_blend_index:1;
|
|
|
|
unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */
|
|
|
|
unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */
|
2020-09-06 05:24:31 +01:00
|
|
|
unsigned medium_precision:1; /* GLSL mediump qualifier */
|
2020-09-02 10:20:06 +01:00
|
|
|
unsigned per_view:1;
|
|
|
|
unsigned _pad:7;
|
2020-08-12 04:48:12 +01:00
|
|
|
} nir_io_semantics;
|
|
|
|
|
2018-03-15 21:56:43 +00:00
|
|
|
#define NIR_INTRINSIC_MAX_INPUTS 5
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
const char *name;
|
|
|
|
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t num_srcs; /** < number of register/SSA inputs */
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2014-12-04 01:03:19 +00:00
|
|
|
/** number of components of each input register
|
|
|
|
*
|
|
|
|
* If this value is 0, the number of components is given by the
|
2018-11-28 03:30:22 +00:00
|
|
|
* num_components field of nir_intrinsic_instr. If this value is -1, the
|
|
|
|
* intrinsic consumes however many components are provided and it is not
|
|
|
|
* validated at all.
|
2014-12-04 01:03:19 +00:00
|
|
|
*/
|
2020-05-14 20:50:52 +01:00
|
|
|
int8_t src_components[NIR_INTRINSIC_MAX_INPUTS];
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
bool has_dest;
|
|
|
|
|
2014-12-04 01:03:19 +00:00
|
|
|
/** number of components of the output register
|
|
|
|
*
|
|
|
|
* If this value is 0, the number of components is given by the
|
|
|
|
* num_components field of nir_intrinsic_instr.
|
|
|
|
*/
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t dest_components;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2018-07-19 12:04:43 +01:00
|
|
|
/** bitfield of legal bit sizes */
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t dest_bit_sizes;
|
2018-07-19 12:04:43 +01:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/** the number of constant indices used by the intrinsic */
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t num_indices;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2016-01-13 23:43:14 +00:00
|
|
|
/** indicates the usage of intr->const_index[n] */
|
2020-05-14 20:50:52 +01:00
|
|
|
uint8_t index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS];
|
2016-01-13 23:43:14 +00:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/** semantic flags for calls to this intrinsic */
|
2014-12-19 23:56:55 +00:00
|
|
|
nir_intrinsic_semantic_flag flags;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_intrinsic_info;
|
|
|
|
|
|
|
|
extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics];
|
|
|
|
|
2018-03-28 13:32:10 +01:00
|
|
|
static inline unsigned
|
2019-08-21 05:43:56 +01:00
|
|
|
nir_intrinsic_src_components(const nir_intrinsic_instr *intr, unsigned srcn)
|
2018-03-28 13:32:10 +01:00
|
|
|
{
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
|
|
|
|
assert(srcn < info->num_srcs);
|
2018-11-28 03:30:22 +00:00
|
|
|
if (info->src_components[srcn] > 0)
|
2018-03-28 13:32:10 +01:00
|
|
|
return info->src_components[srcn];
|
2018-11-28 03:30:22 +00:00
|
|
|
else if (info->src_components[srcn] == 0)
|
2018-03-28 13:32:10 +01:00
|
|
|
return intr->num_components;
|
2018-11-28 03:30:22 +00:00
|
|
|
else
|
|
|
|
return nir_src_num_components(intr->src[srcn]);
|
2018-03-28 13:32:10 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
nir_intrinsic_dest_components(nir_intrinsic_instr *intr)
|
|
|
|
{
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
|
|
|
|
if (!info->has_dest)
|
|
|
|
return 0;
|
|
|
|
else if (info->dest_components)
|
|
|
|
return info->dest_components;
|
|
|
|
else
|
|
|
|
return intr->num_components;
|
|
|
|
}
|
2016-01-13 23:43:14 +00:00
|
|
|
|
2020-05-06 21:35:51 +01:00
|
|
|
/**
|
|
|
|
* Helper to copy const_index[] from src to dst, without assuming they
|
|
|
|
* match in order.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
nir_intrinsic_copy_const_indices(nir_intrinsic_instr *dst, nir_intrinsic_instr *src)
|
|
|
|
{
|
|
|
|
if (src->intrinsic == dst->intrinsic) {
|
|
|
|
memcpy(dst->const_index, src->const_index, sizeof(dst->const_index));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const nir_intrinsic_info *src_info = &nir_intrinsic_infos[src->intrinsic];
|
|
|
|
const nir_intrinsic_info *dst_info = &nir_intrinsic_infos[dst->intrinsic];
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < NIR_INTRINSIC_NUM_INDEX_FLAGS; i++) {
|
|
|
|
if (src_info->index_map[i] == 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* require that dst instruction also uses the same const_index[]: */
|
|
|
|
assert(dst_info->index_map[i] > 0);
|
|
|
|
|
|
|
|
dst->const_index[dst_info->index_map[i] - 1] =
|
|
|
|
src->const_index[src_info->index_map[i] - 1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-13 23:43:14 +00:00
|
|
|
#define INTRINSIC_IDX_ACCESSORS(name, flag, type) \
|
|
|
|
static inline type \
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_intrinsic_##name(const nir_intrinsic_instr *instr) \
|
2016-01-13 23:43:14 +00:00
|
|
|
{ \
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \
|
|
|
|
assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \
|
2018-08-16 21:11:44 +01:00
|
|
|
return (type)instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \
|
2016-01-13 23:43:14 +00:00
|
|
|
} \
|
|
|
|
static inline void \
|
|
|
|
nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \
|
|
|
|
{ \
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \
|
|
|
|
assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \
|
|
|
|
instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \
|
2020-08-20 14:01:23 +01:00
|
|
|
} \
|
|
|
|
static inline bool \
|
2020-05-27 23:08:28 +01:00
|
|
|
nir_intrinsic_has_##name(const nir_intrinsic_instr *instr) \
|
2020-08-20 14:01:23 +01:00
|
|
|
{ \
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \
|
|
|
|
return info->index_map[NIR_INTRINSIC_##flag] > 0; \
|
2016-01-13 23:43:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(base, BASE, int)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned)
|
2015-11-24 21:52:49 +00:00
|
|
|
INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned)
|
nir: Add a range_base+range to nir_intrinsic_load_ubo().
For UBO accesses to be the same performance as classic GL default uniform
block uniforms, we need to be able to push them through the same path. On
freedreno, we haven't been uploading UBOs as push constants when they're
used for indirect array access, because we don't know what range of the
UBO is needed for an access.
I believe we won't be able to calculate the range in general in spirv
given casts that can happen, so we define a [0, ~0] range to be "We don't
know anything". We use that at the moment for all UBO loads except for
nir_lower_uniforms_to_ubo, where we now avoid losing the range information
that default uniform block loads come with.
In a departure from other NIR intrinsics with a "base", I didn't make the
base an be something you have to add to the src[1] offset. This keeps us
from needing to modify all drivers (particularly since the base+offset
thing can mean needing to do addition in the backend), makes backend
tracking of ranges easy, and makes the range calculations in
load_store_vectorizer reasonable. However, this could definitely cause
some confusion for people used to the normal NIR base.
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6359>
2020-08-14 21:10:02 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(range_base, RANGE_BASE, unsigned)
|
2016-03-25 17:17:28 +00:00
|
|
|
INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned)
|
2016-05-23 07:46:46 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned)
|
nir: Add new intrinsics for fragment shader input interpolation.
Backends can normally handle shader inputs solely by looking at
load_input intrinsics, and ignore the nir_variables in nir->inputs.
One exception is fragment shader inputs. load_input doesn't capture
the necessary interpolation information - flat, smooth, noperspective
mode, and centroid, sample, or pixel for the location. This means
that backends have to interpolate based on the nir_variables, then
associate those with the load_input intrinsics (say, by storing a
map of which variables are at which locations).
With GL_ARB_enhanced_layouts, we're going to have multiple varyings
packed into a single vec4 location. The intrinsics make this easy:
simply load N components from location <loc, component>. However,
working with variables and correlating the two is very awkward; we'd
much rather have intrinsics capture all the necessary information.
Fragment shader input interpolation typically works by producing a
set of barycentric coordinates, then using those to do a linear
interpolation between the values at the triangle's corners.
We represent this by introducing five new load_barycentric_* intrinsics:
- load_barycentric_pixel (ordinary variable)
- load_barycentric_centroid (centroid qualified variable)
- load_barycentric_sample (sample qualified variable)
- load_barycentric_at_sample (ARB_gpu_shader5's interpolateAtSample())
- load_barycentric_at_offset (ARB_gpu_shader5's interpolateAtOffset())
Each of these take the interpolation mode (smooth or noperspective only)
as a const_index, and produce a vec2. The last two also take a sample
or offset source.
We then introduce a new load_interpolated_input intrinsic, which
is like a normal load_input intrinsic, but with an additional
barycentric coordinate source.
The intention is that flat inputs will still use regular load_input
intrinsics. This makes them distinguishable from normal inputs that
need fancy interpolation, while also providing all the necessary data.
This nicely unifies regular inputs and interpolateAt functions.
Qualifiers and variables become irrelevant; there are just
load_barycentric intrinsics that determine the interpolation.
v2: Document the interp_mode const_index value, define a new
BARYCENTRIC() helper rather than using SYSTEM_VALUE() for
some of them (requested by Jason Ekstrand).
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisforbes@google.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-07-12 09:46:43 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned)
|
2017-08-30 04:09:58 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(reduction_op, REDUCTION_OP, unsigned)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(cluster_size, CLUSTER_SIZE, unsigned)
|
2018-03-22 23:41:18 +00:00
|
|
|
INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned)
|
2018-08-16 21:11:44 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(image_dim, IMAGE_DIM, enum glsl_sampler_dim)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(image_array, IMAGE_ARRAY, bool)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(access, ACCESS, enum gl_access_qualifier)
|
2019-06-04 10:40:14 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(src_access, SRC_ACCESS, enum gl_access_qualifier)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(dst_access, DST_ACCESS, enum gl_access_qualifier)
|
2020-01-10 22:09:43 +00:00
|
|
|
INTRINSIC_IDX_ACCESSORS(format, FORMAT, enum pipe_format)
|
2018-11-13 15:45:03 +00:00
|
|
|
INTRINSIC_IDX_ACCESSORS(align_mul, ALIGN_MUL, unsigned)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(align_offset, ALIGN_OFFSET, unsigned)
|
2018-12-13 22:50:19 +00:00
|
|
|
INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
|
2020-10-01 03:20:53 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(src_type, SRC_TYPE, nir_alu_type)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(dest_type, DEST_TYPE, nir_alu_type)
|
2018-05-09 19:37:24 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
|
2019-10-11 01:17:10 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
|
2019-07-19 00:14:03 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope)
|
2020-05-05 08:13:20 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(execution_scope, EXECUTION_SCOPE, nir_scope)
|
2020-09-30 21:19:45 +01:00
|
|
|
INTRINSIC_IDX_ACCESSORS(rounding_mode, ROUNDING_MODE, nir_rounding_mode)
|
|
|
|
INTRINSIC_IDX_ACCESSORS(saturate, SATURATE, bool)
|
2018-11-13 15:45:03 +00:00
|
|
|
|
|
|
|
static inline void
|
|
|
|
nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
|
|
|
|
unsigned align_mul, unsigned align_offset)
|
|
|
|
{
|
|
|
|
assert(util_is_power_of_two_nonzero(align_mul));
|
|
|
|
assert(align_offset < align_mul);
|
|
|
|
nir_intrinsic_set_align_mul(intrin, align_mul);
|
|
|
|
nir_intrinsic_set_align_offset(intrin, align_offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Returns a simple alignment for a load/store intrinsic offset
|
|
|
|
*
|
|
|
|
* Instead of the full mul+offset alignment scheme provided by the ALIGN_MUL
|
|
|
|
* and ALIGN_OFFSET parameters, this helper takes both into account and
|
|
|
|
* provides a single simple alignment parameter. The offset X is guaranteed
|
|
|
|
* to satisfy X % align == 0.
|
|
|
|
*/
|
|
|
|
static inline unsigned
|
2018-12-14 11:08:51 +00:00
|
|
|
nir_intrinsic_align(const nir_intrinsic_instr *intrin)
|
2018-11-13 15:45:03 +00:00
|
|
|
{
|
|
|
|
const unsigned align_mul = nir_intrinsic_align_mul(intrin);
|
|
|
|
const unsigned align_offset = nir_intrinsic_align_offset(intrin);
|
|
|
|
assert(align_offset < align_mul);
|
|
|
|
return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
|
|
|
|
}
|
2016-01-13 23:43:14 +00:00
|
|
|
|
2020-05-27 23:08:28 +01:00
|
|
|
static inline bool
|
|
|
|
nir_intrinsic_has_align(const nir_intrinsic_instr *intrin)
|
|
|
|
{
|
|
|
|
return nir_intrinsic_has_align_mul(intrin) &&
|
|
|
|
nir_intrinsic_has_align_offset(intrin);
|
|
|
|
}
|
|
|
|
|
2020-08-12 04:48:12 +01:00
|
|
|
static inline void
|
|
|
|
nir_intrinsic_set_io_semantics(nir_intrinsic_instr *intrin,
|
|
|
|
nir_io_semantics semantics)
|
|
|
|
{
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
|
|
|
|
assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0);
|
|
|
|
STATIC_ASSERT(sizeof(nir_io_semantics) == sizeof(intrin->const_index[0]));
|
|
|
|
semantics._pad = 0; /* clear padding bits */
|
|
|
|
memcpy(&intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1],
|
|
|
|
&semantics, sizeof(semantics));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_io_semantics
|
|
|
|
nir_intrinsic_io_semantics(const nir_intrinsic_instr *intrin)
|
|
|
|
{
|
|
|
|
const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
|
|
|
|
assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0);
|
|
|
|
nir_io_semantics semantics;
|
|
|
|
memcpy(&semantics,
|
|
|
|
&intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1],
|
|
|
|
sizeof(semantics));
|
|
|
|
return semantics;
|
|
|
|
}
|
|
|
|
|
2020-02-05 23:46:40 +00:00
|
|
|
unsigned
|
|
|
|
nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
|
|
|
|
|
2019-03-28 21:21:46 +00:00
|
|
|
/* Converts a image_deref_* intrinsic into a image_* one */
|
|
|
|
void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
|
2019-03-24 19:43:55 +00:00
|
|
|
nir_ssa_def *handle, bool bindless);
|
2019-03-28 21:21:46 +00:00
|
|
|
|
2019-06-04 12:02:31 +01:00
|
|
|
/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
|
|
|
|
static inline bool
|
|
|
|
nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
|
|
|
|
{
|
2019-06-04 13:12:34 +01:00
|
|
|
if (instr->intrinsic == nir_intrinsic_load_deref ||
|
|
|
|
instr->intrinsic == nir_intrinsic_load_ssbo ||
|
|
|
|
instr->intrinsic == nir_intrinsic_bindless_image_load ||
|
|
|
|
instr->intrinsic == nir_intrinsic_image_deref_load ||
|
|
|
|
instr->intrinsic == nir_intrinsic_image_load) {
|
|
|
|
return nir_intrinsic_access(instr) & ACCESS_CAN_REORDER;
|
|
|
|
} else {
|
|
|
|
const nir_intrinsic_info *info =
|
|
|
|
&nir_intrinsic_infos[instr->intrinsic];
|
|
|
|
return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
|
|
|
|
(info->flags & NIR_INTRINSIC_CAN_REORDER);
|
|
|
|
}
|
2019-06-04 12:02:31 +01:00
|
|
|
}
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/**
|
|
|
|
* \group texture information
|
|
|
|
*
|
|
|
|
* This gives semantic information about textures which is useful to the
|
|
|
|
* frontend, the backend, and lowering passes, but not the optimizer.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
nir_tex_src_coord,
|
|
|
|
nir_tex_src_projector,
|
2016-12-12 13:32:38 +00:00
|
|
|
nir_tex_src_comparator, /* shadow comparator */
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_tex_src_offset,
|
|
|
|
nir_tex_src_bias,
|
|
|
|
nir_tex_src_lod,
|
2018-10-03 03:15:47 +01:00
|
|
|
nir_tex_src_min_lod,
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_tex_src_ms_index, /* MSAA sample index */
|
2016-05-03 01:28:38 +01:00
|
|
|
nir_tex_src_ms_mcs, /* MSAA compression value */
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_tex_src_ddx,
|
|
|
|
nir_tex_src_ddy,
|
2018-03-19 17:24:59 +00:00
|
|
|
nir_tex_src_texture_deref, /* < deref pointing to the texture */
|
|
|
|
nir_tex_src_sampler_deref, /* < deref pointing to the sampler */
|
2016-02-06 17:05:10 +00:00
|
|
|
nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */
|
2015-11-03 01:58:29 +00:00
|
|
|
nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */
|
2018-10-28 12:52:44 +00:00
|
|
|
nir_tex_src_texture_handle, /* < bindless texture handle */
|
|
|
|
nir_tex_src_sampler_handle, /* < bindless sampler handle */
|
2016-05-02 05:12:48 +01:00
|
|
|
nir_tex_src_plane, /* < selects plane for planar textures */
|
2015-01-10 04:01:13 +00:00
|
|
|
nir_num_tex_src_types
|
|
|
|
} nir_tex_src_type;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_src src;
|
|
|
|
nir_tex_src_type src_type;
|
|
|
|
} nir_tex_src;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
nir_texop_tex, /**< Regular texture look-up */
|
|
|
|
nir_texop_txb, /**< Texture look-up with LOD bias */
|
|
|
|
nir_texop_txl, /**< Texture look-up with explicit LOD */
|
2017-11-06 23:28:25 +00:00
|
|
|
nir_texop_txd, /**< Texture look-up with partial derivatives */
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_texop_txf, /**< Texel fetch with explicit LOD */
|
2019-04-26 18:05:08 +01:00
|
|
|
nir_texop_txf_ms, /**< Multisample texture fetch */
|
|
|
|
nir_texop_txf_ms_fb, /**< Multisample texture fetch from framebuffer */
|
2016-05-03 01:28:38 +01:00
|
|
|
nir_texop_txf_ms_mcs, /**< Multisample compression value fetch */
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_texop_txs, /**< Texture size */
|
|
|
|
nir_texop_lod, /**< Texture lod query */
|
|
|
|
nir_texop_tg4, /**< Texture gather */
|
2015-08-28 04:05:03 +01:00
|
|
|
nir_texop_query_levels, /**< Texture levels query */
|
|
|
|
nir_texop_texture_samples, /**< Texture samples query */
|
2015-11-18 01:09:09 +00:00
|
|
|
nir_texop_samples_identical, /**< Query whether all samples are definitely
|
|
|
|
* identical.
|
|
|
|
*/
|
2019-07-10 08:48:21 +01:00
|
|
|
nir_texop_tex_prefetch, /**< Regular texture look-up, eligible for pre-dispatch */
|
2020-01-07 07:39:39 +00:00
|
|
|
nir_texop_fragment_fetch, /**< Multisample fragment color texture fetch */
|
|
|
|
nir_texop_fragment_mask_fetch,/**< Multisample fragment mask texture fetch */
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_texop;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
|
|
|
|
enum glsl_sampler_dim sampler_dim;
|
|
|
|
nir_alu_type dest_type;
|
|
|
|
|
|
|
|
nir_texop op;
|
|
|
|
nir_dest dest;
|
2015-01-10 04:01:13 +00:00
|
|
|
nir_tex_src *src;
|
2014-08-01 00:14:51 +01:00
|
|
|
unsigned num_srcs, coord_components;
|
|
|
|
bool is_array, is_shadow;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* If is_shadow is true, whether this is the old-style shadow that outputs 4
|
|
|
|
* components or the new-style shadow that outputs 1 component.
|
|
|
|
*/
|
|
|
|
bool is_new_style_shadow;
|
|
|
|
|
|
|
|
/* gather component selector */
|
|
|
|
unsigned component : 2;
|
|
|
|
|
2019-03-18 20:23:59 +00:00
|
|
|
/* gather offsets */
|
|
|
|
int8_t tg4_offsets[4][2];
|
|
|
|
|
2019-02-27 20:36:44 +00:00
|
|
|
/* True if the texture index or handle is not dynamically uniform */
|
|
|
|
bool texture_non_uniform;
|
|
|
|
|
|
|
|
/* True if the sampler index or handle is not dynamically uniform */
|
|
|
|
bool sampler_non_uniform;
|
|
|
|
|
2016-02-06 17:05:10 +00:00
|
|
|
/** The texture index
|
2014-12-05 22:46:24 +00:00
|
|
|
*
|
2016-02-06 17:05:10 +00:00
|
|
|
* If this texture instruction has a nir_tex_src_texture_offset source,
|
|
|
|
* then the texture index is given by texture_index + texture_offset.
|
2014-12-05 22:46:24 +00:00
|
|
|
*/
|
2016-02-06 17:05:10 +00:00
|
|
|
unsigned texture_index;
|
2014-12-05 22:46:24 +00:00
|
|
|
|
2015-11-03 01:58:29 +00:00
|
|
|
/** The sampler index
|
|
|
|
*
|
|
|
|
* The following operations do not require a sampler and, as such, this
|
|
|
|
* field should be ignored:
|
|
|
|
* - nir_texop_txf
|
|
|
|
* - nir_texop_txf_ms
|
|
|
|
* - nir_texop_txs
|
|
|
|
* - nir_texop_lod
|
|
|
|
* - nir_texop_query_levels
|
|
|
|
* - nir_texop_texture_samples
|
|
|
|
* - nir_texop_samples_identical
|
|
|
|
*
|
|
|
|
* If this texture instruction has a nir_tex_src_sampler_offset source,
|
|
|
|
* then the sampler index is given by sampler_index + sampler_offset.
|
|
|
|
*/
|
|
|
|
unsigned sampler_index;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_tex_instr;
|
|
|
|
|
2020-04-22 22:32:47 +01:00
|
|
|
/*
|
|
|
|
* Returns true if the texture operation requires a sampler as a general rule,
|
|
|
|
* see the documentation of sampler_index.
|
|
|
|
*
|
|
|
|
* Note that the specific hw/driver backend could require to a sampler
|
|
|
|
* object/configuration packet in any case, for some other reason.
|
|
|
|
*/
|
|
|
|
static inline bool
|
|
|
|
nir_tex_instr_need_sampler(const nir_tex_instr *instr)
|
|
|
|
{
|
|
|
|
switch (instr->op) {
|
|
|
|
case nir_texop_txf:
|
|
|
|
case nir_texop_txf_ms:
|
|
|
|
case nir_texop_txs:
|
|
|
|
case nir_texop_lod:
|
|
|
|
case nir_texop_query_levels:
|
|
|
|
case nir_texop_texture_samples:
|
|
|
|
case nir_texop_samples_identical:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
static inline unsigned
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_tex_instr_dest_size(const nir_tex_instr *instr)
|
2014-08-01 00:14:51 +01:00
|
|
|
{
|
2015-04-10 05:03:02 +01:00
|
|
|
switch (instr->op) {
|
|
|
|
case nir_texop_txs: {
|
2014-08-01 00:14:51 +01:00
|
|
|
unsigned ret;
|
|
|
|
switch (instr->sampler_dim) {
|
|
|
|
case GLSL_SAMPLER_DIM_1D:
|
|
|
|
case GLSL_SAMPLER_DIM_BUF:
|
|
|
|
ret = 1;
|
|
|
|
break;
|
|
|
|
case GLSL_SAMPLER_DIM_2D:
|
|
|
|
case GLSL_SAMPLER_DIM_CUBE:
|
|
|
|
case GLSL_SAMPLER_DIM_MS:
|
|
|
|
case GLSL_SAMPLER_DIM_RECT:
|
|
|
|
case GLSL_SAMPLER_DIM_EXTERNAL:
|
2016-09-15 02:08:12 +01:00
|
|
|
case GLSL_SAMPLER_DIM_SUBPASS:
|
2014-08-01 00:14:51 +01:00
|
|
|
ret = 2;
|
|
|
|
break;
|
|
|
|
case GLSL_SAMPLER_DIM_3D:
|
|
|
|
ret = 3;
|
|
|
|
break;
|
|
|
|
default:
|
2015-01-22 04:22:18 +00:00
|
|
|
unreachable("not reached");
|
2014-08-01 00:14:51 +01:00
|
|
|
}
|
|
|
|
if (instr->is_array)
|
|
|
|
ret++;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-04-10 05:04:21 +01:00
|
|
|
case nir_texop_lod:
|
2014-08-01 00:14:51 +01:00
|
|
|
return 2;
|
|
|
|
|
2015-08-28 04:05:03 +01:00
|
|
|
case nir_texop_texture_samples:
|
2015-04-10 05:04:21 +01:00
|
|
|
case nir_texop_query_levels:
|
2015-11-18 01:09:09 +00:00
|
|
|
case nir_texop_samples_identical:
|
2020-01-07 07:39:39 +00:00
|
|
|
case nir_texop_fragment_mask_fetch:
|
2015-04-10 05:04:21 +01:00
|
|
|
return 1;
|
|
|
|
|
2015-04-10 05:03:02 +01:00
|
|
|
default:
|
|
|
|
if (instr->is_shadow && instr->is_new_style_shadow)
|
|
|
|
return 1;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2015-04-10 05:03:02 +01:00
|
|
|
return 4;
|
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
}
|
|
|
|
|
2015-11-12 02:30:09 +00:00
|
|
|
/* Returns true if this texture operation queries something about the texture
|
|
|
|
* rather than actually sampling it.
|
|
|
|
*/
|
|
|
|
static inline bool
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_tex_instr_is_query(const nir_tex_instr *instr)
|
2015-11-12 02:30:09 +00:00
|
|
|
{
|
|
|
|
switch (instr->op) {
|
|
|
|
case nir_texop_txs:
|
|
|
|
case nir_texop_lod:
|
|
|
|
case nir_texop_texture_samples:
|
|
|
|
case nir_texop_query_levels:
|
2016-05-03 01:28:38 +01:00
|
|
|
case nir_texop_txf_ms_mcs:
|
2015-11-12 02:30:09 +00:00
|
|
|
return true;
|
|
|
|
case nir_texop_tex:
|
|
|
|
case nir_texop_txb:
|
|
|
|
case nir_texop_txl:
|
|
|
|
case nir_texop_txd:
|
|
|
|
case nir_texop_txf:
|
|
|
|
case nir_texop_txf_ms:
|
2019-04-26 18:05:08 +01:00
|
|
|
case nir_texop_txf_ms_fb:
|
2015-11-12 02:30:09 +00:00
|
|
|
case nir_texop_tg4:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
unreachable("Invalid texture opcode");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-07 23:58:15 +01:00
|
|
|
static inline bool
|
|
|
|
nir_tex_instr_has_implicit_derivative(const nir_tex_instr *instr)
|
|
|
|
{
|
|
|
|
switch (instr->op) {
|
|
|
|
case nir_texop_tex:
|
|
|
|
case nir_texop_txb:
|
|
|
|
case nir_texop_lod:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-04 04:18:50 +01:00
|
|
|
static inline nir_alu_type
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_tex_instr_src_type(const nir_tex_instr *instr, unsigned src)
|
2016-05-04 04:18:50 +01:00
|
|
|
{
|
|
|
|
switch (instr->src[src].src_type) {
|
|
|
|
case nir_tex_src_coord:
|
|
|
|
switch (instr->op) {
|
|
|
|
case nir_texop_txf:
|
|
|
|
case nir_texop_txf_ms:
|
2019-04-26 18:05:08 +01:00
|
|
|
case nir_texop_txf_ms_fb:
|
2016-05-04 04:18:50 +01:00
|
|
|
case nir_texop_txf_ms_mcs:
|
|
|
|
case nir_texop_samples_identical:
|
|
|
|
return nir_type_int;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return nir_type_float;
|
|
|
|
}
|
|
|
|
|
|
|
|
case nir_tex_src_lod:
|
|
|
|
switch (instr->op) {
|
|
|
|
case nir_texop_txs:
|
|
|
|
case nir_texop_txf:
|
|
|
|
return nir_type_int;
|
|
|
|
|
|
|
|
default:
|
|
|
|
return nir_type_float;
|
|
|
|
}
|
|
|
|
|
|
|
|
case nir_tex_src_projector:
|
2016-12-12 13:32:38 +00:00
|
|
|
case nir_tex_src_comparator:
|
2016-05-04 04:18:50 +01:00
|
|
|
case nir_tex_src_bias:
|
2019-08-19 02:53:24 +01:00
|
|
|
case nir_tex_src_min_lod:
|
2016-05-04 04:18:50 +01:00
|
|
|
case nir_tex_src_ddx:
|
|
|
|
case nir_tex_src_ddy:
|
|
|
|
return nir_type_float;
|
|
|
|
|
|
|
|
case nir_tex_src_offset:
|
|
|
|
case nir_tex_src_ms_index:
|
2019-08-19 02:53:24 +01:00
|
|
|
case nir_tex_src_plane:
|
|
|
|
return nir_type_int;
|
|
|
|
|
|
|
|
case nir_tex_src_ms_mcs:
|
|
|
|
case nir_tex_src_texture_deref:
|
|
|
|
case nir_tex_src_sampler_deref:
|
2016-05-04 04:18:50 +01:00
|
|
|
case nir_tex_src_texture_offset:
|
|
|
|
case nir_tex_src_sampler_offset:
|
2019-08-19 02:53:24 +01:00
|
|
|
case nir_tex_src_texture_handle:
|
|
|
|
case nir_tex_src_sampler_handle:
|
|
|
|
return nir_type_uint;
|
2016-05-04 04:18:50 +01:00
|
|
|
|
2019-08-19 02:53:24 +01:00
|
|
|
case nir_num_tex_src_types:
|
|
|
|
unreachable("nir_num_tex_src_types is not a valid source type");
|
2016-05-04 04:18:50 +01:00
|
|
|
}
|
2019-08-19 02:53:24 +01:00
|
|
|
|
|
|
|
unreachable("Invalid texture source type");
|
2016-05-04 04:18:50 +01:00
|
|
|
}
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
static inline unsigned
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
|
2014-08-01 00:14:51 +01:00
|
|
|
{
|
2015-01-10 04:01:13 +00:00
|
|
|
if (instr->src[src].src_type == nir_tex_src_coord)
|
2014-08-01 00:14:51 +01:00
|
|
|
return instr->coord_components;
|
|
|
|
|
2016-05-03 01:28:38 +01:00
|
|
|
/* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */
|
|
|
|
if (instr->src[src].src_type == nir_tex_src_ms_mcs)
|
|
|
|
return 4;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2017-11-19 17:27:12 +00:00
|
|
|
if (instr->src[src].src_type == nir_tex_src_ddx ||
|
2015-01-10 04:01:13 +00:00
|
|
|
instr->src[src].src_type == nir_tex_src_ddy) {
|
2014-08-01 00:14:51 +01:00
|
|
|
if (instr->is_array)
|
|
|
|
return instr->coord_components - 1;
|
|
|
|
else
|
|
|
|
return instr->coord_components;
|
|
|
|
}
|
|
|
|
|
2017-11-19 17:27:12 +00:00
|
|
|
/* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for
|
|
|
|
* the offset, since a cube maps to a single face.
|
|
|
|
*/
|
|
|
|
if (instr->src[src].src_type == nir_tex_src_offset) {
|
|
|
|
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
|
|
|
|
return 2;
|
|
|
|
else if (instr->is_array)
|
|
|
|
return instr->coord_components - 1;
|
|
|
|
else
|
|
|
|
return instr->coord_components;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int
|
2017-06-07 00:19:15 +01:00
|
|
|
nir_tex_instr_src_index(const nir_tex_instr *instr, nir_tex_src_type type)
|
2014-08-01 00:14:51 +01:00
|
|
|
{
|
|
|
|
for (unsigned i = 0; i < instr->num_srcs; i++)
|
2015-01-10 04:01:13 +00:00
|
|
|
if (instr->src[i].src_type == type)
|
2014-08-01 00:14:51 +01:00
|
|
|
return (int) i;
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-10-16 16:50:23 +01:00
|
|
|
void nir_tex_instr_add_src(nir_tex_instr *tex,
|
|
|
|
nir_tex_src_type src_type,
|
|
|
|
nir_src src);
|
|
|
|
|
2016-09-08 19:07:06 +01:00
|
|
|
void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx);
|
|
|
|
|
2019-03-18 20:23:59 +00:00
|
|
|
bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
|
2014-12-16 01:32:56 +00:00
|
|
|
nir_ssa_def def;
|
2019-03-26 23:59:03 +00:00
|
|
|
|
|
|
|
nir_const_value value[];
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_load_const_instr;
|
|
|
|
|
|
|
|
typedef enum {
|
2020-05-18 20:26:30 +01:00
|
|
|
/** Return from a function
|
|
|
|
*
|
|
|
|
* This instruction is a classic function return. It jumps to
|
|
|
|
* nir_function_impl::end_block. No return value is provided in this
|
|
|
|
* instruction. Instead, the function is expected to write any return
|
|
|
|
* data to a deref passed in from the caller.
|
|
|
|
*/
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_jump_return,
|
2020-05-18 20:26:30 +01:00
|
|
|
|
|
|
|
/** Break out of the inner-most loop
|
|
|
|
*
|
|
|
|
* This has the same semantics as C's "break" statement.
|
|
|
|
*/
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_jump_break,
|
2020-05-18 20:26:30 +01:00
|
|
|
|
|
|
|
/** Jump back to the top of the inner-most loop
|
|
|
|
*
|
|
|
|
* This has the same semantics as C's "continue" statement assuming that a
|
|
|
|
* NIR loop is implemented as "while (1) { body }".
|
|
|
|
*/
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_jump_continue,
|
2020-07-02 13:32:04 +01:00
|
|
|
|
|
|
|
/** Jumps for unstructured CFG.
|
|
|
|
*
|
|
|
|
* As within an unstructured CFG we can't rely on block ordering we need to
|
|
|
|
* place explicit jumps at the end of every block.
|
|
|
|
*/
|
|
|
|
nir_jump_goto,
|
|
|
|
nir_jump_goto_if,
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_jump_type;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
nir_jump_type type;
|
2020-07-02 13:32:04 +01:00
|
|
|
nir_src condition;
|
|
|
|
struct nir_block *target;
|
|
|
|
struct nir_block *else_target;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_jump_instr;
|
|
|
|
|
|
|
|
/* creates a new SSA variable in an undefined state */
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
nir_ssa_def def;
|
|
|
|
} nir_ssa_undef_instr;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
struct exec_node node;
|
2014-12-19 01:13:22 +00:00
|
|
|
|
|
|
|
/* The predecessor block corresponding to this source */
|
2014-08-01 00:14:51 +01:00
|
|
|
struct nir_block *pred;
|
2014-12-19 01:13:22 +00:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_src src;
|
|
|
|
} nir_phi_src;
|
|
|
|
|
2016-04-27 04:16:21 +01:00
|
|
|
#define nir_foreach_phi_src(phi_src, phi) \
|
|
|
|
foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs)
|
|
|
|
#define nir_foreach_phi_src_safe(phi_src, phi) \
|
|
|
|
foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs)
|
2015-01-21 00:30:14 +00:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
|
|
|
|
2014-12-19 01:13:22 +00:00
|
|
|
struct exec_list srcs; /** < list of nir_phi_src */
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_dest dest;
|
|
|
|
} nir_phi_instr;
|
|
|
|
|
2014-10-31 04:04:15 +00:00
|
|
|
typedef struct {
|
|
|
|
struct exec_node node;
|
|
|
|
nir_src src;
|
|
|
|
nir_dest dest;
|
2014-12-18 00:53:04 +00:00
|
|
|
} nir_parallel_copy_entry;
|
|
|
|
|
2016-04-27 04:21:27 +01:00
|
|
|
#define nir_foreach_parallel_copy_entry(entry, pcopy) \
|
2014-12-18 00:53:04 +00:00
|
|
|
foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries)
|
2014-10-31 04:04:15 +00:00
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_instr instr;
|
2014-12-18 00:53:04 +00:00
|
|
|
|
2017-02-28 01:21:42 +00:00
|
|
|
/* A list of nir_parallel_copy_entrys. The sources of all of the
|
2014-12-18 00:53:04 +00:00
|
|
|
* entries are copied to the corresponding destinations "in parallel".
|
|
|
|
* In other words, if we have two entries: a -> b and b -> a, the values
|
|
|
|
* get swapped.
|
|
|
|
*/
|
|
|
|
struct exec_list entries;
|
2014-10-31 04:04:15 +00:00
|
|
|
} nir_parallel_copy_instr;
|
|
|
|
|
2016-10-06 02:09:25 +01:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr,
|
|
|
|
type, nir_instr_type_alu)
|
2019-04-19 21:09:04 +01:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_deref, nir_instr, nir_deref_instr, instr,
|
|
|
|
type, nir_instr_type_deref)
|
2016-10-06 02:09:25 +01:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr,
|
|
|
|
type, nir_instr_type_call)
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr,
|
|
|
|
type, nir_instr_type_jump)
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr,
|
|
|
|
type, nir_instr_type_tex)
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
|
|
|
|
type, nir_instr_type_intrinsic)
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
|
|
|
|
type, nir_instr_type_load_const)
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr,
|
|
|
|
type, nir_instr_type_ssa_undef)
|
|
|
|
NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
|
|
|
|
type, nir_instr_type_phi)
|
2014-12-05 19:00:05 +00:00
|
|
|
NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr,
|
2016-10-06 02:09:25 +01:00
|
|
|
nir_parallel_copy_instr, instr,
|
|
|
|
type, nir_instr_type_parallel_copy)
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2019-06-26 02:33:46 +01:00
|
|
|
|
|
|
|
#define NIR_DEFINE_SRC_AS_CONST(type, suffix) \
|
|
|
|
static inline type \
|
|
|
|
nir_src_comp_as_##suffix(nir_src src, unsigned comp) \
|
|
|
|
{ \
|
|
|
|
assert(nir_src_is_const(src)); \
|
|
|
|
nir_load_const_instr *load = \
|
|
|
|
nir_instr_as_load_const(src.ssa->parent_instr); \
|
|
|
|
assert(comp < load->def.num_components); \
|
|
|
|
return nir_const_value_as_##suffix(load->value[comp], \
|
|
|
|
load->def.bit_size); \
|
|
|
|
} \
|
|
|
|
\
|
|
|
|
static inline type \
|
|
|
|
nir_src_as_##suffix(nir_src src) \
|
|
|
|
{ \
|
|
|
|
assert(nir_src_num_components(src) == 1); \
|
|
|
|
return nir_src_comp_as_##suffix(src, 0); \
|
|
|
|
}
|
|
|
|
|
|
|
|
NIR_DEFINE_SRC_AS_CONST(int64_t, int)
|
|
|
|
NIR_DEFINE_SRC_AS_CONST(uint64_t, uint)
|
|
|
|
NIR_DEFINE_SRC_AS_CONST(bool, bool)
|
|
|
|
NIR_DEFINE_SRC_AS_CONST(double, float)
|
|
|
|
|
|
|
|
#undef NIR_DEFINE_SRC_AS_CONST
|
|
|
|
|
|
|
|
|
2019-06-20 17:12:54 +01:00
|
|
|
typedef struct {
|
|
|
|
nir_ssa_def *def;
|
|
|
|
unsigned comp;
|
|
|
|
} nir_ssa_scalar;
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_ssa_scalar_is_const(nir_ssa_scalar s)
|
|
|
|
{
|
|
|
|
return s.def->parent_instr->type == nir_instr_type_load_const;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_const_value
|
|
|
|
nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
|
|
|
|
{
|
|
|
|
assert(s.comp < s.def->num_components);
|
|
|
|
nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
|
|
|
|
return load->value[s.comp];
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
|
|
|
|
static inline type \
|
|
|
|
nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \
|
|
|
|
{ \
|
|
|
|
return nir_const_value_as_##suffix( \
|
|
|
|
nir_ssa_scalar_as_const_value(s), s.def->bit_size); \
|
|
|
|
}
|
|
|
|
|
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(int64_t, int)
|
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(uint64_t, uint)
|
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(bool, bool)
|
|
|
|
NIR_DEFINE_SCALAR_AS_CONST(double, float)
|
|
|
|
|
|
|
|
#undef NIR_DEFINE_SCALAR_AS_CONST
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_ssa_scalar_is_alu(nir_ssa_scalar s)
|
|
|
|
{
|
|
|
|
return s.def->parent_instr->type == nir_instr_type_alu;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_op
|
|
|
|
nir_ssa_scalar_alu_op(nir_ssa_scalar s)
|
|
|
|
{
|
|
|
|
return nir_instr_as_alu(s.def->parent_instr)->op;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_ssa_scalar
|
|
|
|
nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
|
|
|
|
{
|
|
|
|
nir_ssa_scalar out = { NULL, 0 };
|
|
|
|
|
|
|
|
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
|
|
|
|
assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
|
|
|
|
|
|
|
|
/* Our component must be written */
|
|
|
|
assert(s.comp < s.def->num_components);
|
|
|
|
assert(alu->dest.write_mask & (1u << s.comp));
|
|
|
|
|
|
|
|
assert(alu->src[alu_src_idx].src.is_ssa);
|
|
|
|
out.def = alu->src[alu_src_idx].src.ssa;
|
|
|
|
|
|
|
|
if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
|
|
|
|
/* The ALU src is unsized so the source component follows the
|
|
|
|
* destination component.
|
|
|
|
*/
|
|
|
|
out.comp = alu->src[alu_src_idx].swizzle[s.comp];
|
|
|
|
} else {
|
|
|
|
/* This is a sized source so all source components work together to
|
|
|
|
* produce all the destination components. Since we need to return a
|
|
|
|
* scalar, this only works if the source is a scalar.
|
|
|
|
*/
|
|
|
|
assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
|
|
|
|
out.comp = alu->src[alu_src_idx].swizzle[0];
|
|
|
|
}
|
|
|
|
assert(out.comp < out.def->num_components);
|
|
|
|
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
/*
|
|
|
|
* Control flow
|
|
|
|
*
|
|
|
|
* Control flow consists of a tree of control flow nodes, which include
|
|
|
|
* if-statements and loops. The leaves of the tree are basic blocks, lists of
|
|
|
|
* instructions that always run start-to-finish. Each basic block also keeps
|
|
|
|
* track of its successors (blocks which may run immediately after the current
|
|
|
|
* block) and predecessors (blocks which could have run immediately before the
|
|
|
|
* current block). Each function also has a start block and an end block which
|
|
|
|
* all return statements point to (which is always empty). Together, all the
|
|
|
|
* blocks with their predecessors and successors make up the control flow
|
|
|
|
* graph (CFG) of the function. There are helpers that modify the tree of
|
|
|
|
* control flow nodes while modifying the CFG appropriately; these should be
|
|
|
|
* used instead of modifying the tree directly.
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
nir_cf_node_block,
|
|
|
|
nir_cf_node_if,
|
|
|
|
nir_cf_node_loop,
|
|
|
|
nir_cf_node_function
|
|
|
|
} nir_cf_node_type;
|
|
|
|
|
|
|
|
typedef struct nir_cf_node {
|
|
|
|
struct exec_node node;
|
|
|
|
nir_cf_node_type type;
|
|
|
|
struct nir_cf_node *parent;
|
|
|
|
} nir_cf_node;
|
|
|
|
|
|
|
|
typedef struct nir_block {
|
|
|
|
nir_cf_node cf_node;
|
|
|
|
|
2014-12-19 01:13:22 +00:00
|
|
|
struct exec_list instr_list; /** < list of nir_instr */
|
|
|
|
|
|
|
|
/** generic block index; generated by nir_index_blocks */
|
2014-08-01 00:14:51 +01:00
|
|
|
unsigned index;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each block can only have up to 2 successors, so we put them in a simple
|
|
|
|
* array - no need for anything more complicated.
|
|
|
|
*/
|
|
|
|
struct nir_block *successors[2];
|
|
|
|
|
2014-12-19 01:13:22 +00:00
|
|
|
/* Set of nir_block predecessors in the CFG */
|
2014-08-01 00:14:51 +01:00
|
|
|
struct set *predecessors;
|
2014-07-19 00:13:11 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* this node's immediate dominator in the dominance tree - set to NULL for
|
|
|
|
* the start block.
|
|
|
|
*/
|
|
|
|
struct nir_block *imm_dom;
|
|
|
|
|
|
|
|
/* This node's children in the dominance tree */
|
|
|
|
unsigned num_dom_children;
|
|
|
|
struct nir_block **dom_children;
|
|
|
|
|
2017-02-28 01:21:42 +00:00
|
|
|
/* Set of nir_blocks on the dominance frontier of this block */
|
2014-07-19 00:13:11 +01:00
|
|
|
struct set *dom_frontier;
|
2014-10-29 21:17:17 +00:00
|
|
|
|
2015-02-06 20:45:43 +00:00
|
|
|
/*
|
|
|
|
* These two indices have the property that dom_{pre,post}_index for each
|
|
|
|
* child of this block in the dominance tree will always be between
|
|
|
|
* dom_pre_index and dom_post_index for this block, which makes testing if
|
|
|
|
* a given block is dominated by another block an O(1) operation.
|
|
|
|
*/
|
2020-09-09 01:50:23 +01:00
|
|
|
uint32_t dom_pre_index, dom_post_index;
|
2015-02-06 20:45:43 +00:00
|
|
|
|
2020-07-23 20:29:02 +01:00
|
|
|
/* SSA def live in and out for this block; used for liveness analysis.
|
|
|
|
* Indexed by ssa_def->index
|
|
|
|
*/
|
2014-10-29 21:17:17 +00:00
|
|
|
BITSET_WORD *live_in;
|
|
|
|
BITSET_WORD *live_out;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_block;
|
|
|
|
|
2017-01-18 02:38:35 +00:00
|
|
|
static inline bool
|
|
|
|
nir_block_is_reachable(nir_block *b)
|
|
|
|
{
|
|
|
|
/* See also nir_block_dominates */
|
2020-09-09 01:50:23 +01:00
|
|
|
return b->dom_post_index != 0;
|
2017-01-18 02:38:35 +00:00
|
|
|
}
|
|
|
|
|
2014-12-19 23:30:15 +00:00
|
|
|
static inline nir_instr *
|
|
|
|
nir_block_first_instr(nir_block *block)
|
|
|
|
{
|
|
|
|
struct exec_node *head = exec_list_get_head(&block->instr_list);
|
|
|
|
return exec_node_data(nir_instr, head, node);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_instr *
|
|
|
|
nir_block_last_instr(nir_block *block)
|
|
|
|
{
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&block->instr_list);
|
|
|
|
return exec_node_data(nir_instr, tail, node);
|
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2018-08-24 15:34:05 +01:00
|
|
|
static inline bool
|
|
|
|
nir_block_ends_in_jump(nir_block *block)
|
|
|
|
{
|
|
|
|
return !exec_list_is_empty(&block->instr_list) &&
|
|
|
|
nir_block_last_instr(block)->type == nir_instr_type_jump;
|
|
|
|
}
|
|
|
|
|
2016-04-27 02:34:19 +01:00
|
|
|
#define nir_foreach_instr(instr, block) \
|
2014-08-01 00:14:51 +01:00
|
|
|
foreach_list_typed(nir_instr, instr, node, &(block)->instr_list)
|
2016-04-27 02:34:19 +01:00
|
|
|
#define nir_foreach_instr_reverse(instr, block) \
|
2014-08-01 00:14:51 +01:00
|
|
|
foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
|
2016-04-27 02:34:19 +01:00
|
|
|
#define nir_foreach_instr_safe(instr, block) \
|
2014-08-01 00:14:51 +01:00
|
|
|
foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
|
2016-04-27 02:34:19 +01:00
|
|
|
#define nir_foreach_instr_reverse_safe(instr, block) \
|
2015-11-30 17:24:23 +00:00
|
|
|
foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2019-03-20 04:42:56 +00:00
|
|
|
typedef enum {
|
|
|
|
nir_selection_control_none = 0x0,
|
|
|
|
nir_selection_control_flatten = 0x1,
|
|
|
|
nir_selection_control_dont_flatten = 0x2,
|
|
|
|
} nir_selection_control;
|
|
|
|
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
typedef struct nir_if {
|
2014-08-01 00:14:51 +01:00
|
|
|
nir_cf_node cf_node;
|
|
|
|
nir_src condition;
|
2019-03-20 04:42:56 +00:00
|
|
|
nir_selection_control control;
|
2014-12-19 01:13:22 +00:00
|
|
|
|
|
|
|
struct exec_list then_list; /** < list of nir_cf_node */
|
|
|
|
struct exec_list else_list; /** < list of nir_cf_node */
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_if;
|
|
|
|
|
2016-12-13 03:39:51 +00:00
|
|
|
typedef struct {
|
|
|
|
nir_if *nif;
|
|
|
|
|
2019-01-16 19:34:35 +00:00
|
|
|
/** Instruction that generates nif::condition. */
|
2016-12-13 03:39:51 +00:00
|
|
|
nir_instr *conditional_instr;
|
|
|
|
|
2019-01-16 19:34:35 +00:00
|
|
|
/** Block within ::nif that has the break instruction. */
|
2016-12-13 03:39:51 +00:00
|
|
|
nir_block *break_block;
|
2019-01-16 19:34:35 +00:00
|
|
|
|
|
|
|
/** Last block for the then- or else-path that does not contain the break. */
|
2016-12-13 03:39:51 +00:00
|
|
|
nir_block *continue_from_block;
|
|
|
|
|
2019-01-16 19:34:35 +00:00
|
|
|
/** True when ::break_block is in the else-path of ::nif. */
|
2016-12-13 03:39:51 +00:00
|
|
|
bool continue_from_then;
|
2018-11-15 12:23:09 +00:00
|
|
|
bool induction_rhs;
|
2016-12-13 03:39:51 +00:00
|
|
|
|
2018-11-20 02:45:58 +00:00
|
|
|
/* This is true if the terminators exact trip count is unknown. For
|
|
|
|
* example:
|
|
|
|
*
|
|
|
|
* for (int i = 0; i < imin(x, 4); i++)
|
|
|
|
* ...
|
|
|
|
*
|
|
|
|
* Here loop analysis would have set a max_trip_count of 4 however we dont
|
|
|
|
* know for sure that this is the exact trip count.
|
|
|
|
*/
|
|
|
|
bool exact_trip_count_unknown;
|
|
|
|
|
2016-12-13 03:39:51 +00:00
|
|
|
struct list_head loop_terminator_link;
|
|
|
|
} nir_loop_terminator;
|
|
|
|
|
|
|
|
typedef struct {
|
2019-03-03 15:24:12 +00:00
|
|
|
/* Estimated cost (in number of instructions) of the loop */
|
|
|
|
unsigned instr_cost;
|
2016-12-13 03:39:51 +00:00
|
|
|
|
2018-11-15 12:23:09 +00:00
|
|
|
/* Guessed trip count based on array indexing */
|
|
|
|
unsigned guessed_trip_count;
|
|
|
|
|
2018-11-20 00:35:37 +00:00
|
|
|
/* Maximum number of times the loop is run (if known) */
|
|
|
|
unsigned max_trip_count;
|
|
|
|
|
|
|
|
/* Do we know the exact number of times the loop will be run */
|
|
|
|
bool exact_trip_count_known;
|
2016-12-13 03:39:51 +00:00
|
|
|
|
|
|
|
/* Unroll the loop regardless of its size */
|
|
|
|
bool force_unroll;
|
|
|
|
|
2018-07-07 03:09:26 +01:00
|
|
|
/* Does the loop contain complex loop terminators, continues or other
|
|
|
|
* complex behaviours? If this is true we can't rely on
|
|
|
|
* loop_terminator_list to be complete or accurate.
|
|
|
|
*/
|
|
|
|
bool complex_loop;
|
|
|
|
|
2016-12-13 03:39:51 +00:00
|
|
|
nir_loop_terminator *limiting_terminator;
|
|
|
|
|
|
|
|
/* A list of loop_terminators terminating this loop. */
|
|
|
|
struct list_head loop_terminator_list;
|
|
|
|
} nir_loop_info;
|
|
|
|
|
2019-03-20 02:39:36 +00:00
|
|
|
typedef enum {
|
|
|
|
nir_loop_control_none = 0x0,
|
|
|
|
nir_loop_control_unroll = 0x1,
|
|
|
|
nir_loop_control_dont_unroll = 0x2,
|
|
|
|
} nir_loop_control;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
nir_cf_node cf_node;
|
2014-12-19 01:13:22 +00:00
|
|
|
|
|
|
|
struct exec_list body; /** < list of nir_cf_node */
|
2016-12-13 03:39:51 +00:00
|
|
|
|
|
|
|
nir_loop_info *info;
|
2019-03-20 02:39:36 +00:00
|
|
|
nir_loop_control control;
|
2018-11-19 06:01:52 +00:00
|
|
|
bool partially_unrolled;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_loop;
|
|
|
|
|
2014-10-29 19:42:54 +00:00
|
|
|
/**
|
|
|
|
* Various bits of metadata that can may be created or required by
|
|
|
|
* optimization and analysis passes
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
nir_metadata_none = 0x0,
|
2020-05-13 19:29:43 +01:00
|
|
|
|
|
|
|
/** Indicates that nir_block::index values are valid.
|
|
|
|
*
|
|
|
|
* The start block has index 0 and they increase through a natural walk of
|
|
|
|
* the CFG. nir_function_impl::num_blocks is the number of blocks and
|
|
|
|
* every block index is in the range [0, nir_function_impl::num_blocks].
|
|
|
|
*
|
|
|
|
* A pass can preserve this metadata type if it doesn't touch the CFG.
|
|
|
|
*/
|
2014-10-29 19:42:54 +00:00
|
|
|
nir_metadata_block_index = 0x1,
|
2020-05-13 19:29:43 +01:00
|
|
|
|
|
|
|
/** Indicates that block dominance information is valid
|
|
|
|
*
|
|
|
|
* This includes:
|
|
|
|
*
|
|
|
|
* - nir_block::num_dom_children
|
|
|
|
* - nir_block::dom_children
|
|
|
|
* - nir_block::dom_frontier
|
|
|
|
* - nir_block::dom_pre_index
|
|
|
|
* - nir_block::dom_post_index
|
|
|
|
*
|
|
|
|
* A pass can preserve this metadata type if it doesn't touch the CFG.
|
|
|
|
*/
|
2014-10-29 19:42:54 +00:00
|
|
|
nir_metadata_dominance = 0x2,
|
2020-05-13 19:29:43 +01:00
|
|
|
|
|
|
|
/** Indicates that SSA def data-flow liveness information is valid
|
|
|
|
*
|
|
|
|
* This includes:
|
|
|
|
*
|
|
|
|
* - nir_ssa_def::live_index
|
|
|
|
* - nir_block::live_in
|
|
|
|
* - nir_block::live_out
|
|
|
|
*
|
|
|
|
* A pass can preserve this metadata type if it never adds or removes any
|
|
|
|
* SSA defs (most passes shouldn't preserve this metadata type).
|
|
|
|
*/
|
2015-11-04 01:15:24 +00:00
|
|
|
nir_metadata_live_ssa_defs = 0x4,
|
2020-05-13 19:29:43 +01:00
|
|
|
|
|
|
|
/** A dummy metadata value to track when a pass forgot to call
|
|
|
|
* nir_metadata_preserve.
|
|
|
|
*
|
|
|
|
* A pass should always clear this value even if it doesn't make any
|
|
|
|
* progress to indicate that it thought about preserving metadata.
|
|
|
|
*/
|
2015-11-03 08:31:22 +00:00
|
|
|
nir_metadata_not_properly_reset = 0x8,
|
2020-05-13 19:29:43 +01:00
|
|
|
|
|
|
|
/** Indicates that loop analysis information is valid.
|
|
|
|
*
|
|
|
|
* This includes everything pointed to by nir_loop::info.
|
|
|
|
*
|
|
|
|
* A pass can preserve this metadata type if it is guaranteed to not affect
|
|
|
|
* any loop metadata. However, since loop metadata includes things like
|
|
|
|
* loop counts which depend on arithmetic in the loop, this is very hard to
|
|
|
|
* determine. Most passes shouldn't preserve this metadata type.
|
|
|
|
*/
|
2016-12-13 03:39:51 +00:00
|
|
|
nir_metadata_loop_analysis = 0x10,
|
2020-05-22 02:39:30 +01:00
|
|
|
|
|
|
|
/** All metadata
|
|
|
|
*
|
|
|
|
* This includes all nir_metadata flags except not_properly_reset. Passes
|
|
|
|
* which do not change the shader in any way should call
|
|
|
|
*
|
|
|
|
* nir_metadata_preserve(impl, nir_metadata_all);
|
|
|
|
*/
|
|
|
|
nir_metadata_all = ~nir_metadata_not_properly_reset,
|
2014-10-29 19:42:54 +00:00
|
|
|
} nir_metadata;
|
2020-08-31 17:08:49 +01:00
|
|
|
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(nir_metadata)
|
2014-10-29 19:42:54 +00:00
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
|
|
|
nir_cf_node cf_node;
|
|
|
|
|
2015-12-26 18:00:47 +00:00
|
|
|
/** pointer to the function of which this is an implementation */
|
|
|
|
struct nir_function *function;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
struct exec_list body; /** < list of nir_cf_node */
|
|
|
|
|
2015-08-07 02:18:40 +01:00
|
|
|
nir_block *end_block;
|
2014-08-01 00:14:51 +01:00
|
|
|
|
|
|
|
/** list for all local variables in the function */
|
|
|
|
struct exec_list locals;
|
|
|
|
|
|
|
|
/** list of local registers in the function */
|
|
|
|
struct exec_list registers;
|
|
|
|
|
|
|
|
/** next available local register index */
|
|
|
|
unsigned reg_alloc;
|
|
|
|
|
|
|
|
/** next available SSA value index */
|
|
|
|
unsigned ssa_alloc;
|
|
|
|
|
|
|
|
/* total number of basic blocks, only valid when block_index_dirty = false */
|
|
|
|
unsigned num_blocks;
|
|
|
|
|
2019-10-23 19:42:40 +01:00
|
|
|
/** True if this nir_function_impl uses structured control-flow
|
|
|
|
*
|
|
|
|
* Structured nir_function_impls have different validation rules.
|
|
|
|
*/
|
|
|
|
bool structured;
|
|
|
|
|
2014-10-29 19:42:54 +00:00
|
|
|
nir_metadata valid_metadata;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_function_impl;
|
|
|
|
|
2020-05-20 16:18:14 +01:00
|
|
|
#define nir_foreach_function_temp_variable(var, impl) \
|
|
|
|
foreach_list_typed(nir_variable, var, node, &(impl)->locals)
|
|
|
|
|
|
|
|
#define nir_foreach_function_temp_variable_safe(var, impl) \
|
|
|
|
foreach_list_typed_safe(nir_variable, var, node, &(impl)->locals)
|
|
|
|
|
2016-05-13 21:17:34 +01:00
|
|
|
ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
|
2015-08-07 02:18:40 +01:00
|
|
|
nir_start_block(nir_function_impl *impl)
|
|
|
|
{
|
2016-06-27 22:42:57 +01:00
|
|
|
return (nir_block *) impl->body.head_sentinel.next;
|
2015-08-07 02:18:40 +01:00
|
|
|
}
|
|
|
|
|
2016-05-13 21:17:34 +01:00
|
|
|
ATTRIBUTE_RETURNS_NONNULL static inline nir_block *
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
nir_impl_last_block(nir_function_impl *impl)
|
|
|
|
{
|
2016-06-27 22:42:57 +01:00
|
|
|
return (nir_block *) impl->body.tail_sentinel.prev;
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
}
|
|
|
|
|
2014-12-19 23:30:15 +00:00
|
|
|
static inline nir_cf_node *
|
|
|
|
nir_cf_node_next(nir_cf_node *node)
|
|
|
|
{
|
2015-02-05 05:22:45 +00:00
|
|
|
struct exec_node *next = exec_node_get_next(&node->node);
|
|
|
|
if (exec_node_is_tail_sentinel(next))
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return exec_node_data(nir_cf_node, next, node);
|
2014-12-19 23:30:15 +00:00
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2014-12-19 23:30:15 +00:00
|
|
|
static inline nir_cf_node *
|
|
|
|
nir_cf_node_prev(nir_cf_node *node)
|
|
|
|
{
|
2015-02-05 05:22:45 +00:00
|
|
|
struct exec_node *prev = exec_node_get_prev(&node->node);
|
|
|
|
if (exec_node_is_head_sentinel(prev))
|
|
|
|
return NULL;
|
|
|
|
else
|
|
|
|
return exec_node_data(nir_cf_node, prev, node);
|
2014-12-19 23:30:15 +00:00
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2014-12-19 23:30:15 +00:00
|
|
|
static inline bool
|
|
|
|
nir_cf_node_is_first(const nir_cf_node *node)
|
|
|
|
{
|
|
|
|
return exec_node_is_head_sentinel(node->node.prev);
|
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2014-12-19 23:30:15 +00:00
|
|
|
static inline bool
|
|
|
|
nir_cf_node_is_last(const nir_cf_node *node)
|
|
|
|
{
|
|
|
|
return exec_node_is_tail_sentinel(node->node.next);
|
|
|
|
}
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2016-10-06 02:09:25 +01:00
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node,
|
|
|
|
type, nir_cf_node_block)
|
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node,
|
|
|
|
type, nir_cf_node_if)
|
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node,
|
|
|
|
type, nir_cf_node_loop)
|
|
|
|
NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node,
|
|
|
|
nir_function_impl, cf_node, type, nir_cf_node_function)
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2016-10-06 03:08:57 +01:00
|
|
|
static inline nir_block *
|
|
|
|
nir_if_first_then_block(nir_if *if_stmt)
|
|
|
|
{
|
|
|
|
struct exec_node *head = exec_list_get_head(&if_stmt->then_list);
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
nir_if_last_then_block(nir_if *if_stmt)
|
|
|
|
{
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list);
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
nir_if_first_else_block(nir_if *if_stmt)
|
|
|
|
{
|
|
|
|
struct exec_node *head = exec_list_get_head(&if_stmt->else_list);
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
nir_if_last_else_block(nir_if *if_stmt)
|
|
|
|
{
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list);
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
nir_loop_first_block(nir_loop *loop)
|
|
|
|
{
|
|
|
|
struct exec_node *head = exec_list_get_head(&loop->body);
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_block *
|
|
|
|
nir_loop_last_block(nir_loop *loop)
|
|
|
|
{
|
|
|
|
struct exec_node *tail = exec_list_get_tail(&loop->body);
|
|
|
|
return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
|
|
|
|
}
|
|
|
|
|
2019-04-08 19:22:20 +01:00
|
|
|
/**
|
|
|
|
* Return true if this list of cf_nodes contains a single empty block.
|
|
|
|
*/
|
|
|
|
static inline bool
|
|
|
|
nir_cf_list_is_empty_block(struct exec_list *cf_list)
|
|
|
|
{
|
|
|
|
if (exec_list_is_singular(cf_list)) {
|
|
|
|
struct exec_node *head = exec_list_get_head(cf_list);
|
|
|
|
nir_block *block =
|
|
|
|
nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node));
|
|
|
|
return exec_list_is_empty(&block->instr_list);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct {
|
2018-03-22 23:41:18 +00:00
|
|
|
uint8_t num_components;
|
|
|
|
uint8_t bit_size;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_parameter;
|
|
|
|
|
2015-12-26 18:00:47 +00:00
|
|
|
typedef struct nir_function {
|
2014-08-01 00:14:51 +01:00
|
|
|
struct exec_node node;
|
|
|
|
|
2015-12-26 18:00:47 +00:00
|
|
|
const char *name;
|
|
|
|
struct nir_shader *shader;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
unsigned num_params;
|
|
|
|
nir_parameter *params;
|
|
|
|
|
2015-12-26 18:00:47 +00:00
|
|
|
/** The implementation of this function.
|
|
|
|
*
|
|
|
|
* If the function is only declared and not implemented, this is NULL.
|
|
|
|
*/
|
|
|
|
nir_function_impl *impl;
|
2018-09-06 19:12:24 +01:00
|
|
|
|
|
|
|
bool is_entrypoint;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_function;
|
|
|
|
|
2019-02-26 01:13:48 +00:00
|
|
|
typedef enum {
|
|
|
|
nir_lower_imul64 = (1 << 0),
|
|
|
|
nir_lower_isign64 = (1 << 1),
|
|
|
|
/** Lower all int64 modulus and division opcodes */
|
|
|
|
nir_lower_divmod64 = (1 << 2),
|
|
|
|
/** Lower all 64-bit umul_high and imul_high opcodes */
|
|
|
|
nir_lower_imul_high64 = (1 << 3),
|
|
|
|
nir_lower_mov64 = (1 << 4),
|
|
|
|
nir_lower_icmp64 = (1 << 5),
|
|
|
|
nir_lower_iadd64 = (1 << 6),
|
|
|
|
nir_lower_iabs64 = (1 << 7),
|
|
|
|
nir_lower_ineg64 = (1 << 8),
|
|
|
|
nir_lower_logic64 = (1 << 9),
|
|
|
|
nir_lower_minmax64 = (1 << 10),
|
|
|
|
nir_lower_shift64 = (1 << 11),
|
2019-02-15 07:08:39 +00:00
|
|
|
nir_lower_imul_2x32_64 = (1 << 12),
|
2019-07-15 16:31:49 +01:00
|
|
|
nir_lower_extract64 = (1 << 13),
|
2019-11-19 23:23:14 +00:00
|
|
|
nir_lower_ufind_msb64 = (1 << 14),
|
2020-06-23 13:47:20 +01:00
|
|
|
nir_lower_bit_count64 = (1 << 15),
|
2019-02-26 01:13:48 +00:00
|
|
|
} nir_lower_int64_options;
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
nir_lower_drcp = (1 << 0),
|
|
|
|
nir_lower_dsqrt = (1 << 1),
|
|
|
|
nir_lower_drsq = (1 << 2),
|
|
|
|
nir_lower_dtrunc = (1 << 3),
|
|
|
|
nir_lower_dfloor = (1 << 4),
|
|
|
|
nir_lower_dceil = (1 << 5),
|
|
|
|
nir_lower_dfract = (1 << 6),
|
|
|
|
nir_lower_dround_even = (1 << 7),
|
|
|
|
nir_lower_dmod = (1 << 8),
|
2019-07-11 23:06:31 +01:00
|
|
|
nir_lower_dsub = (1 << 9),
|
|
|
|
nir_lower_ddiv = (1 << 10),
|
|
|
|
nir_lower_fp64_full_software = (1 << 11),
|
2019-02-26 01:13:48 +00:00
|
|
|
} nir_lower_doubles_options;
|
|
|
|
|
2019-05-20 13:58:23 +01:00
|
|
|
typedef enum {
|
|
|
|
nir_divergence_single_prim_per_subgroup = (1 << 0),
|
|
|
|
nir_divergence_single_patch_per_tcs_subgroup = (1 << 1),
|
|
|
|
nir_divergence_single_patch_per_tes_subgroup = (1 << 2),
|
|
|
|
nir_divergence_view_index_uniform = (1 << 3),
|
|
|
|
} nir_divergence_options;
|
|
|
|
|
2015-02-03 00:13:49 +00:00
|
|
|
typedef struct nir_shader_compiler_options {
|
nir: Add a lower_fdiv option, turn fdiv into fmul/frcp.
The nir_opt_algebraic rule
(('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
can produce new fdiv operations, which need to be lowered on i965,
as we don't actually implement fdiv. (Normally, we handle this in
GLSL IR's lower_instructions pass, but in the above case we introduce
an fdiv after that point. So, make NIR do it for us.)
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Cc: mesa-stable@lists.freedesktop.org
2016-01-05 13:09:46 +00:00
|
|
|
bool lower_fdiv;
|
2020-09-24 16:46:31 +01:00
|
|
|
bool lower_ffma16;
|
|
|
|
bool lower_ffma32;
|
|
|
|
bool lower_ffma64;
|
|
|
|
bool fuse_ffma16;
|
|
|
|
bool fuse_ffma32;
|
|
|
|
bool fuse_ffma64;
|
2018-04-18 10:02:51 +01:00
|
|
|
bool lower_flrp16;
|
2016-04-28 06:13:10 +01:00
|
|
|
bool lower_flrp32;
|
2016-04-26 08:35:30 +01:00
|
|
|
/** Lowers flrp when it does not support doubles */
|
|
|
|
bool lower_flrp64;
|
2015-01-28 00:22:54 +00:00
|
|
|
bool lower_fpow;
|
2015-01-30 21:53:39 +00:00
|
|
|
bool lower_fsat;
|
2015-01-28 18:39:29 +00:00
|
|
|
bool lower_fsqrt;
|
2019-05-08 15:26:49 +01:00
|
|
|
bool lower_sincos;
|
2019-06-03 21:18:55 +01:00
|
|
|
bool lower_fmod;
|
2018-05-02 21:02:21 +01:00
|
|
|
/** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
|
2016-01-13 19:09:11 +00:00
|
|
|
bool lower_bitfield_extract;
|
2019-01-26 08:12:46 +00:00
|
|
|
/** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
|
2018-05-02 21:02:21 +01:00
|
|
|
bool lower_bitfield_extract_to_shifts;
|
2018-05-02 22:13:23 +01:00
|
|
|
/** Lowers bitfield_insert to bfi/bfm */
|
2016-01-06 23:30:38 +00:00
|
|
|
bool lower_bitfield_insert;
|
2019-01-26 08:12:46 +00:00
|
|
|
/** Lowers bitfield_insert to compares, and shifts. */
|
2018-05-02 22:13:23 +01:00
|
|
|
bool lower_bitfield_insert_to_shifts;
|
2019-06-13 10:34:01 +01:00
|
|
|
/** Lowers bitfield_insert to bfm/bitfield_select. */
|
|
|
|
bool lower_bitfield_insert_to_bitfield_select;
|
2018-05-08 20:47:48 +01:00
|
|
|
/** Lowers bitfield_reverse to shifts. */
|
|
|
|
bool lower_bitfield_reverse;
|
2018-05-08 21:04:37 +01:00
|
|
|
/** Lowers bit_count to shifts. */
|
|
|
|
bool lower_bit_count;
|
2018-05-04 21:33:47 +01:00
|
|
|
/** Lowers ifind_msb to compare and ufind_msb */
|
|
|
|
bool lower_ifind_msb;
|
2018-05-04 22:02:55 +01:00
|
|
|
/** Lowers find_lsb to ufind_msb and logic ops */
|
|
|
|
bool lower_find_lsb;
|
2016-01-06 23:30:38 +00:00
|
|
|
bool lower_uadd_carry;
|
|
|
|
bool lower_usub_borrow;
|
2018-05-08 19:24:40 +01:00
|
|
|
/** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
|
|
|
|
bool lower_mul_high;
|
2015-01-28 18:52:53 +00:00
|
|
|
/** lowers fneg and ineg to fsub and isub. */
|
|
|
|
bool lower_negate;
|
2015-04-04 13:13:44 +01:00
|
|
|
/** lowers fsub and isub to fadd+fneg and iadd+ineg. */
|
|
|
|
bool lower_sub;
|
2015-03-06 09:17:22 +00:00
|
|
|
|
2020-08-18 18:51:57 +01:00
|
|
|
/* lower {slt,sge,seq,sne} to {flt,fge,feq,fneu} + b2f: */
|
2015-03-31 16:25:19 +01:00
|
|
|
bool lower_scmp;
|
|
|
|
|
2020-01-04 00:33:54 +00:00
|
|
|
/* lower b/fall_equalN/b/fany_nequalN (ex:fany_nequal4 to sne+fdot4+fsat) */
|
2019-06-02 23:44:49 +01:00
|
|
|
bool lower_vector_cmp;
|
|
|
|
|
2016-05-07 18:01:24 +01:00
|
|
|
/** enables rules to lower idiv by power-of-two: */
|
|
|
|
bool lower_idiv;
|
|
|
|
|
2019-07-18 19:56:27 +01:00
|
|
|
/** enable rules to avoid bit ops */
|
|
|
|
bool lower_bitops;
|
2019-05-31 18:54:12 +01:00
|
|
|
|
2019-02-06 21:12:25 +00:00
|
|
|
/** enables rules to lower isign to imin+imax */
|
|
|
|
bool lower_isign;
|
|
|
|
|
2019-04-16 21:49:41 +01:00
|
|
|
/** enables rules to lower fsign to fsub and flt */
|
|
|
|
bool lower_fsign;
|
|
|
|
|
2020-01-10 21:59:54 +00:00
|
|
|
/** enables rules to lower iabs to ineg+imax */
|
|
|
|
bool lower_iabs;
|
|
|
|
|
2019-06-21 02:47:16 +01:00
|
|
|
/* lower fdph to fdot4 */
|
|
|
|
bool lower_fdph;
|
|
|
|
|
2019-07-27 16:58:53 +01:00
|
|
|
/** lower fdot to fmul and fsum/fadd. */
|
|
|
|
bool lower_fdot;
|
|
|
|
|
2015-09-10 18:51:46 +01:00
|
|
|
/* Does the native fdot instruction replicate its result for four
|
|
|
|
* components? If so, then opt_algebraic_late will turn all fdotN
|
|
|
|
* instructions into fdot_replicatedN instructions.
|
|
|
|
*/
|
|
|
|
bool fdot_replicates;
|
|
|
|
|
2018-09-01 20:15:27 +01:00
|
|
|
/** lowers ffloor to fsub+ffract: */
|
|
|
|
bool lower_ffloor;
|
|
|
|
|
2015-09-14 16:13:19 +01:00
|
|
|
/** lowers ffract to fsub+ffloor: */
|
|
|
|
bool lower_ffract;
|
|
|
|
|
2018-11-12 17:49:32 +00:00
|
|
|
/** lowers fceil to fneg+ffloor+fneg: */
|
|
|
|
bool lower_fceil;
|
|
|
|
|
2019-04-12 09:12:27 +01:00
|
|
|
bool lower_ftrunc;
|
|
|
|
|
2018-02-27 08:19:21 +00:00
|
|
|
bool lower_ldexp;
|
|
|
|
|
2016-01-21 23:46:47 +00:00
|
|
|
bool lower_pack_half_2x16;
|
2016-01-25 19:05:52 +00:00
|
|
|
bool lower_pack_unorm_2x16;
|
|
|
|
bool lower_pack_snorm_2x16;
|
|
|
|
bool lower_pack_unorm_4x8;
|
|
|
|
bool lower_pack_snorm_4x8;
|
2020-04-21 12:41:41 +01:00
|
|
|
bool lower_pack_64_2x32_split;
|
|
|
|
bool lower_pack_32_2x16_split;
|
2016-01-21 23:46:47 +00:00
|
|
|
bool lower_unpack_half_2x16;
|
2016-01-25 19:07:02 +00:00
|
|
|
bool lower_unpack_unorm_2x16;
|
|
|
|
bool lower_unpack_snorm_2x16;
|
|
|
|
bool lower_unpack_unorm_4x8;
|
|
|
|
bool lower_unpack_snorm_4x8;
|
2020-04-21 12:41:41 +01:00
|
|
|
bool lower_unpack_64_2x32_split;
|
|
|
|
bool lower_unpack_32_2x16_split;
|
2016-01-21 23:46:47 +00:00
|
|
|
|
2020-04-24 19:27:33 +01:00
|
|
|
bool lower_pack_split;
|
|
|
|
|
2016-01-21 17:09:29 +00:00
|
|
|
bool lower_extract_byte;
|
|
|
|
bool lower_extract_word;
|
|
|
|
|
2018-01-29 23:55:19 +00:00
|
|
|
bool lower_all_io_to_temps;
|
2019-03-28 14:57:31 +00:00
|
|
|
bool lower_all_io_to_elements;
|
2018-01-29 23:55:19 +00:00
|
|
|
|
2016-03-25 17:54:27 +00:00
|
|
|
/* Indicates that the driver only has zero-based vertex id */
|
|
|
|
bool vertex_id_zero_based;
|
2016-05-22 23:54:48 +01:00
|
|
|
|
2018-04-28 13:09:21 +01:00
|
|
|
/**
|
|
|
|
* If enabled, gl_BaseVertex will be lowered as:
|
|
|
|
* is_indexed_draw (~0/0) & firstvertex
|
|
|
|
*/
|
|
|
|
bool lower_base_vertex;
|
|
|
|
|
2018-06-01 19:07:15 +01:00
|
|
|
/**
|
|
|
|
* If enabled, gl_HelperInvocation will be lowered as:
|
|
|
|
*
|
|
|
|
* !((1 << sample_id) & sample_mask_in))
|
|
|
|
*
|
|
|
|
* This depends on some possibly hw implementation details, which may
|
|
|
|
* not be true for all hw. In particular that the FS is only executed
|
|
|
|
* for covered samples or for helper invocations. So, do not blindly
|
|
|
|
* enable this option.
|
|
|
|
*
|
|
|
|
* Note: See also issue #22 in ARB_shader_image_load_store
|
|
|
|
*/
|
|
|
|
bool lower_helper_invocation;
|
|
|
|
|
2019-04-10 02:40:33 +01:00
|
|
|
/**
|
|
|
|
* Convert gl_SampleMaskIn to gl_HelperInvocation as follows:
|
|
|
|
*
|
|
|
|
* gl_SampleMaskIn == 0 ---> gl_HelperInvocation
|
|
|
|
* gl_SampleMaskIn != 0 ---> !gl_HelperInvocation
|
|
|
|
*/
|
|
|
|
bool optimize_sample_mask_in;
|
|
|
|
|
2016-05-22 23:54:48 +01:00
|
|
|
bool lower_cs_local_index_from_id;
|
2018-11-15 16:25:46 +00:00
|
|
|
bool lower_cs_local_id_from_index;
|
2016-07-12 09:46:53 +01:00
|
|
|
|
2020-01-13 09:35:40 +00:00
|
|
|
/* Prevents lowering global_invocation_id to be in terms of work_group_id */
|
|
|
|
bool has_cs_global_id;
|
|
|
|
|
2017-09-21 23:51:55 +01:00
|
|
|
bool lower_device_index_to_zero;
|
|
|
|
|
2018-07-06 21:43:06 +01:00
|
|
|
/* Set if nir_lower_wpos_ytransform() should also invert gl_PointCoord. */
|
|
|
|
bool lower_wpos_pntc;
|
|
|
|
|
2018-09-19 09:17:09 +01:00
|
|
|
/**
|
|
|
|
* Set if nir_op_[iu]hadd and nir_op_[iu]rhadd instructions should be
|
|
|
|
* lowered to simple arithmetic.
|
|
|
|
*
|
|
|
|
* If this flag is set, the lowering will be applied to all bit-sizes of
|
|
|
|
* these instructions.
|
|
|
|
*
|
|
|
|
* \sa ::lower_hadd64
|
|
|
|
*/
|
2018-07-12 14:02:27 +01:00
|
|
|
bool lower_hadd;
|
2018-09-19 09:17:09 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Set if only 64-bit nir_op_[iu]hadd and nir_op_[iu]rhadd instructions
|
|
|
|
* should be lowered to simple arithmetic.
|
|
|
|
*
|
|
|
|
* If this flag is set, the lowering will be applied to only 64-bit
|
|
|
|
* versions of these instructions.
|
|
|
|
*
|
|
|
|
* \sa ::lower_hadd
|
|
|
|
*/
|
|
|
|
bool lower_hadd64;
|
|
|
|
|
2018-09-19 09:17:31 +01:00
|
|
|
/**
|
|
|
|
* Set if nir_op_add_sat and nir_op_usub_sat should be lowered to simple
|
|
|
|
* arithmetic.
|
|
|
|
*
|
|
|
|
* If this flag is set, the lowering will be applied to all bit-sizes of
|
|
|
|
* these instructions.
|
|
|
|
*
|
|
|
|
* \sa ::lower_usub_sat64
|
|
|
|
*/
|
2018-07-12 14:02:27 +01:00
|
|
|
bool lower_add_sat;
|
|
|
|
|
2018-09-19 09:17:31 +01:00
|
|
|
/**
|
|
|
|
* Set if only 64-bit nir_op_usub_sat should be lowered to simple
|
|
|
|
* arithmetic.
|
|
|
|
*
|
|
|
|
* \sa ::lower_add_sat
|
|
|
|
*/
|
|
|
|
bool lower_usub_sat64;
|
|
|
|
|
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating
constant components across shaders, eliminating dead components, and
so on. This patch attempts to re-vectorize those operations after
the varying optimizations are done.
Intel GPUs are a scalar architecture, but IO operations work on whole
vec4's at a time, so we'd prefer to have a single IO load per vector
rather than 4 scalar IO loads. This re-vectorization can help a lot.
Broadcom GPUs, however, really do want scalar IO. radeonsi may want
this, or may want to leave it to LLVM. So, we make a new flag in the
NIR compiler options struct, and key it off of that, allowing drivers
to pick. (It's a bit awkward because we have per-stage settings, but
this is about IO between two stages...but I expect drivers to globally
prefer one way or the other. We can adjust later if needed.)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2019-04-11 20:28:48 +01:00
|
|
|
/**
|
|
|
|
* Should IO be re-vectorized? Some scalar ISAs still operate on vec4's
|
|
|
|
* for IO purposes and would prefer loads/stores be vectorized.
|
|
|
|
*/
|
|
|
|
bool vectorize_io;
|
2019-10-08 03:46:00 +01:00
|
|
|
bool lower_to_scalar;
|
st/nir: Re-vectorize shader IO
We scalarize IO to enable further optimizations, such as propagating
constant components across shaders, eliminating dead components, and
so on. This patch attempts to re-vectorize those operations after
the varying optimizations are done.
Intel GPUs are a scalar architecture, but IO operations work on whole
vec4's at a time, so we'd prefer to have a single IO load per vector
rather than 4 scalar IO loads. This re-vectorization can help a lot.
Broadcom GPUs, however, really do want scalar IO. radeonsi may want
this, or may want to leave it to LLVM. So, we make a new flag in the
NIR compiler options struct, and key it off of that, allowing drivers
to pick. (It's a bit awkward because we have per-stage settings, but
this is about IO between two stages...but I expect drivers to globally
prefer one way or the other. We can adjust later if needed.)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2019-04-11 20:28:48 +01:00
|
|
|
|
2020-05-04 01:10:57 +01:00
|
|
|
/**
|
|
|
|
* Whether nir_opt_vectorize should only create 16-bit 2D vectors.
|
|
|
|
*/
|
|
|
|
bool vectorize_vec2_16bit;
|
|
|
|
|
2019-06-17 23:10:06 +01:00
|
|
|
/**
|
|
|
|
* Should the linker unify inputs_read/outputs_written between adjacent
|
|
|
|
* shader stages which are linked into a single program?
|
|
|
|
*/
|
|
|
|
bool unify_interfaces;
|
|
|
|
|
2016-07-12 09:46:53 +01:00
|
|
|
/**
|
|
|
|
* Should nir_lower_io() create load_interpolated_input intrinsics?
|
|
|
|
*
|
|
|
|
* If not, it generates regular load_input intrinsics and interpolation
|
|
|
|
* information must be inferred from the list of input nir_variables.
|
|
|
|
*/
|
|
|
|
bool use_interpolated_input_intrinsics;
|
2016-12-13 03:39:51 +00:00
|
|
|
|
2019-02-15 07:08:39 +00:00
|
|
|
/* Lowers when 32x32->64 bit multiplication is not supported */
|
|
|
|
bool lower_mul_2x32_64;
|
|
|
|
|
2019-05-30 22:15:51 +01:00
|
|
|
/* Lowers when rotate instruction is not supported */
|
|
|
|
bool lower_rotate;
|
|
|
|
|
2019-09-27 18:15:02 +01:00
|
|
|
/**
|
|
|
|
* Backend supports imul24, and would like to use it (when possible)
|
|
|
|
* for address/offset calculation. If true, driver should call
|
|
|
|
* nir_lower_amul(). (If not set, amul will automatically be lowered
|
|
|
|
* to imul.)
|
|
|
|
*/
|
|
|
|
bool has_imul24;
|
|
|
|
|
2020-04-12 15:36:20 +01:00
|
|
|
/** Backend supports umul24, if not set umul24 will automatically be lowered
|
|
|
|
* to imul with masked inputs */
|
|
|
|
bool has_umul24;
|
|
|
|
|
|
|
|
/** Backend supports umad24, if not set umad24 will automatically be lowered
|
|
|
|
* to imul with masked inputs and iadd */
|
|
|
|
bool has_umad24;
|
|
|
|
|
2020-05-05 08:13:20 +01:00
|
|
|
/* Whether to generate only scoped_barrier intrinsics instead of the set of
|
|
|
|
* memory and control barrier intrinsics based on GLSL.
|
2020-01-11 00:25:02 +00:00
|
|
|
*/
|
2020-05-05 08:13:20 +01:00
|
|
|
bool use_scoped_barrier;
|
2020-01-11 00:25:02 +00:00
|
|
|
|
2019-06-03 23:22:15 +01:00
|
|
|
/**
|
|
|
|
* Is this the Intel vec4 backend?
|
|
|
|
*
|
|
|
|
* Used to inhibit algebraic optimizations that are known to be harmful on
|
|
|
|
* the Intel vec4 backend. This is generally applicable to any
|
|
|
|
* optimization that might cause more immediate values to be used in
|
|
|
|
* 3-source (e.g., ffma and flrp) instructions.
|
|
|
|
*/
|
|
|
|
bool intel_vec4;
|
|
|
|
|
2020-03-06 21:22:45 +00:00
|
|
|
/** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
|
|
|
|
bool lower_bfe_with_two_constants;
|
|
|
|
|
2020-03-31 13:57:42 +01:00
|
|
|
/** Whether 8-bit ALU is supported. */
|
|
|
|
bool support_8bit_alu;
|
|
|
|
|
|
|
|
/** Whether 16-bit ALU is supported. */
|
|
|
|
bool support_16bit_alu;
|
|
|
|
|
2016-12-13 03:39:51 +00:00
|
|
|
unsigned max_unroll_iterations;
|
2019-02-26 01:13:48 +00:00
|
|
|
|
2020-09-13 20:01:55 +01:00
|
|
|
/* For the non-zero value of the enum corresponds multiplier when
|
|
|
|
* calling lower_uniforms_to_ubo */
|
|
|
|
bool lower_uniforms_to_ubo;
|
|
|
|
|
2019-02-26 01:13:48 +00:00
|
|
|
nir_lower_int64_options lower_int64_options;
|
|
|
|
nir_lower_doubles_options lower_doubles_options;
|
2015-02-03 00:13:49 +00:00
|
|
|
} nir_shader_compiler_options;
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
typedef struct nir_shader {
|
2015-09-17 23:18:19 +01:00
|
|
|
/** list of uniforms (nir_variable) */
|
2020-07-20 22:30:37 +01:00
|
|
|
struct exec_list variables;
|
2016-01-09 01:16:29 +00:00
|
|
|
|
2015-02-03 00:13:49 +00:00
|
|
|
/** Set of driver-specific options for the shader.
|
|
|
|
*
|
|
|
|
* The memory for the options is expected to be kept in a single static
|
|
|
|
* copy by the driver.
|
|
|
|
*/
|
|
|
|
const struct nir_shader_compiler_options *options;
|
|
|
|
|
2015-08-06 01:14:59 +01:00
|
|
|
/** Various bits of compile-time information about a given shader */
|
2017-05-08 17:20:21 +01:00
|
|
|
struct shader_info info;
|
2015-08-06 01:14:59 +01:00
|
|
|
|
2014-12-19 01:13:22 +00:00
|
|
|
struct exec_list functions; /** < list of nir_function */
|
2014-08-01 00:14:51 +01:00
|
|
|
|
2014-08-05 18:54:27 +01:00
|
|
|
/**
|
2020-08-31 19:06:04 +01:00
|
|
|
* The size of the variable space for load_input_*, load_uniform_*, etc.
|
|
|
|
* intrinsics. This is in back-end specific units which is likely one of
|
|
|
|
* bytes, dwords, or vec4s depending on context and back-end.
|
2014-08-05 18:54:27 +01:00
|
|
|
*/
|
2020-08-31 19:04:50 +01:00
|
|
|
unsigned num_inputs, num_uniforms, num_outputs;
|
|
|
|
|
|
|
|
/** Size in bytes of required shared memory */
|
|
|
|
unsigned shared_size;
|
2018-06-29 03:16:19 +01:00
|
|
|
|
2016-12-02 19:36:42 +00:00
|
|
|
/** Size in bytes of required scratch space */
|
|
|
|
unsigned scratch_size;
|
|
|
|
|
2018-06-29 03:16:19 +01:00
|
|
|
/** Constant data associated with this shader.
|
|
|
|
*
|
2020-07-07 20:25:13 +01:00
|
|
|
* Constant data is loaded through load_constant intrinsics (as compared to
|
|
|
|
* the NIR load_const instructions which have the constant value inlined
|
|
|
|
* into them). This is usually generated by nir_opt_large_constants (so
|
|
|
|
* shaders don't have to load_const into a temporary array when they want
|
|
|
|
* to indirect on a const array).
|
2018-06-29 03:16:19 +01:00
|
|
|
*/
|
|
|
|
void *constant_data;
|
2020-07-07 20:25:13 +01:00
|
|
|
/** Size of the constant data associated with the shader, in bytes */
|
2018-06-29 03:16:19 +01:00
|
|
|
unsigned constant_data_size;
|
2014-08-01 00:14:51 +01:00
|
|
|
} nir_shader;
|
|
|
|
|
2018-09-06 19:12:24 +01:00
|
|
|
#define nir_foreach_function(func, shader) \
|
|
|
|
foreach_list_typed(nir_function, func, node, &(shader)->functions)
|
|
|
|
|
2016-08-25 03:09:57 +01:00
|
|
|
static inline nir_function_impl *
|
2016-03-25 21:07:41 +00:00
|
|
|
nir_shader_get_entrypoint(nir_shader *shader)
|
|
|
|
{
|
2018-09-06 19:12:24 +01:00
|
|
|
nir_function *func = NULL;
|
|
|
|
|
|
|
|
nir_foreach_function(function, shader) {
|
|
|
|
assert(func == NULL);
|
|
|
|
if (function->is_entrypoint) {
|
|
|
|
func = function;
|
|
|
|
#ifndef NDEBUG
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!func)
|
|
|
|
return NULL;
|
|
|
|
|
2016-03-25 21:07:41 +00:00
|
|
|
assert(func->num_params == 0);
|
2016-08-25 03:09:57 +01:00
|
|
|
assert(func->impl);
|
|
|
|
return func->impl;
|
2016-03-25 21:07:41 +00:00
|
|
|
}
|
|
|
|
|
2015-02-03 00:13:49 +00:00
|
|
|
nir_shader *nir_shader_create(void *mem_ctx,
|
2015-08-18 09:48:34 +01:00
|
|
|
gl_shader_stage stage,
|
2016-10-13 01:41:23 +01:00
|
|
|
const nir_shader_compiler_options *options,
|
|
|
|
shader_info *si);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
nir_register *nir_local_reg_create(nir_function_impl *impl);
|
|
|
|
|
|
|
|
void nir_reg_remove(nir_register *reg);
|
|
|
|
|
2017-02-28 00:28:53 +00:00
|
|
|
/** Adds a variable to the appropriate list in nir_shader */
|
2015-10-09 15:05:11 +01:00
|
|
|
void nir_shader_add_variable(nir_shader *shader, nir_variable *var);
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var)
|
|
|
|
{
|
2019-01-15 23:05:04 +00:00
|
|
|
assert(var->data.mode == nir_var_function_temp);
|
2015-10-09 15:05:11 +01:00
|
|
|
exec_list_push_tail(&impl->locals, &var->node);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** creates a variable, sets a few defaults, and adds it to the list */
|
|
|
|
nir_variable *nir_variable_create(nir_shader *shader,
|
|
|
|
nir_variable_mode mode,
|
|
|
|
const struct glsl_type *type,
|
|
|
|
const char *name);
|
|
|
|
/** creates a local variable and adds it to the list */
|
|
|
|
nir_variable *nir_local_variable_create(nir_function_impl *impl,
|
|
|
|
const struct glsl_type *type,
|
|
|
|
const char *name);
|
|
|
|
|
2020-07-23 05:37:27 +01:00
|
|
|
nir_variable *nir_find_variable_with_location(nir_shader *shader,
|
|
|
|
nir_variable_mode mode,
|
|
|
|
unsigned location);
|
|
|
|
|
|
|
|
nir_variable *nir_find_variable_with_driver_location(nir_shader *shader,
|
|
|
|
nir_variable_mode mode,
|
|
|
|
unsigned location);
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
/** creates a function and adds it to the shader's list of functions */
|
|
|
|
nir_function *nir_function_create(nir_shader *shader, const char *name);
|
|
|
|
|
2015-12-26 18:00:47 +00:00
|
|
|
nir_function_impl *nir_function_impl_create(nir_function *func);
|
2015-10-28 04:34:56 +00:00
|
|
|
/** creates a function_impl that isn't tied to any particular function */
|
|
|
|
nir_function_impl *nir_function_impl_create_bare(nir_shader *shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-10-21 15:57:15 +01:00
|
|
|
nir_block *nir_block_create(nir_shader *shader);
|
|
|
|
nir_if *nir_if_create(nir_shader *shader);
|
|
|
|
nir_loop *nir_loop_create(nir_shader *shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
|
|
|
|
|
2014-10-29 19:42:54 +00:00
|
|
|
/** requests that the given pieces of metadata be generated */
|
2016-12-13 03:39:51 +00:00
|
|
|
void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...);
|
2014-10-29 19:42:54 +00:00
|
|
|
/** dirties all but the preserved metadata */
|
2014-12-13 00:22:46 +00:00
|
|
|
void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved);
|
2020-05-22 03:37:33 +01:00
|
|
|
/** Preserves all metadata for the given shader */
|
|
|
|
void nir_shader_preserve_all_metadata(nir_shader *shader);
|
2014-10-29 19:42:54 +00:00
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
/** creates an instruction with default swizzle/writemask/etc. with NULL registers */
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2018-03-15 04:45:38 +00:00
|
|
|
nir_deref_instr *nir_deref_instr_create(nir_shader *shader,
|
|
|
|
nir_deref_type deref_type);
|
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader,
|
2016-03-23 07:04:18 +00:00
|
|
|
unsigned num_components,
|
|
|
|
unsigned bit_size);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader,
|
2014-08-01 00:16:23 +01:00
|
|
|
nir_intrinsic_op op);
|
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_call_instr *nir_call_instr_create(nir_shader *shader,
|
2019-03-19 15:18:49 +00:00
|
|
|
nir_function *callee);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_phi_instr *nir_phi_instr_create(nir_shader *shader);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
|
2014-10-31 04:04:15 +00:00
|
|
|
|
2015-04-07 20:33:17 +01:00
|
|
|
nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
|
2016-03-23 07:04:09 +00:00
|
|
|
unsigned num_components,
|
|
|
|
unsigned bit_size);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2017-08-30 04:36:55 +01:00
|
|
|
nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
|
|
|
|
|
2015-08-25 18:01:31 +01:00
|
|
|
/**
|
|
|
|
* NIR Cursors and Instruction Insertion API
|
|
|
|
* @{
|
|
|
|
*
|
|
|
|
* A tiny struct representing a point to insert/extract instructions or
|
|
|
|
* control flow nodes. Helps reduce the combinatorial explosion of possible
|
|
|
|
* points to insert/extract.
|
|
|
|
*
|
|
|
|
* \sa nir_control_flow.h
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
|
|
nir_cursor_before_block,
|
|
|
|
nir_cursor_after_block,
|
|
|
|
nir_cursor_before_instr,
|
|
|
|
nir_cursor_after_instr,
|
|
|
|
} nir_cursor_option;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
nir_cursor_option option;
|
|
|
|
union {
|
|
|
|
nir_block *block;
|
|
|
|
nir_instr *instr;
|
|
|
|
};
|
|
|
|
} nir_cursor;
|
|
|
|
|
2016-03-25 21:16:47 +00:00
|
|
|
static inline nir_block *
|
|
|
|
nir_cursor_current_block(nir_cursor cursor)
|
|
|
|
{
|
|
|
|
if (cursor.option == nir_cursor_before_instr ||
|
|
|
|
cursor.option == nir_cursor_after_instr) {
|
|
|
|
return cursor.instr->block;
|
|
|
|
} else {
|
|
|
|
return cursor.block;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-12-26 18:32:10 +00:00
|
|
|
bool nir_cursors_equal(nir_cursor a, nir_cursor b);
|
|
|
|
|
2015-08-25 18:01:31 +01:00
|
|
|
static inline nir_cursor
|
|
|
|
nir_before_block(nir_block *block)
|
|
|
|
{
|
|
|
|
nir_cursor cursor;
|
|
|
|
cursor.option = nir_cursor_before_block;
|
|
|
|
cursor.block = block;
|
|
|
|
return cursor;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
nir_after_block(nir_block *block)
|
|
|
|
{
|
|
|
|
nir_cursor cursor;
|
|
|
|
cursor.option = nir_cursor_after_block;
|
|
|
|
cursor.block = block;
|
|
|
|
return cursor;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
nir_before_instr(nir_instr *instr)
|
|
|
|
{
|
|
|
|
nir_cursor cursor;
|
|
|
|
cursor.option = nir_cursor_before_instr;
|
|
|
|
cursor.instr = instr;
|
|
|
|
return cursor;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
nir_after_instr(nir_instr *instr)
|
|
|
|
{
|
|
|
|
nir_cursor cursor;
|
|
|
|
cursor.option = nir_cursor_after_instr;
|
|
|
|
cursor.instr = instr;
|
|
|
|
return cursor;
|
|
|
|
}
|
|
|
|
|
2015-08-29 01:17:39 +01:00
|
|
|
static inline nir_cursor
|
|
|
|
nir_after_block_before_jump(nir_block *block)
|
|
|
|
{
|
|
|
|
nir_instr *last_instr = nir_block_last_instr(block);
|
|
|
|
if (last_instr && last_instr->type == nir_instr_type_jump) {
|
|
|
|
return nir_before_instr(last_instr);
|
|
|
|
} else {
|
|
|
|
return nir_after_block(block);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-29 16:24:43 +01:00
|
|
|
static inline nir_cursor
|
|
|
|
nir_before_src(nir_src *src, bool is_if_condition)
|
|
|
|
{
|
|
|
|
if (is_if_condition) {
|
|
|
|
nir_block *prev_block =
|
|
|
|
nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node));
|
|
|
|
assert(!nir_block_ends_in_jump(prev_block));
|
|
|
|
return nir_after_block(prev_block);
|
|
|
|
} else if (src->parent_instr->type == nir_instr_type_phi) {
|
|
|
|
#ifndef NDEBUG
|
|
|
|
nir_phi_instr *cond_phi = nir_instr_as_phi(src->parent_instr);
|
|
|
|
bool found = false;
|
|
|
|
nir_foreach_phi_src(phi_src, cond_phi) {
|
|
|
|
if (phi_src->src.ssa == src->ssa) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(found);
|
|
|
|
#endif
|
|
|
|
/* The LIST_ENTRY macro is a generic container-of macro, it just happens
|
|
|
|
* to have a more specific name.
|
|
|
|
*/
|
|
|
|
nir_phi_src *phi_src = LIST_ENTRY(nir_phi_src, src, src);
|
|
|
|
return nir_after_block_before_jump(phi_src->pred);
|
|
|
|
} else {
|
|
|
|
return nir_before_instr(src->parent_instr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-25 18:01:31 +01:00
|
|
|
static inline nir_cursor
|
|
|
|
nir_before_cf_node(nir_cf_node *node)
|
|
|
|
{
|
|
|
|
if (node->type == nir_cf_node_block)
|
|
|
|
return nir_before_block(nir_cf_node_as_block(node));
|
|
|
|
|
|
|
|
return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node)));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
nir_after_cf_node(nir_cf_node *node)
|
|
|
|
{
|
|
|
|
if (node->type == nir_cf_node_block)
|
|
|
|
return nir_after_block(nir_cf_node_as_block(node));
|
|
|
|
|
|
|
|
return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node)));
|
|
|
|
}
|
|
|
|
|
2016-09-03 00:06:52 +01:00
|
|
|
static inline nir_cursor
|
|
|
|
nir_after_phis(nir_block *block)
|
|
|
|
{
|
|
|
|
nir_foreach_instr(instr, block) {
|
|
|
|
if (instr->type != nir_instr_type_phi)
|
|
|
|
return nir_before_instr(instr);
|
|
|
|
}
|
|
|
|
return nir_after_block(block);
|
|
|
|
}
|
|
|
|
|
2015-12-28 06:50:14 +00:00
|
|
|
static inline nir_cursor
|
|
|
|
nir_after_cf_node_and_phis(nir_cf_node *node)
|
|
|
|
{
|
|
|
|
if (node->type == nir_cf_node_block)
|
|
|
|
return nir_after_block(nir_cf_node_as_block(node));
|
|
|
|
|
|
|
|
nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node));
|
|
|
|
|
2016-09-03 00:06:52 +01:00
|
|
|
return nir_after_phis(block);
|
2015-12-28 06:50:14 +00:00
|
|
|
}
|
|
|
|
|
2015-08-25 18:01:31 +01:00
|
|
|
static inline nir_cursor
|
|
|
|
nir_before_cf_list(struct exec_list *cf_list)
|
|
|
|
{
|
|
|
|
nir_cf_node *first_node = exec_node_data(nir_cf_node,
|
|
|
|
exec_list_get_head(cf_list), node);
|
|
|
|
return nir_before_cf_node(first_node);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
nir_after_cf_list(struct exec_list *cf_list)
|
|
|
|
{
|
|
|
|
nir_cf_node *last_node = exec_node_data(nir_cf_node,
|
|
|
|
exec_list_get_tail(cf_list), node);
|
|
|
|
return nir_after_cf_node(last_node);
|
|
|
|
}
|
|
|
|
|
2015-08-10 02:30:33 +01:00
|
|
|
/**
|
|
|
|
* Insert a NIR instruction at the given cursor.
|
|
|
|
*
|
|
|
|
* Note: This does not update the cursor.
|
|
|
|
*/
|
|
|
|
void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nir_instr_insert_before(nir_instr *instr, nir_instr *before)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_before_instr(instr), before);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nir_instr_insert_after(nir_instr *instr, nir_instr *after)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_after_instr(instr), after);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nir_instr_insert_before_block(nir_block *block, nir_instr *before)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_before_block(block), before);
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-08-10 02:30:33 +01:00
|
|
|
static inline void
|
|
|
|
nir_instr_insert_after_block(nir_block *block, nir_instr *after)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_after_block(block), after);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_before_cf_node(node), before);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void
|
|
|
|
nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_after_cf_node(node), after);
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-08-10 02:30:33 +01:00
|
|
|
static inline void
|
|
|
|
nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_before_cf_list(list), before);
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-08-10 02:30:33 +01:00
|
|
|
static inline void
|
|
|
|
nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after)
|
|
|
|
{
|
|
|
|
nir_instr_insert(nir_after_cf_list(list), after);
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2018-03-16 16:52:04 +00:00
|
|
|
void nir_instr_remove_v(nir_instr *instr);
|
|
|
|
|
|
|
|
static inline nir_cursor
|
|
|
|
nir_instr_remove(nir_instr *instr)
|
|
|
|
{
|
|
|
|
nir_cursor cursor;
|
|
|
|
nir_instr *prev = nir_instr_prev(instr);
|
|
|
|
if (prev) {
|
|
|
|
cursor = nir_after_instr(prev);
|
|
|
|
} else {
|
|
|
|
cursor = nir_before_block(instr->block);
|
|
|
|
}
|
|
|
|
nir_instr_remove_v(instr);
|
|
|
|
return cursor;
|
|
|
|
}
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2015-08-25 18:01:31 +01:00
|
|
|
/** @} */
|
|
|
|
|
2019-07-11 21:05:27 +01:00
|
|
|
nir_ssa_def *nir_instr_ssa_def(nir_instr *instr);
|
|
|
|
|
2014-12-15 23:12:59 +00:00
|
|
|
typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
|
2014-08-01 00:16:23 +01:00
|
|
|
typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
|
|
|
|
typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
|
2014-12-15 23:12:59 +00:00
|
|
|
bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
|
|
|
|
void *state);
|
2014-08-01 00:16:23 +01:00
|
|
|
bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state);
|
|
|
|
bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state);
|
2020-05-18 22:49:29 +01:00
|
|
|
bool nir_foreach_phi_src_leaving_block(nir_block *instr,
|
|
|
|
nir_foreach_src_cb cb,
|
|
|
|
void *state);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
2014-12-09 01:34:23 +00:00
|
|
|
nir_const_value *nir_src_as_const_value(nir_src src);
|
2018-06-06 03:19:39 +01:00
|
|
|
|
|
|
|
#define NIR_SRC_AS_(name, c_type, type_enum, cast_macro) \
|
|
|
|
static inline c_type * \
|
2019-04-17 23:10:18 +01:00
|
|
|
nir_src_as_ ## name (nir_src src) \
|
2018-06-06 03:19:39 +01:00
|
|
|
{ \
|
2019-04-17 23:10:18 +01:00
|
|
|
return src.is_ssa && src.ssa->parent_instr->type == type_enum \
|
|
|
|
? cast_macro(src.ssa->parent_instr) : NULL; \
|
2018-06-06 03:19:39 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
NIR_SRC_AS_(alu_instr, nir_alu_instr, nir_instr_type_alu, nir_instr_as_alu)
|
2019-04-17 23:18:19 +01:00
|
|
|
NIR_SRC_AS_(intrinsic, nir_intrinsic_instr,
|
|
|
|
nir_instr_type_intrinsic, nir_instr_as_intrinsic)
|
2019-04-19 21:09:04 +01:00
|
|
|
NIR_SRC_AS_(deref, nir_deref_instr, nir_instr_type_deref, nir_instr_as_deref)
|
2018-06-06 03:19:39 +01:00
|
|
|
|
2015-07-30 12:10:08 +01:00
|
|
|
bool nir_src_is_dynamically_uniform(nir_src src);
|
2014-12-12 20:52:11 +00:00
|
|
|
bool nir_srcs_equal(nir_src src1, nir_src src2);
|
2019-06-20 19:47:30 +01:00
|
|
|
bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
|
2014-11-14 03:07:22 +00:00
|
|
|
void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
|
nir/nir: Use a linked list instead of a hash set for use/def sets
This commit switches us from the current setup of using hash sets for
use/def sets to using linked lists. Doing so should save us quite a bit of
memory because we aren't carrying around 3 hash sets per register and 2 per
SSA value. It should also save us CPU time because adding/removing things
from use/def sets is 4 pointer manipulations instead of a hash lookup.
Running shader-db 50 times with USE_NIR=0, NIR, and NIR + use/def lists:
GLSL IR Only: 586.4 +/- 1.653833
NIR with hash sets: 675.4 +/- 2.502108
NIR + use/def lists: 641.2 +/- 1.557043
I also ran a memory usage experiment with Ken's patch to delete GLSL IR and
keep NIR. This patch cuts an aditional 42.9 MiB of ralloc'd memory over
and above what we gained by deleting the GLSL IR on the same dota trace.
On the code complexity side of things, some things are now much easier and
others are a bit harder. One of the operations we perform constantly in
optimization passes is to replace one source with another. Due to the fact
that an instruction can use the same SSA value multiple times, we had to
iterate through the sources of the instruction and determine if the use we
were replacing was the only one before removing it from the set of uses.
With this patch, uses are per-source not per-instruction so we can just
remove it safely. On the other hand, trying to iterate over all of the
instructions that use a given value is more difficult. Fortunately, the
two places we do that are the ffma peephole where it doesn't matter and GCM
where we already gracefully handle duplicates visits to an instruction.
Another aspect here is that using linked lists in this way can be tricky to
get right. With sets, things were quite forgiving and the worst that
happened if you didn't properly remove a use was that it would get caught
in the validator. With linked lists, it can lead to linked list corruption
which can be harder to track. However, we do just as much validation of
the linked lists as we did of the sets so the validator should still catch
these problems. While working on this series, the vast majority of the
bugs I had to fix were caught by assertions. I don't think the lists are
going to be that much worse than the sets.
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
2015-04-24 18:16:27 +01:00
|
|
|
void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
|
2015-04-24 18:34:30 +01:00
|
|
|
void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
|
2015-09-09 23:58:25 +01:00
|
|
|
void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
|
|
|
|
nir_dest new_dest);
|
2014-12-12 20:52:11 +00:00
|
|
|
|
2015-01-21 00:23:51 +00:00
|
|
|
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
|
2015-11-17 12:57:54 +00:00
|
|
|
unsigned num_components, unsigned bit_size,
|
|
|
|
const char *name);
|
2014-11-20 00:06:32 +00:00
|
|
|
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
|
2015-11-17 12:57:54 +00:00
|
|
|
unsigned num_components, unsigned bit_size,
|
|
|
|
const char *name);
|
2017-08-22 22:08:32 +01:00
|
|
|
static inline void
|
|
|
|
nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
|
|
|
|
const struct glsl_type *type,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
assert(glsl_type_is_vector_or_scalar(type));
|
|
|
|
nir_ssa_dest_init(instr, dest, glsl_get_components(type),
|
|
|
|
glsl_get_bit_size(type), name);
|
|
|
|
}
|
2015-09-09 21:24:35 +01:00
|
|
|
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
|
2015-11-12 16:40:17 +00:00
|
|
|
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
|
|
|
|
nir_instr *after_me);
|
2014-11-04 18:40:48 +00:00
|
|
|
|
2018-07-12 02:40:23 +01:00
|
|
|
nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def);
|
2015-10-09 16:13:43 +01:00
|
|
|
|
2020-08-11 20:13:36 +01:00
|
|
|
|
|
|
|
/** Returns the next block, disregarding structure
|
|
|
|
*
|
|
|
|
* The ordering is deterministic but has no guarantees beyond that. In
|
|
|
|
* particular, it is not guaranteed to be dominance-preserving.
|
|
|
|
*/
|
|
|
|
nir_block *nir_block_unstructured_next(nir_block *block);
|
|
|
|
nir_block *nir_unstructured_start_block(nir_function_impl *impl);
|
|
|
|
|
|
|
|
#define nir_foreach_block_unstructured(block, impl) \
|
|
|
|
for (nir_block *block = nir_unstructured_start_block(impl); block != NULL; \
|
|
|
|
block = nir_block_unstructured_next(block))
|
|
|
|
|
|
|
|
#define nir_foreach_block_unstructured_safe(block, impl) \
|
|
|
|
for (nir_block *block = nir_unstructured_start_block(impl), \
|
|
|
|
*next = nir_block_unstructured_next(block); \
|
|
|
|
block != NULL; \
|
|
|
|
block = next, next = nir_block_unstructured_next(block))
|
|
|
|
|
nir: rewrite nir_foreach_block and friends
Previously, these were functions which took a callback. This meant that
the per-block code had to be in a separate function, and all the data
that you wanted to pass in had to be a single void *. They walked the
control flow tree recursively, doing a depth-first search, and called
the callback in a preorder, matching the order of the original source
code. But since each node in the control flow tree has a pointer to its
parent, we can implement a "get-next" and "get-previous" method that
does the same thing that the recursive function did with no state at
all. This lets us rewrite nir_foreach_block() as a simple for loop,
which lets us greatly simplify its users in some cases. This does
require us to rewrite every user, although the transformation from the
old nir_foreach_block() to the new nir_foreach_block() is mostly
trivial.
One subtlety, though, is that the new nir_foreach_block() won't handle
the case where the current block is deleted, which the old one could.
There's a new nir_foreach_block_safe() which implements the standard
trick for solving this. Most users don't modify control flow, though, so
they won't need it. Right now, only opt_select_peephole needs it.
The old functions are reimplemented in terms of the new macros, although
they'll go away after everything is converted.
v2: keep an implementation of the old functions around
v3 (Jason Ekstrand): A small cosmetic change and a bugfix in the loop
handling of nir_cf_node_cf_tree_last().
v4 (Jason Ekstrand): Use the _safe macro in foreach_block_reverse_call
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2016-04-08 07:11:44 +01:00
|
|
|
/*
|
|
|
|
* finds the next basic block in source-code order, returns NULL if there is
|
|
|
|
* none
|
|
|
|
*/
|
|
|
|
|
|
|
|
nir_block *nir_block_cf_tree_next(nir_block *block);
|
|
|
|
|
|
|
|
/* Performs the opposite of nir_block_cf_tree_next() */
|
|
|
|
|
|
|
|
nir_block *nir_block_cf_tree_prev(nir_block *block);
|
|
|
|
|
|
|
|
/* Gets the first block in a CF node in source-code order */
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node);
|
|
|
|
|
|
|
|
/* Gets the last block in a CF node in source-code order */
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node);
|
|
|
|
|
|
|
|
/* Gets the next block after a CF node in source-code order */
|
|
|
|
|
|
|
|
nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node);
|
|
|
|
|
|
|
|
/* Macros for loops that visit blocks in source-code order */
|
|
|
|
|
|
|
|
#define nir_foreach_block(block, impl) \
|
|
|
|
for (nir_block *block = nir_start_block(impl); block != NULL; \
|
|
|
|
block = nir_block_cf_tree_next(block))
|
|
|
|
|
|
|
|
#define nir_foreach_block_safe(block, impl) \
|
|
|
|
for (nir_block *block = nir_start_block(impl), \
|
|
|
|
*next = nir_block_cf_tree_next(block); \
|
|
|
|
block != NULL; \
|
|
|
|
block = next, next = nir_block_cf_tree_next(block))
|
|
|
|
|
|
|
|
#define nir_foreach_block_reverse(block, impl) \
|
|
|
|
for (nir_block *block = nir_impl_last_block(impl); block != NULL; \
|
|
|
|
block = nir_block_cf_tree_prev(block))
|
|
|
|
|
|
|
|
#define nir_foreach_block_reverse_safe(block, impl) \
|
|
|
|
for (nir_block *block = nir_impl_last_block(impl), \
|
|
|
|
*prev = nir_block_cf_tree_prev(block); \
|
|
|
|
block != NULL; \
|
|
|
|
block = prev, prev = nir_block_cf_tree_prev(block))
|
|
|
|
|
|
|
|
#define nir_foreach_block_in_cf_node(block, node) \
|
|
|
|
for (nir_block *block = nir_cf_node_cf_tree_first(node); \
|
|
|
|
block != nir_cf_node_cf_tree_next(node); \
|
|
|
|
block = nir_block_cf_tree_next(block))
|
|
|
|
|
2014-10-29 23:25:51 +00:00
|
|
|
/* If the following CF node is an if, this function returns that if.
|
|
|
|
* Otherwise, it returns NULL.
|
|
|
|
*/
|
2014-12-17 22:49:24 +00:00
|
|
|
nir_if *nir_block_get_following_if(nir_block *block);
|
2014-10-29 23:25:51 +00:00
|
|
|
|
2015-05-08 18:17:10 +01:00
|
|
|
nir_loop *nir_block_get_following_loop(nir_block *block);
|
|
|
|
|
2014-08-01 00:16:23 +01:00
|
|
|
void nir_index_local_regs(nir_function_impl *impl);
|
|
|
|
void nir_index_ssa_defs(nir_function_impl *impl);
|
2015-09-09 00:43:51 +01:00
|
|
|
unsigned nir_index_instrs(nir_function_impl *impl);
|
2014-08-01 00:16:23 +01:00
|
|
|
|
|
|
|
void nir_index_blocks(nir_function_impl *impl);
|
|
|
|
|
2020-07-20 22:49:46 +01:00
|
|
|
unsigned nir_shader_index_vars(nir_shader *shader, nir_variable_mode modes);
|
|
|
|
unsigned nir_function_impl_index_vars(nir_function_impl *impl);
|
2019-11-15 15:15:14 +00:00
|
|
|
|
2014-07-30 23:29:27 +01:00
|
|
|
void nir_print_shader(nir_shader *shader, FILE *fp);
|
2016-05-14 20:37:32 +01:00
|
|
|
void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors);
|
2015-02-09 21:42:16 +00:00
|
|
|
void nir_print_instr(const nir_instr *instr, FILE *fp);
|
2018-12-19 20:13:46 +00:00
|
|
|
void nir_print_deref(const nir_deref_instr *deref, FILE *fp);
|
2014-07-30 23:29:27 +01:00
|
|
|
|
2020-05-18 16:39:43 +01:00
|
|
|
/** Shallow clone of a single instruction. */
|
|
|
|
nir_instr *nir_instr_clone(nir_shader *s, const nir_instr *orig);
|
|
|
|
|
2019-05-30 00:48:17 +01:00
|
|
|
/** Shallow clone of a single ALU instruction. */
|
|
|
|
nir_alu_instr *nir_alu_instr_clone(nir_shader *s, const nir_alu_instr *orig);
|
|
|
|
|
2016-03-23 21:57:57 +00:00
|
|
|
nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
|
2019-03-04 21:32:36 +00:00
|
|
|
nir_function_impl *nir_function_impl_clone(nir_shader *shader,
|
|
|
|
const nir_function_impl *fi);
|
2015-12-31 02:44:19 +00:00
|
|
|
nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
|
2016-03-23 22:05:55 +00:00
|
|
|
nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
|
2015-11-11 16:31:29 +00:00
|
|
|
|
2019-06-04 23:48:33 +01:00
|
|
|
void nir_shader_replace(nir_shader *dest, nir_shader *src);
|
|
|
|
|
2019-06-04 23:50:22 +01:00
|
|
|
void nir_shader_serialize_deserialize(nir_shader *s);
|
2017-09-15 00:49:53 +01:00
|
|
|
|
2017-11-23 13:16:43 +00:00
|
|
|
#ifndef NDEBUG
|
2018-10-18 21:18:30 +01:00
|
|
|
void nir_validate_shader(nir_shader *shader, const char *when);
|
2020-06-01 21:39:31 +01:00
|
|
|
void nir_validate_ssa_dominance(nir_shader *shader, const char *when);
|
2015-11-03 08:31:22 +00:00
|
|
|
void nir_metadata_set_validation_flag(nir_shader *shader);
|
|
|
|
void nir_metadata_check_validation_flag(nir_shader *shader);
|
2015-11-18 21:33:41 +00:00
|
|
|
|
2019-01-17 21:06:04 +00:00
|
|
|
static inline bool
|
|
|
|
should_skip_nir(const char *name)
|
|
|
|
{
|
|
|
|
static const char *list = NULL;
|
|
|
|
if (!list) {
|
|
|
|
/* Comma separated list of names to skip. */
|
|
|
|
list = getenv("NIR_SKIP");
|
|
|
|
if (!list)
|
|
|
|
list = "";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!list[0])
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return comma_separated_list_contains(list, name);
|
|
|
|
}
|
|
|
|
|
2015-11-18 21:33:41 +00:00
|
|
|
static inline bool
|
|
|
|
should_clone_nir(void)
|
|
|
|
{
|
|
|
|
static int should_clone = -1;
|
|
|
|
if (should_clone < 0)
|
|
|
|
should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
|
|
|
|
|
|
|
|
return should_clone;
|
|
|
|
}
|
2017-05-20 16:19:06 +01:00
|
|
|
|
2017-09-15 00:49:53 +01:00
|
|
|
static inline bool
|
|
|
|
should_serialize_deserialize_nir(void)
|
|
|
|
{
|
|
|
|
static int test_serialize = -1;
|
|
|
|
if (test_serialize < 0)
|
|
|
|
test_serialize = env_var_as_boolean("NIR_TEST_SERIALIZE", false);
|
|
|
|
|
|
|
|
return test_serialize;
|
|
|
|
}
|
|
|
|
|
2017-05-20 16:19:06 +01:00
|
|
|
static inline bool
|
2020-08-26 22:22:07 +01:00
|
|
|
should_print_nir(nir_shader *shader)
|
2017-05-20 16:19:06 +01:00
|
|
|
{
|
|
|
|
static int should_print = -1;
|
|
|
|
if (should_print < 0)
|
2020-08-26 22:22:07 +01:00
|
|
|
should_print = env_var_as_unsigned("NIR_PRINT", 0);
|
|
|
|
|
|
|
|
if (should_print == 1)
|
|
|
|
return !shader->info.internal;
|
2017-05-20 16:19:06 +01:00
|
|
|
|
|
|
|
return should_print;
|
|
|
|
}
|
2015-01-07 00:11:57 +00:00
|
|
|
#else
|
2018-10-18 21:18:30 +01:00
|
|
|
static inline void nir_validate_shader(nir_shader *shader, const char *when) { (void) shader; (void)when; }
|
2020-06-01 21:39:31 +01:00
|
|
|
static inline void nir_validate_ssa_dominance(nir_shader *shader, const char *when) { (void) shader; (void)when; }
|
2015-11-03 08:31:22 +00:00
|
|
|
static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
|
|
|
|
static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
|
2019-01-24 22:37:16 +00:00
|
|
|
static inline bool should_skip_nir(UNUSED const char *pass_name) { return false; }
|
2015-11-18 21:33:41 +00:00
|
|
|
static inline bool should_clone_nir(void) { return false; }
|
2017-09-15 00:49:53 +01:00
|
|
|
static inline bool should_serialize_deserialize_nir(void) { return false; }
|
2020-08-26 22:22:07 +01:00
|
|
|
static inline bool should_print_nir(nir_shader *shader) { return false; }
|
2017-11-23 13:16:43 +00:00
|
|
|
#endif /* NDEBUG */
|
2014-07-30 23:20:53 +01:00
|
|
|
|
2018-10-18 21:18:30 +01:00
|
|
|
#define _PASS(pass, nir, do_pass) do { \
|
2019-01-17 21:06:04 +00:00
|
|
|
if (should_skip_nir(#pass)) { \
|
|
|
|
printf("skipping %s\n", #pass); \
|
|
|
|
break; \
|
|
|
|
} \
|
2015-11-18 21:33:41 +00:00
|
|
|
do_pass \
|
2018-10-18 21:18:30 +01:00
|
|
|
nir_validate_shader(nir, "after " #pass); \
|
2015-11-18 21:33:41 +00:00
|
|
|
if (should_clone_nir()) { \
|
|
|
|
nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
|
2019-06-04 23:48:33 +01:00
|
|
|
nir_shader_replace(nir, clone); \
|
2015-11-18 21:33:41 +00:00
|
|
|
} \
|
2017-09-15 00:49:53 +01:00
|
|
|
if (should_serialize_deserialize_nir()) { \
|
2019-06-04 23:50:22 +01:00
|
|
|
nir_shader_serialize_deserialize(nir); \
|
2017-09-15 00:49:53 +01:00
|
|
|
} \
|
2015-11-18 21:33:41 +00:00
|
|
|
} while (0)
|
|
|
|
|
2018-10-18 21:18:30 +01:00
|
|
|
#define NIR_PASS(progress, nir, pass, ...) _PASS(pass, nir, \
|
2015-11-18 21:33:41 +00:00
|
|
|
nir_metadata_set_validation_flag(nir); \
|
2020-08-26 22:22:07 +01:00
|
|
|
if (should_print_nir(nir)) \
|
2017-05-20 16:19:06 +01:00
|
|
|
printf("%s\n", #pass); \
|
2015-11-18 21:33:41 +00:00
|
|
|
if (pass(nir, ##__VA_ARGS__)) { \
|
|
|
|
progress = true; \
|
2020-08-26 22:22:07 +01:00
|
|
|
if (should_print_nir(nir)) \
|
2017-05-20 16:19:06 +01:00
|
|
|
nir_print_shader(nir, stdout); \
|
2015-11-18 21:33:41 +00:00
|
|
|
nir_metadata_check_validation_flag(nir); \
|
|
|
|
} \
|
|
|
|
)
|
|
|
|
|
2018-10-18 21:18:30 +01:00
|
|
|
#define NIR_PASS_V(nir, pass, ...) _PASS(pass, nir, \
|
2020-08-26 22:22:07 +01:00
|
|
|
if (should_print_nir(nir)) \
|
2017-05-20 16:19:06 +01:00
|
|
|
printf("%s\n", #pass); \
|
2015-11-18 21:33:41 +00:00
|
|
|
pass(nir, ##__VA_ARGS__); \
|
2020-08-26 22:22:07 +01:00
|
|
|
if (should_print_nir(nir)) \
|
2017-05-20 16:19:06 +01:00
|
|
|
nir_print_shader(nir, stdout); \
|
2015-11-18 21:33:41 +00:00
|
|
|
)
|
|
|
|
|
2019-01-17 21:06:04 +00:00
|
|
|
#define NIR_SKIP(name) should_skip_nir(#name)
|
|
|
|
|
2019-07-11 19:00:42 +01:00
|
|
|
/** An instruction filtering callback
|
|
|
|
*
|
|
|
|
* Returns true if the instruction should be processed and false otherwise.
|
|
|
|
*/
|
|
|
|
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
|
|
|
|
|
|
|
|
/** A simple instruction lowering callback
|
|
|
|
*
|
|
|
|
* Many instruction lowering passes can be written as a simple function which
|
|
|
|
* takes an instruction as its input and returns a sequence of instructions
|
|
|
|
* that implement the consumed instruction. This function type represents
|
|
|
|
* such a lowering function. When called, a function with this prototype
|
|
|
|
* should either return NULL indicating that no lowering needs to be done or
|
|
|
|
* emit a sequence of instructions using the provided builder (whose cursor
|
|
|
|
* will already be placed after the instruction to be lowered) and return the
|
|
|
|
* resulting nir_ssa_def.
|
|
|
|
*/
|
|
|
|
typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
|
|
|
|
nir_instr *, void *);
|
|
|
|
|
2019-07-16 18:52:25 +01:00
|
|
|
/**
|
|
|
|
* Special return value for nir_lower_instr_cb when some progress occurred
|
|
|
|
* (like changing an input to the instr) that didn't result in a replacement
|
|
|
|
* SSA def being generated.
|
|
|
|
*/
|
|
|
|
#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1)
|
|
|
|
|
2019-07-11 19:00:42 +01:00
|
|
|
/** Iterate over all the instructions in a nir_function_impl and lower them
|
|
|
|
* using the provided callbacks
|
|
|
|
*
|
|
|
|
* This function implements the guts of a standard lowering pass for you. It
|
|
|
|
* iterates over all of the instructions in a nir_function_impl and calls the
|
|
|
|
* filter callback on each one. If the filter callback returns true, it then
|
|
|
|
* calls the lowering call back on the instruction. (Splitting it this way
|
|
|
|
* allows us to avoid some save/restore work for instructions we know won't be
|
|
|
|
* lowered.) If the instruction is dead after the lowering is complete, it
|
|
|
|
* will be removed. If new instructions are added, the lowering callback will
|
|
|
|
* also be called on them in case multiple lowerings are required.
|
|
|
|
*
|
|
|
|
* The metadata for the nir_function_impl will also be updated. If any blocks
|
|
|
|
* are added (they cannot be removed), dominance and block indices will be
|
|
|
|
* invalidated.
|
|
|
|
*/
|
|
|
|
bool nir_function_impl_lower_instructions(nir_function_impl *impl,
|
|
|
|
nir_instr_filter_cb filter,
|
|
|
|
nir_lower_instr_cb lower,
|
|
|
|
void *cb_data);
|
|
|
|
bool nir_shader_lower_instructions(nir_shader *shader,
|
|
|
|
nir_instr_filter_cb filter,
|
|
|
|
nir_lower_instr_cb lower,
|
|
|
|
void *cb_data);
|
|
|
|
|
2014-07-19 00:13:11 +01:00
|
|
|
void nir_calc_dominance_impl(nir_function_impl *impl);
|
|
|
|
void nir_calc_dominance(nir_shader *shader);
|
|
|
|
|
2015-02-06 20:06:04 +00:00
|
|
|
nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
|
2015-02-06 20:45:43 +00:00
|
|
|
bool nir_block_dominates(nir_block *parent, nir_block *child);
|
2019-09-02 18:53:16 +01:00
|
|
|
bool nir_block_is_unreachable(nir_block *block);
|
2015-02-06 20:06:04 +00:00
|
|
|
|
2014-07-19 00:13:11 +01:00
|
|
|
void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
|
|
|
|
void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
|
|
|
|
|
|
|
|
void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp);
|
|
|
|
void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
|
|
|
|
|
|
|
|
void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
|
|
|
|
void nir_dump_cfg(nir_shader *shader, FILE *fp);
|
|
|
|
|
2015-10-21 01:40:19 +01:00
|
|
|
int nir_gs_count_vertices(const nir_shader *shader);
|
2015-09-25 01:01:23 +01:00
|
|
|
|
2018-07-25 03:32:27 +01:00
|
|
|
bool nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes);
|
2018-07-24 20:33:46 +01:00
|
|
|
bool nir_split_array_vars(nir_shader *shader, nir_variable_mode modes);
|
2015-09-17 20:33:36 +01:00
|
|
|
bool nir_split_var_copies(nir_shader *shader);
|
2018-03-21 23:48:35 +00:00
|
|
|
bool nir_split_per_member_structs(nir_shader *shader);
|
2018-07-24 18:08:06 +01:00
|
|
|
bool nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes);
|
2014-11-19 22:52:30 +00:00
|
|
|
|
2016-02-14 01:08:57 +00:00
|
|
|
bool nir_lower_returns_impl(nir_function_impl *impl);
|
|
|
|
bool nir_lower_returns(nir_shader *shader);
|
|
|
|
|
2019-03-04 21:32:36 +00:00
|
|
|
void nir_inline_function_impl(struct nir_builder *b,
|
|
|
|
const nir_function_impl *impl,
|
2020-08-20 21:00:15 +01:00
|
|
|
nir_ssa_def **params,
|
|
|
|
struct hash_table *shader_var_remap);
|
2016-02-14 01:31:05 +00:00
|
|
|
bool nir_inline_functions(nir_shader *shader);
|
|
|
|
|
2016-06-13 20:47:19 +01:00
|
|
|
bool nir_propagate_invariant(nir_shader *shader);
|
|
|
|
|
2016-12-24 18:34:33 +00:00
|
|
|
void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader);
|
2018-03-16 07:20:57 +00:00
|
|
|
void nir_lower_deref_copy_instr(struct nir_builder *b,
|
|
|
|
nir_intrinsic_instr *copy);
|
2017-02-24 23:34:40 +00:00
|
|
|
bool nir_lower_var_copies(nir_shader *shader);
|
2015-01-14 23:19:49 +00:00
|
|
|
|
2020-09-14 21:20:37 +01:00
|
|
|
bool nir_lower_memcpy(nir_shader *shader);
|
|
|
|
|
2018-03-15 22:37:50 +00:00
|
|
|
void nir_fixup_deref_modes(nir_shader *shader);
|
|
|
|
|
2015-09-17 16:38:10 +01:00
|
|
|
bool nir_lower_global_vars_to_local(nir_shader *shader);
|
2014-12-02 20:48:38 +00:00
|
|
|
|
2019-03-11 23:47:39 +00:00
|
|
|
typedef enum {
|
|
|
|
nir_lower_direct_array_deref_of_vec_load = (1 << 0),
|
|
|
|
nir_lower_indirect_array_deref_of_vec_load = (1 << 1),
|
|
|
|
nir_lower_direct_array_deref_of_vec_store = (1 << 2),
|
|
|
|
nir_lower_indirect_array_deref_of_vec_store = (1 << 3),
|
|
|
|
} nir_lower_array_deref_of_vec_options;
|
|
|
|
|
|
|
|
bool nir_lower_array_deref_of_vec(nir_shader *shader, nir_variable_mode modes,
|
|
|
|
nir_lower_array_deref_of_vec_options options);
|
|
|
|
|
2020-07-14 19:55:19 +01:00
|
|
|
bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes,
|
|
|
|
uint32_t max_lower_array_len);
|
2016-02-14 01:45:37 +00:00
|
|
|
|
2015-09-17 20:29:49 +01:00
|
|
|
bool nir_lower_locals_to_regs(nir_shader *shader);
|
2014-12-02 04:29:35 +00:00
|
|
|
|
2016-08-25 03:09:57 +01:00
|
|
|
void nir_lower_io_to_temporaries(nir_shader *shader,
|
|
|
|
nir_function_impl *entrypoint,
|
2016-03-25 19:10:50 +00:00
|
|
|
bool outputs, bool inputs);
|
2016-03-25 17:52:26 +00:00
|
|
|
|
2016-12-02 19:36:42 +00:00
|
|
|
bool nir_lower_vars_to_scratch(nir_shader *shader,
|
|
|
|
nir_variable_mode modes,
|
|
|
|
int size_threshold,
|
|
|
|
glsl_type_size_align_func size_align);
|
|
|
|
|
2019-12-15 17:03:49 +00:00
|
|
|
void nir_lower_clip_halfz(nir_shader *shader);
|
|
|
|
|
2016-03-25 17:23:25 +00:00
|
|
|
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
|
2015-08-29 01:09:02 +01:00
|
|
|
|
2019-01-05 15:13:44 +00:00
|
|
|
void nir_gather_ssa_types(nir_function_impl *impl,
|
|
|
|
BITSET_WORD *float_types,
|
|
|
|
BITSET_WORD *int_types);
|
|
|
|
|
2020-07-20 20:51:04 +01:00
|
|
|
void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
|
|
|
|
unsigned *size,
|
2019-03-29 01:39:48 +00:00
|
|
|
int (*type_size)(const struct glsl_type *, bool));
|
2014-12-02 06:01:05 +00:00
|
|
|
|
2017-09-07 14:27:59 +01:00
|
|
|
/* Some helpers to do very simple linking */
|
|
|
|
bool nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer);
|
2020-05-20 16:09:05 +01:00
|
|
|
bool nir_remove_unused_io_vars(nir_shader *shader, nir_variable_mode mode,
|
2018-09-26 17:13:13 +01:00
|
|
|
uint64_t *used_by_other_stage,
|
|
|
|
uint64_t *used_by_other_stage_patches);
|
2017-10-18 09:40:06 +01:00
|
|
|
void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
|
|
|
|
bool default_to_smooth_interp);
|
2018-10-23 11:56:29 +01:00
|
|
|
void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
|
2018-12-11 00:49:00 +00:00
|
|
|
bool nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer);
|
2017-09-07 14:27:59 +01:00
|
|
|
|
2019-09-27 18:15:02 +01:00
|
|
|
bool nir_lower_amul(nir_shader *shader,
|
|
|
|
int (*type_size)(const struct glsl_type *, bool));
|
2019-05-10 09:18:12 +01:00
|
|
|
|
2020-08-18 19:38:41 +01:00
|
|
|
bool nir_lower_ubo_vec4(nir_shader *shader);
|
|
|
|
|
2020-07-20 20:42:53 +01:00
|
|
|
void nir_assign_io_var_locations(nir_shader *shader,
|
|
|
|
nir_variable_mode mode,
|
2019-05-10 09:18:12 +01:00
|
|
|
unsigned *size,
|
|
|
|
gl_shader_stage stage);
|
|
|
|
|
2020-03-30 14:58:07 +01:00
|
|
|
typedef struct {
|
|
|
|
uint8_t num_linked_io_vars;
|
|
|
|
uint8_t num_linked_patch_io_vars;
|
|
|
|
} nir_linked_io_var_info;
|
|
|
|
|
|
|
|
nir_linked_io_var_info
|
|
|
|
nir_assign_linked_io_var_locations(nir_shader *producer,
|
|
|
|
nir_shader *consumer);
|
|
|
|
|
2016-09-14 18:29:38 +01:00
|
|
|
typedef enum {
|
2019-07-19 23:10:07 +01:00
|
|
|
/* If set, this causes all 64-bit IO operations to be lowered on-the-fly
|
|
|
|
* to 32-bit operations. This is only valid for nir_var_shader_in/out
|
|
|
|
* modes.
|
|
|
|
*/
|
|
|
|
nir_lower_io_lower_64bit_to_32 = (1 << 0),
|
|
|
|
|
2016-09-14 18:29:38 +01:00
|
|
|
/* If set, this forces all non-flat fragment shader inputs to be
|
|
|
|
* interpolated as if with the "sample" qualifier. This requires
|
|
|
|
* nir_shader_compiler_options::use_interpolated_input_intrinsics.
|
|
|
|
*/
|
|
|
|
nir_lower_io_force_sample_interpolation = (1 << 1),
|
|
|
|
} nir_lower_io_options;
|
2017-03-09 19:01:22 +00:00
|
|
|
bool nir_lower_io(nir_shader *shader,
|
2016-04-11 21:43:27 +01:00
|
|
|
nir_variable_mode modes,
|
2019-03-29 01:39:48 +00:00
|
|
|
int (*type_size)(const struct glsl_type *, bool),
|
2016-09-14 18:29:38 +01:00
|
|
|
nir_lower_io_options);
|
2018-11-28 03:31:42 +00:00
|
|
|
|
2020-09-28 12:13:49 +01:00
|
|
|
bool nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes);
|
2019-05-14 11:10:11 +01:00
|
|
|
|
2019-04-09 20:18:11 +01:00
|
|
|
bool
|
|
|
|
nir_lower_vars_to_explicit_types(nir_shader *shader,
|
|
|
|
nir_variable_mode modes,
|
|
|
|
glsl_type_size_align_func type_info);
|
|
|
|
|
2020-08-18 20:43:39 +01:00
|
|
|
bool nir_lower_mem_constant_vars(nir_shader *shader,
|
|
|
|
glsl_type_size_align_func type_info);
|
|
|
|
|
2018-11-28 03:31:42 +00:00
|
|
|
typedef enum {
|
2019-01-07 23:17:46 +00:00
|
|
|
/**
|
|
|
|
* An address format which is a simple 32-bit global GPU address.
|
|
|
|
*/
|
|
|
|
nir_address_format_32bit_global,
|
|
|
|
|
|
|
|
/**
|
|
|
|
* An address format which is a simple 64-bit global GPU address.
|
|
|
|
*/
|
|
|
|
nir_address_format_64bit_global,
|
|
|
|
|
2019-01-09 20:56:02 +00:00
|
|
|
/**
|
|
|
|
* An address format which is a bounds-checked 64-bit global GPU address.
|
|
|
|
*
|
|
|
|
* The address is comprised as a 32-bit vec4 where .xy are a uint64_t base
|
|
|
|
* address stored with the low bits in .x and high bits in .y, .z is a
|
|
|
|
* size, and .w is an offset. When the final I/O operation is lowered, .w
|
|
|
|
* is checked against .z and the operation is predicated on the result.
|
|
|
|
*/
|
|
|
|
nir_address_format_64bit_bounded_global,
|
|
|
|
|
2018-11-28 03:31:42 +00:00
|
|
|
/**
|
|
|
|
* An address format which is comprised of a vec2 where the first
|
2019-03-09 16:10:37 +00:00
|
|
|
* component is a buffer index and the second is an offset.
|
2018-11-28 03:31:42 +00:00
|
|
|
*/
|
2019-03-09 16:10:37 +00:00
|
|
|
nir_address_format_32bit_index_offset,
|
2019-05-01 21:24:45 +01:00
|
|
|
|
2020-05-25 16:53:00 +01:00
|
|
|
/**
|
|
|
|
* An address format which is a 64-bit value, where the high 32 bits
|
|
|
|
* are a buffer index, and the low 32 bits are an offset.
|
|
|
|
*/
|
|
|
|
nir_address_format_32bit_index_offset_pack64,
|
|
|
|
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 18:47:57 +01:00
|
|
|
/**
|
|
|
|
* An address format which is comprised of a vec3 where the first two
|
|
|
|
* components specify the buffer and the third is an offset.
|
|
|
|
*/
|
|
|
|
nir_address_format_vec2_index_32bit_offset,
|
|
|
|
|
2019-05-03 22:34:55 +01:00
|
|
|
/**
|
|
|
|
* An address format which is a simple 32-bit offset.
|
|
|
|
*/
|
|
|
|
nir_address_format_32bit_offset,
|
|
|
|
|
2020-05-25 16:57:14 +01:00
|
|
|
/**
|
|
|
|
* An address format which is a simple 32-bit offset cast to 64-bit.
|
|
|
|
*/
|
|
|
|
nir_address_format_32bit_offset_as_64bit,
|
|
|
|
|
2019-05-01 21:24:45 +01:00
|
|
|
/**
|
|
|
|
* An address format representing a purely logical addressing model. In
|
|
|
|
* this model, all deref chains must be complete from the dereference
|
|
|
|
* operation to the variable. Cast derefs are not allowed. These
|
|
|
|
* addresses will be 32-bit scalars but the format is immaterial because
|
|
|
|
* you can always chase the chain.
|
|
|
|
*/
|
|
|
|
nir_address_format_logical,
|
2018-11-28 03:31:42 +00:00
|
|
|
} nir_address_format;
|
2019-01-08 00:00:22 +00:00
|
|
|
|
2019-04-18 18:08:34 +01:00
|
|
|
static inline unsigned
|
|
|
|
nir_address_format_bit_size(nir_address_format addr_format)
|
|
|
|
{
|
|
|
|
switch (addr_format) {
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 18:47:57 +01:00
|
|
|
case nir_address_format_32bit_global: return 32;
|
|
|
|
case nir_address_format_64bit_global: return 64;
|
|
|
|
case nir_address_format_64bit_bounded_global: return 32;
|
|
|
|
case nir_address_format_32bit_index_offset: return 32;
|
2020-05-25 16:53:00 +01:00
|
|
|
case nir_address_format_32bit_index_offset_pack64: return 64;
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 18:47:57 +01:00
|
|
|
case nir_address_format_vec2_index_32bit_offset: return 32;
|
|
|
|
case nir_address_format_32bit_offset: return 32;
|
2020-05-25 16:57:14 +01:00
|
|
|
case nir_address_format_32bit_offset_as_64bit: return 64;
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 18:47:57 +01:00
|
|
|
case nir_address_format_logical: return 32;
|
2019-04-18 18:08:34 +01:00
|
|
|
}
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned
|
|
|
|
nir_address_format_num_components(nir_address_format addr_format)
|
|
|
|
{
|
|
|
|
switch (addr_format) {
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 18:47:57 +01:00
|
|
|
case nir_address_format_32bit_global: return 1;
|
|
|
|
case nir_address_format_64bit_global: return 1;
|
|
|
|
case nir_address_format_64bit_bounded_global: return 4;
|
|
|
|
case nir_address_format_32bit_index_offset: return 2;
|
2020-05-25 16:53:00 +01:00
|
|
|
case nir_address_format_32bit_index_offset_pack64: return 1;
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 18:47:57 +01:00
|
|
|
case nir_address_format_vec2_index_32bit_offset: return 3;
|
|
|
|
case nir_address_format_32bit_offset: return 1;
|
2020-05-25 16:57:14 +01:00
|
|
|
case nir_address_format_32bit_offset_as_64bit: return 1;
|
nir: add vec2_index_32bit_offset address format
For turnip, we use the "bindless" model on a6xx. Loads and stores with
the bindless model require a bindless base, which is an immediate field
in the instruction that selects between 5 different 64-bit "bindless
base registers", a 32-bit descriptor index that's added to the base, and
the usual 32-bit offset. The bindless base usually, but not always,
corresponds to the Vulkan descriptor set. We can handle the case where
the base is non-constant by using a bunch of if-statements, to make it a
little easier in core NIR, and this seems to be what Qualcomm's driver
does too. Therefore, the pointer format we need to use in NIR has a vec2
index, for the bindless base and descriptor index. Plumb this format
through core NIR.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5683>
2020-06-29 18:47:57 +01:00
|
|
|
case nir_address_format_logical: return 1;
|
2019-04-18 18:08:34 +01:00
|
|
|
}
|
|
|
|
unreachable("Invalid address format");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline const struct glsl_type *
|
|
|
|
nir_address_format_to_glsl_type(nir_address_format addr_format)
|
|
|
|
{
|
|
|
|
unsigned bit_size = nir_address_format_bit_size(addr_format);
|
|
|
|
assert(bit_size == 32 || bit_size == 64);
|
|
|
|
return glsl_vector_type(bit_size == 32 ? GLSL_TYPE_UINT : GLSL_TYPE_UINT64,
|
|
|
|
nir_address_format_num_components(addr_format));
|
|
|
|
}
|
|
|
|
|
2019-05-01 22:44:15 +01:00
|
|
|
const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
|
|
|
|
|
2019-05-16 23:11:07 +01:00
|
|
|
nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
|
|
|
|
nir_address_format addr_format);
|
|
|
|
|
|
|
|
nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
|
|
|
|
nir_address_format addr_format);
|
|
|
|
|
2019-01-08 00:00:22 +00:00
|
|
|
nir_ssa_def * nir_explicit_io_address_from_deref(struct nir_builder *b,
|
|
|
|
nir_deref_instr *deref,
|
|
|
|
nir_ssa_def *base_addr,
|
|
|
|
nir_address_format addr_format);
|
2020-08-24 16:48:51 +01:00
|
|
|
|
|
|
|
bool nir_get_explicit_deref_align(nir_deref_instr *deref,
|
|
|
|
bool default_to_type_align,
|
|
|
|
uint32_t *align_mul,
|
|
|
|
uint32_t *align_offset);
|
|
|
|
|
2019-01-08 00:00:22 +00:00
|
|
|
void nir_lower_explicit_io_instr(struct nir_builder *b,
|
|
|
|
nir_intrinsic_instr *io_instr,
|
|
|
|
nir_ssa_def *addr,
|
|
|
|
nir_address_format addr_format);
|
|
|
|
|
2018-11-28 03:31:42 +00:00
|
|
|
bool nir_lower_explicit_io(nir_shader *shader,
|
|
|
|
nir_variable_mode modes,
|
|
|
|
nir_address_format);
|
|
|
|
|
2015-11-25 22:14:05 +00:00
|
|
|
nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
|
2015-11-08 06:35:33 +00:00
|
|
|
nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
|
|
|
|
|
2017-06-07 00:19:15 +01:00
|
|
|
bool nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage);
|
2016-09-26 06:19:07 +01:00
|
|
|
|
2017-03-09 18:56:20 +00:00
|
|
|
bool nir_lower_regs_to_ssa_impl(nir_function_impl *impl);
|
|
|
|
bool nir_lower_regs_to_ssa(nir_shader *shader);
|
2017-02-24 23:22:54 +00:00
|
|
|
bool nir_lower_vars_to_ssa(nir_shader *shader);
|
2014-11-14 01:16:31 +00:00
|
|
|
|
2018-03-21 00:32:07 +00:00
|
|
|
bool nir_remove_dead_derefs(nir_shader *shader);
|
|
|
|
bool nir_remove_dead_derefs_impl(nir_function_impl *impl);
|
2020-05-28 01:59:28 +01:00
|
|
|
bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes,
|
|
|
|
bool (*can_remove_var)(nir_variable *var));
|
2020-02-07 20:18:49 +00:00
|
|
|
bool nir_lower_variable_initializers(nir_shader *shader,
|
2016-07-16 00:44:53 +01:00
|
|
|
nir_variable_mode modes);
|
2014-07-30 19:56:52 +01:00
|
|
|
|
2017-02-28 00:28:43 +00:00
|
|
|
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
|
2015-09-10 01:50:09 +01:00
|
|
|
bool nir_lower_vec_to_movs(nir_shader *shader);
|
2017-02-23 00:53:18 +00:00
|
|
|
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
|
2019-10-03 21:22:58 +01:00
|
|
|
bool alpha_to_one,
|
|
|
|
const gl_state_index16 *alpha_ref_state_tokens);
|
2018-05-08 19:24:40 +01:00
|
|
|
bool nir_lower_alu(nir_shader *shader);
|
2018-08-18 19:46:46 +01:00
|
|
|
|
|
|
|
bool nir_lower_flrp(nir_shader *shader, unsigned lowering_mask,
|
2020-07-23 03:13:16 +01:00
|
|
|
bool always_precise);
|
2018-08-18 19:46:46 +01:00
|
|
|
|
2019-08-30 05:14:54 +01:00
|
|
|
bool nir_lower_alu_to_scalar(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
|
2018-10-24 08:25:29 +01:00
|
|
|
bool nir_lower_bool_to_bitsize(nir_shader *shader);
|
2018-10-22 22:44:59 +01:00
|
|
|
bool nir_lower_bool_to_float(nir_shader *shader);
|
2018-10-18 18:04:09 +01:00
|
|
|
bool nir_lower_bool_to_int32(nir_shader *shader);
|
2019-05-01 06:25:05 +01:00
|
|
|
bool nir_lower_int_to_float(nir_shader *shader);
|
2017-02-24 23:32:11 +00:00
|
|
|
bool nir_lower_load_const_to_scalar(nir_shader *shader);
|
2017-07-06 05:57:44 +01:00
|
|
|
bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
|
2016-09-13 23:14:28 +01:00
|
|
|
bool nir_lower_phis_to_scalar(nir_shader *shader);
|
2017-10-23 05:51:29 +01:00
|
|
|
void nir_lower_io_arrays_to_elements(nir_shader *producer, nir_shader *consumer);
|
2018-01-19 02:05:35 +00:00
|
|
|
void nir_lower_io_arrays_to_elements_no_indirects(nir_shader *shader,
|
|
|
|
bool outputs_only);
|
2016-08-04 20:42:45 +01:00
|
|
|
void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
|
2017-10-05 23:19:35 +01:00
|
|
|
void nir_lower_io_to_scalar_early(nir_shader *shader, nir_variable_mode mask);
|
2019-03-06 21:21:51 +00:00
|
|
|
bool nir_lower_io_to_vector(nir_shader *shader, nir_variable_mode mask);
|
2015-01-21 23:23:32 +00:00
|
|
|
|
2020-07-06 13:56:16 +01:00
|
|
|
bool nir_lower_fragcolor(nir_shader *shader);
|
2020-07-06 08:38:01 +01:00
|
|
|
bool nir_lower_fragcoord_wtrans(nir_shader *shader);
|
2019-04-14 16:43:13 +01:00
|
|
|
void nir_lower_viewport_transform(nir_shader *shader);
|
2019-02-28 09:53:11 +00:00
|
|
|
bool nir_lower_uniforms_to_ubo(nir_shader *shader, int multiplier);
|
2019-02-08 21:36:37 +00:00
|
|
|
|
2017-08-22 21:23:59 +01:00
|
|
|
typedef struct nir_lower_subgroups_options {
|
2017-08-23 02:57:56 +01:00
|
|
|
uint8_t subgroup_size;
|
2017-08-23 02:44:51 +01:00
|
|
|
uint8_t ballot_bit_size;
|
2017-08-22 21:23:59 +01:00
|
|
|
bool lower_to_scalar:1;
|
|
|
|
bool lower_vote_trivial:1;
|
2018-03-10 18:05:58 +00:00
|
|
|
bool lower_vote_eq_to_ballot:1;
|
2017-08-22 21:23:59 +01:00
|
|
|
bool lower_subgroup_masks:1;
|
2017-12-07 05:41:47 +00:00
|
|
|
bool lower_shuffle:1;
|
2018-04-10 15:07:27 +01:00
|
|
|
bool lower_shuffle_to_32bit:1;
|
2020-06-23 17:37:37 +01:00
|
|
|
bool lower_shuffle_to_swizzle_amd:1;
|
2017-08-29 18:20:56 +01:00
|
|
|
bool lower_quad:1;
|
2019-12-16 16:43:18 +00:00
|
|
|
bool lower_quad_broadcast_dynamic:1;
|
2020-03-11 14:01:56 +00:00
|
|
|
bool lower_quad_broadcast_dynamic_to_const:1;
|
2017-08-22 21:23:59 +01:00
|
|
|
} nir_lower_subgroups_options;
|
|
|
|
|
|
|
|
bool nir_lower_subgroups(nir_shader *shader,
|
|
|
|
const nir_lower_subgroups_options *options);
|
|
|
|
|
2015-09-17 21:00:58 +01:00
|
|
|
bool nir_lower_system_values(nir_shader *shader);
|
2015-09-16 17:56:58 +01:00
|
|
|
|
2020-08-21 18:40:45 +01:00
|
|
|
typedef struct nir_lower_compute_system_values_options {
|
|
|
|
bool has_base_global_invocation_id:1;
|
|
|
|
bool has_base_work_group_id:1;
|
|
|
|
} nir_lower_compute_system_values_options;
|
|
|
|
|
|
|
|
bool nir_lower_compute_system_values(nir_shader *shader,
|
|
|
|
const nir_lower_compute_system_values_options *options);
|
2020-08-21 18:18:14 +01:00
|
|
|
|
2018-12-19 21:53:39 +00:00
|
|
|
enum PACKED nir_lower_tex_packing {
|
|
|
|
nir_lower_tex_packing_none = 0,
|
|
|
|
/* The sampler returns up to 2 32-bit words of half floats or 16-bit signed
|
|
|
|
* or unsigned ints based on the sampler type
|
|
|
|
*/
|
|
|
|
nir_lower_tex_packing_16,
|
|
|
|
/* The sampler returns 1 32-bit word of 4x8 unorm */
|
|
|
|
nir_lower_tex_packing_8,
|
|
|
|
};
|
|
|
|
|
2015-09-16 17:56:58 +01:00
|
|
|
typedef struct nir_lower_tex_options {
|
|
|
|
/**
|
|
|
|
* bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which
|
|
|
|
* sampler types a texture projector is lowered.
|
|
|
|
*/
|
|
|
|
unsigned lower_txp;
|
2015-09-16 21:49:14 +01:00
|
|
|
|
2016-07-21 04:32:31 +01:00
|
|
|
/**
|
|
|
|
* If true, lower away nir_tex_src_offset for all texelfetch instructions.
|
|
|
|
*/
|
|
|
|
bool lower_txf_offset;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* If true, lower away nir_tex_src_offset for all rect textures.
|
|
|
|
*/
|
|
|
|
bool lower_rect_offset;
|
|
|
|
|
2015-09-16 21:49:14 +01:00
|
|
|
/**
|
|
|
|
* If true, lower rect textures to 2D, using txs to fetch the
|
|
|
|
* texture dimensions and dividing the texture coords by the
|
|
|
|
* texture dims to normalize.
|
|
|
|
*/
|
|
|
|
bool lower_rect;
|
2015-09-18 15:44:27 +01:00
|
|
|
|
2016-05-02 05:13:37 +01:00
|
|
|
/**
|
|
|
|
* If true, convert yuv to rgb.
|
|
|
|
*/
|
|
|
|
unsigned lower_y_uv_external;
|
|
|
|
unsigned lower_y_u_v_external;
|
|
|
|
unsigned lower_yx_xuxv_external;
|
2017-06-16 06:40:31 +01:00
|
|
|
unsigned lower_xy_uxvx_external;
|
2018-11-08 16:28:20 +00:00
|
|
|
unsigned lower_ayuv_external;
|
2019-02-13 00:02:20 +00:00
|
|
|
unsigned lower_xyuv_external;
|
2020-07-08 05:23:22 +01:00
|
|
|
unsigned bt709_external;
|
|
|
|
unsigned bt2020_external;
|
2016-05-02 05:13:37 +01:00
|
|
|
|
2015-09-18 15:44:27 +01:00
|
|
|
/**
|
|
|
|
* To emulate certain texture wrap modes, this can be used
|
|
|
|
* to saturate the specified tex coord to [0.0, 1.0]. The
|
|
|
|
* bits are according to sampler #, ie. if, for example:
|
|
|
|
*
|
|
|
|
* (conf->saturate_s & (1 << n))
|
|
|
|
*
|
|
|
|
* is true, then the s coord for sampler n is saturated.
|
|
|
|
*
|
|
|
|
* Note that clamping must happen *after* projector lowering
|
|
|
|
* so any projected texture sample instruction with a clamped
|
|
|
|
* coordinate gets automatically lowered, regardless of the
|
|
|
|
* 'lower_txp' setting.
|
|
|
|
*/
|
|
|
|
unsigned saturate_s;
|
|
|
|
unsigned saturate_t;
|
|
|
|
unsigned saturate_r;
|
2015-11-12 02:30:31 +00:00
|
|
|
|
2016-02-06 17:05:10 +00:00
|
|
|
/* Bitmask of textures that need swizzling.
|
2015-11-12 02:30:31 +00:00
|
|
|
*
|
2016-02-06 17:05:10 +00:00
|
|
|
* If (swizzle_result & (1 << texture_index)), then the swizzle in
|
|
|
|
* swizzles[texture_index] is applied to the result of the texturing
|
2015-11-12 02:30:31 +00:00
|
|
|
* operation.
|
|
|
|
*/
|
|
|
|
unsigned swizzle_result;
|
|
|
|
|
2016-02-06 17:05:10 +00:00
|
|
|
/* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles
|
2015-11-12 02:30:31 +00:00
|
|
|
* while 4 and 5 represent 0 and 1 respectively.
|
|
|
|
*/
|
|
|
|
uint8_t swizzles[32][4];
|
2016-04-19 13:28:22 +01:00
|
|
|
|
2019-02-11 07:25:18 +00:00
|
|
|
/* Can be used to scale sampled values in range required by the format. */
|
|
|
|
float scale_factors[32];
|
|
|
|
|
2016-04-19 13:28:22 +01:00
|
|
|
/**
|
|
|
|
* Bitmap of textures that need srgb to linear conversion. If
|
|
|
|
* (lower_srgb & (1 << texture_index)) then the rgb (xyz) components
|
|
|
|
* of the texture are lowered to linear.
|
|
|
|
*/
|
|
|
|
unsigned lower_srgb;
|
2016-11-30 08:40:43 +00:00
|
|
|
|
2019-04-19 05:01:15 +01:00
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_tex on shaders that doesn't support implicit
|
|
|
|
* LODs to nir_texop_txl.
|
|
|
|
*/
|
|
|
|
bool lower_tex_without_implicit_lod;
|
|
|
|
|
2016-11-30 08:40:43 +00:00
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd on cube maps with nir_texop_txl.
|
|
|
|
*/
|
|
|
|
bool lower_txd_cube_map;
|
2016-11-30 10:31:01 +00:00
|
|
|
|
2018-12-13 19:40:58 +00:00
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd on 3D surfaces with nir_texop_txl.
|
|
|
|
*/
|
|
|
|
bool lower_txd_3d;
|
|
|
|
|
2016-11-30 10:31:01 +00:00
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd on shadow samplers (except cube maps)
|
|
|
|
* with nir_texop_txl. Notice that cube map shadow samplers are lowered
|
|
|
|
* with lower_txd_cube_map.
|
|
|
|
*/
|
|
|
|
bool lower_txd_shadow;
|
2017-11-22 00:21:36 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd on all samplers to a nir_texop_txl.
|
|
|
|
* Implies lower_txd_cube_map and lower_txd_shadow.
|
|
|
|
*/
|
|
|
|
bool lower_txd;
|
2018-10-11 20:14:29 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txb that try to use shadow compare and min_lod
|
|
|
|
* at the same time to a nir_texop_lod, some math, and nir_texop_tex.
|
|
|
|
*/
|
|
|
|
bool lower_txb_shadow_clamp;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd on shadow samplers when it uses min_lod
|
|
|
|
* with nir_texop_txl. This includes cube maps.
|
|
|
|
*/
|
|
|
|
bool lower_txd_shadow_clamp;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd on when it uses both offset and min_lod
|
|
|
|
* with nir_texop_txl. This includes cube maps.
|
|
|
|
*/
|
|
|
|
bool lower_txd_offset_clamp;
|
2018-12-19 21:53:39 +00:00
|
|
|
|
2019-02-08 23:56:52 +00:00
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
|
|
|
|
* sampler is bindless.
|
|
|
|
*/
|
|
|
|
bool lower_txd_clamp_bindless_sampler;
|
|
|
|
|
2019-02-08 23:51:24 +00:00
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txd with min_lod to a nir_texop_txl if the
|
|
|
|
* sampler index is not statically determinable to be less than 16.
|
|
|
|
*/
|
|
|
|
bool lower_txd_clamp_if_sampler_index_not_lt_16;
|
|
|
|
|
2019-06-17 10:43:13 +01:00
|
|
|
/**
|
|
|
|
* If true, lower nir_texop_txs with a non-0-lod into nir_texop_txs with
|
|
|
|
* 0-lod followed by a nir_ishr.
|
|
|
|
*/
|
|
|
|
bool lower_txs_lod;
|
|
|
|
|
2018-12-27 06:45:04 +00:00
|
|
|
/**
|
|
|
|
* If true, apply a .bagr swizzle on tg4 results to handle Broadcom's
|
|
|
|
* mixed-up tg4 locations.
|
|
|
|
*/
|
|
|
|
bool lower_tg4_broadcom_swizzle;
|
|
|
|
|
2019-03-19 17:47:20 +00:00
|
|
|
/**
|
|
|
|
* If true, lowers tg4 with 4 constant offsets to 4 tg4 calls
|
|
|
|
*/
|
|
|
|
bool lower_tg4_offsets;
|
|
|
|
|
2018-12-19 21:53:39 +00:00
|
|
|
enum nir_lower_tex_packing lower_tex_packing[32];
|
2015-09-16 17:56:58 +01:00
|
|
|
} nir_lower_tex_options;
|
|
|
|
|
2015-11-11 18:46:09 +00:00
|
|
|
bool nir_lower_tex(nir_shader *shader,
|
2015-09-16 17:56:58 +01:00
|
|
|
const nir_lower_tex_options *options);
|
|
|
|
|
2020-09-02 22:51:51 +01:00
|
|
|
bool nir_lower_cl_images_to_tex(nir_shader *shader);
|
|
|
|
|
2019-02-27 20:36:44 +00:00
|
|
|
enum nir_lower_non_uniform_access_type {
|
|
|
|
nir_lower_non_uniform_ubo_access = (1 << 0),
|
|
|
|
nir_lower_non_uniform_ssbo_access = (1 << 1),
|
|
|
|
nir_lower_non_uniform_texture_access = (1 << 2),
|
|
|
|
nir_lower_non_uniform_image_access = (1 << 3),
|
|
|
|
};
|
|
|
|
|
|
|
|
bool nir_lower_non_uniform_access(nir_shader *shader,
|
|
|
|
enum nir_lower_non_uniform_access_type);
|
|
|
|
|
2019-02-05 15:56:24 +00:00
|
|
|
enum nir_lower_idiv_path {
|
|
|
|
/* This path is based on NV50LegalizeSSA::handleDIV(). It is the faster of
|
|
|
|
* the two but it is not exact in some cases (for example, 1091317713u /
|
|
|
|
* 1034u gives 5209173 instead of 1055432) */
|
|
|
|
nir_lower_idiv_fast,
|
|
|
|
/* This path is based on AMDGPUTargetLowering::LowerUDIVREM() and
|
|
|
|
* AMDGPUTargetLowering::LowerSDIVREM(). It requires more instructions than
|
|
|
|
* the nv50 path and many of them are integer multiplications, so it is
|
|
|
|
* probably slower. It should always return the correct result, though. */
|
|
|
|
nir_lower_idiv_precise,
|
|
|
|
};
|
|
|
|
|
|
|
|
bool nir_lower_idiv(nir_shader *shader, enum nir_lower_idiv_path path);
|
2014-07-30 20:07:45 +01:00
|
|
|
|
2020-07-01 15:55:46 +01:00
|
|
|
typedef struct nir_input_attachment_options {
|
|
|
|
bool use_fragcoord_sysval;
|
2020-07-01 16:16:01 +01:00
|
|
|
bool use_layer_id_sysval;
|
2020-07-01 16:29:45 +01:00
|
|
|
bool use_view_id_for_layer;
|
2020-07-01 15:55:46 +01:00
|
|
|
} nir_input_attachment_options;
|
|
|
|
|
|
|
|
bool nir_lower_input_attachments(nir_shader *shader,
|
|
|
|
const nir_input_attachment_options *options);
|
2019-04-03 16:29:20 +01:00
|
|
|
|
2019-10-02 21:19:08 +01:00
|
|
|
bool nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables,
|
|
|
|
bool use_vars,
|
2019-10-02 21:30:45 +01:00
|
|
|
bool use_clipdist_array,
|
|
|
|
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
|
2019-10-02 21:19:08 +01:00
|
|
|
bool nir_lower_clip_gs(nir_shader *shader, unsigned ucp_enables,
|
2019-10-02 21:30:45 +01:00
|
|
|
bool use_clipdist_array,
|
|
|
|
const gl_state_index16 clipplane_state_tokens[][STATE_LENGTH]);
|
2019-10-02 21:19:08 +01:00
|
|
|
bool nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables,
|
|
|
|
bool use_clipdist_array);
|
2017-02-24 23:38:28 +00:00
|
|
|
bool nir_lower_clip_cull_distance_arrays(nir_shader *nir);
|
2020-06-17 14:35:46 +01:00
|
|
|
bool nir_lower_clip_disable(nir_shader *shader, unsigned clip_plane_enable);
|
2015-09-09 19:57:15 +01:00
|
|
|
|
2019-10-03 21:44:29 +01:00
|
|
|
void nir_lower_point_size_mov(nir_shader *shader,
|
|
|
|
const gl_state_index16 *pointsize_state_tokens);
|
|
|
|
|
2019-03-22 08:24:57 +00:00
|
|
|
bool nir_lower_frexp(nir_shader *nir);
|
|
|
|
|
2020-07-14 11:17:34 +01:00
|
|
|
void nir_lower_two_sided_color(nir_shader *shader, bool face_sysval);
|
2016-02-01 22:29:22 +00:00
|
|
|
|
2017-03-02 19:18:04 +00:00
|
|
|
bool nir_lower_clamp_color_outputs(nir_shader *shader);
|
2016-02-01 22:34:12 +00:00
|
|
|
|
2019-01-24 03:07:42 +00:00
|
|
|
bool nir_lower_flatshade(nir_shader *shader);
|
|
|
|
|
2016-02-01 22:34:12 +00:00
|
|
|
void nir_lower_passthrough_edgeflags(nir_shader *shader);
|
2018-07-19 00:42:03 +01:00
|
|
|
bool nir_lower_patch_vertices(nir_shader *nir, unsigned static_count,
|
|
|
|
const gl_state_index16 *uniform_state_tokens);
|
2015-09-17 18:17:08 +01:00
|
|
|
|
2015-11-07 15:59:09 +00:00
|
|
|
typedef struct nir_lower_wpos_ytransform_options {
|
2017-11-16 15:19:22 +00:00
|
|
|
gl_state_index16 state_tokens[STATE_LENGTH];
|
2015-11-07 15:59:09 +00:00
|
|
|
bool fs_coord_origin_upper_left :1;
|
|
|
|
bool fs_coord_origin_lower_left :1;
|
|
|
|
bool fs_coord_pixel_center_integer :1;
|
|
|
|
bool fs_coord_pixel_center_half_integer :1;
|
|
|
|
} nir_lower_wpos_ytransform_options;
|
|
|
|
|
|
|
|
bool nir_lower_wpos_ytransform(nir_shader *shader,
|
|
|
|
const nir_lower_wpos_ytransform_options *options);
|
2017-03-23 10:54:16 +00:00
|
|
|
bool nir_lower_wpos_center(nir_shader *shader, const bool for_sample_shading);
|
2015-11-07 15:59:09 +00:00
|
|
|
|
2020-05-06 22:20:09 +01:00
|
|
|
bool nir_lower_wrmasks(nir_shader *shader, nir_instr_filter_cb cb, const void *data);
|
|
|
|
|
2019-04-26 18:05:08 +01:00
|
|
|
bool nir_lower_fb_read(nir_shader *shader);
|
|
|
|
|
2015-12-22 02:27:25 +00:00
|
|
|
typedef struct nir_lower_drawpixels_options {
|
2017-11-16 15:19:22 +00:00
|
|
|
gl_state_index16 texcoord_state_tokens[STATE_LENGTH];
|
|
|
|
gl_state_index16 scale_state_tokens[STATE_LENGTH];
|
|
|
|
gl_state_index16 bias_state_tokens[STATE_LENGTH];
|
2015-12-22 02:27:25 +00:00
|
|
|
unsigned drawpix_sampler;
|
|
|
|
unsigned pixelmap_sampler;
|
|
|
|
bool pixel_maps :1;
|
|
|
|
bool scale_and_bias :1;
|
|
|
|
} nir_lower_drawpixels_options;
|
|
|
|
|
|
|
|
void nir_lower_drawpixels(nir_shader *shader,
|
|
|
|
const nir_lower_drawpixels_options *options);
|
|
|
|
|
2015-12-22 02:54:00 +00:00
|
|
|
typedef struct nir_lower_bitmap_options {
|
|
|
|
unsigned sampler;
|
|
|
|
bool swizzle_xxxx;
|
|
|
|
} nir_lower_bitmap_options;
|
|
|
|
|
|
|
|
void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options);
|
|
|
|
|
2019-12-20 21:52:06 +00:00
|
|
|
bool nir_lower_atomics_to_ssbo(nir_shader *shader);
|
2018-11-12 08:17:34 +00:00
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
nir_lower_int_source_mods = 1 << 0,
|
|
|
|
nir_lower_float_source_mods = 1 << 1,
|
2019-02-02 17:38:17 +00:00
|
|
|
nir_lower_triop_abs = 1 << 2,
|
|
|
|
nir_lower_all_source_mods = (1 << 3) - 1
|
2018-11-12 08:17:34 +00:00
|
|
|
} nir_lower_to_source_mods_flags;
|
|
|
|
|
|
|
|
|
|
|
|
bool nir_lower_to_source_mods(nir_shader *shader, nir_lower_to_source_mods_flags options);
|
2014-07-30 22:43:26 +01:00
|
|
|
|
2019-10-15 16:46:02 +01:00
|
|
|
bool nir_lower_gs_intrinsics(nir_shader *shader, bool per_stream);
|
2015-05-12 09:05:29 +01:00
|
|
|
|
2018-04-26 09:02:04 +01:00
|
|
|
typedef unsigned (*nir_lower_bit_size_callback)(const nir_alu_instr *, void *);
|
|
|
|
|
|
|
|
bool nir_lower_bit_size(nir_shader *shader,
|
|
|
|
nir_lower_bit_size_callback callback,
|
|
|
|
void *callback_data);
|
2020-06-05 23:43:26 +01:00
|
|
|
bool nir_lower_64bit_phis(nir_shader *shader);
|
2018-04-26 09:02:04 +01:00
|
|
|
|
2019-03-01 23:39:54 +00:00
|
|
|
nir_lower_int64_options nir_lower_int64_op_to_options_mask(nir_op opcode);
|
2020-07-13 19:28:16 +01:00
|
|
|
bool nir_lower_int64(nir_shader *shader);
|
2017-02-23 21:56:15 +00:00
|
|
|
|
2019-03-01 23:39:54 +00:00
|
|
|
nir_lower_doubles_options nir_lower_doubles_op_to_options_mask(nir_op opcode);
|
2019-03-04 21:55:19 +00:00
|
|
|
bool nir_lower_doubles(nir_shader *shader, const nir_shader *softfp64,
|
|
|
|
nir_lower_doubles_options options);
|
2018-04-27 08:28:48 +01:00
|
|
|
bool nir_lower_pack(nir_shader *shader);
|
2015-11-12 10:40:34 +00:00
|
|
|
|
2020-04-24 00:10:43 +01:00
|
|
|
void nir_lower_mediump_outputs(nir_shader *nir);
|
|
|
|
|
2019-07-31 08:47:08 +01:00
|
|
|
bool nir_lower_point_size(nir_shader *shader, float min, float max);
|
|
|
|
|
2019-04-04 00:29:36 +01:00
|
|
|
typedef enum {
|
|
|
|
nir_lower_interpolation_at_sample = (1 << 1),
|
|
|
|
nir_lower_interpolation_at_offset = (1 << 2),
|
|
|
|
nir_lower_interpolation_centroid = (1 << 3),
|
|
|
|
nir_lower_interpolation_pixel = (1 << 4),
|
|
|
|
nir_lower_interpolation_sample = (1 << 5),
|
|
|
|
} nir_lower_interpolation_options;
|
|
|
|
|
|
|
|
bool nir_lower_interpolation(nir_shader *shader,
|
|
|
|
nir_lower_interpolation_options options);
|
|
|
|
|
2020-03-04 15:55:13 +00:00
|
|
|
bool nir_lower_discard_to_demote(nir_shader *shader);
|
|
|
|
|
2020-05-01 14:32:31 +01:00
|
|
|
bool nir_lower_memory_model(nir_shader *shader);
|
|
|
|
|
2020-04-06 11:52:06 +01:00
|
|
|
bool nir_lower_goto_ifs(nir_shader *shader);
|
|
|
|
|
2020-07-06 14:25:03 +01:00
|
|
|
bool nir_shader_uses_view_index(nir_shader *shader);
|
|
|
|
bool nir_can_lower_multiview(nir_shader *shader);
|
|
|
|
bool nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
|
|
|
|
|
2015-09-17 21:18:41 +01:00
|
|
|
bool nir_normalize_cubemap_coords(nir_shader *shader);
|
2015-04-03 00:38:30 +01:00
|
|
|
|
2015-11-04 01:15:24 +00:00
|
|
|
void nir_live_ssa_defs_impl(nir_function_impl *impl);
|
2016-12-13 03:39:51 +00:00
|
|
|
|
|
|
|
void nir_loop_analyze_impl(nir_function_impl *impl,
|
|
|
|
nir_variable_mode indirect_mask);
|
|
|
|
|
2014-10-29 21:17:17 +00:00
|
|
|
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
|
|
|
|
|
2016-02-13 05:52:46 +00:00
|
|
|
bool nir_repair_ssa_impl(nir_function_impl *impl);
|
|
|
|
bool nir_repair_ssa(nir_shader *shader);
|
2015-06-24 13:28:34 +01:00
|
|
|
|
2016-08-29 01:02:34 +01:00
|
|
|
void nir_convert_loop_to_lcssa(nir_loop *loop);
|
2019-08-05 15:24:18 +01:00
|
|
|
bool nir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants);
|
2019-10-15 20:48:10 +01:00
|
|
|
void nir_divergence_analysis(nir_shader *shader, nir_divergence_options options);
|
2016-08-29 01:02:34 +01:00
|
|
|
|
2015-07-08 09:57:00 +01:00
|
|
|
/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
|
|
|
|
* registers. If false, convert all values (even those not involved in a phi
|
|
|
|
* node) to registers.
|
2015-06-24 13:28:34 +01:00
|
|
|
*/
|
2017-03-09 19:49:57 +00:00
|
|
|
bool nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only);
|
2014-07-22 22:05:06 +01:00
|
|
|
|
2016-12-20 04:11:47 +00:00
|
|
|
bool nir_lower_phis_to_regs_block(nir_block *block);
|
|
|
|
bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
|
2018-09-11 18:15:22 +01:00
|
|
|
bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
|
2016-12-20 04:11:47 +00:00
|
|
|
|
2019-08-28 21:34:14 +01:00
|
|
|
bool nir_lower_samplers(nir_shader *shader);
|
2019-11-04 22:27:18 +00:00
|
|
|
bool nir_lower_ssbo(nir_shader *shader);
|
2019-08-28 21:34:14 +01:00
|
|
|
|
2019-06-18 00:27:37 +01:00
|
|
|
/* This is here for unit tests. */
|
|
|
|
bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
|
|
|
|
|
2018-05-23 02:19:16 +01:00
|
|
|
bool nir_opt_comparison_pre(nir_shader *shader);
|
|
|
|
|
2019-09-27 21:43:31 +01:00
|
|
|
bool nir_opt_access(nir_shader *shader);
|
2014-12-12 19:13:10 +00:00
|
|
|
bool nir_opt_algebraic(nir_shader *shader);
|
2017-01-13 06:25:11 +00:00
|
|
|
bool nir_opt_algebraic_before_ffma(nir_shader *shader);
|
2015-03-24 00:11:49 +00:00
|
|
|
bool nir_opt_algebraic_late(nir_shader *shader);
|
nir/algebraic: Distribute source modifiers into instructions
There are three main classes of cases that are helped by this change:
1. When the negation is applied to a value being type converted (e.g.,
float(-x)). This could possibly also be handled with more clever
code generation.
2. When the negation is applied to a phi node source (e.g., x = -(...);
at the end of a basic block). This was the original case that caught
my attention while looking at shader-db dumps.
3. When the negation is applied to the source of an instruction that
cannot have source modifiers. This includes texture instructions and
math box instructions on pre-Gen7 platforms (see more details below).
In many these cases the negation can be propagated into the instructions
that generate the value (e.g., -(a*b) = (-a)*b).
In addition to the operations implemtned in this patch, I also tried:
- frcp - Helped 6 or fewer shaders on Gen7+, and hurt just as many on
pre-Gen7. On Gen6 and earlier, frcp is a math box instruction, and
math box instructions cannot have source modifiers.
I suspect this is why so many more shaders are helped on Gen6 than on
Gen5 or Gen7. Gen6 supports OpenGL 3.3, so a lot more shaders
compile on it. A lot of these shaders may have things like cos(-x)
or rcp(-x) that could result in an explicit negation instruction.
- bcsel - Hurt a few shaders with none helped. bcsel operates on
integer sources, so the fabs or fneg cannot be a source modifier in
the bcsel itself.
- Integer instructions - No changes on any Intel platform.
Some notes about the shader-db results below.
- On Tiger Lake, a single Deus Ex fragment shader is hurt for both
spills and fills.
- On Haswell, a different Deus Ex fragment shader is hurt for both
spills and fills.
- On GM45, the "LOST: 1" and "GAINED: 1" is a single Left4Dead 2
(very high graphics settings, lol) fragment shader that upgrades
from SIMD8 to SIMD16.
v2: Add support for fsign. Add some patterns that remove redundant
negations and redundant absolute value rather than trying to push them
down the tree.
Tiger Lake
total instructions in shared programs: 17611333 -> 17586465 (-0.14%)
instructions in affected programs: 3033734 -> 3008866 (-0.82%)
helped: 10310
HURT: 632
helped stats (abs) min: 1 max: 35 x̄: 2.61 x̃: 1
helped stats (rel) min: 0.04% max: 16.67% x̄: 1.43% x̃: 1.01%
HURT stats (abs) min: 1 max: 47 x̄: 3.21 x̃: 2
HURT stats (rel) min: 0.04% max: 5.08% x̄: 0.88% x̃: 0.63%
95% mean confidence interval for instructions value: -2.33 -2.21
95% mean confidence interval for instructions %-change: -1.32% -1.27%
Instructions are helped.
total cycles in shared programs: 338365223 -> 338262252 (-0.03%)
cycles in affected programs: 125291811 -> 125188840 (-0.08%)
helped: 5224
HURT: 2031
helped stats (abs) min: 1 max: 5670 x̄: 46.73 x̃: 12
helped stats (rel) min: <.01% max: 34.78% x̄: 1.91% x̃: 0.97%
HURT stats (abs) min: 1 max: 2882 x̄: 69.50 x̃: 14
HURT stats (rel) min: <.01% max: 44.93% x̄: 2.35% x̃: 0.74%
95% mean confidence interval for cycles value: -18.71 -9.68
95% mean confidence interval for cycles %-change: -0.80% -0.63%
Cycles are helped.
total spills in shared programs: 8942 -> 8946 (0.04%)
spills in affected programs: 8 -> 12 (50.00%)
helped: 0
HURT: 1
total fills in shared programs: 9399 -> 9401 (0.02%)
fills in affected programs: 21 -> 23 (9.52%)
helped: 0
HURT: 1
Ice Lake
total instructions in shared programs: 16124348 -> 16102258 (-0.14%)
instructions in affected programs: 2830928 -> 2808838 (-0.78%)
helped: 11294
HURT: 2
helped stats (abs) min: 1 max: 12 x̄: 1.96 x̃: 1
helped stats (rel) min: 0.07% max: 17.65% x̄: 1.32% x̃: 0.93%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 3.45% max: 4.00% x̄: 3.72% x̃: 3.72%
95% mean confidence interval for instructions value: -1.99 -1.93
95% mean confidence interval for instructions %-change: -1.34% -1.29%
Instructions are helped.
total cycles in shared programs: 335393932 -> 335325794 (-0.02%)
cycles in affected programs: 123834609 -> 123766471 (-0.06%)
helped: 5034
HURT: 2128
helped stats (abs) min: 1 max: 3256 x̄: 43.39 x̃: 11
helped stats (rel) min: <.01% max: 35.79% x̄: 1.98% x̃: 1.00%
HURT stats (abs) min: 1 max: 2634 x̄: 70.63 x̃: 16
HURT stats (rel) min: <.01% max: 49.49% x̄: 2.73% x̃: 0.62%
95% mean confidence interval for cycles value: -13.66 -5.37
95% mean confidence interval for cycles %-change: -0.69% -0.48%
Cycles are helped.
LOST: 0
GAINED: 2
Skylake
total instructions in shared programs: 14949240 -> 14927930 (-0.14%)
instructions in affected programs: 2594756 -> 2573446 (-0.82%)
helped: 11000
HURT: 2
helped stats (abs) min: 1 max: 12 x̄: 1.94 x̃: 1
helped stats (rel) min: 0.07% max: 18.75% x̄: 1.39% x̃: 0.94%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 4.76% max: 4.76% x̄: 4.76% x̃: 4.76%
95% mean confidence interval for instructions value: -1.97 -1.91
95% mean confidence interval for instructions %-change: -1.42% -1.37%
Instructions are helped.
total cycles in shared programs: 324829346 -> 324821596 (<.01%)
cycles in affected programs: 121566087 -> 121558337 (<.01%)
helped: 4611
HURT: 2147
helped stats (abs) min: 1 max: 3715 x̄: 33.29 x̃: 10
helped stats (rel) min: <.01% max: 36.08% x̄: 1.94% x̃: 1.00%
HURT stats (abs) min: 1 max: 2551 x̄: 67.88 x̃: 16
HURT stats (rel) min: <.01% max: 53.79% x̄: 3.69% x̃: 0.89%
95% mean confidence interval for cycles value: -4.25 1.96
95% mean confidence interval for cycles %-change: -0.28% -0.02%
Inconclusive result (value mean confidence interval includes 0).
Broadwell
total instructions in shared programs: 14971203 -> 14949957 (-0.14%)
instructions in affected programs: 2635699 -> 2614453 (-0.81%)
helped: 10982
HURT: 2
helped stats (abs) min: 1 max: 12 x̄: 1.93 x̃: 1
helped stats (rel) min: 0.07% max: 18.75% x̄: 1.39% x̃: 0.94%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 4.76% max: 4.76% x̄: 4.76% x̃: 4.76%
95% mean confidence interval for instructions value: -1.97 -1.90
95% mean confidence interval for instructions %-change: -1.42% -1.37%
Instructions are helped.
total cycles in shared programs: 336215033 -> 336086458 (-0.04%)
cycles in affected programs: 127383198 -> 127254623 (-0.10%)
helped: 4884
HURT: 1963
helped stats (abs) min: 1 max: 25696 x̄: 51.78 x̃: 12
helped stats (rel) min: <.01% max: 58.28% x̄: 2.00% x̃: 1.05%
HURT stats (abs) min: 1 max: 3401 x̄: 63.33 x̃: 16
HURT stats (rel) min: <.01% max: 39.95% x̄: 2.20% x̃: 0.70%
95% mean confidence interval for cycles value: -29.99 -7.57
95% mean confidence interval for cycles %-change: -0.89% -0.71%
Cycles are helped.
total fills in shared programs: 24905 -> 24901 (-0.02%)
fills in affected programs: 117 -> 113 (-3.42%)
helped: 4
HURT: 0
LOST: 0
GAINED: 16
Haswell
total instructions in shared programs: 13148927 -> 13131528 (-0.13%)
instructions in affected programs: 2220941 -> 2203542 (-0.78%)
helped: 8017
HURT: 4
helped stats (abs) min: 1 max: 12 x̄: 2.17 x̃: 1
helped stats (rel) min: 0.07% max: 15.25% x̄: 1.40% x̃: 0.93%
HURT stats (abs) min: 1 max: 7 x̄: 2.50 x̃: 1
HURT stats (rel) min: 0.33% max: 4.76% x̄: 2.73% x̃: 2.91%
95% mean confidence interval for instructions value: -2.21 -2.13
95% mean confidence interval for instructions %-change: -1.43% -1.37%
Instructions are helped.
total cycles in shared programs: 321221791 -> 321079870 (-0.04%)
cycles in affected programs: 126886055 -> 126744134 (-0.11%)
helped: 4674
HURT: 1729
helped stats (abs) min: 1 max: 23654 x̄: 56.47 x̃: 16
helped stats (rel) min: <.01% max: 53.22% x̄: 2.13% x̃: 1.05%
HURT stats (abs) min: 1 max: 3694 x̄: 70.58 x̃: 18
HURT stats (rel) min: <.01% max: 63.06% x̄: 2.48% x̃: 0.90%
95% mean confidence interval for cycles value: -33.31 -11.02
95% mean confidence interval for cycles %-change: -0.99% -0.78%
Cycles are helped.
total spills in shared programs: 19872 -> 19874 (0.01%)
spills in affected programs: 21 -> 23 (9.52%)
helped: 0
HURT: 1
total fills in shared programs: 20941 -> 20941 (0.00%)
fills in affected programs: 62 -> 62 (0.00%)
helped: 1
HURT: 1
LOST: 0
GAINED: 8
Ivy Bridge
total instructions in shared programs: 11875553 -> 11853839 (-0.18%)
instructions in affected programs: 1553112 -> 1531398 (-1.40%)
helped: 7304
HURT: 3
helped stats (abs) min: 1 max: 16 x̄: 2.97 x̃: 2
helped stats (rel) min: 0.07% max: 15.25% x̄: 1.62% x̃: 1.15%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 1.05% max: 3.33% x̄: 2.44% x̃: 2.94%
95% mean confidence interval for instructions value: -3.04 -2.90
95% mean confidence interval for instructions %-change: -1.65% -1.59%
Instructions are helped.
total cycles in shared programs: 178246425 -> 178184484 (-0.03%)
cycles in affected programs: 13702146 -> 13640205 (-0.45%)
helped: 4409
HURT: 1566
helped stats (abs) min: 1 max: 531 x̄: 24.52 x̃: 13
helped stats (rel) min: <.01% max: 38.67% x̄: 2.14% x̃: 1.02%
HURT stats (abs) min: 1 max: 356 x̄: 29.48 x̃: 10
HURT stats (rel) min: <.01% max: 64.73% x̄: 1.87% x̃: 0.70%
95% mean confidence interval for cycles value: -11.60 -9.14
95% mean confidence interval for cycles %-change: -1.19% -0.99%
Cycles are helped.
LOST: 0
GAINED: 10
Sandy Bridge
total instructions in shared programs: 10695740 -> 10667483 (-0.26%)
instructions in affected programs: 2337607 -> 2309350 (-1.21%)
helped: 10720
HURT: 1
helped stats (abs) min: 1 max: 49 x̄: 2.64 x̃: 2
helped stats (rel) min: 0.07% max: 20.00% x̄: 1.54% x̃: 1.13%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 1.04% max: 1.04% x̄: 1.04% x̃: 1.04%
95% mean confidence interval for instructions value: -2.69 -2.58
95% mean confidence interval for instructions %-change: -1.57% -1.51%
Instructions are helped.
total cycles in shared programs: 153478839 -> 153416223 (-0.04%)
cycles in affected programs: 22050900 -> 21988284 (-0.28%)
helped: 5342
HURT: 2200
helped stats (abs) min: 1 max: 1020 x̄: 20.34 x̃: 16
helped stats (rel) min: <.01% max: 24.05% x̄: 1.51% x̃: 0.86%
HURT stats (abs) min: 1 max: 335 x̄: 20.93 x̃: 6
HURT stats (rel) min: <.01% max: 20.18% x̄: 1.03% x̃: 0.30%
95% mean confidence interval for cycles value: -9.18 -7.42
95% mean confidence interval for cycles %-change: -0.82% -0.71%
Cycles are helped.
Iron Lake
total instructions in shared programs: 8114882 -> 8105574 (-0.11%)
instructions in affected programs: 1232504 -> 1223196 (-0.76%)
helped: 4109
HURT: 2
helped stats (abs) min: 1 max: 6 x̄: 2.27 x̃: 1
helped stats (rel) min: 0.05% max: 8.33% x̄: 0.99% x̃: 0.66%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 0.94% max: 4.35% x̄: 2.65% x̃: 2.65%
95% mean confidence interval for instructions value: -2.31 -2.21
95% mean confidence interval for instructions %-change: -1.01% -0.96%
Instructions are helped.
total cycles in shared programs: 188504036 -> 188466296 (-0.02%)
cycles in affected programs: 31203798 -> 31166058 (-0.12%)
helped: 3447
HURT: 36
helped stats (abs) min: 2 max: 92 x̄: 11.03 x̃: 8
helped stats (rel) min: <.01% max: 5.41% x̄: 0.21% x̃: 0.13%
HURT stats (abs) min: 2 max: 30 x̄: 7.33 x̃: 6
HURT stats (rel) min: 0.01% max: 1.65% x̄: 0.18% x̃: 0.10%
95% mean confidence interval for cycles value: -11.16 -10.51
95% mean confidence interval for cycles %-change: -0.22% -0.20%
Cycles are helped.
LOST: 0
GAINED: 1
GM45
total instructions in shared programs: 4989697 -> 4984531 (-0.10%)
instructions in affected programs: 703952 -> 698786 (-0.73%)
helped: 2493
HURT: 2
helped stats (abs) min: 1 max: 6 x̄: 2.07 x̃: 1
helped stats (rel) min: 0.05% max: 8.33% x̄: 1.03% x̃: 0.66%
HURT stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
HURT stats (rel) min: 0.95% max: 4.35% x̄: 2.65% x̃: 2.65%
95% mean confidence interval for instructions value: -2.13 -2.01
95% mean confidence interval for instructions %-change: -1.07% -0.99%
Instructions are helped.
total cycles in shared programs: 128929136 -> 128903886 (-0.02%)
cycles in affected programs: 21583096 -> 21557846 (-0.12%)
helped: 2214
HURT: 17
helped stats (abs) min: 2 max: 92 x̄: 11.44 x̃: 8
helped stats (rel) min: <.01% max: 5.41% x̄: 0.24% x̃: 0.13%
HURT stats (abs) min: 2 max: 8 x̄: 4.24 x̃: 4
HURT stats (rel) min: 0.01% max: 1.65% x̄: 0.20% x̃: 0.09%
95% mean confidence interval for cycles value: -11.75 -10.88
95% mean confidence interval for cycles %-change: -0.25% -0.22%
Cycles are helped.
LOST: 1
GAINED: 1
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/1359>
2019-07-15 23:55:00 +01:00
|
|
|
bool nir_opt_algebraic_distribute_src_mods(nir_shader *shader);
|
2014-11-15 05:35:25 +00:00
|
|
|
bool nir_opt_constant_folding(nir_shader *shader);
|
2014-12-12 19:13:10 +00:00
|
|
|
|
2020-02-21 18:53:05 +00:00
|
|
|
/* Try to combine a and b into a. Return true if combination was possible,
|
|
|
|
* which will result in b being removed by the pass. Return false if
|
|
|
|
* combination wasn't possible.
|
|
|
|
*/
|
|
|
|
typedef bool (*nir_combine_memory_barrier_cb)(
|
|
|
|
nir_intrinsic_instr *a, nir_intrinsic_instr *b, void *data);
|
|
|
|
|
|
|
|
bool nir_opt_combine_memory_barriers(nir_shader *shader,
|
|
|
|
nir_combine_memory_barrier_cb combine_cb,
|
|
|
|
void *data);
|
|
|
|
|
2019-03-08 18:08:20 +00:00
|
|
|
bool nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes);
|
|
|
|
|
2014-07-23 19:19:50 +01:00
|
|
|
bool nir_copy_prop(nir_shader *shader);
|
|
|
|
|
2016-12-10 06:31:26 +00:00
|
|
|
bool nir_opt_copy_prop_vars(nir_shader *shader);
|
|
|
|
|
2014-11-12 00:11:34 +00:00
|
|
|
bool nir_opt_cse(nir_shader *shader);
|
|
|
|
|
2014-07-24 23:51:58 +01:00
|
|
|
bool nir_opt_dce(nir_shader *shader);
|
|
|
|
|
2015-05-01 07:38:17 +01:00
|
|
|
bool nir_opt_dead_cf(nir_shader *shader);
|
|
|
|
|
2018-07-27 21:56:35 +01:00
|
|
|
bool nir_opt_dead_write_vars(nir_shader *shader);
|
|
|
|
|
2019-03-04 22:17:02 +00:00
|
|
|
bool nir_opt_deref_impl(nir_function_impl *impl);
|
2018-12-13 17:08:13 +00:00
|
|
|
bool nir_opt_deref(nir_shader *shader);
|
|
|
|
|
2018-07-24 03:16:56 +01:00
|
|
|
bool nir_opt_find_array_copies(nir_shader *shader);
|
|
|
|
|
2016-08-10 22:34:49 +01:00
|
|
|
bool nir_opt_gcm(nir_shader *shader, bool value_number);
|
2015-02-03 18:11:23 +00:00
|
|
|
|
2017-12-28 21:06:28 +00:00
|
|
|
bool nir_opt_idiv_const(nir_shader *shader, unsigned min_bit_size);
|
|
|
|
|
2019-04-08 11:13:49 +01:00
|
|
|
bool nir_opt_if(nir_shader *shader, bool aggressive_last_continue);
|
2016-12-19 21:11:43 +00:00
|
|
|
|
2017-06-22 20:13:25 +01:00
|
|
|
bool nir_opt_intrinsics(nir_shader *shader);
|
|
|
|
|
2018-06-29 03:16:58 +01:00
|
|
|
bool nir_opt_large_constants(nir_shader *shader,
|
|
|
|
glsl_type_size_align_func size_align,
|
|
|
|
unsigned threshold);
|
|
|
|
|
2016-09-15 06:49:57 +01:00
|
|
|
bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
|
|
|
|
|
2019-05-22 20:23:03 +01:00
|
|
|
typedef enum {
|
|
|
|
nir_move_const_undef = (1 << 0),
|
|
|
|
nir_move_load_ubo = (1 << 1),
|
|
|
|
nir_move_load_input = (1 << 2),
|
|
|
|
nir_move_comparisons = (1 << 3),
|
2019-10-14 17:15:04 +01:00
|
|
|
nir_move_copies = (1 << 4),
|
2019-05-22 20:23:03 +01:00
|
|
|
} nir_move_options;
|
|
|
|
|
|
|
|
bool nir_can_move_instr(nir_instr *instr, nir_move_options options);
|
|
|
|
|
|
|
|
bool nir_opt_sink(nir_shader *shader, nir_move_options options);
|
|
|
|
|
2019-07-24 19:23:21 +01:00
|
|
|
bool nir_opt_move(nir_shader *shader, nir_move_options options);
|
2018-01-26 11:38:57 +00:00
|
|
|
|
2018-06-27 19:41:19 +01:00
|
|
|
bool nir_opt_peephole_select(nir_shader *shader, unsigned limit,
|
2018-06-19 00:11:55 +01:00
|
|
|
bool indirect_load_ok, bool expensive_alu_ok);
|
2014-11-04 18:12:14 +00:00
|
|
|
|
nir: Rematerialize compare instructions
On some architectures, Boolean values used to control conditional
branches or condtional selection must be propagated into a flag. This
generally means that a stored Boolean value must be compared with zero.
Rather than force the generation of extra compares with zero, re-emit
the original comparison instruction. This can save register pressure by
not needing to store the Boolean value.
There are several possible ares for future improvement to this pass:
1. Be more conservative. If both sources to the comparison instruction
are non-constants, it may be better for register pressure to emit the
extra compare. The current shader-db results on Intel GPUs (next
commit) lead me to believe that this is not currently a problem.
2. Be less conservative. Currently the pass requires that all users of
the comparison match the pattern. The idea is that after the pass is
complete, no instruction will use the resulting Boolean value. The only
uses will be of the flag value. It may be beneficial to relax this
requirement in some cases.
3. Be less conservative. Also try to rematerialize comparisons used for
discard_if intrinsics. After changing the way the Intel compiler
generates cod e for discard_if (see MR!935), I tried implementing this
already. The changes were pretty small. Instructions were helped in 19
shaders, but, overall, cycles were hurt. A commit "nir: Rematerialize
comparisons for nir_intrinsic_discard_if too" is on my fd.o cgit.
4. Copy the preceeding ALU instruction. If the comparison is a
comparison with zero, and it is the only user of a particular ALU
instruction (e.g., (a+b) != 0.0), it may be a further improvment to also
copy the preceeding ALU instruction. On Intel GPUs, this may enable
cmod propagation to make additional progress.
v2: Use much simpler method to get the prev_block for an if-statement.
Suggested by Tim.
Reviewed-by: Matt Turner <mattst88@gmail.com>
2019-05-20 19:22:12 +01:00
|
|
|
bool nir_opt_rematerialize_compares(nir_shader *shader);
|
|
|
|
|
2015-02-03 06:49:44 +00:00
|
|
|
bool nir_opt_remove_phis(nir_shader *shader);
|
2019-07-10 21:14:42 +01:00
|
|
|
bool nir_opt_remove_phis_block(nir_block *block);
|
2015-02-03 06:49:44 +00:00
|
|
|
|
2020-07-23 00:57:22 +01:00
|
|
|
bool nir_opt_shrink_vectors(nir_shader *shader);
|
2018-01-29 16:19:00 +00:00
|
|
|
|
2016-12-17 00:54:17 +00:00
|
|
|
bool nir_opt_trivial_continues(nir_shader *shader);
|
|
|
|
|
2015-08-05 00:25:24 +01:00
|
|
|
bool nir_opt_undef(nir_shader *shader);
|
|
|
|
|
2020-08-27 20:49:13 +01:00
|
|
|
typedef bool (*nir_opt_vectorize_cb)(const nir_instr *a, const nir_instr *b,
|
|
|
|
void *data);
|
|
|
|
bool nir_opt_vectorize(nir_shader *shader, nir_opt_vectorize_cb filter,
|
|
|
|
void *data);
|
2015-11-15 01:26:47 +00:00
|
|
|
|
2016-11-02 01:22:07 +00:00
|
|
|
bool nir_opt_conditional_discard(nir_shader *shader);
|
|
|
|
|
2020-09-08 18:58:49 +01:00
|
|
|
typedef bool (*nir_should_vectorize_mem_func)(unsigned align_mul,
|
|
|
|
unsigned align_offset,
|
|
|
|
unsigned bit_size,
|
2020-09-08 19:12:56 +01:00
|
|
|
unsigned num_components,
|
2019-03-19 20:55:30 +00:00
|
|
|
nir_intrinsic_instr *low, nir_intrinsic_instr *high);
|
|
|
|
|
|
|
|
bool nir_opt_load_store_vectorize(nir_shader *shader, nir_variable_mode modes,
|
2020-05-04 15:02:38 +01:00
|
|
|
nir_should_vectorize_mem_func callback,
|
|
|
|
nir_variable_mode robust_modes);
|
2019-03-19 20:55:30 +00:00
|
|
|
|
2015-03-28 02:50:29 +00:00
|
|
|
void nir_sweep(nir_shader *shader);
|
|
|
|
|
2018-08-30 21:02:25 +01:00
|
|
|
void nir_remap_dual_slot_attributes(nir_shader *shader,
|
2018-08-31 13:35:17 +01:00
|
|
|
uint64_t *dual_slot_inputs);
|
2018-08-30 21:02:25 +01:00
|
|
|
uint64_t nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot);
|
2018-03-21 08:39:32 +00:00
|
|
|
|
2015-09-11 00:53:08 +01:00
|
|
|
nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
|
2015-08-04 00:02:16 +01:00
|
|
|
gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin);
|
|
|
|
|
2018-09-11 11:40:08 +01:00
|
|
|
static inline bool
|
|
|
|
nir_variable_is_in_ubo(const nir_variable *var)
|
|
|
|
{
|
|
|
|
return (var->data.mode == nir_var_mem_ubo &&
|
|
|
|
var->interface_type != NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_variable_is_in_ssbo(const nir_variable *var)
|
|
|
|
{
|
|
|
|
return (var->data.mode == nir_var_mem_ssbo &&
|
|
|
|
var->interface_type != NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool
|
|
|
|
nir_variable_is_in_block(const nir_variable *var)
|
|
|
|
{
|
|
|
|
return nir_variable_is_in_ubo(var) || nir_variable_is_in_ssbo(var);
|
|
|
|
}
|
|
|
|
|
2019-11-12 17:51:19 +00:00
|
|
|
typedef struct nir_unsigned_upper_bound_config {
|
|
|
|
unsigned min_subgroup_size;
|
|
|
|
unsigned max_subgroup_size;
|
|
|
|
unsigned max_work_group_invocations;
|
|
|
|
unsigned max_work_group_count[3];
|
|
|
|
unsigned max_work_group_size[3];
|
|
|
|
|
|
|
|
uint32_t vertex_attrib_max[32];
|
|
|
|
} nir_unsigned_upper_bound_config;
|
|
|
|
|
|
|
|
uint32_t
|
|
|
|
nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
|
|
|
|
nir_ssa_scalar scalar,
|
|
|
|
const nir_unsigned_upper_bound_config *config);
|
|
|
|
|
|
|
|
bool
|
|
|
|
nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
|
|
|
|
nir_ssa_scalar ssa, unsigned const_val,
|
|
|
|
const nir_unsigned_upper_bound_config *config);
|
|
|
|
|
2014-08-01 00:14:51 +01:00
|
|
|
#ifdef __cplusplus
|
|
|
|
} /* extern "C" */
|
|
|
|
#endif
|
2017-03-20 16:04:16 +00:00
|
|
|
|
|
|
|
#endif /* NIR_H */
|