mesa/src/freedreno/ir3/instr-a3xx.h

797 lines
23 KiB
C

/*
* Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef INSTR_A3XX_H_
#define INSTR_A3XX_H_
#define PACKED __attribute__((__packed__))
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
/* clang-format off */
void ir3_assert_handler(const char *expr, const char *file, int line,
const char *func) __attribute__((weak)) __attribute__((__noreturn__));
/* clang-format on */
/* A wrapper for assert() that allows overriding handling of a failed
* assert. This is needed for tools like crashdec which can want to
* attempt to disassemble memory that might not actually be valid
* instructions.
*/
#define ir3_assert(expr) \
do { \
if (!(expr)) { \
if (ir3_assert_handler) { \
ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
} \
assert(expr); \
} \
} while (0)
/* size of largest OPC field of all the instruction categories: */
#define NOPC_BITS 7
#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
/* clang-format off */
typedef enum {
/* category 0: */
OPC_NOP = _OPC(0, 0),
OPC_B = _OPC(0, 1),
OPC_JUMP = _OPC(0, 2),
OPC_CALL = _OPC(0, 3),
OPC_RET = _OPC(0, 4),
OPC_KILL = _OPC(0, 5),
OPC_END = _OPC(0, 6),
OPC_EMIT = _OPC(0, 7),
OPC_CUT = _OPC(0, 8),
OPC_CHMASK = _OPC(0, 9),
OPC_CHSH = _OPC(0, 10),
OPC_FLOW_REV = _OPC(0, 11),
OPC_BKT = _OPC(0, 16),
OPC_STKS = _OPC(0, 17),
OPC_STKR = _OPC(0, 18),
OPC_XSET = _OPC(0, 19),
OPC_XCLR = _OPC(0, 20),
OPC_GETONE = _OPC(0, 21),
OPC_DBG = _OPC(0, 22),
OPC_SHPS = _OPC(0, 23), /* shader prologue start */
OPC_SHPE = _OPC(0, 24), /* shader prologue end */
OPC_GETLAST = _OPC(0, 25),
OPC_PREDT = _OPC(0, 29), /* predicated true */
OPC_PREDF = _OPC(0, 30), /* predicated false */
OPC_PREDE = _OPC(0, 31), /* predicated end */
/* Logical opcodes for different branch instruction variations: */
OPC_BR = _OPC(0, 40),
OPC_BRAO = _OPC(0, 41),
OPC_BRAA = _OPC(0, 42),
OPC_BRAC = _OPC(0, 43),
OPC_BANY = _OPC(0, 44),
OPC_BALL = _OPC(0, 45),
OPC_BRAX = _OPC(0, 46),
/* Logical opcode to distinguish kill and demote */
OPC_DEMOTE = _OPC(0, 47),
/* category 1: */
OPC_MOV = _OPC(1, 0),
OPC_MOVP = _OPC(1, 1),
/* swz, gat, sct */
OPC_MOVMSK = _OPC(1, 3),
/* Virtual opcodes for instructions differentiated via a "sub-opcode" that
* replaces the repeat field:
*/
OPC_SWZ = _OPC(1, 4),
OPC_GAT = _OPC(1, 5),
OPC_SCT = _OPC(1, 6),
/* Logical opcodes for different variants of mov: */
OPC_MOV_IMMED = _OPC(1, 40),
OPC_MOV_CONST = _OPC(1, 41),
OPC_MOV_GPR = _OPC(1, 42),
OPC_MOV_RELGPR = _OPC(1, 43),
OPC_MOV_RELCONST = _OPC(1, 44),
/* Macros that expand to an if statement + move */
OPC_BALLOT_MACRO = _OPC(1, 50),
OPC_ANY_MACRO = _OPC(1, 51),
OPC_ALL_MACRO = _OPC(1, 52),
OPC_ELECT_MACRO = _OPC(1, 53),
OPC_READ_COND_MACRO = _OPC(1, 54),
OPC_READ_FIRST_MACRO = _OPC(1, 55),
OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
OPC_SHPS_MACRO = _OPC(1, 57),
/* Macros that expand to a loop */
OPC_SCAN_MACRO = _OPC(1, 58),
/* category 2: */
OPC_ADD_F = _OPC(2, 0),
OPC_MIN_F = _OPC(2, 1),
OPC_MAX_F = _OPC(2, 2),
OPC_MUL_F = _OPC(2, 3),
OPC_SIGN_F = _OPC(2, 4),
OPC_CMPS_F = _OPC(2, 5),
OPC_ABSNEG_F = _OPC(2, 6),
OPC_CMPV_F = _OPC(2, 7),
/* 8 - invalid */
OPC_FLOOR_F = _OPC(2, 9),
OPC_CEIL_F = _OPC(2, 10),
OPC_RNDNE_F = _OPC(2, 11),
OPC_RNDAZ_F = _OPC(2, 12),
OPC_TRUNC_F = _OPC(2, 13),
/* 14-15 - invalid */
OPC_ADD_U = _OPC(2, 16),
OPC_ADD_S = _OPC(2, 17),
OPC_SUB_U = _OPC(2, 18),
OPC_SUB_S = _OPC(2, 19),
OPC_CMPS_U = _OPC(2, 20),
OPC_CMPS_S = _OPC(2, 21),
OPC_MIN_U = _OPC(2, 22),
OPC_MIN_S = _OPC(2, 23),
OPC_MAX_U = _OPC(2, 24),
OPC_MAX_S = _OPC(2, 25),
OPC_ABSNEG_S = _OPC(2, 26),
/* 27 - invalid */
OPC_AND_B = _OPC(2, 28),
OPC_OR_B = _OPC(2, 29),
OPC_NOT_B = _OPC(2, 30),
OPC_XOR_B = _OPC(2, 31),
/* 32 - invalid */
OPC_CMPV_U = _OPC(2, 33),
OPC_CMPV_S = _OPC(2, 34),
/* 35-47 - invalid */
OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
OPC_MULL_U = _OPC(2, 50),
OPC_BFREV_B = _OPC(2, 51),
OPC_CLZ_S = _OPC(2, 52),
OPC_CLZ_B = _OPC(2, 53),
OPC_SHL_B = _OPC(2, 54),
OPC_SHR_B = _OPC(2, 55),
OPC_ASHR_B = _OPC(2, 56),
OPC_BARY_F = _OPC(2, 57),
OPC_MGEN_B = _OPC(2, 58),
OPC_GETBIT_B = _OPC(2, 59),
OPC_SETRM = _OPC(2, 60),
OPC_CBITS_B = _OPC(2, 61),
OPC_SHB = _OPC(2, 62),
OPC_MSAD = _OPC(2, 63),
OPC_FLAT_B = _OPC(2, 64),
/* category 3: */
OPC_MAD_U16 = _OPC(3, 0),
OPC_MADSH_U16 = _OPC(3, 1),
OPC_MAD_S16 = _OPC(3, 2),
OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
OPC_MAD_U24 = _OPC(3, 4),
OPC_MAD_S24 = _OPC(3, 5),
OPC_MAD_F16 = _OPC(3, 6),
OPC_MAD_F32 = _OPC(3, 7),
OPC_SEL_B16 = _OPC(3, 8),
OPC_SEL_B32 = _OPC(3, 9),
OPC_SEL_S16 = _OPC(3, 10),
OPC_SEL_S32 = _OPC(3, 11),
OPC_SEL_F16 = _OPC(3, 12),
OPC_SEL_F32 = _OPC(3, 13),
OPC_SAD_S16 = _OPC(3, 14),
OPC_SAD_S32 = _OPC(3, 15),
OPC_SHRM = _OPC(3, 16),
OPC_SHLM = _OPC(3, 17),
OPC_SHRG = _OPC(3, 18),
OPC_SHLG = _OPC(3, 19),
OPC_ANDG = _OPC(3, 20),
OPC_DP2ACC = _OPC(3, 21),
OPC_DP4ACC = _OPC(3, 22),
OPC_WMM = _OPC(3, 23),
OPC_WMM_ACCU = _OPC(3, 24),
/* category 4: */
OPC_RCP = _OPC(4, 0),
OPC_RSQ = _OPC(4, 1),
OPC_LOG2 = _OPC(4, 2),
OPC_EXP2 = _OPC(4, 3),
OPC_SIN = _OPC(4, 4),
OPC_COS = _OPC(4, 5),
OPC_SQRT = _OPC(4, 6),
/* NOTE that these are 8+opc from their highp equivs, so it's possible
* that the high order bit in the opc field has been repurposed for
* half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
* still use the same opc as highp
*/
OPC_HRSQ = _OPC(4, 9),
OPC_HLOG2 = _OPC(4, 10),
OPC_HEXP2 = _OPC(4, 11),
/* category 5: */
OPC_ISAM = _OPC(5, 0),
OPC_ISAML = _OPC(5, 1),
OPC_ISAMM = _OPC(5, 2),
OPC_SAM = _OPC(5, 3),
OPC_SAMB = _OPC(5, 4),
OPC_SAML = _OPC(5, 5),
OPC_SAMGQ = _OPC(5, 6),
OPC_GETLOD = _OPC(5, 7),
OPC_CONV = _OPC(5, 8),
OPC_CONVM = _OPC(5, 9),
OPC_GETSIZE = _OPC(5, 10),
OPC_GETBUF = _OPC(5, 11),
OPC_GETPOS = _OPC(5, 12),
OPC_GETINFO = _OPC(5, 13),
OPC_DSX = _OPC(5, 14),
OPC_DSY = _OPC(5, 15),
OPC_GATHER4R = _OPC(5, 16),
OPC_GATHER4G = _OPC(5, 17),
OPC_GATHER4B = _OPC(5, 18),
OPC_GATHER4A = _OPC(5, 19),
OPC_SAMGP0 = _OPC(5, 20),
OPC_SAMGP1 = _OPC(5, 21),
OPC_SAMGP2 = _OPC(5, 22),
OPC_SAMGP3 = _OPC(5, 23),
OPC_DSXPP_1 = _OPC(5, 24),
OPC_DSYPP_1 = _OPC(5, 25),
OPC_RGETPOS = _OPC(5, 26),
OPC_RGETINFO = _OPC(5, 27),
OPC_BRCST_ACTIVE = _OPC(5, 28),
OPC_QUAD_SHUFFLE_BRCST = _OPC(5, 29),
OPC_QUAD_SHUFFLE_HORIZ = _OPC(5, 30),
OPC_QUAD_SHUFFLE_VERT = _OPC(5, 31),
OPC_QUAD_SHUFFLE_DIAG = _OPC(5, 32),
/* cat5 meta instructions, placed above the cat5 opc field's size */
OPC_DSXPP_MACRO = _OPC(5, 35),
OPC_DSYPP_MACRO = _OPC(5, 36),
/* category 6: */
OPC_LDG = _OPC(6, 0), /* load-global */
OPC_LDL = _OPC(6, 1),
OPC_LDP = _OPC(6, 2),
OPC_STG = _OPC(6, 3), /* store-global */
OPC_STL = _OPC(6, 4),
OPC_STP = _OPC(6, 5),
OPC_LDIB = _OPC(6, 6),
OPC_G2L = _OPC(6, 7),
OPC_L2G = _OPC(6, 8),
OPC_PREFETCH = _OPC(6, 9),
OPC_LDLW = _OPC(6, 10),
OPC_STLW = _OPC(6, 11),
OPC_RESFMT = _OPC(6, 14),
OPC_RESINFO = _OPC(6, 15),
OPC_ATOMIC_ADD = _OPC(6, 16),
OPC_ATOMIC_SUB = _OPC(6, 17),
OPC_ATOMIC_XCHG = _OPC(6, 18),
OPC_ATOMIC_INC = _OPC(6, 19),
OPC_ATOMIC_DEC = _OPC(6, 20),
OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
OPC_ATOMIC_MIN = _OPC(6, 22),
OPC_ATOMIC_MAX = _OPC(6, 23),
OPC_ATOMIC_AND = _OPC(6, 24),
OPC_ATOMIC_OR = _OPC(6, 25),
OPC_ATOMIC_XOR = _OPC(6, 26),
OPC_LDGB = _OPC(6, 27),
OPC_STGB = _OPC(6, 28),
OPC_STIB = _OPC(6, 29),
OPC_LDC = _OPC(6, 30),
OPC_LDLV = _OPC(6, 31),
OPC_PIPR = _OPC(6, 32), /* ??? */
OPC_PIPC = _OPC(6, 33), /* ??? */
OPC_EMIT2 = _OPC(6, 34), /* ??? */
OPC_ENDLS = _OPC(6, 35), /* ??? */
OPC_GETSPID = _OPC(6, 36), /* SP ID */
OPC_GETWID = _OPC(6, 37), /* wavefront ID */
OPC_GETFIBERID = _OPC(6, 38), /* fiber ID */
/* Logical opcodes for things that differ in a6xx+ */
OPC_STC = _OPC(6, 40),
OPC_RESINFO_B = _OPC(6, 41),
OPC_LDIB_B = _OPC(6, 42),
OPC_STIB_B = _OPC(6, 43),
/* Logical opcodes for different atomic instruction variations: */
OPC_ATOMIC_B_ADD = _OPC(6, 44),
OPC_ATOMIC_B_SUB = _OPC(6, 45),
OPC_ATOMIC_B_XCHG = _OPC(6, 46),
OPC_ATOMIC_B_INC = _OPC(6, 47),
OPC_ATOMIC_B_DEC = _OPC(6, 48),
OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),
OPC_ATOMIC_B_MIN = _OPC(6, 50),
OPC_ATOMIC_B_MAX = _OPC(6, 51),
OPC_ATOMIC_B_AND = _OPC(6, 52),
OPC_ATOMIC_B_OR = _OPC(6, 53),
OPC_ATOMIC_B_XOR = _OPC(6, 54),
OPC_ATOMIC_S_ADD = _OPC(6, 55),
OPC_ATOMIC_S_SUB = _OPC(6, 56),
OPC_ATOMIC_S_XCHG = _OPC(6, 57),
OPC_ATOMIC_S_INC = _OPC(6, 58),
OPC_ATOMIC_S_DEC = _OPC(6, 59),
OPC_ATOMIC_S_CMPXCHG = _OPC(6, 60),
OPC_ATOMIC_S_MIN = _OPC(6, 61),
OPC_ATOMIC_S_MAX = _OPC(6, 62),
OPC_ATOMIC_S_AND = _OPC(6, 63),
OPC_ATOMIC_S_OR = _OPC(6, 64),
OPC_ATOMIC_S_XOR = _OPC(6, 65),
OPC_ATOMIC_G_ADD = _OPC(6, 66),
OPC_ATOMIC_G_SUB = _OPC(6, 67),
OPC_ATOMIC_G_XCHG = _OPC(6, 68),
OPC_ATOMIC_G_INC = _OPC(6, 69),
OPC_ATOMIC_G_DEC = _OPC(6, 70),
OPC_ATOMIC_G_CMPXCHG = _OPC(6, 71),
OPC_ATOMIC_G_MIN = _OPC(6, 72),
OPC_ATOMIC_G_MAX = _OPC(6, 73),
OPC_ATOMIC_G_AND = _OPC(6, 74),
OPC_ATOMIC_G_OR = _OPC(6, 75),
OPC_ATOMIC_G_XOR = _OPC(6, 76),
OPC_LDG_A = _OPC(6, 77),
OPC_STG_A = _OPC(6, 78),
OPC_SPILL_MACRO = _OPC(6, 79),
OPC_RELOAD_MACRO = _OPC(6, 80),
OPC_LDC_K = _OPC(6, 81),
/* category 7: */
OPC_BAR = _OPC(7, 0),
OPC_FENCE = _OPC(7, 1),
/* meta instructions (category -1): */
/* placeholder instr to mark shader inputs: */
OPC_META_INPUT = _OPC(-1, 0),
/* The "collect" and "split" instructions are used for keeping
* track of instructions that write to multiple dst registers
* (split) like texture sample instructions, or read multiple
* consecutive scalar registers (collect) (bary.f, texture samp)
*
* A "split" extracts a scalar component from a vecN, and a
* "collect" gathers multiple scalar components into a vecN
*/
OPC_META_SPLIT = _OPC(-1, 2),
OPC_META_COLLECT = _OPC(-1, 3),
/* placeholder for texture fetches that run before FS invocation
* starts:
*/
OPC_META_TEX_PREFETCH = _OPC(-1, 4),
/* Parallel copies have multiple destinations, and copy each destination
* to its corresponding source. This happens "in parallel," meaning that
* it happens as-if every source is read first and then every destination
* is stored. These are produced in RA when register shuffling is
* required, and then lowered away immediately afterwards.
*/
OPC_META_PARALLEL_COPY = _OPC(-1, 5),
OPC_META_PHI = _OPC(-1, 6),
} opc_t;
/* clang-format on */
#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
const char *disasm_a3xx_instr_name(opc_t opc);
typedef enum {
TYPE_F16 = 0,
TYPE_F32 = 1,
TYPE_U16 = 2,
TYPE_U32 = 3,
TYPE_S16 = 4,
TYPE_S32 = 5,
TYPE_U8 = 6,
TYPE_S8 = 7, // XXX I assume?
} type_t;
static inline uint32_t
type_size(type_t type)
{
switch (type) {
case TYPE_F32:
case TYPE_U32:
case TYPE_S32:
return 32;
case TYPE_F16:
case TYPE_U16:
case TYPE_S16:
return 16;
case TYPE_U8:
case TYPE_S8:
return 8;
default:
ir3_assert(0); /* invalid type */
return 0;
}
}
static inline type_t
type_uint_size(unsigned bit_size)
{
switch (bit_size) {
case 8: return TYPE_U8;
case 1: /* 1b bools are treated as normal half-regs */
case 16: return TYPE_U16;
case 32: return TYPE_U32;
default:
ir3_assert(0); /* invalid size */
return 0;
}
}
static inline type_t
type_float_size(unsigned bit_size)
{
switch (bit_size) {
case 16: return TYPE_F16;
case 32: return TYPE_F32;
default:
ir3_assert(0); /* invalid size */
return 0;
}
}
static inline int
type_float(type_t type)
{
return (type == TYPE_F32) || (type == TYPE_F16);
}
static inline int
type_uint(type_t type)
{
return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
}
static inline int
type_sint(type_t type)
{
return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
}
typedef enum {
ROUND_ZERO = 0,
ROUND_EVEN = 1,
ROUND_POS_INF = 2,
ROUND_NEG_INF = 3,
} round_t;
/* comp:
* 0 - x
* 1 - y
* 2 - z
* 3 - w
*/
static inline uint32_t
regid(int num, int comp)
{
return (num << 2) | (comp & 0x3);
}
#define INVALID_REG regid(63, 0)
#define VALIDREG(r) ((r) != INVALID_REG)
#define CONDREG(r, val) COND(VALIDREG(r), (val))
/* special registers: */
#define REG_A0 61 /* address register */
#define REG_P0 62 /* predicate register */
typedef enum {
BRANCH_PLAIN = 0, /* br */
BRANCH_OR = 1, /* brao */
BRANCH_AND = 2, /* braa */
BRANCH_CONST = 3, /* brac */
BRANCH_ANY = 4, /* bany */
BRANCH_ALL = 5, /* ball */
BRANCH_X = 6, /* brax ??? */
} brtype_t;
/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
* if so, how to get the (base, index) pair for both sampler and texture.
* There is a single base embedded in the instruction, which is always used
* for the texture.
*/
typedef enum {
/* Use traditional GL binding model, get texture and sampler index from src3
* which is presumed to be uniform on a4xx+ (a3xx doesn't have the other
* modes, but does handle non-uniform indexing).
*/
CAT5_UNIFORM = 0,
/* The sampler base comes from the low 3 bits of a1.x, and the sampler
* and texture index come from src3 which is presumed to be uniform.
*/
CAT5_BINDLESS_A1_UNIFORM = 1,
/* The texture and sampler share the same base, and the sampler and
* texture index come from src3 which is *not* presumed to be uniform.
*/
CAT5_BINDLESS_NONUNIFORM = 2,
/* The sampler base comes from the low 3 bits of a1.x, and the sampler
* and texture index come from src3 which is *not* presumed to be
* uniform.
*/
CAT5_BINDLESS_A1_NONUNIFORM = 3,
/* Use traditional GL binding model, get texture and sampler index
* from src3 which is *not* presumed to be uniform.
*/
CAT5_NONUNIFORM = 4,
/* The texture and sampler share the same base, and the sampler and
* texture index come from src3 which is presumed to be uniform.
*/
CAT5_BINDLESS_UNIFORM = 5,
/* The texture and sampler share the same base, get sampler index from low
* 4 bits of src3 and texture index from high 4 bits.
*/
CAT5_BINDLESS_IMM = 6,
/* The sampler base comes from the low 3 bits of a1.x, and the texture
* index comes from the next 8 bits of a1.x. The sampler index is an
* immediate in src3.
*/
CAT5_BINDLESS_A1_IMM = 7,
} cat5_desc_mode_t;
/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
*/
typedef enum {
/* Use old GL binding model with an immediate index. */
CAT6_IMM = 0,
CAT6_UNIFORM = 1,
CAT6_NONUNIFORM = 2,
/* Use the bindless model, with an immediate index.
*/
CAT6_BINDLESS_IMM = 4,
/* Use the bindless model, with a uniform register index.
*/
CAT6_BINDLESS_UNIFORM = 5,
/* Use the bindless model, with a register index that isn't guaranteed
* to be uniform. This presumably checks if the indices are equal and
* splits up the load/store, because it works the way you would
* expect.
*/
CAT6_BINDLESS_NONUNIFORM = 6,
} cat6_desc_mode_t;
static inline bool
is_sat_compatible(opc_t opc)
{
/* On a6xx saturation doesn't work on cat4 */
if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
return false;
switch (opc) {
/* On a3xx and a6xx saturation doesn't work on bary.f */
case OPC_BARY_F:
/* On a6xx saturation doesn't work on sel.* */
case OPC_SEL_B16:
case OPC_SEL_B32:
case OPC_SEL_S16:
case OPC_SEL_S32:
case OPC_SEL_F16:
case OPC_SEL_F32:
return false;
default:
return true;
}
}
static inline bool
is_mad(opc_t opc)
{
switch (opc) {
case OPC_MAD_U16:
case OPC_MAD_S16:
case OPC_MAD_U24:
case OPC_MAD_S24:
case OPC_MAD_F16:
case OPC_MAD_F32:
return true;
default:
return false;
}
}
static inline bool
is_madsh(opc_t opc)
{
switch (opc) {
case OPC_MADSH_U16:
case OPC_MADSH_M16:
return true;
default:
return false;
}
}
static inline bool
is_local_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_ADD:
case OPC_ATOMIC_SUB:
case OPC_ATOMIC_XCHG:
case OPC_ATOMIC_INC:
case OPC_ATOMIC_DEC:
case OPC_ATOMIC_CMPXCHG:
case OPC_ATOMIC_MIN:
case OPC_ATOMIC_MAX:
case OPC_ATOMIC_AND:
case OPC_ATOMIC_OR:
case OPC_ATOMIC_XOR:
return true;
default:
return false;
}
}
static inline bool
is_global_a3xx_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_S_ADD:
case OPC_ATOMIC_S_SUB:
case OPC_ATOMIC_S_XCHG:
case OPC_ATOMIC_S_INC:
case OPC_ATOMIC_S_DEC:
case OPC_ATOMIC_S_CMPXCHG:
case OPC_ATOMIC_S_MIN:
case OPC_ATOMIC_S_MAX:
case OPC_ATOMIC_S_AND:
case OPC_ATOMIC_S_OR:
case OPC_ATOMIC_S_XOR:
return true;
default:
return false;
}
}
static inline bool
is_global_a6xx_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_G_ADD:
case OPC_ATOMIC_G_SUB:
case OPC_ATOMIC_G_XCHG:
case OPC_ATOMIC_G_INC:
case OPC_ATOMIC_G_DEC:
case OPC_ATOMIC_G_CMPXCHG:
case OPC_ATOMIC_G_MIN:
case OPC_ATOMIC_G_MAX:
case OPC_ATOMIC_G_AND:
case OPC_ATOMIC_G_OR:
case OPC_ATOMIC_G_XOR:
return true;
default:
return false;
}
}
static inline bool
is_bindless_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_B_ADD:
case OPC_ATOMIC_B_SUB:
case OPC_ATOMIC_B_XCHG:
case OPC_ATOMIC_B_INC:
case OPC_ATOMIC_B_DEC:
case OPC_ATOMIC_B_CMPXCHG:
case OPC_ATOMIC_B_MIN:
case OPC_ATOMIC_B_MAX:
case OPC_ATOMIC_B_AND:
case OPC_ATOMIC_B_OR:
case OPC_ATOMIC_B_XOR:
return true;
default:
return false;
}
}
static inline bool
is_atomic(opc_t opc)
{
return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
}
static inline bool
is_ssbo(opc_t opc)
{
switch (opc) {
case OPC_RESFMT:
case OPC_RESINFO:
case OPC_LDGB:
case OPC_STGB:
case OPC_STIB:
return true;
default:
return false;
}
}
static inline bool
is_isam(opc_t opc)
{
switch (opc) {
case OPC_ISAM:
case OPC_ISAML:
case OPC_ISAMM:
return true;
default:
return false;
}
}
static inline bool
is_cat2_float(opc_t opc)
{
switch (opc) {
case OPC_ADD_F:
case OPC_MIN_F:
case OPC_MAX_F:
case OPC_MUL_F:
case OPC_SIGN_F:
case OPC_CMPS_F:
case OPC_ABSNEG_F:
case OPC_CMPV_F:
case OPC_FLOOR_F:
case OPC_CEIL_F:
case OPC_RNDNE_F:
case OPC_RNDAZ_F:
case OPC_TRUNC_F:
return true;
default:
return false;
}
}
static inline bool
is_cat3_float(opc_t opc)
{
switch (opc) {
case OPC_MAD_F16:
case OPC_MAD_F32:
case OPC_SEL_F16:
case OPC_SEL_F32:
return true;
default:
return false;
}
}
#endif /* INSTR_A3XX_H_ */