ir3: Add gen4 new subgroup instructions

* getlast.w8 #4 - Perform jump for the first (CLUSTER_SIZE-1)
   fibers in a subgroup
* brcst.active.w8 - necessary to implement arithmetic subgroup
   operations with prefix sum.
* quad_shuffle.brcst - subgroupQuadBroadcast
* quad_shuffle.horiz - subgroupQuadSwapHorizontal
* quad_shuffle.vert - subgroupQuadSwapVertical
* quad_shuffle.diag - subgroupQuadSwapDiagonal
* getfiberid - gl_SubgroupID

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13817>
This commit is contained in:
Danylo Piliaiev 2021-11-15 16:57:38 +02:00 committed by Marge Bot
parent 943ef0edbd
commit d1c49901df
10 changed files with 319 additions and 111 deletions

View File

@ -173,6 +173,7 @@ static const struct opc_info {
OPC(0, OPC_STKR, stkr),
OPC(0, OPC_XSET, xset),
OPC(0, OPC_XCLR, xclr),
OPC(0, OPC_GETLAST, getlast),
OPC(0, OPC_GETONE, getone),
OPC(0, OPC_DBG, dbg),
OPC(0, OPC_SHPS, shps),
@ -300,6 +301,11 @@ static const struct opc_info {
OPC(5, OPC_DSYPP_1, dsypp.1),
OPC(5, OPC_RGETPOS, rgetpos),
OPC(5, OPC_RGETINFO, rgetinfo),
OPC(5, OPC_BRCST_ACTIVE, brcst.active),
OPC(5, OPC_QUAD_SHUFFLE_BRCST, quad_shuffle.brcst),
OPC(5, OPC_QUAD_SHUFFLE_HORIZ, quad_shuffle.horiz),
OPC(5, OPC_QUAD_SHUFFLE_VERT, quad_shuffle.vert),
OPC(5, OPC_QUAD_SHUFFLE_DIAG, quad_shuffle.diag),
/* macros are needed here for ir3_print */
OPC(5, OPC_DSXPP_MACRO, dsxpp.macro),
OPC(5, OPC_DSYPP_MACRO, dsypp.macro),
@ -377,6 +383,7 @@ static const struct opc_info {
OPC(6, OPC_ENDLS, endls),
OPC(6, OPC_GETSPID, getspid),
OPC(6, OPC_GETWID, getwid),
OPC(6, OPC_GETFIBERID, getfiberid),
OPC(6, OPC_SPILL_MACRO, spill.macro),
OPC(6, OPC_RELOAD_MACRO, reload.macro),

View File

@ -80,6 +80,7 @@ typedef enum {
OPC_DBG = _OPC(0, 22),
OPC_SHPS = _OPC(0, 23), /* shader prologue start */
OPC_SHPE = _OPC(0, 24), /* shader prologue end */
OPC_GETLAST = _OPC(0, 25),
OPC_PREDT = _OPC(0, 29), /* predicated true */
OPC_PREDF = _OPC(0, 30), /* predicated false */
@ -245,9 +246,14 @@ typedef enum {
OPC_DSYPP_1 = _OPC(5, 25),
OPC_RGETPOS = _OPC(5, 26),
OPC_RGETINFO = _OPC(5, 27),
OPC_BRCST_ACTIVE = _OPC(5, 28),
OPC_QUAD_SHUFFLE_BRCST = _OPC(5, 29),
OPC_QUAD_SHUFFLE_HORIZ = _OPC(5, 30),
OPC_QUAD_SHUFFLE_VERT = _OPC(5, 31),
OPC_QUAD_SHUFFLE_DIAG = _OPC(5, 32),
/* cat5 meta instructions, placed above the cat5 opc field's size */
OPC_DSXPP_MACRO = _OPC(5, 32),
OPC_DSYPP_MACRO = _OPC(5, 33),
OPC_DSXPP_MACRO = _OPC(5, 35),
OPC_DSYPP_MACRO = _OPC(5, 36),
/* category 6: */
OPC_LDG = _OPC(6, 0), /* load-global */
@ -286,6 +292,7 @@ typedef enum {
OPC_ENDLS = _OPC(6, 35), /* ??? */
OPC_GETSPID = _OPC(6, 36), /* SP ID */
OPC_GETWID = _OPC(6, 37), /* wavefront ID */
OPC_GETFIBERID = _OPC(6, 38), /* fiber ID */
/* Logical opcodes for things that differ in a6xx+ */
OPC_STC = _OPC(6, 40),

View File

@ -330,6 +330,7 @@ struct ir3_instruction {
struct {
unsigned samp, tex;
unsigned tex_base : 3;
unsigned cluster_size : 4;
type_t type;
} cat5;
struct {
@ -2171,6 +2172,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask,
}
/* cat6 instructions: */
INSTR0(GETFIBERID)
INSTR2(LDLV)
INSTR3(LDG)
INSTR3(LDL)

View File

@ -72,16 +72,6 @@ static int parse_reg(const char *str)
return num;
}
static int parse_w(const char *str)
{
str++;
unsigned num = strtol(str, NULL, 10);
if ((num % 32) != 0)
yy_fatal_error("w# must be multiple of 32");
if (num < 32)
yy_fatal_error("w# must be at least 32");
return num / 32;
}
%}
%option noyywrap
@ -139,7 +129,7 @@ static int parse_w(const char *str)
"a0.x" return T_A0;
"a1.x" return T_A1;
"p0."[xyzw] ir3_yylval.num = parse_reg(yytext); return T_P0;
"w"[0-9]+ ir3_yylval.num = parse_w(yytext); return T_W;
"w"[0-9]+ ir3_yylval.num = strtol(yytext+1, NULL, 10); return T_W;
"s#"[0-9]+ ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_SAMP;
"t#"[0-9]+ ir3_yylval.num = strtol(yytext+2, NULL, 10); return T_TEX;
@ -167,6 +157,7 @@ static int parse_w(const char *str)
"stkr" return TOKEN(T_OP_STKR);
"xset" return TOKEN(T_OP_XSET);
"xclr" return TOKEN(T_OP_XCLR);
"getlast" return TOKEN(T_OP_GETLAST);
"getone" return TOKEN(T_OP_GETONE);
"dbg" return TOKEN(T_OP_DBG);
"shps" return TOKEN(T_OP_SHPS);
@ -296,6 +287,11 @@ static int parse_w(const char *str)
"dsypp.1" return TOKEN(T_OP_DSYPP_1);
"rgetpos" return TOKEN(T_OP_RGETPOS);
"rgetinfo" return TOKEN(T_OP_RGETINFO);
"brcst.active" return TOKEN(T_OP_BRCST_A);
"quad_shuffle.brcst" return TOKEN(T_OP_QSHUFFLE_BRCST);
"quad_shuffle.horiz" return TOKEN(T_OP_QSHUFFLE_H);
"quad_shuffle.vert" return TOKEN(T_OP_QSHUFFLE_V);
"quad_shuffle.diag" return TOKEN(T_OP_QSHUFFLE_DIAG);
/* category 6: */
"ldg" return TOKEN(T_OP_LDG);
@ -369,6 +365,7 @@ static int parse_w(const char *str)
"ldlv" return TOKEN(T_OP_LDLV);
"getspid" return TOKEN(T_OP_GETSPID);
"getwid" return TOKEN(T_OP_GETWID);
"getfiberid" return TOKEN(T_OP_GETFIBERID);
/* category 7: */
"bar" return TOKEN(T_OP_BAR);

View File

@ -399,6 +399,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_STKR
%token <tok> T_OP_XSET
%token <tok> T_OP_XCLR
%token <tok> T_OP_GETLAST
%token <tok> T_OP_GETONE
%token <tok> T_OP_DBG
%token <tok> T_OP_SHPS
@ -526,6 +527,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_DSYPP_1
%token <tok> T_OP_RGETPOS
%token <tok> T_OP_RGETINFO
%token <tok> T_OP_BRCST_A
%token <tok> T_OP_QSHUFFLE_BRCST
%token <tok> T_OP_QSHUFFLE_H
%token <tok> T_OP_QSHUFFLE_V
%token <tok> T_OP_QSHUFFLE_DIAG
/* category 6: */
%token <tok> T_OP_LDG
@ -598,6 +604,7 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_LDLV
%token <tok> T_OP_GETSPID
%token <tok> T_OP_GETWID
%token <tok> T_OP_GETFIBERID
/* category 7: */
%token <tok> T_OP_BAR
@ -822,6 +829,7 @@ cat0_instr: T_OP_NOP { new_instr(OPC_NOP); }
| T_OP_PREDT { new_instr(OPC_PREDT); } cat0_src1
| T_OP_PREDF { new_instr(OPC_PREDF); } cat0_src1
| T_OP_PREDE { new_instr(OPC_PREDE); }
| T_OP_GETLAST '.' T_W { new_instr(OPC_GETLAST); } cat0_immed
cat1_opc: T_OP_MOV '.' T_CAT1_TYPE_TYPE {
parse_type_type(new_instr(OPC_MOV), $3);
@ -837,9 +845,16 @@ cat1_movmsk: T_OP_MOVMSK '.' T_W {
new_instr(OPC_MOVMSK);
instr->cat1.src_type = TYPE_U32;
instr->cat1.dst_type = TYPE_U32;
instr->repeat = $3 - 1;
} dst_reg {
instr->dsts[0]->wrmask = (1 << $3) - 1;
if (($3 % 32) != 0)
yyerror("w# must be multiple of 32");
if ($3 < 32)
yyerror("w# must be at least 32");
int num = $3 / 32;
instr->repeat = num - 1;
instr->dsts[0]->wrmask = (1 << num) - 1;
}
cat1_mova1: T_OP_MOVA1 T_A1 ',' {
@ -995,6 +1010,11 @@ cat5_opc: T_OP_ISAM { new_instr(OPC_ISAM); }
| T_OP_SAMGP3 { new_instr(OPC_SAMGP3); }
| T_OP_RGETPOS { new_instr(OPC_RGETPOS); }
| T_OP_RGETINFO { new_instr(OPC_RGETINFO); }
| T_OP_BRCST_A { new_instr(OPC_BRCST_ACTIVE); }
| T_OP_QSHUFFLE_BRCST { new_instr(OPC_QUAD_SHUFFLE_BRCST); }
| T_OP_QSHUFFLE_H { new_instr(OPC_QUAD_SHUFFLE_HORIZ); }
| T_OP_QSHUFFLE_V { new_instr(OPC_QUAD_SHUFFLE_VERT); }
| T_OP_QSHUFFLE_DIAG { new_instr(OPC_QUAD_SHUFFLE_DIAG); }
cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; }
| '.' 'a' { instr->flags |= IR3_INSTR_A; }
@ -1005,6 +1025,7 @@ cat5_flag: '.' T_3D { instr->flags |= IR3_INSTR_3D; }
| '.' T_UNIFORM { }
| '.' T_NONUNIFORM { instr->flags |= IR3_INSTR_NONUNIF; }
| '.' T_BASE { instr->flags |= IR3_INSTR_B; instr->cat5.tex_base = $2; }
| '.' T_W { instr->cat5.cluster_size = $2; }
cat5_flags:
| cat5_flag cat5_flags
@ -1136,6 +1157,7 @@ cat6_ibo: cat6_ibo_opc_1src cat6_type cat6_dim dst_reg ',' 'g' '[' cat6
cat6_id_opc:
T_OP_GETSPID { new_instr(OPC_GETSPID); }
| T_OP_GETWID { new_instr(OPC_GETWID); }
| T_OP_GETFIBERID { new_instr(OPC_GETFIBERID); }
cat6_id: cat6_id_opc cat6_type dst_reg

View File

@ -69,6 +69,7 @@ static const struct test {
INSTR_6XX(00900000_00000003, "br !p0.x, #3"),
INSTR_6XX(03820000_00000015, "shps #21"), /* emit */
INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */
INSTR_6XX(02220000_00000004, "getlast.w8 #4"),
INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */
INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"),
INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"),
@ -157,6 +158,13 @@ static const struct test {
INSTR_6XX(a048d107_cc080a07, "isaml.base3 (s32)(x)r1.w, r0.w, r1.y, s#0, t#6"),
/* dEQP-VK.subgroups.arithmetic.compute.subgroupadd_float */
INSTR_6XX(a7c03102_00100003, "brcst.active.w8 (u32)(x)r0.z, r0.y"), /* brcst.active.w8 (u32)(xOOO)r0.z, r0.y */
/* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
INSTR_6XX(b7e03107_00000401, "(sy)quad_shuffle.brcst (u32)(x)r1.w, r0.x, r0.z"), /* (sy)quad_shuffle.brcst (u32)(xOOO)r1.w, r0.x, r0.z */
/* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */
INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */
/* cat6 */
INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */
@ -374,6 +382,9 @@ static const struct test {
/* dEQP-VK.descriptor_indexing.sampler */
INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
/* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"),
/* Custom test since we've never seen the blob emit these. */
INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),

View File

@ -22,6 +22,7 @@
*/
#include "util/log.h"
#include "util/u_math.h"
#include "ir3/ir3.h"
#include "ir3/ir3_shader.h"

View File

@ -171,6 +171,38 @@ SOFTWARE.
<pattern low="55" high="58">0000</pattern> <!-- OPC -->
</bitset>
<bitset name="getlast" extends="#instruction-cat0">
<doc>
Perform a jump for all fibers in the first cluster with any active
fibers, except for the last fiber in the cluster.
While there is a separate field for CLUSTER_SIZE its value does
not change the behaviour in any observable way, it behaves as if
CLUSTER_SIZE is always 8.
</doc>
<gen min="600"/>
<display>
{SY}{SS}{JP}{NAME}.w{CLUSTER_SIZE} #{IMMED}
</display>
<derived name="CLUSTER_SIZE" type="uint">
<expr>
2 &lt;&lt; {W}
</expr>
</derived>
<pattern low="32" high="36">xxxxx</pattern> <!-- INDEX -->
<pattern low="37" high="39">xxx</pattern> <!-- BRTYPE -->
<pattern low="45" high="47">xxx</pattern> <!-- src1 -->
<pattern low="49" high="51">xx1</pattern> <!-- OPC_HI -->
<pattern low="55" high="58">0100</pattern> <!-- OPC -->
<field name="W" low="52" high="54" type="uint"/>
<encode>
<map name="W">util_logbase2(8) - 1</map>
</encode>
</bitset>
<bitset name="getone" extends="#instruction-cat0-immed">
<pattern low="49" high="51">xx1</pattern> <!-- OPC_HI -->
<pattern low="55" high="58">0101</pattern> <!-- OPC -->

View File

@ -52,6 +52,80 @@ SOFTWARE.
</bitset>
<bitset name="#instruction-cat5" extends="#instruction">
<doc>
The "normal" case, ie. not s2en (indirect) and/or bindless
</doc>
<display>
{SY}{JP}{NAME}{3D}{A}{O}{P}{S} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SAMP}{TEX}
</display>
<derived name="DST_HALF" expr="#type-half" type="bool" display="h"/>
<field name="FULL" pos="0" type="bool"/>
<derived name="HALF" expr="#multisrc-half" type="bool" display="h"/>
<field name="SRC1" low="1" high="8" type="#cat5-src1">
<param name="NUM_SRC"/>
<param name="HALF"/>
</field>
<field name="SRC2" low="9" high="16" type="#cat5-src2">
<param name="NUM_SRC"/>
<param name="HALF"/>
<param name="O"/>
</field>
<!--
TODO remainder of first 32b differ depending on s2en/bindless..
possibly use overrides? Need to sort-out how to display..
Note b17 seems to show up in some blob traces (samgpN), need
to figure out what this bit does
-->
<pattern low="17" high="18">0x</pattern>
<field name="SAMP" low="21" high="24" type="#cat5-samp">
<param name="HAS_SAMP"/>
</field>
<field name="TEX" low="25" high="31" type="#cat5-tex">
<param name="HAS_TEX"/>
</field>
<field name="DST" low="32" high="39" type="#reg-gpr"/>
<field name="WRMASK" low="40" high="43" type="#wrmask"/>
<field name="TYPE" low="44" high="46" type="#cat5-type">
<param name="HAS_TYPE"/>
</field>
<assert pos="47">0</assert> <!-- BASE_LO -->
<field name="3D" pos="48" type="bool" display=".3d"/>
<field name="A" pos="49" type="bool" display=".a"/>
<field name="S" pos="50" type="bool" display=".s"/>
<field name="S2EN_BINDLESS" pos="51" type="bool"/>
<field name="O" pos="52" type="bool" display=".o"/>
<!-- OPC -->
<field name="JP" pos="59" type="bool" display="(jp)"/>
<field name="SY" pos="60" type="bool" display="(sy)"/>
<pattern low="61" high="63">101</pattern> <!-- cat5 -->
<encode>
<map name="FULL">extract_cat5_FULL(src)</map>
<map name="TEX">src</map>
<map name="SAMP">src</map>
<map name="WRMASK">src->dsts[0]->wrmask</map>
<map name="BASE">src</map>
<map name="TYPE">src</map>
<map name="BASE_HI">src->cat5.tex_base >> 1</map>
<map name="3D">!!(src->flags &amp; IR3_INSTR_3D)</map>
<map name="A">!!(src->flags &amp; IR3_INSTR_A)</map>
<map name="S">!!(src->flags &amp; IR3_INSTR_S)</map>
<map name="S2EN_BINDLESS">!!(src->flags &amp; (IR3_INSTR_S2EN | IR3_INSTR_B))</map>
<map name="O">!!(src->flags &amp; IR3_INSTR_O)</map>
<map name="DESC_MODE">extract_cat5_DESC_MODE(src)</map>
<!--
TODO the src order is currently a bit messy due to ir3 using srcs[0]
for s2en src in the s2en case
-->
<map name="SRC1">extract_cat5_SRC(src, 0)</map>
<map name="SRC2">extract_cat5_SRC(src, 1)</map>
<map name="SRC3">(src->srcs_count > 0) ? src->srcs[0] : NULL</map>
</encode>
</bitset>
<bitset name="#instruction-cat5-tex" extends="#instruction-cat5">
<override>
<expr>{S2EN_BINDLESS}</expr>
<doc>
@ -79,82 +153,15 @@ SOFTWARE.
<derived name="A1" expr="#cat5-s2enb-uses_a1" type="bool" display=", a1.x"/>
</override>
<doc>
The "normal" case, ie. not s2en (indirect) and/or bindless
</doc>
<display>
{SY}{JP}{NAME}{3D}{A}{O}{P}{S} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}{SAMP}{TEX}
</display>
<derived name="DST_HALF" expr="#type-half" type="bool" display="h"/>
<field name="FULL" pos="0" type="bool"/>
<derived name="HALF" expr="#multisrc-half" type="bool" display="h"/>
<field name="SRC1" low="1" high="8" type="#cat5-src1">
<param name="NUM_SRC"/>
<param name="HALF"/>
</field>
<field name="SRC2" low="9" high="16" type="#cat5-src2">
<param name="NUM_SRC"/>
<param name="HALF"/>
<param name="O"/>
</field>
<!--
TODO remainder of first 32b differ depending on s2en/bindless..
possibly use overrides? Need to sort-out how to display..
Note b17 seems to show up in some blob traces (samgpN), need
to figure out what this bit does
-->
<pattern low="17" high="18">0x</pattern>
<assert low="19" high="20">00</assert> <!-- BASE_HI -->
<field name="SAMP" low="21" high="24" type="#cat5-samp">
<param name="HAS_SAMP"/>
</field>
<field name="TEX" low="25" high="31" type="#cat5-tex">
<param name="HAS_TEX"/>
</field>
<field name="DST" low="32" high="39" type="#reg-gpr"/>
<field name="WRMASK" low="40" high="43" type="#wrmask"/>
<field name="TYPE" low="44" high="46" type="#cat5-type">
<param name="HAS_TYPE"/>
</field>
<assert pos="47">0</assert> <!-- BASE_LO -->
<field name="3D" pos="48" type="bool" display=".3d"/>
<field name="A" pos="49" type="bool" display=".a"/>
<field name="S" pos="50" type="bool" display=".s"/>
<field name="S2EN_BINDLESS" pos="51" type="bool"/>
<field name="O" pos="52" type="bool" display=".o"/>
<field name="P" pos="53" type="bool" display=".p"/>
<!-- OPC -->
<field name="JP" pos="59" type="bool" display="(jp)"/>
<field name="SY" pos="60" type="bool" display="(sy)"/>
<pattern low="61" high="63">101</pattern> <!-- cat5 -->
<encode>
<map name="FULL">extract_cat5_FULL(src)</map>
<map name="TEX">src</map>
<map name="SAMP">src</map>
<map name="WRMASK">src->dsts[0]->wrmask</map>
<map name="BASE">src</map>
<map name="TYPE">src</map>
<map name="BASE_HI">src->cat5.tex_base >> 1</map>
<map name="3D">!!(src->flags &amp; IR3_INSTR_3D)</map>
<map name="A">!!(src->flags &amp; IR3_INSTR_A)</map>
<map name="S">!!(src->flags &amp; IR3_INSTR_S)</map>
<map name="S2EN_BINDLESS">!!(src->flags &amp; (IR3_INSTR_S2EN | IR3_INSTR_B))</map>
<map name="O">!!(src->flags &amp; IR3_INSTR_O)</map>
<map name="P">!!(src->flags &amp; IR3_INSTR_P)</map>
<map name="DESC_MODE">extract_cat5_DESC_MODE(src)</map>
<!--
TODO the src order is currently a bit messy due to ir3 using srcs[0]
for s2en src in the s2en case
-->
<map name="SRC1">extract_cat5_SRC(src, 0)</map>
<map name="SRC2">extract_cat5_SRC(src, 1)</map>
<map name="SRC3">(src->srcs_count > 0) ? src->srcs[0] : NULL</map>
</encode>
</bitset>
<bitset name="isam" extends="#instruction-cat5">
<bitset name="isam" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00000</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -162,7 +169,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="isaml" extends="#instruction-cat5">
<bitset name="isaml" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00001</pattern>
<derived name="NUM_SRC" expr="#two" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -170,7 +177,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="isamm" extends="#instruction-cat5">
<bitset name="isamm" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00010</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -178,7 +185,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="sam" extends="#instruction-cat5">
<bitset name="sam" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00011</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -186,7 +193,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="samb" extends="#instruction-cat5">
<bitset name="samb" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00100</pattern>
<derived name="NUM_SRC" expr="#two" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -194,7 +201,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="saml" extends="#instruction-cat5">
<bitset name="saml" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00101</pattern>
<derived name="NUM_SRC" expr="#two" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -202,7 +209,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="samgq" extends="#instruction-cat5">
<bitset name="samgq" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00110</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -210,7 +217,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="getlod" extends="#instruction-cat5">
<bitset name="getlod" extends="#instruction-cat5-tex">
<pattern low="54" high="58">00111</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -218,7 +225,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="conv" extends="#instruction-cat5">
<bitset name="conv" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01000</pattern>
<derived name="NUM_SRC" expr="#two" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -226,7 +233,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="convm" extends="#instruction-cat5">
<bitset name="convm" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01001</pattern>
<derived name="NUM_SRC" expr="#two" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -234,7 +241,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="getsize" extends="#instruction-cat5">
<bitset name="getsize" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01010</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -242,7 +249,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="getbuf" extends="#instruction-cat5">
<bitset name="getbuf" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01011</pattern>
<derived name="NUM_SRC" expr="#zero" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -250,7 +257,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="getpos" extends="#instruction-cat5">
<bitset name="getpos" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01100</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -258,7 +265,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="getinfo" extends="#instruction-cat5">
<bitset name="getinfo" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01101</pattern>
<derived name="NUM_SRC" expr="#zero" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -266,7 +273,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="dsx" extends="#instruction-cat5">
<bitset name="dsx" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01110</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -274,7 +281,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="dsy" extends="#instruction-cat5">
<bitset name="dsy" extends="#instruction-cat5-tex">
<pattern low="54" high="58">01111</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -282,7 +289,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="gather4r" extends="#instruction-cat5">
<bitset name="gather4r" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10000</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -290,7 +297,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="gather4g" extends="#instruction-cat5">
<bitset name="gather4g" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10001</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -298,7 +305,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="gather4b" extends="#instruction-cat5">
<bitset name="gather4b" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10010</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -306,7 +313,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="gather4a" extends="#instruction-cat5">
<bitset name="gather4a" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10011</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -314,7 +321,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="samgp0" extends="#instruction-cat5">
<bitset name="samgp0" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10100</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -322,7 +329,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="samgp1" extends="#instruction-cat5">
<bitset name="samgp1" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10101</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -330,7 +337,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="samgp2" extends="#instruction-cat5">
<bitset name="samgp2" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10110</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -338,7 +345,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="samgp3" extends="#instruction-cat5">
<bitset name="samgp3" extends="#instruction-cat5-tex">
<pattern low="54" high="58">10111</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#true" type="bool"/>
@ -346,7 +353,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="dsxpp.1" extends="#instruction-cat5">
<bitset name="dsxpp.1" extends="#instruction-cat5-tex">
<pattern low="54" high="58">11000</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -354,7 +361,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#false" type="bool"/>
</bitset>
<bitset name="dsypp.1" extends="#instruction-cat5">
<bitset name="dsypp.1" extends="#instruction-cat5-tex">
<pattern low="54" high="58">11001</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -362,7 +369,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#false" type="bool"/>
</bitset>
<bitset name="rgetpos" extends="#instruction-cat5">
<bitset name="rgetpos" extends="#instruction-cat5-tex">
<pattern low="54" high="58">11010</pattern>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -370,7 +377,7 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="rgetinfo" extends="#instruction-cat5">
<bitset name="rgetinfo" extends="#instruction-cat5-tex">
<pattern low="54" high="58">11011</pattern>
<derived name="NUM_SRC" expr="#zero" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
@ -378,6 +385,105 @@ SOFTWARE.
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="brcst.active" extends="#instruction-cat5">
<doc>
The subgroup is divided into (subgroup_size / CLUSTER_SIZE)
clusters. For each cluster brcst.active.w does:
Given a cluster of fibers f_0, f_1, ..., f_{CLUSTER_SIZE-1} brcst
broadcasts the SRC value from the fiber f_{CLUSTER_SIZE/2-1}
to fibers f_{CLUSTER_SIZE/2}, ..., f_{CLUSTER_SIZE-1}. The DST reg
in other fibers is unaffected. If fiber f_{CLUSTER_SIZE/2-1} is
inactive the value to broadcast is taken from lower fibers
f_{CLUSTER_SIZE/2-2}, f_{CLUSTER_SIZE/2-3}, ...
If all fibers f_0, f_1, ..., f_{CLUSTER_SIZE/2-1} are inactive
the DST reg remains unchanged for all fibers.
It is necessary in order to implement arithmetic subgroup
operations with prefix sum (https://en.wikipedia.org/wiki/Prefix_sum).
For brcst.active.w8 without inactive fibers:
Fiber | 0 1 2 3 4 5 6 7 | 8 9 10 11 12 13 14 15
SRC | s0 s1 s2 s3 ... s7 | s8 ... s11 ... s15
DST_before | d0 d1 ... d7 | d8 ... d15
DST_after | d0 d1 d2 d3 s3 s3 s3 s3 | d8 ... d11 s11 s11 s11 s11
If fibers 2 and 3 are inactive:
Fiber | 0 1 X X 4 5 6 7 | ...
SRC | s0 s1 X X ... s7 | ...
DST_before | d0 d1 ... d7 | ...
DST_after | d0 d1 X X s1 s1 s1 s1 | ...
</doc>
<gen min="600"/>
<display>
{SY}{JP}{NAME}.w{CLUSTER_SIZE} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}
</display>
<field name="W" low="19" high="20" type="uint"/>
<pattern low="53" high="58">111110</pattern> <!-- OPC -->
<derived name="CLUSTER_SIZE" type="uint">
<expr>
2 &lt;&lt; {W}
</expr>
</derived>
<derived name="NUM_SRC" expr="#one" type="uint"/>
<derived name="HAS_SAMP" expr="#false" type="bool"/>
<derived name="HAS_TEX" expr="#false" type="bool"/>
<derived name="HAS_TYPE" expr="#true" type="bool"/>
<encode>
<map name="W">util_logbase2(src->cat5.cluster_size) - 1</map>
</encode>
</bitset>
<bitset name="#instruction-cat5-quad-shuffle" extends="#instruction-cat5">
<gen min="600"/>
<display>
{SY}{JP}{NAME} {TYPE}({WRMASK}){DST_HALF}{DST}{SRC1}{SRC2}
</display>
<pattern low="53" high="58">111111</pattern> <!-- OPC -->
<derived name="HAS_SAMP" expr="#false" type="bool"/>
<derived name="HAS_TEX" expr="#false" type="bool"/>
<derived name="HAS_TYPE" expr="#true" type="bool"/>
</bitset>
<bitset name="quad_shuffle.brcst" extends="#instruction-cat5-quad-shuffle">
<doc>subgroupQuadBroadcast</doc>
<pattern low="19" high="20">00</pattern> <!-- Quad-shuffle variant -->
<derived name="NUM_SRC" expr="#two" type="uint"/>
</bitset>
<bitset name="quad_shuffle.horiz" extends="#instruction-cat5-quad-shuffle">
<doc>subgroupQuadSwapHorizontal</doc>
<pattern low="19" high="20">01</pattern> <!-- Quad-shuffle variant -->
<derived name="NUM_SRC" expr="#one" type="uint"/>
</bitset>
<bitset name="quad_shuffle.vert" extends="#instruction-cat5-quad-shuffle">
<doc>subgroupQuadSwapVertical</doc>
<pattern low="19" high="20">10</pattern> <!-- Quad-shuffle variant -->
<derived name="NUM_SRC" expr="#one" type="uint"/>
</bitset>
<bitset name="quad_shuffle.diag" extends="#instruction-cat5-quad-shuffle">
<doc>subgroupQuadSwapDiagonal</doc>
<pattern low="19" high="20">11</pattern> <!-- Quad-shuffle variant -->
<derived name="NUM_SRC" expr="#one" type="uint"/>
</bitset>
<!--
All the magic for conditionally displaying various srcs, etc

View File

@ -827,6 +827,29 @@ SOFTWARE.
<pattern low="52" high="53">1x</pattern>
</bitset>
<bitset name="getfiberid" extends="#instruction-cat6-a6xx">
<doc>
GET Fiber ID (gl_SubgroupID)
</doc>
<gen min="600"/>
<display>
{SY}{JP}{NAME}.{TYPE} {DST}
</display>
<pattern pos="0" >0</pattern>
<pattern low="9" high="10">xx</pattern> <!-- D_MINUS_ONE -->
<pattern pos="11" >x</pattern> <!-- TYPED -->
<pattern low="14" high="19">100110</pattern> <!-- OPC -->
<pattern low="20" high="23">11xx</pattern>
<pattern low="24" high="31">xxxxxxxx</pattern> <!-- SRC2 -->
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern low="41" high="48">xxxxxxxx</pattern> <!-- SSBO/image binding point -->
<field low="49" high="51" name="TYPE" type="#type"/>
<pattern low="52" high="53">1x</pattern>
</bitset>
<bitset name="resinfo.b" extends="#instruction-cat6-a6xx">
<doc>
RESourceINFO - returns image/ssbo dimensions (3 components)