ir3: New cat3 instructions

* shrm - (src2 >> src1) & src3
* shlm - (src2 << src1) & src3
* shrg - (src2 >> src1) | src3
* shlg - (src2 << src1) | src3
* andg - (src2 & src1) | src3
* dp2acc - dot product of two {i,u}8vec2 packed into
  SRC1 and SRC2, added to 32b SRC3
* dp4acc - dot product of two {i,u}8vec4 packed into
  SRC1 and SRC2, added to 32b SRC3
* wmm - vec4(x_1, x_2, x_3, x_4) * (y_1 + y_2 + y_3 + y_4), which is
  duplicated (1 << (SRC3 / 32)) times starting from DST register
* wmm.accu - same as wmm but result is added to DST registers, however
  the first reg in each vec4 result is overwritten instead of
  accumulating.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13986>
This commit is contained in:
Danylo Piliaiev 2021-11-24 14:57:03 +02:00
parent c45c6e36eb
commit c1d5c318bc
8 changed files with 352 additions and 53 deletions

View File

@ -258,7 +258,15 @@ static const struct opc_info {
OPC(3, OPC_SEL_F32, sel.f32),
OPC(3, OPC_SAD_S16, sad.s16),
OPC(3, OPC_SAD_S32, sad.s32),
OPC(3, OPC_SHLG_B16, shlg.b16),
OPC(3, OPC_SHRM, shrm),
OPC(3, OPC_SHLM, shlm),
OPC(3, OPC_SHRG, shrg),
OPC(3, OPC_SHLG, shlg),
OPC(3, OPC_ANDG, andg),
OPC(3, OPC_DP2ACC, dp2acc),
OPC(3, OPC_DP4ACC, dp4acc),
OPC(3, OPC_WMM, wmm),
OPC(3, OPC_WMM_ACCU, wmm.accu),
/* category 4: */
OPC(4, OPC_RCP, rcp),

View File

@ -198,7 +198,15 @@ typedef enum {
OPC_SEL_F32 = _OPC(3, 13),
OPC_SAD_S16 = _OPC(3, 14),
OPC_SAD_S32 = _OPC(3, 15),
OPC_SHLG_B16 = _OPC(3, 16),
OPC_SHRM = _OPC(3, 16),
OPC_SHLM = _OPC(3, 17),
OPC_SHRG = _OPC(3, 18),
OPC_SHLG = _OPC(3, 19),
OPC_ANDG = _OPC(3, 20),
OPC_DP2ACC = _OPC(3, 21),
OPC_DP4ACC = _OPC(3, 22),
OPC_WMM = _OPC(3, 23),
OPC_WMM_ACCU = _OPC(3, 24),
/* category 4: */
OPC_RCP = _OPC(4, 0),

View File

@ -922,12 +922,29 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
valid_flags =
ir3_cat3_absneg(instr->opc) | IR3_REG_RELATIV | IR3_REG_SHARED;
if (instr->opc == OPC_SHLG_B16) {
switch (instr->opc) {
case OPC_SHRM:
case OPC_SHLM:
case OPC_SHRG:
case OPC_SHLG:
case OPC_ANDG: {
valid_flags |= IR3_REG_IMMED;
/* shlg.b16 can be RELATIV+CONST but not CONST: */
/* Can be RELATIV+CONST but not CONST: */
if (flags & IR3_REG_RELATIV)
valid_flags |= IR3_REG_CONST;
} else {
break;
}
case OPC_WMM:
case OPC_WMM_ACCU: {
valid_flags = IR3_REG_SHARED;
if (n == 2)
valid_flags = IR3_REG_CONST;
break;
}
case OPC_DP2ACC:
case OPC_DP4ACC:
break;
default:
valid_flags |= IR3_REG_CONST;
}

View File

@ -329,6 +329,16 @@ struct ir3_instruction {
IR3_COND_NE = 5,
} condition;
} cat2;
struct {
enum {
IR3_SRC_UNSIGNED = 0,
IR3_SRC_MIXED = 1,
} signedness;
enum {
IR3_SRC_PACKED_LOW = 0,
IR3_SRC_PACKED_HIGH = 1,
} packed;
} cat3;
struct {
unsigned samp, tex;
unsigned tex_base : 3;
@ -1383,7 +1393,13 @@ ir3_cat3_absneg(opc_t opc)
case OPC_SEL_B16:
case OPC_SEL_B32:
case OPC_SHLG_B16:
case OPC_SHRM:
case OPC_SHLM:
case OPC_SHRG:
case OPC_SHLG:
case OPC_ANDG:
case OPC_WMM:
case OPC_WMM_ACCU:
default:
return 0;
@ -1407,6 +1423,8 @@ ir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
case OPC_BARY_F:
case OPC_MAD_F32:
case OPC_MAD_F16:
case OPC_WMM:
case OPC_WMM_ACCU:
return TYPE_F32;
case OPC_ADD_U:
@ -1423,6 +1441,11 @@ ir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
case OPC_SHR_B:
case OPC_ASHR_B:
case OPC_MAD_U24:
case OPC_SHRM:
case OPC_SHLM:
case OPC_SHRG:
case OPC_SHLG:
case OPC_ANDG:
/* Comparison ops zero-extend/truncate their results, so consider them as
* unsigned here.
*/
@ -2213,6 +2236,8 @@ INSTR3(MAD_U24)
INSTR3(MAD_S24)
INSTR3(MAD_F16)
INSTR3(MAD_F32)
INSTR3(DP2ACC)
INSTR3(DP4ACC)
/* NOTE: SEL_B32 checks for zero vs nonzero */
INSTR3(SEL_B16)
INSTR3(SEL_B32)

View File

@ -244,7 +244,15 @@ static int parse_reg(const char *str)
"sel.f32" return TOKEN(T_OP_SEL_F32);
"sad.s16" return TOKEN(T_OP_SAD_S16);
"sad.s32" return TOKEN(T_OP_SAD_S32);
"shlg.b16" return TOKEN(T_OP_SHLG_B16);
"shrm" return TOKEN(T_OP_SHRM);
"shlm" return TOKEN(T_OP_SHLM);
"shrg" return TOKEN(T_OP_SHRG);
"shlg" return TOKEN(T_OP_SHLG);
"andg" return TOKEN(T_OP_ANDG);
"dp2acc" return TOKEN(T_OP_DP2ACC);
"dp4acc" return TOKEN(T_OP_DP4ACC);
"wmm" return TOKEN(T_OP_WMM);
"wmm.accu" return TOKEN(T_OP_WMM_ACCU);
/* category 4: */
"rcp" return TOKEN(T_OP_RCP);
@ -383,6 +391,11 @@ static int parse_reg(const char *str)
"untyped" return TOKEN(T_UNTYPED);
"typed" return TOKEN(T_TYPED);
"unsigned" return TOKEN(T_UNSIGNED);
"mixed" return TOKEN(T_MIXED);
"low" return TOKEN(T_LOW);
"high" return TOKEN(T_HIGH);
"1d" return TOKEN(T_1D);
"2d" return TOKEN(T_2D);
"3d" return TOKEN(T_3D);

View File

@ -484,7 +484,15 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_SEL_F32
%token <tok> T_OP_SAD_S16
%token <tok> T_OP_SAD_S32
%token <tok> T_OP_SHLG_B16
%token <tok> T_OP_SHRM
%token <tok> T_OP_SHLM
%token <tok> T_OP_SHRG
%token <tok> T_OP_SHLG
%token <tok> T_OP_ANDG
%token <tok> T_OP_DP2ACC
%token <tok> T_OP_DP4ACC
%token <tok> T_OP_WMM
%token <tok> T_OP_WMM_ACCU
/* category 4: */
%token <tok> T_OP_RCP
@ -623,6 +631,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_UNTYPED
%token <tok> T_TYPED
%token <tok> T_MIXED
%token <tok> T_UNSIGNED
%token <tok> T_LOW
%token <tok> T_HIGH
%token <tok> T_1D
%token <tok> T_2D
%token <tok> T_3D
@ -949,6 +962,12 @@ cat2_instr: cat2_opc_1src dst_reg ',' src_reg_or_const_or_rel_or_imm
| cat2_opc_2src_cnd '.' cond dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm
| cat2_opc_2src dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm
cat3_dp_signedness:'.' T_MIXED { instr->cat3.signedness = IR3_SRC_MIXED; }
| '.' T_UNSIGNED{ instr->cat3.signedness = IR3_SRC_UNSIGNED; }
cat3_dp_pack: '.' T_LOW { instr->cat3.packed = IR3_SRC_PACKED_LOW; }
| '.' T_HIGH { instr->cat3.packed = IR3_SRC_PACKED_HIGH; }
cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); }
| T_OP_MADSH_U16 { new_instr(OPC_MADSH_U16); }
| T_OP_MAD_S16 { new_instr(OPC_MAD_S16); }
@ -966,8 +985,22 @@ cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); }
| T_OP_SAD_S16 { new_instr(OPC_SAD_S16); }
| T_OP_SAD_S32 { new_instr(OPC_SAD_S32); }
cat3_imm_reg_opc: T_OP_SHRM { new_instr(OPC_SHRM); }
| T_OP_SHLM { new_instr(OPC_SHLM); }
| T_OP_SHRG { new_instr(OPC_SHRG); }
| T_OP_SHLG { new_instr(OPC_SHLG); }
| T_OP_ANDG { new_instr(OPC_ANDG); }
cat3_wmm: T_OP_WMM { new_instr(OPC_WMM); }
| T_OP_WMM_ACCU { new_instr(OPC_WMM_ACCU); }
cat3_dp: T_OP_DP2ACC { new_instr(OPC_DP2ACC); }
| T_OP_DP4ACC { new_instr(OPC_DP4ACC); }
cat3_instr: cat3_opc dst_reg ',' src_reg_or_const_or_rel ',' src_reg_or_const ',' src_reg_or_const_or_rel
| T_OP_SHLG_B16 { new_instr(OPC_SHLG_B16); } dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
| cat3_imm_reg_opc dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
| cat3_wmm dst_reg ',' src_reg_gpr ',' src_reg ',' immediate
| cat3_dp cat3_dp_signedness cat3_dp_pack dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
cat4_opc: T_OP_RCP { new_instr(OPC_RCP); }
| T_OP_RSQ { new_instr(OPC_RSQ); }
@ -1267,6 +1300,9 @@ src_reg_flags: src_reg_flag
src_reg: src
| src_reg_flags src
src_reg_gpr: src_reg
| relative_gpr_src
src_const: const
| src_reg_flags const

View File

@ -127,9 +127,20 @@ static const struct test {
INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"),
INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"),
INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"),
INSTR_6XX(65900820_100cb008, "(nop3) shlg.b16 hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
INSTR_6XX(65ae085c_0002a001, "(nop3) shlg.b16 hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
INSTR_6XX(65900820_0c0aac05, "(nop3) shlg.b16 hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
INSTR_6XX(65900820_100cb008, "(nop3) shlg hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
INSTR_6XX(65ae085c_0002a001, "(nop3) shlg hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
INSTR_6XX(65900820_0c0aac05, "(nop3) shlg hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
INSTR_6XX(65ae0c5c_0002a001, "(nop3) shlg r23.x, r0.y, r23.x, r0.z"), /* (nop3) shlg.b32 r23.x, (r)r0.y, (r)r23.x, r0.z */
INSTR_6XX(64018802_0002e003, "(nop3) shrm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
INSTR_6XX(64818802_0002e003, "(nop3) shlm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
INSTR_6XX(65018802_0002e003, "(nop3) shrg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
INSTR_6XX(66018802_0002e003, "(nop3) andg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
INSTR_6XX(67018802_1002e003, "(nop3) wmm hr0.z, (neg)hr0.w, hr0.w, 2"), /* (nop3) wmm.f16f16 hr0.z, (abs)(r)hr0.w, (r)hr0.w, 2 */
INSTR_6XX(67018c02_1002e003, "(nop3) wmm.accu hr0.z, (neg)hr0.w, hr0.w, 2"),
INSTR_6XX(6701c802_9002a003, "(nop3) wmm r0.z, r0.w, r0.w, 2"), /* (nop3) wmm.f32f32 r0.z, (r)r0.w, (r)r0.w, 2 */
/* custom test with qcom_dot8 function from cl_qcom_dot_product8 */
INSTR_6XX(66818c02_0002e003, "(sat)(nop3) dp2acc.mixed.low r0.z, r0.w, r0.w, r0.z"), /* (nop3) dp2acc (sat)r0.z, (signed)(low)(r)r0.w, (low)(r)r0.w, r0.z */
INSTR_6XX(6681c802_8002a003, "(nop3) dp4acc.unsigned.low r0.z, r0.w, r0.w, (neg)r0.z"), /* (nop3) dp4acc r0.z, (unsigned)(r)r0.w, (r)r0.w, (neg)r0.z */
/* cat4 */
INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"),

View File

@ -106,24 +106,13 @@ SOFTWARE.
<display>
{SY}{SS}{JP}{SAT}{REPEAT}{UL}{NAME} {DST_HALF}{DST}, {SRC1_NEG}{SRC1_R}{SRC1}, {SRC2_NEG}{SRC2_R}{HALF}{SRC2}, {SRC3_NEG}{SRC3_R}{SRC3}
</display>
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
<field name="SRC2_R" pos="15" type="bool" display="(r)"/>
<field name="SRC3_R" pos="29" type="bool" display="(r)"/>
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
<field name="DST" low="32" high="39" type="#reg-gpr"/>
<field name="REPEAT" low="40" high="41" type="#rptN"/>
<field name="SAT" pos="42" type="bool" display="(sat)"/>
<field name="SRC1_R" pos="43" type="bool" display="(r)"/>
<field name="SS" pos="44" type="bool" display="(ss)"/>
<field name="UL" pos="45" type="bool" display="(ul)"/>
<field name="DST_CONV" pos="46" type="bool">
<doc>
The source precision is determined by the instruction
opcode. If {DST_CONV} the result is widened/narrowed
to the opposite precision.
</doc>
</field>
<field name="SRC2" low="47" high="54" type="#reg-gpr"/>
<!-- opcode, 4 bits -->
<field name="JP" pos="59" type="bool" display="(jp)"/>
@ -139,6 +128,38 @@ SOFTWARE.
<map name="SRC2_NEG">!!(src->srcs[1]->flags &amp; (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))</map>
<map name="SRC3_NEG">!!(src->srcs[2]->flags &amp; (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))</map>
<map name="SRC1">src->srcs[0]</map>
</encode>
</bitset>
<bitset name="#instruction-cat3" extends="#instruction-cat3-base">
<derived name="IMMED_ENCODING" expr="#false" type="bool" display="h"/>
<field name="SRC1" low="0" high="12" type="#cat3-src">
<param name="HALF"/>
<param name="IMMED_ENCODING"/>
</field>
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
<pattern pos="13">0</pattern>
<field name="SRC3" low="16" high="28" type="#cat3-src">
<param name="HALF"/>
<param name="IMMED_ENCODING"/>
</field>
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
<field name="SAT" pos="42" type="bool" display="(sat)"/>
<field name="DST_CONV" pos="46" type="bool">
<doc>
The source precision is determined by the instruction
opcode. If {DST_CONV} the result is widened/narrowed
to the opposite precision.
</doc>
</field>
<encode>
<map name="DST_CONV">
((src->dsts[0]->num >> 2) == 62) ? 0 :
!!((src->srcs[0]->flags ^ src->dsts[0]->flags) &amp; IR3_REG_HALF)
@ -146,21 +167,7 @@ SOFTWARE.
</encode>
</bitset>
<bitset name="#instruction-cat3" extends="#instruction-cat3-base">
<pattern pos="13">0</pattern>
<derived name="IMMED_ENCODING" expr="#false" type="bool" display="h"/>
<field name="SRC1" low="0" high="12" type="#cat3-src">
<param name="HALF"/>
<param name="IMMED_ENCODING"/>
</field>
<field name="SRC3" low="16" high="28" type="#cat3-src">
<param name="HALF"/>
<param name="IMMED_ENCODING"/>
</field>
</bitset>
<!-- TODO check on pre a6xx gens -->
<bitset name="#instruction-cat3-alt" extends="#instruction-cat3-base">
<doc>
The difference is that this cat3 version does not support plain
@ -168,38 +175,39 @@ SOFTWARE.
On the other hand it still supports relative gpr and consts.
</doc>
<pattern pos="13">1</pattern>
<gen min="600"/>
<derived name="IMMED_ENCODING" expr="#true" type="bool" display="h"/>
<derived name="SAT" expr="#false" type="bool" display=""/>
<field name="SRC1" low="0" high="12" type="#cat3-src">
<param name="HALF"/>
<param name="IMMED_ENCODING"/>
</field>
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
<pattern pos="13">1</pattern>
<field name="SRC3" low="16" high="28" type="#cat3-src">
<param name="HALF"/>
<param name="IMMED_ENCODING"/>
</field>
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
<field name="FULL" pos="42" type="bool"/>
<field name="DST_CONV" pos="46" type="bool"/>
<encode>
<map name="SRC3">src->srcs[2]</map>
<map name="DST_CONV">false</map>
<map name="FULL">!(src->srcs[1]->flags &amp; IR3_REG_HALF)</map>
<map name="DST_CONV">
((src->dsts[0]->num >> 2) == 62) ? 0 :
!!((src->srcs[1]->flags ^ src->dsts[0]->flags) &amp; IR3_REG_HALF)
</map>
</encode>
</bitset>
<!-- TODO find shlg.b32 -->
<bitset name="shlg.b16" extends="#instruction-cat3-alt">
<doc>
(src2 &lt;&lt; src1) | src3
</doc>
<!-- TODO check older gens -->
<gen min="600"/>
<pattern low="55" high="58">1011</pattern> <!-- OPC -->
<derived name="FULL" expr="#false" type="bool"/>
</bitset>
<bitset name="mad.u16" extends="#instruction-cat3">
<pattern low="55" high="58">0000</pattern> <!-- OPC -->
<derived name="FULL" expr="#false" type="bool"/>
@ -280,4 +288,177 @@ SOFTWARE.
<derived name="FULL" expr="#false" type="bool"/> <!-- We think? -->
</bitset>
<bitset name="shrm" extends="#instruction-cat3-alt">
<doc>
(src2 &gt;&gt; src1) &amp; src3
</doc>
<pattern low="55" high="58">1000</pattern> <!-- OPC -->
</bitset>
<bitset name="shlm" extends="#instruction-cat3-alt">
<doc>
(src2 &lt;&lt; src1) &amp; src3
</doc>
<pattern low="55" high="58">1001</pattern> <!-- OPC -->
</bitset>
<bitset name="shrg" extends="#instruction-cat3-alt">
<doc>
(src2 &gt;&gt; src1) | src3
</doc>
<pattern low="55" high="58">1010</pattern> <!-- OPC -->
</bitset>
<bitset name="shlg" extends="#instruction-cat3-alt">
<doc>
(src2 &lt;&lt; src1) | src3
</doc>
<pattern low="55" high="58">1011</pattern> <!-- OPC -->
</bitset>
<bitset name="andg" extends="#instruction-cat3-alt">
<doc>
(src2 &amp; src1) | src3
</doc>
<pattern low="55" high="58">1100</pattern> <!-- OPC -->
</bitset>
<enum name="#signedness">
<value val="0" display=".unsigned"/>
<value val="1" display=".mixed"/>
</enum>
<enum name="#8bitvec2pack">
<value val="0" display=".low"/>
<value val="1" display=".high"/>
</enum>
<bitset name="#instruction-cat3-dp" extends="#instruction-cat3-base">
<gen min="600"/>
<display>
{SY}{SS}{JP}{SAT}(nop{NOP}) {UL}{NAME}{SRC_SIGN}{SRC_PACK} {DST}, {SRC1}, {SRC2}, {SRC3_NEG}{SRC3}
</display>
<derived name="FULL" expr="#true" type="bool"/>
<field name="SRC1" low="0" high="12" type="#cat3-src">
<param name="HALF"/>
</field>
<field name="SRC_SIGN" pos="14" type="#signedness"/>
<pattern pos="13">1</pattern>
<field name="SRC3" low="16" high="28" type="#cat3-src">
<param name="HALF"/>
</field>
<field name="SRC_PACK" pos="30" type="#8bitvec2pack"/>
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
<field name="SAT" pos="42" type="bool" display="(sat)"/>
<encode>
<map name="SRC3">src->srcs[2]</map>
<map name="SRC_SIGN">src->cat3.signedness</map>
<map name="SRC_PACK">src->cat3.packed</map>
</encode>
</bitset>
<bitset name="dp2acc" extends="#instruction-cat3-dp">
<doc>
Given:
SRC1 is a i8vec2 or u8vec2
SRC2 is a u8vec2
SRC1 and SRC2 are packed into low or high halves of the registers.
SRC3 is a int32_t or uint32_t
Do:
DST = dot(SRC1, SRC2) + SRC3
</doc>
<pattern pos="46">0</pattern>
<pattern low="55" high="58">1101</pattern> <!-- OPC -->
</bitset>
<bitset name="dp4acc" extends="#instruction-cat3-dp">
<doc>
Same a dp2acc but for vec4 instead of vec2.
Corresponds to packed variantes of OpUDotKHR and OpSUDotKHR.
</doc>
<pattern pos="46">1</pattern>
<pattern low="55" high="58">1101</pattern> <!-- OPC -->
</bitset>
<expr name="#wmm-dest-half">
(!{DST_FULL})
</expr>
<bitset name="#instruction-cat3-wmm" extends="#instruction-cat3-base">
<gen min="600"/>
<derived name="IMMED_ENCODING" expr="#true" type="bool" display="h"/>
<derived name="SAT" expr="#false" type="bool" display=""/>
<derived name="SRC3_NEG" expr="#false" type="bool" display=""/>
<derived name="DST_HALF" expr="#wmm-dest-half" type="bool" display="h"/>
<field name="SRC1" low="0" high="12" type="#cat3-src">
<param name="HALF"/>
</field>
<pattern pos="13">1</pattern>
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
<field name="SRC3" low="16" high="28" type="#cat3-src">
<param name="HALF"/>
<param name="IMMED_ENCODING"/>
</field>
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
<field name="FULL" pos="31" type="bool" display=""/>
<field name="DST_FULL" pos="46" type="bool"/>
<encode>
<map name="SRC3">src->srcs[2]</map>
<map name="FULL">!(src->srcs[0]->flags &amp; IR3_REG_HALF)</map>
<map name="DST_FULL">
((src->dsts[0]->num >> 2) == 62) ? 1 :
!(src->dsts[0]->flags &amp; IR3_REG_HALF)
</map>
</encode>
</bitset>
<bitset name="wmm" extends="#instruction-cat3-wmm">
<doc>
Given:
SRC1 = (x_1, x_2, x_3, x_4) - 4 consecutive registers
SRC2 = (y_1, y_2, y_3, y_4) - 4 consecutive registers
SRC3 is an immediate in range of [0, 160]
Do:
float y_sum = y_1 + y_2 + y_3 + y_4
vec4 result = (x_1 * y_sum, x_2 * y_sum, x_3 * y_sum, x_4 * y_sum)
Starting from DST reg duplicate *result* into consecutive registers
(1 &lt;&lt; (SRC3 / 32)) times.
</doc>
<pattern pos="42">0</pattern>
<pattern low="55" high="58">1110</pattern> <!-- OPC -->
</bitset>
<bitset name="wmm.accu" extends="#instruction-cat3-wmm">
<doc>
Same as wmm but instead of overwriting DST - the result is
added to DST registers, however the first reg of the result
is always overwritten.
</doc>
<pattern pos="42">1</pattern>
<pattern low="55" high="58">1110</pattern> <!-- OPC -->
</bitset>
</isa>