ir3: New cat3 instructions
* shrm - (src2 >> src1) & src3 * shlm - (src2 << src1) & src3 * shrg - (src2 >> src1) | src3 * shlg - (src2 << src1) | src3 * andg - (src2 & src1) | src3 * dp2acc - dot product of two {i,u}8vec2 packed into SRC1 and SRC2, added to 32b SRC3 * dp4acc - dot product of two {i,u}8vec4 packed into SRC1 and SRC2, added to 32b SRC3 * wmm - vec4(x_1, x_2, x_3, x_4) * (y_1 + y_2 + y_3 + y_4), which is duplicated (1 << (SRC3 / 32)) times starting from DST register * wmm.accu - same as wmm but result is added to DST registers, however the first reg in each vec4 result is overwritten instead of accumulating. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13986>
This commit is contained in:
parent
c45c6e36eb
commit
c1d5c318bc
|
@ -258,7 +258,15 @@ static const struct opc_info {
|
|||
OPC(3, OPC_SEL_F32, sel.f32),
|
||||
OPC(3, OPC_SAD_S16, sad.s16),
|
||||
OPC(3, OPC_SAD_S32, sad.s32),
|
||||
OPC(3, OPC_SHLG_B16, shlg.b16),
|
||||
OPC(3, OPC_SHRM, shrm),
|
||||
OPC(3, OPC_SHLM, shlm),
|
||||
OPC(3, OPC_SHRG, shrg),
|
||||
OPC(3, OPC_SHLG, shlg),
|
||||
OPC(3, OPC_ANDG, andg),
|
||||
OPC(3, OPC_DP2ACC, dp2acc),
|
||||
OPC(3, OPC_DP4ACC, dp4acc),
|
||||
OPC(3, OPC_WMM, wmm),
|
||||
OPC(3, OPC_WMM_ACCU, wmm.accu),
|
||||
|
||||
/* category 4: */
|
||||
OPC(4, OPC_RCP, rcp),
|
||||
|
|
|
@ -198,7 +198,15 @@ typedef enum {
|
|||
OPC_SEL_F32 = _OPC(3, 13),
|
||||
OPC_SAD_S16 = _OPC(3, 14),
|
||||
OPC_SAD_S32 = _OPC(3, 15),
|
||||
OPC_SHLG_B16 = _OPC(3, 16),
|
||||
OPC_SHRM = _OPC(3, 16),
|
||||
OPC_SHLM = _OPC(3, 17),
|
||||
OPC_SHRG = _OPC(3, 18),
|
||||
OPC_SHLG = _OPC(3, 19),
|
||||
OPC_ANDG = _OPC(3, 20),
|
||||
OPC_DP2ACC = _OPC(3, 21),
|
||||
OPC_DP4ACC = _OPC(3, 22),
|
||||
OPC_WMM = _OPC(3, 23),
|
||||
OPC_WMM_ACCU = _OPC(3, 24),
|
||||
|
||||
/* category 4: */
|
||||
OPC_RCP = _OPC(4, 0),
|
||||
|
|
|
@ -922,12 +922,29 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
|
|||
valid_flags =
|
||||
ir3_cat3_absneg(instr->opc) | IR3_REG_RELATIV | IR3_REG_SHARED;
|
||||
|
||||
if (instr->opc == OPC_SHLG_B16) {
|
||||
switch (instr->opc) {
|
||||
case OPC_SHRM:
|
||||
case OPC_SHLM:
|
||||
case OPC_SHRG:
|
||||
case OPC_SHLG:
|
||||
case OPC_ANDG: {
|
||||
valid_flags |= IR3_REG_IMMED;
|
||||
/* shlg.b16 can be RELATIV+CONST but not CONST: */
|
||||
/* Can be RELATIV+CONST but not CONST: */
|
||||
if (flags & IR3_REG_RELATIV)
|
||||
valid_flags |= IR3_REG_CONST;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
case OPC_WMM:
|
||||
case OPC_WMM_ACCU: {
|
||||
valid_flags = IR3_REG_SHARED;
|
||||
if (n == 2)
|
||||
valid_flags = IR3_REG_CONST;
|
||||
break;
|
||||
}
|
||||
case OPC_DP2ACC:
|
||||
case OPC_DP4ACC:
|
||||
break;
|
||||
default:
|
||||
valid_flags |= IR3_REG_CONST;
|
||||
}
|
||||
|
||||
|
|
|
@ -329,6 +329,16 @@ struct ir3_instruction {
|
|||
IR3_COND_NE = 5,
|
||||
} condition;
|
||||
} cat2;
|
||||
struct {
|
||||
enum {
|
||||
IR3_SRC_UNSIGNED = 0,
|
||||
IR3_SRC_MIXED = 1,
|
||||
} signedness;
|
||||
enum {
|
||||
IR3_SRC_PACKED_LOW = 0,
|
||||
IR3_SRC_PACKED_HIGH = 1,
|
||||
} packed;
|
||||
} cat3;
|
||||
struct {
|
||||
unsigned samp, tex;
|
||||
unsigned tex_base : 3;
|
||||
|
@ -1383,7 +1393,13 @@ ir3_cat3_absneg(opc_t opc)
|
|||
case OPC_SEL_B16:
|
||||
case OPC_SEL_B32:
|
||||
|
||||
case OPC_SHLG_B16:
|
||||
case OPC_SHRM:
|
||||
case OPC_SHLM:
|
||||
case OPC_SHRG:
|
||||
case OPC_SHLG:
|
||||
case OPC_ANDG:
|
||||
case OPC_WMM:
|
||||
case OPC_WMM_ACCU:
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@ -1407,6 +1423,8 @@ ir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
|
|||
case OPC_BARY_F:
|
||||
case OPC_MAD_F32:
|
||||
case OPC_MAD_F16:
|
||||
case OPC_WMM:
|
||||
case OPC_WMM_ACCU:
|
||||
return TYPE_F32;
|
||||
|
||||
case OPC_ADD_U:
|
||||
|
@ -1423,6 +1441,11 @@ ir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
|
|||
case OPC_SHR_B:
|
||||
case OPC_ASHR_B:
|
||||
case OPC_MAD_U24:
|
||||
case OPC_SHRM:
|
||||
case OPC_SHLM:
|
||||
case OPC_SHRG:
|
||||
case OPC_SHLG:
|
||||
case OPC_ANDG:
|
||||
/* Comparison ops zero-extend/truncate their results, so consider them as
|
||||
* unsigned here.
|
||||
*/
|
||||
|
@ -2213,6 +2236,8 @@ INSTR3(MAD_U24)
|
|||
INSTR3(MAD_S24)
|
||||
INSTR3(MAD_F16)
|
||||
INSTR3(MAD_F32)
|
||||
INSTR3(DP2ACC)
|
||||
INSTR3(DP4ACC)
|
||||
/* NOTE: SEL_B32 checks for zero vs nonzero */
|
||||
INSTR3(SEL_B16)
|
||||
INSTR3(SEL_B32)
|
||||
|
|
|
@ -244,7 +244,15 @@ static int parse_reg(const char *str)
|
|||
"sel.f32" return TOKEN(T_OP_SEL_F32);
|
||||
"sad.s16" return TOKEN(T_OP_SAD_S16);
|
||||
"sad.s32" return TOKEN(T_OP_SAD_S32);
|
||||
"shlg.b16" return TOKEN(T_OP_SHLG_B16);
|
||||
"shrm" return TOKEN(T_OP_SHRM);
|
||||
"shlm" return TOKEN(T_OP_SHLM);
|
||||
"shrg" return TOKEN(T_OP_SHRG);
|
||||
"shlg" return TOKEN(T_OP_SHLG);
|
||||
"andg" return TOKEN(T_OP_ANDG);
|
||||
"dp2acc" return TOKEN(T_OP_DP2ACC);
|
||||
"dp4acc" return TOKEN(T_OP_DP4ACC);
|
||||
"wmm" return TOKEN(T_OP_WMM);
|
||||
"wmm.accu" return TOKEN(T_OP_WMM_ACCU);
|
||||
|
||||
/* category 4: */
|
||||
"rcp" return TOKEN(T_OP_RCP);
|
||||
|
@ -383,6 +391,11 @@ static int parse_reg(const char *str)
|
|||
"untyped" return TOKEN(T_UNTYPED);
|
||||
"typed" return TOKEN(T_TYPED);
|
||||
|
||||
"unsigned" return TOKEN(T_UNSIGNED);
|
||||
"mixed" return TOKEN(T_MIXED);
|
||||
"low" return TOKEN(T_LOW);
|
||||
"high" return TOKEN(T_HIGH);
|
||||
|
||||
"1d" return TOKEN(T_1D);
|
||||
"2d" return TOKEN(T_2D);
|
||||
"3d" return TOKEN(T_3D);
|
||||
|
|
|
@ -484,7 +484,15 @@ static void print_token(FILE *file, int type, YYSTYPE value)
|
|||
%token <tok> T_OP_SEL_F32
|
||||
%token <tok> T_OP_SAD_S16
|
||||
%token <tok> T_OP_SAD_S32
|
||||
%token <tok> T_OP_SHLG_B16
|
||||
%token <tok> T_OP_SHRM
|
||||
%token <tok> T_OP_SHLM
|
||||
%token <tok> T_OP_SHRG
|
||||
%token <tok> T_OP_SHLG
|
||||
%token <tok> T_OP_ANDG
|
||||
%token <tok> T_OP_DP2ACC
|
||||
%token <tok> T_OP_DP4ACC
|
||||
%token <tok> T_OP_WMM
|
||||
%token <tok> T_OP_WMM_ACCU
|
||||
|
||||
/* category 4: */
|
||||
%token <tok> T_OP_RCP
|
||||
|
@ -623,6 +631,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
|
|||
%token <tok> T_UNTYPED
|
||||
%token <tok> T_TYPED
|
||||
|
||||
%token <tok> T_MIXED
|
||||
%token <tok> T_UNSIGNED
|
||||
%token <tok> T_LOW
|
||||
%token <tok> T_HIGH
|
||||
|
||||
%token <tok> T_1D
|
||||
%token <tok> T_2D
|
||||
%token <tok> T_3D
|
||||
|
@ -949,6 +962,12 @@ cat2_instr: cat2_opc_1src dst_reg ',' src_reg_or_const_or_rel_or_imm
|
|||
| cat2_opc_2src_cnd '.' cond dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm
|
||||
| cat2_opc_2src dst_reg ',' src_reg_or_const_or_rel_or_imm ',' src_reg_or_const_or_rel_or_imm
|
||||
|
||||
cat3_dp_signedness:'.' T_MIXED { instr->cat3.signedness = IR3_SRC_MIXED; }
|
||||
| '.' T_UNSIGNED{ instr->cat3.signedness = IR3_SRC_UNSIGNED; }
|
||||
|
||||
cat3_dp_pack: '.' T_LOW { instr->cat3.packed = IR3_SRC_PACKED_LOW; }
|
||||
| '.' T_HIGH { instr->cat3.packed = IR3_SRC_PACKED_HIGH; }
|
||||
|
||||
cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); }
|
||||
| T_OP_MADSH_U16 { new_instr(OPC_MADSH_U16); }
|
||||
| T_OP_MAD_S16 { new_instr(OPC_MAD_S16); }
|
||||
|
@ -966,8 +985,22 @@ cat3_opc: T_OP_MAD_U16 { new_instr(OPC_MAD_U16); }
|
|||
| T_OP_SAD_S16 { new_instr(OPC_SAD_S16); }
|
||||
| T_OP_SAD_S32 { new_instr(OPC_SAD_S32); }
|
||||
|
||||
cat3_imm_reg_opc: T_OP_SHRM { new_instr(OPC_SHRM); }
|
||||
| T_OP_SHLM { new_instr(OPC_SHLM); }
|
||||
| T_OP_SHRG { new_instr(OPC_SHRG); }
|
||||
| T_OP_SHLG { new_instr(OPC_SHLG); }
|
||||
| T_OP_ANDG { new_instr(OPC_ANDG); }
|
||||
|
||||
cat3_wmm: T_OP_WMM { new_instr(OPC_WMM); }
|
||||
| T_OP_WMM_ACCU { new_instr(OPC_WMM_ACCU); }
|
||||
|
||||
cat3_dp: T_OP_DP2ACC { new_instr(OPC_DP2ACC); }
|
||||
| T_OP_DP4ACC { new_instr(OPC_DP4ACC); }
|
||||
|
||||
cat3_instr: cat3_opc dst_reg ',' src_reg_or_const_or_rel ',' src_reg_or_const ',' src_reg_or_const_or_rel
|
||||
| T_OP_SHLG_B16 { new_instr(OPC_SHLG_B16); } dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
|
||||
| cat3_imm_reg_opc dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
|
||||
| cat3_wmm dst_reg ',' src_reg_gpr ',' src_reg ',' immediate
|
||||
| cat3_dp cat3_dp_signedness cat3_dp_pack dst_reg ',' src_reg_or_rel_or_imm ',' src_reg_or_const ',' src_reg_or_rel_or_imm
|
||||
|
||||
cat4_opc: T_OP_RCP { new_instr(OPC_RCP); }
|
||||
| T_OP_RSQ { new_instr(OPC_RSQ); }
|
||||
|
@ -1267,6 +1300,9 @@ src_reg_flags: src_reg_flag
|
|||
src_reg: src
|
||||
| src_reg_flags src
|
||||
|
||||
src_reg_gpr: src_reg
|
||||
| relative_gpr_src
|
||||
|
||||
src_const: const
|
||||
| src_reg_flags const
|
||||
|
||||
|
|
|
@ -127,9 +127,20 @@ static const struct test {
|
|||
INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"),
|
||||
INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"),
|
||||
INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"),
|
||||
INSTR_6XX(65900820_100cb008, "(nop3) shlg.b16 hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
|
||||
INSTR_6XX(65ae085c_0002a001, "(nop3) shlg.b16 hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
|
||||
INSTR_6XX(65900820_0c0aac05, "(nop3) shlg.b16 hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
|
||||
INSTR_6XX(65900820_100cb008, "(nop3) shlg hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
|
||||
INSTR_6XX(65ae085c_0002a001, "(nop3) shlg hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
|
||||
INSTR_6XX(65900820_0c0aac05, "(nop3) shlg hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
|
||||
INSTR_6XX(65ae0c5c_0002a001, "(nop3) shlg r23.x, r0.y, r23.x, r0.z"), /* (nop3) shlg.b32 r23.x, (r)r0.y, (r)r23.x, r0.z */
|
||||
INSTR_6XX(64018802_0002e003, "(nop3) shrm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
|
||||
INSTR_6XX(64818802_0002e003, "(nop3) shlm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
|
||||
INSTR_6XX(65018802_0002e003, "(nop3) shrg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
|
||||
INSTR_6XX(66018802_0002e003, "(nop3) andg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
|
||||
INSTR_6XX(67018802_1002e003, "(nop3) wmm hr0.z, (neg)hr0.w, hr0.w, 2"), /* (nop3) wmm.f16f16 hr0.z, (abs)(r)hr0.w, (r)hr0.w, 2 */
|
||||
INSTR_6XX(67018c02_1002e003, "(nop3) wmm.accu hr0.z, (neg)hr0.w, hr0.w, 2"),
|
||||
INSTR_6XX(6701c802_9002a003, "(nop3) wmm r0.z, r0.w, r0.w, 2"), /* (nop3) wmm.f32f32 r0.z, (r)r0.w, (r)r0.w, 2 */
|
||||
/* custom test with qcom_dot8 function from cl_qcom_dot_product8 */
|
||||
INSTR_6XX(66818c02_0002e003, "(sat)(nop3) dp2acc.mixed.low r0.z, r0.w, r0.w, r0.z"), /* (nop3) dp2acc (sat)r0.z, (signed)(low)(r)r0.w, (low)(r)r0.w, r0.z */
|
||||
INSTR_6XX(6681c802_8002a003, "(nop3) dp4acc.unsigned.low r0.z, r0.w, r0.w, (neg)r0.z"), /* (nop3) dp4acc r0.z, (unsigned)(r)r0.w, (r)r0.w, (neg)r0.z */
|
||||
|
||||
/* cat4 */
|
||||
INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"),
|
||||
|
|
|
@ -106,24 +106,13 @@ SOFTWARE.
|
|||
<display>
|
||||
{SY}{SS}{JP}{SAT}{REPEAT}{UL}{NAME} {DST_HALF}{DST}, {SRC1_NEG}{SRC1_R}{SRC1}, {SRC2_NEG}{SRC2_R}{HALF}{SRC2}, {SRC3_NEG}{SRC3_R}{SRC3}
|
||||
</display>
|
||||
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
|
||||
<field name="SRC2_R" pos="15" type="bool" display="(r)"/>
|
||||
<field name="SRC3_R" pos="29" type="bool" display="(r)"/>
|
||||
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
|
||||
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
|
||||
<field name="DST" low="32" high="39" type="#reg-gpr"/>
|
||||
<field name="REPEAT" low="40" high="41" type="#rptN"/>
|
||||
<field name="SAT" pos="42" type="bool" display="(sat)"/>
|
||||
<field name="SRC1_R" pos="43" type="bool" display="(r)"/>
|
||||
<field name="SS" pos="44" type="bool" display="(ss)"/>
|
||||
<field name="UL" pos="45" type="bool" display="(ul)"/>
|
||||
<field name="DST_CONV" pos="46" type="bool">
|
||||
<doc>
|
||||
The source precision is determined by the instruction
|
||||
opcode. If {DST_CONV} the result is widened/narrowed
|
||||
to the opposite precision.
|
||||
</doc>
|
||||
</field>
|
||||
<field name="SRC2" low="47" high="54" type="#reg-gpr"/>
|
||||
<!-- opcode, 4 bits -->
|
||||
<field name="JP" pos="59" type="bool" display="(jp)"/>
|
||||
|
@ -139,6 +128,38 @@ SOFTWARE.
|
|||
<map name="SRC2_NEG">!!(src->srcs[1]->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))</map>
|
||||
<map name="SRC3_NEG">!!(src->srcs[2]->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT))</map>
|
||||
<map name="SRC1">src->srcs[0]</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
<bitset name="#instruction-cat3" extends="#instruction-cat3-base">
|
||||
<derived name="IMMED_ENCODING" expr="#false" type="bool" display="h"/>
|
||||
|
||||
<field name="SRC1" low="0" high="12" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
<param name="IMMED_ENCODING"/>
|
||||
</field>
|
||||
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
|
||||
|
||||
<pattern pos="13">0</pattern>
|
||||
|
||||
<field name="SRC3" low="16" high="28" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
<param name="IMMED_ENCODING"/>
|
||||
</field>
|
||||
|
||||
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
|
||||
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
|
||||
<field name="SAT" pos="42" type="bool" display="(sat)"/>
|
||||
|
||||
<field name="DST_CONV" pos="46" type="bool">
|
||||
<doc>
|
||||
The source precision is determined by the instruction
|
||||
opcode. If {DST_CONV} the result is widened/narrowed
|
||||
to the opposite precision.
|
||||
</doc>
|
||||
</field>
|
||||
|
||||
<encode>
|
||||
<map name="DST_CONV">
|
||||
((src->dsts[0]->num >> 2) == 62) ? 0 :
|
||||
!!((src->srcs[0]->flags ^ src->dsts[0]->flags) & IR3_REG_HALF)
|
||||
|
@ -146,21 +167,7 @@ SOFTWARE.
|
|||
</encode>
|
||||
</bitset>
|
||||
|
||||
<bitset name="#instruction-cat3" extends="#instruction-cat3-base">
|
||||
<pattern pos="13">0</pattern>
|
||||
|
||||
<derived name="IMMED_ENCODING" expr="#false" type="bool" display="h"/>
|
||||
|
||||
<field name="SRC1" low="0" high="12" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
<param name="IMMED_ENCODING"/>
|
||||
</field>
|
||||
<field name="SRC3" low="16" high="28" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
<param name="IMMED_ENCODING"/>
|
||||
</field>
|
||||
</bitset>
|
||||
|
||||
<!-- TODO check on pre a6xx gens -->
|
||||
<bitset name="#instruction-cat3-alt" extends="#instruction-cat3-base">
|
||||
<doc>
|
||||
The difference is that this cat3 version does not support plain
|
||||
|
@ -168,38 +175,39 @@ SOFTWARE.
|
|||
On the other hand it still supports relative gpr and consts.
|
||||
</doc>
|
||||
|
||||
<pattern pos="13">1</pattern>
|
||||
<gen min="600"/>
|
||||
|
||||
<derived name="IMMED_ENCODING" expr="#true" type="bool" display="h"/>
|
||||
<derived name="SAT" expr="#false" type="bool" display=""/>
|
||||
|
||||
<field name="SRC1" low="0" high="12" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
<param name="IMMED_ENCODING"/>
|
||||
</field>
|
||||
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
|
||||
|
||||
<pattern pos="13">1</pattern>
|
||||
|
||||
<field name="SRC3" low="16" high="28" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
<param name="IMMED_ENCODING"/>
|
||||
</field>
|
||||
|
||||
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
|
||||
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
|
||||
<field name="FULL" pos="42" type="bool"/>
|
||||
<field name="DST_CONV" pos="46" type="bool"/>
|
||||
|
||||
<encode>
|
||||
<map name="SRC3">src->srcs[2]</map>
|
||||
<map name="DST_CONV">false</map>
|
||||
<map name="FULL">!(src->srcs[1]->flags & IR3_REG_HALF)</map>
|
||||
<map name="DST_CONV">
|
||||
((src->dsts[0]->num >> 2) == 62) ? 0 :
|
||||
!!((src->srcs[1]->flags ^ src->dsts[0]->flags) & IR3_REG_HALF)
|
||||
</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
<!-- TODO find shlg.b32 -->
|
||||
<bitset name="shlg.b16" extends="#instruction-cat3-alt">
|
||||
<doc>
|
||||
(src2 << src1) | src3
|
||||
</doc>
|
||||
|
||||
<!-- TODO check older gens -->
|
||||
<gen min="600"/>
|
||||
|
||||
<pattern low="55" high="58">1011</pattern> <!-- OPC -->
|
||||
<derived name="FULL" expr="#false" type="bool"/>
|
||||
</bitset>
|
||||
|
||||
<bitset name="mad.u16" extends="#instruction-cat3">
|
||||
<pattern low="55" high="58">0000</pattern> <!-- OPC -->
|
||||
<derived name="FULL" expr="#false" type="bool"/>
|
||||
|
@ -280,4 +288,177 @@ SOFTWARE.
|
|||
<derived name="FULL" expr="#false" type="bool"/> <!-- We think? -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="shrm" extends="#instruction-cat3-alt">
|
||||
<doc>
|
||||
(src2 >> src1) & src3
|
||||
</doc>
|
||||
|
||||
<pattern low="55" high="58">1000</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="shlm" extends="#instruction-cat3-alt">
|
||||
<doc>
|
||||
(src2 << src1) & src3
|
||||
</doc>
|
||||
|
||||
<pattern low="55" high="58">1001</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="shrg" extends="#instruction-cat3-alt">
|
||||
<doc>
|
||||
(src2 >> src1) | src3
|
||||
</doc>
|
||||
|
||||
<pattern low="55" high="58">1010</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="shlg" extends="#instruction-cat3-alt">
|
||||
<doc>
|
||||
(src2 << src1) | src3
|
||||
</doc>
|
||||
|
||||
<pattern low="55" high="58">1011</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="andg" extends="#instruction-cat3-alt">
|
||||
<doc>
|
||||
(src2 & src1) | src3
|
||||
</doc>
|
||||
|
||||
<pattern low="55" high="58">1100</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<enum name="#signedness">
|
||||
<value val="0" display=".unsigned"/>
|
||||
<value val="1" display=".mixed"/>
|
||||
</enum>
|
||||
|
||||
<enum name="#8bitvec2pack">
|
||||
<value val="0" display=".low"/>
|
||||
<value val="1" display=".high"/>
|
||||
</enum>
|
||||
|
||||
<bitset name="#instruction-cat3-dp" extends="#instruction-cat3-base">
|
||||
<gen min="600"/>
|
||||
|
||||
<display>
|
||||
{SY}{SS}{JP}{SAT}(nop{NOP}) {UL}{NAME}{SRC_SIGN}{SRC_PACK} {DST}, {SRC1}, {SRC2}, {SRC3_NEG}{SRC3}
|
||||
</display>
|
||||
|
||||
<derived name="FULL" expr="#true" type="bool"/>
|
||||
|
||||
<field name="SRC1" low="0" high="12" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
</field>
|
||||
<field name="SRC_SIGN" pos="14" type="#signedness"/>
|
||||
|
||||
<pattern pos="13">1</pattern>
|
||||
|
||||
<field name="SRC3" low="16" high="28" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
</field>
|
||||
<field name="SRC_PACK" pos="30" type="#8bitvec2pack"/>
|
||||
<field name="SRC3_NEG" pos="31" type="bool" display="(neg)"/>
|
||||
<field name="SAT" pos="42" type="bool" display="(sat)"/>
|
||||
|
||||
<encode>
|
||||
<map name="SRC3">src->srcs[2]</map>
|
||||
<map name="SRC_SIGN">src->cat3.signedness</map>
|
||||
<map name="SRC_PACK">src->cat3.packed</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
<bitset name="dp2acc" extends="#instruction-cat3-dp">
|
||||
<doc>
|
||||
Given:
|
||||
SRC1 is a i8vec2 or u8vec2
|
||||
SRC2 is a u8vec2
|
||||
SRC1 and SRC2 are packed into low or high halves of the registers.
|
||||
SRC3 is a int32_t or uint32_t
|
||||
Do:
|
||||
DST = dot(SRC1, SRC2) + SRC3
|
||||
</doc>
|
||||
|
||||
<pattern pos="46">0</pattern>
|
||||
<pattern low="55" high="58">1101</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="dp4acc" extends="#instruction-cat3-dp">
|
||||
<doc>
|
||||
Same a dp2acc but for vec4 instead of vec2.
|
||||
Corresponds to packed variantes of OpUDotKHR and OpSUDotKHR.
|
||||
</doc>
|
||||
|
||||
<pattern pos="46">1</pattern>
|
||||
<pattern low="55" high="58">1101</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<expr name="#wmm-dest-half">
|
||||
(!{DST_FULL})
|
||||
</expr>
|
||||
|
||||
<bitset name="#instruction-cat3-wmm" extends="#instruction-cat3-base">
|
||||
<gen min="600"/>
|
||||
|
||||
<derived name="IMMED_ENCODING" expr="#true" type="bool" display="h"/>
|
||||
<derived name="SAT" expr="#false" type="bool" display=""/>
|
||||
<derived name="SRC3_NEG" expr="#false" type="bool" display=""/>
|
||||
<derived name="DST_HALF" expr="#wmm-dest-half" type="bool" display="h"/>
|
||||
|
||||
<field name="SRC1" low="0" high="12" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
</field>
|
||||
|
||||
<pattern pos="13">1</pattern>
|
||||
<field name="SRC1_NEG" pos="14" type="bool" display="(neg)"/>
|
||||
|
||||
<field name="SRC3" low="16" high="28" type="#cat3-src">
|
||||
<param name="HALF"/>
|
||||
<param name="IMMED_ENCODING"/>
|
||||
</field>
|
||||
|
||||
<field name="SRC2_NEG" pos="30" type="bool" display="(neg)"/>
|
||||
<field name="FULL" pos="31" type="bool" display=""/>
|
||||
<field name="DST_FULL" pos="46" type="bool"/>
|
||||
|
||||
<encode>
|
||||
<map name="SRC3">src->srcs[2]</map>
|
||||
<map name="FULL">!(src->srcs[0]->flags & IR3_REG_HALF)</map>
|
||||
<map name="DST_FULL">
|
||||
((src->dsts[0]->num >> 2) == 62) ? 1 :
|
||||
!(src->dsts[0]->flags & IR3_REG_HALF)
|
||||
</map>
|
||||
</encode>
|
||||
</bitset>
|
||||
|
||||
<bitset name="wmm" extends="#instruction-cat3-wmm">
|
||||
<doc>
|
||||
Given:
|
||||
SRC1 = (x_1, x_2, x_3, x_4) - 4 consecutive registers
|
||||
SRC2 = (y_1, y_2, y_3, y_4) - 4 consecutive registers
|
||||
SRC3 is an immediate in range of [0, 160]
|
||||
|
||||
Do:
|
||||
float y_sum = y_1 + y_2 + y_3 + y_4
|
||||
vec4 result = (x_1 * y_sum, x_2 * y_sum, x_3 * y_sum, x_4 * y_sum)
|
||||
|
||||
Starting from DST reg duplicate *result* into consecutive registers
|
||||
(1 << (SRC3 / 32)) times.
|
||||
</doc>
|
||||
|
||||
<pattern pos="42">0</pattern>
|
||||
<pattern low="55" high="58">1110</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
<bitset name="wmm.accu" extends="#instruction-cat3-wmm">
|
||||
<doc>
|
||||
Same as wmm but instead of overwriting DST - the result is
|
||||
added to DST registers, however the first reg of the result
|
||||
is always overwritten.
|
||||
</doc>
|
||||
|
||||
<pattern pos="42">1</pattern>
|
||||
<pattern low="55" high="58">1110</pattern> <!-- OPC -->
|
||||
</bitset>
|
||||
|
||||
</isa>
|
||||
|
|
Loading…
Reference in New Issue