intel/compiler: Enable the emission of ROR/ROL instructions

v2: 1) Drop changes for vec4 backend as on Gen11+ we don't support
       align16 mode (Matt Turner)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Sagar Ghuge 2019-05-29 11:43:30 -07:00
parent 8d74749f81
commit 83fdec0f0d
6 changed files with 26 additions and 2 deletions

View File

@ -488,7 +488,13 @@ static const struct opcode_desc opcode_descs[128] = {
[BRW_OPCODE_ASR] = {
.name = "asr", .nsrc = 2, .ndst = 1, .gens = GEN_ALL,
},
/* Reserved - 13-15 */
/* Reserved - 13 */
[BRW_OPCODE_ROR] = {
.name = "ror", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN11),
},
[BRW_OPCODE_ROL] = {
.name = "rol", .nsrc = 2, .ndst = 1, .gens = GEN_GE(GEN11),
},
[BRW_OPCODE_CMP] = {
.name = "cmp", .nsrc = 2, .ndst = 1, .gens = GEN_ALL,
},

View File

@ -204,6 +204,8 @@ ALU2(SHR)
ALU2(SHL)
ALU1(DIM)
ALU2(ASR)
ALU2(ROL)
ALU2(ROR)
ALU3(CSEL)
ALU1(F32TO16)
ALU1(F16TO32)

View File

@ -210,7 +210,9 @@ enum opcode {
BRW_OPCODE_SMOV = 10, /**< Gen8+ */ /* Reused */
/* Reserved - 11 */
BRW_OPCODE_ASR = 12,
/* Reserved - 13-15 */
/* Reserved - 13 */
BRW_OPCODE_ROR = 14, /**< Gen11+ */
BRW_OPCODE_ROL = 15, /**< Gen11+ */
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_CSEL = 18, /**< Gen8+ */

View File

@ -981,6 +981,8 @@ ALU2(SHR)
ALU2(SHL)
ALU1(DIM)
ALU2(ASR)
ALU2(ROL)
ALU2(ROR)
ALU3(CSEL)
ALU1(FRC)
ALU1(RNDD)

View File

@ -594,6 +594,8 @@ namespace brw {
ALU1(RNDE)
ALU1(RNDU)
ALU1(RNDZ)
ALU2(ROL)
ALU2(ROR)
ALU2(SAD2)
ALU2_ACC(SADA2)
ALU2(SEL)

View File

@ -1796,6 +1796,16 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
case BRW_OPCODE_SHL:
brw_SHL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_ROL:
assert(devinfo->gen >= 11);
assert(src[0].type == dst.type);
brw_ROL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_ROR:
assert(devinfo->gen >= 11);
assert(src[0].type == dst.type);
brw_ROR(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_F32TO16:
assert(devinfo->gen >= 7);
brw_F32TO16(p, dst, src[0]);