gallium: Add support for 32x32 muls with 64 bit results
The code introduces two new 32bit integer multiplication opcodes which can be used to produce correct 64 bit results. GLSL, OpenCL and D3D10+ require them. We use two seperate opcodes, because they match the behavior of GLSL and OpenCL, are a lot easier to add than a single opcode with multiple destinations and because there's not much (any) difference wrt code-generation. Signed-off-by: Zack Rusin <zackr@vmware.com> Reviewed-by: José Fonseca <jfonseca@vmware.com> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Brian Paul <brianp@vmware.com>
This commit is contained in:
parent
c01c6a95b4
commit
6905698fc2
|
@ -3477,6 +3477,32 @@ micro_umul(union tgsi_exec_channel *dst,
|
||||||
dst->u[3] = src0->u[3] * src1->u[3];
|
dst->u[3] = src0->u[3] * src1->u[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
micro_imul_hi(union tgsi_exec_channel *dst,
|
||||||
|
const union tgsi_exec_channel *src0,
|
||||||
|
const union tgsi_exec_channel *src1)
|
||||||
|
{
|
||||||
|
#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
|
||||||
|
dst->i[0] = I64M(src0->i[0], src1->i[0]);
|
||||||
|
dst->i[1] = I64M(src0->i[1], src1->i[1]);
|
||||||
|
dst->i[2] = I64M(src0->i[2], src1->i[2]);
|
||||||
|
dst->i[3] = I64M(src0->i[3], src1->i[3]);
|
||||||
|
#undef I64M
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
micro_umul_hi(union tgsi_exec_channel *dst,
|
||||||
|
const union tgsi_exec_channel *src0,
|
||||||
|
const union tgsi_exec_channel *src1)
|
||||||
|
{
|
||||||
|
#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
|
||||||
|
dst->u[0] = U64M(src0->u[0], src1->u[0]);
|
||||||
|
dst->u[1] = U64M(src0->u[1], src1->u[1]);
|
||||||
|
dst->u[2] = U64M(src0->u[2], src1->u[2]);
|
||||||
|
dst->u[3] = U64M(src0->u[3], src1->u[3]);
|
||||||
|
#undef U64M
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
micro_useq(union tgsi_exec_channel *dst,
|
micro_useq(union tgsi_exec_channel *dst,
|
||||||
const union tgsi_exec_channel *src0,
|
const union tgsi_exec_channel *src0,
|
||||||
|
@ -4277,6 +4303,14 @@ exec_instruction(
|
||||||
exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
|
exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case TGSI_OPCODE_IMUL_HI:
|
||||||
|
exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TGSI_OPCODE_UMUL_HI:
|
||||||
|
exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
|
||||||
|
break;
|
||||||
|
|
||||||
case TGSI_OPCODE_USEQ:
|
case TGSI_OPCODE_USEQ:
|
||||||
exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
|
exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -219,6 +219,8 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
|
||||||
{ 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
|
{ 1, 3, 1, 0, 0, 0, OTHR, "TEX2", TGSI_OPCODE_TEX2 },
|
||||||
{ 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
|
{ 1, 3, 1, 0, 0, 0, OTHR, "TXB2", TGSI_OPCODE_TXB2 },
|
||||||
{ 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
|
{ 1, 3, 1, 0, 0, 0, OTHR, "TXL2", TGSI_OPCODE_TXL2 },
|
||||||
|
{ 1, 2, 0, 0, 0, 0, COMP, "IMUL_HI", TGSI_OPCODE_IMUL_HI },
|
||||||
|
{ 1, 2, 0, 0, 0, 0, COMP, "UMUL_HI", TGSI_OPCODE_UMUL_HI },
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct tgsi_opcode_info *
|
const struct tgsi_opcode_info *
|
||||||
|
@ -297,6 +299,7 @@ tgsi_opcode_infer_type( uint opcode )
|
||||||
case TGSI_OPCODE_USLT:
|
case TGSI_OPCODE_USLT:
|
||||||
case TGSI_OPCODE_USNE:
|
case TGSI_OPCODE_USNE:
|
||||||
case TGSI_OPCODE_SVIEWINFO:
|
case TGSI_OPCODE_SVIEWINFO:
|
||||||
|
case TGSI_OPCODE_UMUL_HI:
|
||||||
return TGSI_TYPE_UNSIGNED;
|
return TGSI_TYPE_UNSIGNED;
|
||||||
case TGSI_OPCODE_ARL:
|
case TGSI_OPCODE_ARL:
|
||||||
case TGSI_OPCODE_ARR:
|
case TGSI_OPCODE_ARR:
|
||||||
|
@ -317,6 +320,7 @@ tgsi_opcode_infer_type( uint opcode )
|
||||||
case TGSI_OPCODE_UARL:
|
case TGSI_OPCODE_UARL:
|
||||||
case TGSI_OPCODE_IABS:
|
case TGSI_OPCODE_IABS:
|
||||||
case TGSI_OPCODE_ISSG:
|
case TGSI_OPCODE_ISSG:
|
||||||
|
case TGSI_OPCODE_IMUL_HI:
|
||||||
return TGSI_TYPE_SIGNED;
|
return TGSI_TYPE_SIGNED;
|
||||||
default:
|
default:
|
||||||
return TGSI_TYPE_FLOAT;
|
return TGSI_TYPE_FLOAT;
|
||||||
|
@ -339,7 +343,9 @@ tgsi_opcode_infer_src_type( uint opcode )
|
||||||
case TGSI_OPCODE_CASE:
|
case TGSI_OPCODE_CASE:
|
||||||
case TGSI_OPCODE_SAMPLE_I:
|
case TGSI_OPCODE_SAMPLE_I:
|
||||||
case TGSI_OPCODE_SAMPLE_I_MS:
|
case TGSI_OPCODE_SAMPLE_I_MS:
|
||||||
|
case TGSI_OPCODE_UMUL_HI:
|
||||||
return TGSI_TYPE_UNSIGNED;
|
return TGSI_TYPE_UNSIGNED;
|
||||||
|
case TGSI_OPCODE_IMUL_HI:
|
||||||
case TGSI_OPCODE_I2F:
|
case TGSI_OPCODE_I2F:
|
||||||
return TGSI_TYPE_SIGNED;
|
return TGSI_TYPE_SIGNED;
|
||||||
case TGSI_OPCODE_ARL:
|
case TGSI_OPCODE_ARL:
|
||||||
|
|
|
@ -204,6 +204,9 @@ OP12(SAMPLE_INFO)
|
||||||
|
|
||||||
OP13(UCMP)
|
OP13(UCMP)
|
||||||
|
|
||||||
|
OP12(IMUL_HI)
|
||||||
|
OP12(UMUL_HI)
|
||||||
|
|
||||||
#undef OP00
|
#undef OP00
|
||||||
#undef OP01
|
#undef OP01
|
||||||
#undef OP10
|
#undef OP10
|
||||||
|
|
|
@ -243,6 +243,8 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
|
||||||
case TGSI_OPCODE_USHR:
|
case TGSI_OPCODE_USHR:
|
||||||
case TGSI_OPCODE_USLT:
|
case TGSI_OPCODE_USLT:
|
||||||
case TGSI_OPCODE_USNE:
|
case TGSI_OPCODE_USNE:
|
||||||
|
case TGSI_OPCODE_IMUL_HI:
|
||||||
|
case TGSI_OPCODE_UMUL_HI:
|
||||||
/* Channel-wise operations */
|
/* Channel-wise operations */
|
||||||
read_mask = write_mask;
|
read_mask = write_mask;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -1103,6 +1103,36 @@ Support for these opcodes indicated by PIPE_SHADER_CAP_INTEGERS (all of them?)
|
||||||
dst.w = src0.w \times src1.w
|
dst.w = src0.w \times src1.w
|
||||||
|
|
||||||
|
|
||||||
|
.. opcode:: IMUL_HI - Signed Integer Multiply High Bits
|
||||||
|
|
||||||
|
The high 32bits of the multiplication of 2 signed integers are returned.
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
dst.x = (src0.x \times src1.x) >> 32
|
||||||
|
|
||||||
|
dst.y = (src0.y \times src1.y) >> 32
|
||||||
|
|
||||||
|
dst.z = (src0.z \times src1.z) >> 32
|
||||||
|
|
||||||
|
dst.w = (src0.w \times src1.w) >> 32
|
||||||
|
|
||||||
|
|
||||||
|
.. opcode:: UMUL_HI - Unsigned Integer Multiply High Bits
|
||||||
|
|
||||||
|
The high 32bits of the multiplication of 2 unsigned integers are returned.
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
dst.x = (src0.x \times src1.x) >> 32
|
||||||
|
|
||||||
|
dst.y = (src0.y \times src1.y) >> 32
|
||||||
|
|
||||||
|
dst.z = (src0.z \times src1.z) >> 32
|
||||||
|
|
||||||
|
dst.w = (src0.w \times src1.w) >> 32
|
||||||
|
|
||||||
|
|
||||||
.. opcode:: IDIV - Signed Integer Division
|
.. opcode:: IDIV - Signed Integer Division
|
||||||
|
|
||||||
TBD: behavior for division by zero.
|
TBD: behavior for division by zero.
|
||||||
|
|
|
@ -450,7 +450,10 @@ struct tgsi_property_data {
|
||||||
#define TGSI_OPCODE_TXB2 178
|
#define TGSI_OPCODE_TXB2 178
|
||||||
#define TGSI_OPCODE_TXL2 179
|
#define TGSI_OPCODE_TXL2 179
|
||||||
|
|
||||||
#define TGSI_OPCODE_LAST 180
|
#define TGSI_OPCODE_IMUL_HI 180
|
||||||
|
#define TGSI_OPCODE_UMUL_HI 181
|
||||||
|
|
||||||
|
#define TGSI_OPCODE_LAST 182
|
||||||
|
|
||||||
#define TGSI_SAT_NONE 0 /* do not saturate */
|
#define TGSI_SAT_NONE 0 /* do not saturate */
|
||||||
#define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */
|
#define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
VERT
|
||||||
|
DCL IN[0]
|
||||||
|
DCL IN[1]
|
||||||
|
DCL OUT[0], POSITION
|
||||||
|
DCL OUT[1], COLOR
|
||||||
|
DCL TEMP[0]
|
||||||
|
DCL TEMP[1]
|
||||||
|
IMM[0] INT32 {-2147483648, 2, 0, -1}
|
||||||
|
MOV OUT[0], IN[0]
|
||||||
|
IMUL_HI TEMP[0], IMM[0].xzzx, IMM[0].yzzy
|
||||||
|
UMUL TEMP[0], TEMP[0], IMM[0].wwww
|
||||||
|
I2F OUT[1], TEMP[0]
|
||||||
|
END
|
|
@ -0,0 +1,11 @@
|
||||||
|
VERT
|
||||||
|
DCL IN[0]
|
||||||
|
DCL IN[1]
|
||||||
|
DCL OUT[0], POSITION
|
||||||
|
DCL OUT[1], COLOR
|
||||||
|
DCL TEMP[0]
|
||||||
|
IMM[0] INT32 {4, 1073741824, 0, 1}
|
||||||
|
MOV OUT[0], IN[0]
|
||||||
|
UMUL_HI TEMP[0], IMM[0].xzzx, IMM[0].yzzy
|
||||||
|
I2F OUT[1], TEMP[0]
|
||||||
|
END
|
Loading…
Reference in New Issue