tgsi: clarify the semantics of DFRACEXP

The status quo is quite the mess:

1. tgsi_exec will do a per-channel computation, and store the dst[0]
   result (significand) correctly for each channel. The dst[1] result
   (exponent) will be written to the first bit set in the writemask.
   So per-component calculation only works partially.

2. r600 will only do a single computation. It will replicate the
   exponent but not the significand.

3. The docs pretend that there's per-component calculation, but even
   get dst[0] and dst[1] confused.

4. Luckily, st_glsl_to_tgsi only ever emits single-component instructions,
   and kind-of assumes that everything is replicated, generating this for
   the dvec4 case:

     DFRACEXP TEMP[0].xy, TEMP[1].x, CONST[0][0].xyxy
     DFRACEXP TEMP[0].zw, TEMP[1].y, CONST[0][0].zwzw
     DFRACEXP TEMP[2].xy, TEMP[1].z, CONST[0][1].xyxy
     DFRACEXP TEMP[2].zw, TEMP[1].w, CONST[0][1].zwzw

Settle on the simplest behavior, which is single-component calculation
with replication, document it, and adjust tgsi_exec and r600.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de>
This commit is contained in:
Nicolai Hähnle 2017-09-15 18:34:48 +02:00
parent dbe7fc00d5
commit 3c78215a1c
4 changed files with 20 additions and 22 deletions

View File

@ -3688,17 +3688,15 @@ exec_dfracexp(struct tgsi_exec_machine *mach,
union tgsi_double_channel dst;
union tgsi_exec_channel dst_exp;
if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
micro_dfracexp(&dst, &dst_exp, &src);
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
micro_dfracexp(&dst, &dst_exp, &src);
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
}
if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
micro_dfracexp(&dst, &dst_exp, &src);
if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
if (inst->Dst[1].Register.WriteMask & (1 << chan))
store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT);
}
}

View File

@ -212,7 +212,7 @@ OPCODE(1, 1, COMP, DSQRT)
OPCODE(1, 3, COMP, DMAD)
OPCODE(1, 1, COMP, DFRAC)
OPCODE(1, 2, COMP, DLDEXP)
OPCODE(2, 1, COMP, DFRACEXP)
OPCODE(2, 1, REPL, DFRACEXP)
OPCODE(1, 1, COMP, D2I)
OPCODE(1, 1, COMP, I2D)
OPCODE(1, 1, COMP, D2U)

View File

@ -1838,17 +1838,15 @@ two-component vectors with doubled precision in each component.
Like the ``frexp()`` routine in many math libraries, this opcode stores the
exponent of its source to ``dst0``, and the significand to ``dst1``, such that
:math:`dst1 \times 2^{dst0} = src` .
:math:`dst1 \times 2^{dst0} = src` . The results are replicated across
channels.
.. math::
dst0.xy = exp(src.xy)
dst0.xy = dst.zw = frac(src.xy)
dst1.xy = frac(src.xy)
dst1 = frac(src.xy)
dst0.zw = exp(src.zw)
dst1.zw = frac(src.zw)
.. opcode:: DLDEXP - Multiply Number by Integral Power of 2

View File

@ -4045,7 +4045,6 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
struct r600_bytecode_alu alu;
unsigned write_mask = inst->Dst[0].Register.WriteMask;
int i, j, r;
int firsti = write_mask == 0xc ? 2 : 0;
for (i = 0; i <= 3; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
@ -4066,15 +4065,18 @@ static int tgsi_dfracexp(struct r600_shader_ctx *ctx)
return r;
}
/* MOV first two channels to writemask dst0 */
for (i = 0; i <= 1; i++) {
/* Replicate significand result across channels. */
for (i = 0; i <= 3; i++) {
if (!(write_mask & (1 << i)))
continue;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.src[0].chan = i + 2;
alu.src[0].chan = (i & 1) + 2;
alu.src[0].sel = ctx->temp_reg;
tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst);
alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1;
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
alu.dst.write = 1;
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)