From e6469ec43b25898e99766a30aa8f54cc64c3bc04 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Mon, 13 Mar 2017 17:31:39 -0700
Subject: [PATCH] gallium/tgsi: Treat UCMP sources as floats to match the
 GLSL-to-TGSI pass expectations.

Currently the GLSL-to-TGSI translation pass assumes it can use
floating point source modifiers on the UCMP instruction.  See the bug
report linked below for an example where an unrelated change in the
GLSL built-in lowering code for atan2 (e9ffd12827ac11a2d2002a42fa8eb1)
caused the generation of floating-point ir_unop_neg instructions
followed by ir_triop_csel, which is translated into UCMP with a negate
modifier on back-ends with native integer support.

Allowing floating-point source modifiers on an integer instruction
seems like rather dubious design for a transport IR, since the same
semantics could be represented as a sequence of MOV+UCMP instructions
instead, but supposedly this matches the expectations of TGSI
back-ends other than tgsi_exec, and the expectations of the DX10 API.
I take no responsibility for future headaches caused by this
inconsistency.

Fixes a regression of piglit glsl-fs-tan-1 on softpipe introduced by
the above-mentioned glsl front-end commit.  Even though the commit
that triggered the regression doesn't seem to have made it to any
stable branches yet, this might be worth back-porting since I don't
see any reason why the bug couldn't have been reproduced before that
point.

Suggested-by: Roland Scheidegger <sroland@vmware.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99817
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 54 +++++++++++++++++++-------
 src/gallium/docs/source/tgsi.rst       |  8 ++--
 2 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 3c1530672ff..48d91af3e16 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3358,6 +3358,46 @@ exec_up2h(struct tgsi_exec_machine *mach,
    }
 }
 
+static void
+micro_ucmp(union tgsi_exec_channel *dst,
+           const union tgsi_exec_channel *src0,
+           const union tgsi_exec_channel *src1,
+           const union tgsi_exec_channel *src2)
+{
+   dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
+   dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
+   dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
+   dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
+}
+
+static void
+exec_ucmp(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   unsigned int chan;
+   struct tgsi_exec_vector dst;
+
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         union tgsi_exec_channel src[3];
+
+         fetch_source(mach, &src[0], &inst->Src[0], chan,
+                      TGSI_EXEC_DATA_UINT);
+         fetch_source(mach, &src[1], &inst->Src[1], chan,
+                      TGSI_EXEC_DATA_FLOAT);
+         fetch_source(mach, &src[2], &inst->Src[2], chan,
+                      TGSI_EXEC_DATA_FLOAT);
+         micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
+      }
+   }
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+      if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+         store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan,
+                    TGSI_EXEC_DATA_FLOAT);
+      }
+   }
+}
+
 static void
 exec_scs(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
@@ -4997,18 +5037,6 @@ micro_uarl(union tgsi_exec_channel *dst,
    dst->i[3] = src->u[3];
 }
 
-static void
-micro_ucmp(union tgsi_exec_channel *dst,
-           const union tgsi_exec_channel *src0,
-           const union tgsi_exec_channel *src1,
-           const union tgsi_exec_channel *src2)
-{
-   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
-   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
-   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
-   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
-}
-
 /**
  * Signed bitfield extract (i.e. sign-extend the extracted bits)
  */
@@ -5911,7 +5939,7 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_UCMP:
-      exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
+      exec_ucmp(mach, inst);
       break;
 
    case TGSI_OPCODE_IABS:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 18b42fba747..9976875c7bd 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -28,9 +28,11 @@ Modifiers
 
 TGSI supports modifiers on inputs (as well as saturate modifier on instructions).
 
-For inputs which have a floating point type, both absolute value and negation
-modifiers are supported (with absolute value being applied first).
-TGSI_OPCODE_MOV is considered to have float input type for applying modifiers.
+For inputs which have a floating point type, both absolute value and
+negation modifiers are supported (with absolute value being applied
+first).  The only source of TGSI_OPCODE_MOV and the second and third
+sources of TGSI_OPCODE_UCMP are considered to have float type for
+applying modifiers.
 
 For inputs which have signed or unsigned type only the negate modifier is
 supported.