i965/compiler: handle conversion to smaller type in the lowering pass for that

This rollbacks the revert of this same patch introduced in commit 7b9c15628a. And also squahes the following patch to prevent a piglit regression caused by this change: intel/compiler: Fix lower_conversions for 8-bit types. Author: Jose Maria Casanova Crespo <jmcasanova@igalia.com> For 8-bit types the execution type is word. A byte raw MOV has 16-bit execution type and 8-bit destination and it shouldn't be considered a conversion case. So there is no need to change alignment and enter in lower_conversions for these instructions. Fixes a regresion in the piglit test "glsl-fs-shader-stencil-export" that is introduced with this patch from the Vulkan shaderInt16 series: 'i965/compiler: handle conversion to smaller type in the lowering pass for that'. The problem is caused because there is already a case in the driver that injects Byte instructions like this: mov(8) g127<1>UB g2<32,8,4>UB And the aforementioned pass was not accounting for the special handling of the execution size of Byte instructions. This patch fixes this. v2: (Jason Ekstrand) - Simplify is_byte_raw_mov, include reference to PRM and not consider B <-> UB conversions as raw movs. v3: (Matt Turner) - Indentation style fixes. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106393 Tested-by: Mark Janes <mark.a.janes@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
2018-05-04 11:33:07 +02:00 · 2018-05-04 11:33:07 +02:00 · 5a12bdac09
parent a75f967388
commit 5a12bdac09
2 changed files with 26 additions and 12 deletions
--- a/src/intel/compiler/brw_fs_lower_conversions.cpp
+++ b/src/intel/compiler/brw_fs_lower_conversions.cpp
@ -43,6 +43,24 @@ supports_type_conversion(const fs_inst *inst) {
   }
 }

+/* From the SKL PRM Vol 2a, "Move":
+ *
+ *    "A mov with the same source and destination type, no source modifier,
+ *     and no saturation is a raw move. A packed byte destination region (B
+ *     or UB type with HorzStride == 1 and ExecSize > 1) can only be written
+ *     using raw move."
+ */
+static bool
+is_byte_raw_mov (const fs_inst *inst)
+{
+   return type_sz(inst->dst.type) == 1 &&
+          inst->opcode == BRW_OPCODE_MOV &&
+          inst->src[0].type == inst->dst.type &&
+          !inst->saturate &&
+          !inst->src[0].negate &&
+          !inst->src[0].abs;
+}
+
 bool
 fs_visitor::lower_conversions()
 {
@ -54,7 +72,8 @@ fs_visitor::lower_conversions()
      bool saturate = inst->saturate;

      if (supports_type_conversion(inst)) {
-         if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
+         if (type_sz(inst->dst.type) < get_exec_type_size(inst) &&
+             !is_byte_raw_mov(inst)) {
            /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
             * Single Precision Float":
             *
@ -64,6 +83,9 @@ fs_visitor::lower_conversions()
             * So we need to allocate a temporary that's two registers, and then do
             * a strided MOV to get the lower DWord of every Qword that has the
             * result.
+             *
+             * This restriction applies, in general, whenever we convert to
+             * a type with a smaller bit-size.
             */
            fs_reg temp = ibld.vgrf(get_exec_type(inst));
            fs_reg strided_temp = subscript(temp, dst.type, 0);
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@ -755,19 +755,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
       */

   case nir_op_f2f16_undef:
-   case nir_op_i2i16:
-   case nir_op_u2u16: {
-      /* TODO: Fixing aligment rules for conversions from 32-bits to
-       * 16-bit types should be moved to lower_conversions
-       */
-      fs_reg tmp = bld.vgrf(op[0].type, 1);
-      tmp = subscript(tmp, result.type, 0);
-      inst = bld.MOV(tmp, op[0]);
-      inst->saturate = instr->dest.saturate;
-      inst = bld.MOV(result, tmp);
+      inst = bld.MOV(result, op[0]);
      inst->saturate = instr->dest.saturate;
      break;
-   }

   case nir_op_f2f64:
   case nir_op_f2i64:
@ -807,6 +797,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
   case nir_op_f2u16:
   case nir_op_i2i32:
   case nir_op_u2u32:
+   case nir_op_i2i16:
+   case nir_op_u2u16:
   case nir_op_i2f16:
   case nir_op_u2f16:
      inst = bld.MOV(result, op[0]);