intel/brw: Replace FS_OPCODE_LINTERP with BRW_OPCODE_PLN

We no longer support the old LINE+MAC lowering, and we already lower this to MAD in NIR on Gfx11+, so the LINTERP virtual opcode always corresponds the PLN. The only catch is that LINTERP's operands are reversed from PLN, so we have to switch them. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28705>
2024-04-11 01:10:51 -07:00 · 2024-04-11 01:10:51 -07:00 · d5b8cec7a2
parent 12b0e03bd2
commit d5b8cec7a2
10 changed files with 21 additions and 51 deletions
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@ -495,7 +495,6 @@ enum opcode {
    */
   FS_OPCODE_DDY_COARSE,
   FS_OPCODE_DDY_FINE,
-   FS_OPCODE_LINTERP,
   FS_OPCODE_PIXEL_X,
   FS_OPCODE_PIXEL_Y,
   FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@ -510,7 +510,6 @@ fs_inst::can_do_cmod() const
   case BRW_OPCODE_SHR:
   case BRW_OPCODE_SUBB:
   case BRW_OPCODE_XOR:
-   case FS_OPCODE_LINTERP:
      break;
   default:
      return false;
@ -721,11 +720,8 @@ fs_inst::components_read(unsigned i) const
      return 0;

   switch (opcode) {
-   case FS_OPCODE_LINTERP:
-      if (i == 0)
-         return 2;
-      else
-         return 1;
+   case BRW_OPCODE_PLN:
+      return i == 0 ? 1 : 2;

   case FS_OPCODE_PIXEL_X:
   case FS_OPCODE_PIXEL_Y:
@ -920,8 +916,8 @@ fs_inst::size_read(int arg) const
         return mlen * REG_SIZE;
      break;

-   case FS_OPCODE_LINTERP:
-      if (arg == 1)
+   case BRW_OPCODE_PLN:
+      if (arg == 0)
         return 16;
      break;

@ -2422,9 +2418,6 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
   case FS_OPCODE_DDY_FINE:
      return "ddy_fine";

-   case FS_OPCODE_LINTERP:
-      return "linterp";
-
   case FS_OPCODE_PIXEL_X:
      return "pixel_x";
   case FS_OPCODE_PIXEL_Y:
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@ -500,8 +500,6 @@ private:
                      struct brw_reg payload,
                      struct brw_reg payload2);
   void generate_barrier(fs_inst *inst, struct brw_reg src);
-   bool generate_linterp(fs_inst *inst, struct brw_reg dst,
-			 struct brw_reg *src);
   void generate_ddx(const fs_inst *inst,
                     struct brw_reg dst, struct brw_reg src);
   void generate_ddy(const fs_inst *inst,
--- a/src/intel/compiler/brw_fs_cse.cpp
+++ b/src/intel/compiler/brw_fs_cse.cpp
@ -75,7 +75,6 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
   case FS_OPCODE_FB_READ_LOGICAL:
   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
-   case FS_OPCODE_LINTERP:
   case SHADER_OPCODE_FIND_LIVE_CHANNEL:
   case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
   case SHADER_OPCODE_LOAD_LIVE_CHANNELS:
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@ -603,31 +603,6 @@ fs_generator::generate_barrier(fs_inst *, struct brw_reg src)
   }
 }

-bool
-fs_generator::generate_linterp(fs_inst *inst,
-                               struct brw_reg dst, struct brw_reg *src)
-{
-   /* PLN reads:
-    *                      /   in SIMD16   \
-    *    -----------------------------------
-    *   | src1+0 | src1+1 | src1+2 | src1+3 |
-    *   |-----------------------------------|
-    *   |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)|
-    *    -----------------------------------
-    */
-   struct brw_reg delta_x = src[0];
-   struct brw_reg interp = src[1];
-
-   /* nir_lower_interpolation() will do the lowering to MAD instructions for
-    * us on gfx11+
-    */
-   assert(devinfo->ver < 11);
-   assert(devinfo->has_pln);
-
-   brw_PLN(p, dst, interp, delta_x);
-   return false;
-}
-
 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
 * looking like:
 *
@ -1216,8 +1191,16 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
         assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8);
         gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
 	 break;
-      case FS_OPCODE_LINTERP:
-	 multiple_instructions_emitted = generate_linterp(inst, dst, src);
+      case BRW_OPCODE_PLN:
+         /* PLN reads:
+          *                      /   in SIMD16   \
+          *    -----------------------------------
+          *   | src1+0 | src1+1 | src1+2 | src1+3 |
+          *   |-----------------------------------|
+          *   |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)|
+          *    -----------------------------------
+          */
+         brw_PLN(p, dst, src[0], src[1]);
 	 break;
      case FS_OPCODE_PIXEL_X:
         assert(src[0].type == BRW_REGISTER_TYPE_UW);
--- a/src/intel/compiler/brw_fs_lower.cpp
+++ b/src/intel/compiler/brw_fs_lower.cpp
@ -254,18 +254,18 @@ brw_fs_lower_barycentrics(fs_visitor &s)
      const fs_builder ubld = ibld.exec_all().group(8, 0);

      switch (inst->opcode) {
-      case FS_OPCODE_LINTERP : {
+      case BRW_OPCODE_PLN: {
         assert(inst->exec_size == 16);
-         const fs_reg tmp = ibld.vgrf(inst->src[0].type, 2);
+         const fs_reg tmp = ibld.vgrf(inst->src[1].type, 2);
         fs_reg srcs[4];

         for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++)
-            srcs[i] = horiz_offset(offset(inst->src[0], ibld, i % 2),
+            srcs[i] = horiz_offset(offset(inst->src[1], ibld, i % 2),
                                   8 * (i / 2));

         ubld.LOAD_PAYLOAD(tmp, srcs, ARRAY_SIZE(srcs), ARRAY_SIZE(srcs));

-         inst->src[0] = tmp;
+         inst->src[1] = tmp;
         progress = true;
         break;
      }
--- a/src/intel/compiler/brw_fs_lower_simd_width.cpp
+++ b/src/intel/compiler/brw_fs_lower_simd_width.cpp
@ -306,7 +306,7 @@ brw_fs_get_lowered_simd_width(const fs_visitor *shader, const fs_inst *inst)
      /* Integer division is limited to SIMD8 on all generations. */
      return MIN2(8, inst->exec_size);

-   case FS_OPCODE_LINTERP:
+   case BRW_OPCODE_PLN:
   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
   case FS_OPCODE_PACK_HALF_2x16_SPLIT:
   case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@ -4310,7 +4310,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
         interp.type = BRW_REGISTER_TYPE_F;
         dest.type = BRW_REGISTER_TYPE_F;

-         bld.emit(FS_OPCODE_LINTERP, offset(dest, bld, i), dst_xy, interp);
+         bld.PLN(offset(dest, bld, i), interp, dst_xy);
      }
      break;
   }
--- a/src/intel/compiler/brw_ir_performance.cpp
+++ b/src/intel/compiler/brw_ir_performance.cpp
@ -439,7 +439,7 @@ namespace {
         return calculate_desc(info, EU_UNIT_NULL, 8, 0, 0, 0, 0,
                               0, 0, 0, 0, 0, 0);

-      case FS_OPCODE_LINTERP:
+      case BRW_OPCODE_PLN:
         return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4,
                               0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0);

--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@ -423,7 +423,6 @@ fs_inst::can_do_saturate() const
   case BRW_OPCODE_SEL:
   case BRW_OPCODE_SHL:
   case BRW_OPCODE_SHR:
-   case FS_OPCODE_LINTERP:
   case SHADER_OPCODE_COS:
   case SHADER_OPCODE_EXP2:
   case SHADER_OPCODE_LOG2:
@ -455,7 +454,6 @@ bool
 fs_inst::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
 {
   return writes_accumulator ||
-          (opcode == FS_OPCODE_LINTERP && !devinfo->has_pln) ||
          (eot && intel_needs_workaround(devinfo, 14010017096));
 }