intel/compiler: extract subfunctions of lower_integer_multiplication()

The lower_integer_multiplication() function is already a little too big. I want to add more to it, so let's reorganize the existing code first. Let's start with just extracting the current code to subfunctions. Later we'll change them a little more. v2: Make private functions private (Caio). v3: Fix typo (Caio). Reviewed-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
2019-07-11 16:56:05 -07:00 · 2019-07-11 16:56:05 -07:00 · 75b3868dcc
parent 7740149852
commit 75b3868dcc
2 changed files with 197 additions and 186 deletions
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@ -3862,25 +3862,11 @@ fs_visitor::lower_load_payload()
   return progress;
 }

-bool
-fs_visitor::lower_integer_multiplication()
+void
+fs_visitor::lower_mul_dword_inst(fs_inst *inst, bblock_t *block,
+                                 const fs_builder &ibld)
 {
-   bool progress = false;
-
-   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
-      const fs_builder ibld(this, block, inst);
-
-      if (inst->opcode == BRW_OPCODE_MUL) {
-         if (inst->dst.is_accumulator() ||
-             (inst->dst.type != BRW_REGISTER_TYPE_D &&
-              inst->dst.type != BRW_REGISTER_TYPE_UD))
-            continue;
-
-         if (devinfo->has_integer_dword_mul)
-            continue;
-
-         if (inst->src[1].file == IMM &&
-             inst->src[1].ud < (1 << 16)) {
+   if (inst->src[1].file == IMM && inst->src[1].ud < (1 << 16)) {
      /* The MUL instruction isn't commutative. On Gen <= 6, only the low
       * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
       * src1 are used.
@ -3889,8 +3875,7 @@ fs_visitor::lower_integer_multiplication()
       * single MUL instruction with that value in the proper location.
       */
      if (devinfo->gen < 7) {
-               fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8),
-                          inst->dst.type);
+         fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8), inst->dst.type);
         ibld.MOV(imm, inst->src[1]);
         ibld.MUL(inst->dst, imm, inst->src[0]);
      } else {
@ -3966,8 +3951,7 @@ fs_visitor::lower_integer_multiplication()
      }

      /* Get a new VGRF but keep the same stride as inst->dst */
-            fs_reg high(VGRF, alloc.allocate(regs_written(inst)),
-                        inst->dst.type);
+      fs_reg high(VGRF, alloc.allocate(regs_written(inst)), inst->dst.type);
      high.stride = inst->dst.stride;
      high.offset = inst->dst.offset % REG_SIZE;

@ -4000,13 +3984,15 @@ fs_visitor::lower_integer_multiplication()
               subscript(low, BRW_REGISTER_TYPE_UW, 1),
               subscript(high, BRW_REGISTER_TYPE_UW, 0));

-            if (needs_mov || inst->conditional_mod) {
-               set_condmod(inst->conditional_mod,
-                           ibld.MOV(orig_dst, low));
+      if (needs_mov || inst->conditional_mod)
+         set_condmod(inst->conditional_mod, ibld.MOV(orig_dst, low));
   }
 }

-      } else if (inst->opcode == SHADER_OPCODE_MULH) {
+void
+fs_visitor::lower_mulh_inst(fs_inst *inst, bblock_t *block,
+                            const fs_builder &ibld)
+{
   /* According to the BDW+ BSpec page for the "Multiply Accumulate
    * High" instruction:
    *
@ -4021,8 +4007,7 @@ fs_visitor::lower_integer_multiplication()

   /* Should have been lowered to 8-wide. */
   assert(inst->exec_size <= get_lowered_simd_width(devinfo, inst));
-         const fs_reg acc = retype(brw_acc_reg(inst->exec_size),
-                                   inst->dst.type);
+   const fs_reg acc = retype(brw_acc_reg(inst->exec_size), inst->dst.type);
   fs_inst *mul = ibld.MUL(acc, inst->src[0], inst->src[1]);
   fs_inst *mach = ibld.MACH(inst->dst, inst->src[0], inst->src[1]);

@ -4066,6 +4051,28 @@ fs_visitor::lower_integer_multiplication()
      mach->dst = ibld.vgrf(inst->dst.type);
      ibld.MOV(inst->dst, mach->dst);
   }
+}
+
+bool
+fs_visitor::lower_integer_multiplication()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+      const fs_builder ibld(this, block, inst);
+
+      if (inst->opcode == BRW_OPCODE_MUL) {
+         if (inst->dst.is_accumulator() ||
+             (inst->dst.type != BRW_REGISTER_TYPE_D &&
+              inst->dst.type != BRW_REGISTER_TYPE_UD))
+            continue;
+
+         if (devinfo->has_integer_dword_mul)
+            continue;
+
+         lower_mul_dword_inst(inst, block, ibld);
+      } else if (inst->opcode == SHADER_OPCODE_MULH) {
+         lower_mulh_inst(inst, block, ibld);
      } else {
         continue;
      }
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@ -406,6 +406,10 @@ private:

   void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,
                             fs_reg *op);
+   void lower_mul_dword_inst(fs_inst *inst, bblock_t *block,
+                             const brw::fs_builder &ibld);
+   void lower_mulh_inst(fs_inst *inst, bblock_t *block,
+                        const brw::fs_builder &ibld);
 };

 /**