i965/nir/vec4: Implement multiplication

Implementation based on the vec4_visitor IR implementation for the operations ir_binop_mul and ir_binop_imul_high. Adds NIR ALU operations: * nir_op_fmul * nir_op_imul * nir_op_imul_high * nir_op_umul_high Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
2015-06-16 23:50:46 +02:00 · 2015-06-16 23:50:46 +02:00 · 9acebf1461
parent 0675842b56
commit 9acebf1461
1 changed files with 44 additions and 0 deletions
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@ -700,6 +700,50 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
      inst->saturate = instr->dest.saturate;
      break;

+   case nir_op_fmul:
+      inst = emit(MUL(dst, op[0], op[1]));
+      inst->saturate = instr->dest.saturate;
+      break;
+
+   case nir_op_imul: {
+      nir_const_value *value0 = nir_src_as_const_value(instr->src[0].src);
+      nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
+
+      /* For integer multiplication, the MUL uses the low 16 bits of one of
+       * the operands (src0 through SNB, src1 on IVB and later). The MACH
+       * accumulates in the contribution of the upper 16 bits of that
+       * operand. If we can determine that one of the args is in the low
+       * 16 bits, though, we can just emit a single MUL.
+       */
+      if (value0 && value0->u[0] < (1 << 16)) {
+         if (devinfo->gen < 7)
+            emit(MUL(dst, op[0], op[1]));
+         else
+            emit(MUL(dst, op[1], op[0]));
+      } else if (value1 && value1->u[0] < (1 << 16)) {
+         if (devinfo->gen < 7)
+            emit(MUL(dst, op[1], op[0]));
+         else
+            emit(MUL(dst, op[0], op[1]));
+      } else {
+         struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+         emit(MUL(acc, op[0], op[1]));
+         emit(MACH(dst_null_d(), op[0], op[1]));
+         emit(MOV(dst, src_reg(acc)));
+      }
+      break;
+   }
+
+   case nir_op_imul_high:
+   case nir_op_umul_high: {
+      struct brw_reg acc = retype(brw_acc_reg(8), dst.type);
+
+      emit(MUL(acc, op[0], op[1]));
+      emit(MACH(dst, op[0], op[1]));
+      break;
+   }
+
   default:
      unreachable("Unimplemented ALU operation");
   }