i965: Add support for POW in gen6 FS.

Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.
2010-08-22 01:33:57 -07:00 · 2010-08-22 01:33:57 -07:00 · fe2d4a5ea0
parent 2f914053bc
commit fe2d4a5ea0
3 changed files with 121 additions and 26 deletions
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@ -885,6 +885,12 @@ void brw_math( struct brw_compile *p,
 	       GLuint data_type,
 	       GLuint precision );
 void brw_math2(struct brw_compile *p,
 	       struct brw_reg dest,
 	       GLuint function,
 	       struct brw_reg src0,
 	       struct brw_reg src1);
 void brw_dp_READ_16( struct brw_compile *p,
 		     struct brw_reg dest,
 		     GLuint scratch_offset );
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@ -1131,6 +1131,29 @@ void brw_math( struct brw_compile *p,
   }
 }
 /** Extended math function, float[8].
 */
 void brw_math2(struct brw_compile *p,
 	       struct brw_reg dest,
 	       GLuint function,
 	       struct brw_reg src0,
 	       struct brw_reg src1)
 {
   struct intel_context *intel = &p->brw->intel;
   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
   assert(intel->gen >= 6);
   /* Math is the same ISA format as other opcodes, except that CondModifier
    * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
    */
   insn->header.destreg__conditionalmod = function;
   brw_set_dest(insn, dest);
   brw_set_src0(insn, src0);
   brw_set_src1(insn, src1);
 }
 /**
 * Extended math function, float[16].
 * Use 2 send instructions.
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@ -918,10 +918,8 @@ void emit_math2(struct brw_wm_compile *c,
 		const struct brw_reg *arg1)
 {
   struct brw_compile *p = &c->func;
   struct intel_context *intel = &p->brw->intel;
   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
   GLuint saturate = ((mask & SATURATE) ?
 		      BRW_MATH_SATURATE_SATURATE :
 		      BRW_MATH_SATURATE_NONE);
   if (!(mask & WRITEMASK_XYZW))
      return; /* Do not emit dead code */
@ -930,35 +928,103 @@ void emit_math2(struct brw_wm_compile *c,
   brw_push_insn_state(p);
-   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   /* math can only operate on up to a vec8 at a time, so in
-   brw_MOV(p, brw_message_reg(3), arg1[0]);
+    * dispatch_width==16 we have to do the second half manually.
   if (c->dispatch_width == 16) {
      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
      brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
   }
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_math(p, 
 	    dst[dst_chan],
 	    function,
 	    saturate,
 	    2,
 	    arg0[0],
 	    BRW_MATH_DATA_VECTOR,
 	    BRW_MATH_PRECISION_FULL);
   /* Send two messages to perform all 16 operations:
    */
-   if (c->dispatch_width == 16) {
+   if (intel->gen >= 6) {
-      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      struct brw_reg src0 = arg0[0];
      struct brw_reg src1 = arg1[0];
      struct brw_reg temp_dst = dst[dst_chan];
      if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
 	 if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
 	    /* Both scalar arguments.  Do scalar calc. */
 	    src0.hstride = BRW_HORIZONTAL_STRIDE_1;
 	    src1.hstride = BRW_HORIZONTAL_STRIDE_1;
 	    temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1;
 	    temp_dst.width = BRW_WIDTH_1;
 	    if (arg0[0].subnr != 0) {
 	       brw_MOV(p, temp_dst, src0);
 	       src0 = temp_dst;
 	       /* Ouch.  We've used the temp as a dst, and we still
 		* need a temp to store arg1 in, because src and dst
 		* offsets have to be equal.  Leaving this up to
 		* glsl2-965 to handle correctly.
 		*/
 	       assert(arg1[0].subnr == 0);
 	    } else if (arg1[0].subnr != 0) {
 	       brw_MOV(p, temp_dst, src1);
 	       src1 = temp_dst;
 	    }
 	 } else {
 	    brw_MOV(p, temp_dst, src0);
 	    src0 = temp_dst;
 	 }
      } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
 	 brw_MOV(p, temp_dst, src1);
 	 src1 = temp_dst;
      }
      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_math2(p,
 		temp_dst,
 		function,
 		src0,
 		src1);
      if (c->dispatch_width == 16) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 	 brw_math2(p,
 		   sechalf(temp_dst),
 		   function,
 		   sechalf(src0),
 		   sechalf(src1));
      }
      /* Splat a scalar result into all the channels. */
      if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 &&
 	  arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
 	 temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0;
 	 temp_dst.vstride = BRW_VERTICAL_STRIDE_0;
 	 brw_MOV(p, dst[dst_chan], temp_dst);
      }
   } else {
      GLuint saturate = ((mask & SATURATE) ?
 			 BRW_MATH_SATURATE_SATURATE :
 			 BRW_MATH_SATURATE_NONE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_MOV(p, brw_message_reg(3), arg1[0]);
      if (c->dispatch_width == 16) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 	 brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
      }
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_math(p,
-	       offset(dst[dst_chan],1),
+	       dst[dst_chan],
 	       function,
 	       saturate,
-	       4,
+	       2,
-	       sechalf(arg0[0]),
+	       arg0[0],
 	       BRW_MATH_DATA_VECTOR,
 	       BRW_MATH_PRECISION_FULL);
      /* Send two messages to perform all 16 operations:
       */
      if (c->dispatch_width == 16) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
 	 brw_math(p,
 		  offset(dst[dst_chan],1),
 		  function,
 		  saturate,
 		  4,
 		  sechalf(arg0[0]),
 		  BRW_MATH_DATA_VECTOR,
 		  BRW_MATH_PRECISION_FULL);
      }
   }
   brw_pop_insn_state(p);
 }