i965: first attempt at handling URB overflow when there's too many vs outputs

If we can't fit all the VS outputs into the MRF, we need to overflow into temporary GRF registers, then use some MOVs and a second brw_urb_WRITE() instruction to place the overflow vertex results into the URB. This is hit when a vertex/fragment shader pair has a large number of varying variables (12 or more). There's still something broken here, but it seems close...
2009-06-30 17:12:34 -06:00 · 2009-06-30 17:12:34 -06:00 · 119eb40942
parent 1b6ae2e004
commit 119eb40942
2 changed files with 49 additions and 4 deletions
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@ -58,6 +58,7 @@ struct brw_vs_compile {

   GLuint first_output;
   GLuint nr_outputs;
+   GLuint first_overflow_output; /**< VERT_ATTRIB_x */

   GLuint first_tmp;
   GLuint last_tmp;
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@ -133,6 +133,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    */
   c->nr_outputs = 0;
   c->first_output = reg;
+   c->first_overflow_output = 0;
   mrf = 4;
   for (i = 0; i < VERT_RESULT_MAX; i++) {
      if (c->prog_data.outputs_written & (1 << i)) {
@ -148,8 +149,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 	    mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
 	 }
 	 else {
-	    c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
-	    mrf++;
+            if (mrf < 16) {
+               c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+               mrf++;
+            }
+            else {
+               /* too many vertex results to fit in MRF, use GRF for overflow */
+               if (!c->first_overflow_output)
+                  c->first_overflow_output = i;
+               c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+               reg++;
+            }
 	 }
      }
   }     
@ -1067,6 +1077,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
   struct brw_reg m0 = brw_message_reg(0);
   struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
   struct brw_reg ndc;
+   int eot;

   if (c->key.copy_edgeflag) {
      brw_MOV(p, 
@ -1145,18 +1156,51 @@ static void emit_vertex_write( struct brw_vs_compile *c)
   brw_MOV(p, offset(m0, 2), ndc);
   brw_MOV(p, offset(m0, 3), pos);

+   eot = (c->first_overflow_output == 0);
+
   brw_urb_WRITE(p, 
 		 brw_null_reg(), /* dest */
 		 0,		/* starting mrf reg nr */
 		 c->r0,		/* src */
 		 0,		/* allocate */
 		 1,		/* used */
-		 c->nr_outputs + 3, /* msg len */
+		 MIN2(c->nr_outputs + 3, (BRW_MAX_MRF-1)), /* msg len */
 		 0,		/* response len */
-		 1, 		/* eot */
+		 eot, 		/* eot */
 		 1, 		/* writes complete */
 		 0, 		/* urb destination offset */
 		 BRW_URB_SWIZZLE_INTERLEAVE);
+
+   if (c->first_overflow_output > 0) {
+      /* Not all of the vertex outputs/results fit into the MRF.
+       * Move the overflowed attributes from the GRF to the MRF and
+       * issue another brw_urb_WRITE().
+       */
+      /* XXX I'm not 100% sure about which MRF regs to use here.  Starting
+       * at mrf[4] atm...
+       */
+      GLuint i, mrf = 0;
+      for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
+         if (c->prog_data.outputs_written & (1 << i)) {
+            /* move from GRF to MRF */
+            brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+            mrf++;
+         }
+      }
+
+      brw_urb_WRITE(p,
+                    brw_null_reg(), /* dest */
+                    4,              /* starting mrf reg nr */
+                    c->r0,          /* src */
+                    0,              /* allocate */
+                    1,              /* used */
+                    mrf+1,          /* msg len */
+                    0,              /* response len */
+                    1,              /* eot */
+                    1,              /* writes complete */
+                    BRW_MAX_MRF-1,  /* urb destination offset */
+                    BRW_URB_SWIZZLE_INTERLEAVE);
+   }
 }