i965: Move repeat-instruction-suppression to batchbuffer core

Move the tracking of the last emitted instructions into the core batchbuffer routines and take advantage of the shadow batch copy to avoid extra memory allocations and copies. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
2011-02-20 13:23:47 +00:00 · 2011-02-20 13:23:47 +00:00 · aac120977d
parent 8d68a90e22
commit aac120977d
9 changed files with 119 additions and 151 deletions
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@ -233,18 +233,16 @@ const struct brw_tracked_state brw_cc_unit = {
 static void upload_blend_constant_color(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
-   struct brw_blend_constant_color bcc;
+   struct gl_context *ctx = &intel->ctx;
-   memset(&bcc, 0, sizeof(bcc));
+   BEGIN_BATCH(5);
-   bcc.header.opcode = _3DSTATE_BLEND_CONSTANT_COLOR;
+   OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2));
-   bcc.header.length = sizeof(bcc)/4-2;
+   OUT_BATCH(ctx->Color.BlendColor[0]);
-   bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
+   OUT_BATCH(ctx->Color.BlendColor[1]);
-   bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
+   OUT_BATCH(ctx->Color.BlendColor[2]);
-   bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
+   OUT_BATCH(ctx->Color.BlendColor[3]);
-   bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
+   CACHED_BATCH();
   BRW_CACHED_BATCH_STRUCT(brw, &bcc);
 }
 const struct brw_tracked_state brw_blend_constant_color = {
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@ -146,22 +146,24 @@ const struct brw_tracked_state brw_curbe_offsets = {
 */
 void brw_upload_cs_urb_state(struct brw_context *brw)
 {
-   struct brw_cs_urb_state cs_urb;
+   struct intel_context *intel = &brw->intel;
   memset(&cs_urb, 0, sizeof(cs_urb));
   BEGIN_BATCH(2);
   /* It appears that this is the state packet for the CS unit, ie. the
    * urb entries detailed here are housed in the CS range from the
    * URB_FENCE command.
    */
-   cs_urb.header.opcode = CMD_CS_URB_STATE;
+   OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
   cs_urb.header.length = sizeof(cs_urb)/4 - 2;
   /* BRW_NEW_URB_FENCE */
-   cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+   if (brw->urb.csize == 0) {
-   cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
+      OUT_BATCH(0);
-
+   } else {
-   assert(brw->urb.nr_cs_entries);
+      /* BRW_NEW_URB_FENCE */
-   BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+      assert(brw->urb.nr_cs_entries);
      OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
   }
   CACHED_BATCH();
 }
 static GLfloat fixed_plane[6][4] = {
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@ -301,16 +301,15 @@ const struct brw_tracked_state brw_depthbuffer = {
 static void upload_polygon_stipple(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &brw->intel.ctx;
   struct brw_polygon_stipple bps;
   GLuint i;
   if (!ctx->Polygon.StippleFlag)
      return;
-   memset(&bps, 0, sizeof(bps));
+   BEGIN_BATCH(33);
-   bps.header.opcode = _3DSTATE_POLY_STIPPLE_PATTERN;
+   OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2));
   bps.header.length = sizeof(bps)/4-2;
   /* Polygon stipple is provided in OpenGL order, i.e. bottom
    * row first.  If we're rendering to a window (i.e. the
@ -321,14 +320,13 @@ static void upload_polygon_stipple(struct brw_context *brw)
    */
   if (ctx->DrawBuffer->Name == 0) {
      for (i = 0; i < 32; i++)
-         bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+	  OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */
   }
   else {
      for (i = 0; i < 32; i++)
-         bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
+	 OUT_BATCH(ctx->PolygonStipple[i]);
   }
-
+   CACHED_BATCH();
   BRW_CACHED_BATCH_STRUCT(brw, &bps);
 }
 const struct brw_tracked_state brw_polygon_stipple = {
@ -347,15 +345,14 @@ const struct brw_tracked_state brw_polygon_stipple = {
 static void upload_polygon_stipple_offset(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &brw->intel.ctx;
   struct brw_polygon_stipple_offset bpso;
   if (!ctx->Polygon.StippleFlag)
      return;
-   memset(&bpso, 0, sizeof(bpso));
+   BEGIN_BATCH(2);
-   bpso.header.opcode = _3DSTATE_POLY_STIPPLE_OFFSET;
+   OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2));
   bpso.header.length = sizeof(bpso)/4-2;
   /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
    * we have to invert the Y axis in order to match the OpenGL
@ -365,16 +362,11 @@ static void upload_polygon_stipple_offset(struct brw_context *brw)
    * system works just fine, and there's no window system to
    * worry about.
    */
-   if (brw->intel.ctx.DrawBuffer->Name == 0) {
+   if (brw->intel.ctx.DrawBuffer->Name == 0)
-      bpso.bits0.x_offset = 0;
+      OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31);
-      bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31;
+   else
-   }
+      OUT_BATCH(0);
-   else {
+   CACHED_BATCH();
      bpso.bits0.y_offset = 0;
      bpso.bits0.x_offset = 0;
   }
   BRW_CACHED_BATCH_STRUCT(brw, &bpso);
 }
 #define _NEW_WINDOW_POS 0x40000000
@ -393,18 +385,17 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
 */
 static void upload_aa_line_parameters(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &brw->intel.ctx;
   struct brw_aa_line_parameters balp;
   if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters)
      return;
   OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2));
   /* use legacy aa line coverage computation */
-   memset(&balp, 0, sizeof(balp));
+   OUT_BATCH(0);
-   balp.header.opcode = _3DSTATE_AA_LINE_PARAMETERS;
+   OUT_BATCH(0);
-   balp.header.length = sizeof(balp) / 4 - 2;
+   CACHED_BATCH();
   BRW_CACHED_BATCH_STRUCT(brw, &balp);
 }
 const struct brw_tracked_state brw_aa_line_parameters = {
@ -422,28 +413,21 @@ const struct brw_tracked_state brw_aa_line_parameters = {
 static void upload_line_stipple(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &brw->intel.ctx;
   struct brw_line_stipple bls;
   GLfloat tmp;
   GLint tmpi;
   if (!ctx->Line.StippleFlag)
      return;
-   memset(&bls, 0, sizeof(bls));
+   BEGIN_BATCH(3);
-   bls.header.opcode = _3DSTATE_LINE_STIPPLE_PATTERN;
+   OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2));
-   bls.header.length = sizeof(bls)/4 - 2;
+   OUT_BATCH(ctx->Line.StipplePattern);
   bls.bits0.pattern = ctx->Line.StipplePattern;
   bls.bits1.repeat_count = ctx->Line.StippleFactor;
   tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
   tmpi = tmp * (1<<13);
-
+   OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
-
+   CACHED_BATCH();
   bls.bits1.inverse_repeat_count = tmpi;
   BRW_CACHED_BATCH_STRUCT(brw, &bls);
 }
 const struct brw_tracked_state brw_line_stipple = {
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@ -166,13 +166,7 @@ void brw_destroy_caches( struct brw_context *brw );
 */
 #define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(&brw->intel, (s), \
 							sizeof(*(s)), false)
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
 				   const void *data,
 				   GLuint sz );
 void brw_destroy_batch_cache( struct brw_context *brw );
 void brw_clear_batch_cache( struct brw_context *brw );
 void *brw_state_batch(struct brw_context *brw,
 		      int size,
 		      int alignment,
--- a/src/mesa/drivers/dri/i965/brw_state_batch.c
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@ -29,75 +29,10 @@
  *   Keith Whitwell <keith@tungstengraphics.com>
  */
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
 #include "main/imports.h"
 /* A facility similar to the data caching code above, which aims to
 * prevent identical commands being issued repeatedly.
 */
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
 				   const void *data,
 				   GLuint sz )
 {
   struct brw_cached_batch_item *item = brw->cached_batch_items;
   struct header *newheader = (struct header *)data;
   if (brw->emit_state_always) {
      intel_batchbuffer_data(&brw->intel, data, sz, false);
      return GL_TRUE;
   }
   while (item) {
      if (item->header->opcode == newheader->opcode) {
 	 if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
 	    return GL_FALSE;
 	 if (item->sz != sz) {
 	    free(item->header);
 	    item->header = malloc(sz);
 	    item->sz = sz;
 	 }
 	 goto emit;
      }
      item = item->next;
   }
   assert(!item);
   item = CALLOC_STRUCT(brw_cached_batch_item);
   item->header = malloc(sz);
   item->sz = sz;
   item->next = brw->cached_batch_items;
   brw->cached_batch_items = item;
 emit:
   memcpy(item->header, newheader, sz);
   intel_batchbuffer_data(&brw->intel, data, sz, false);
   return GL_TRUE;
 }
 void brw_clear_batch_cache( struct brw_context *brw )
 {
   struct brw_cached_batch_item *item = brw->cached_batch_items;
   while (item) {
      struct brw_cached_batch_item *next = item->next;
      free((void *)item->header);
      free(item);
      item = next;
   }
   brw->cached_batch_items = NULL;
 }
 void brw_destroy_batch_cache( struct brw_context *brw )
 {
   brw_clear_batch_cache(brw);
 }
 /**
 * Allocates a block of space in the batchbuffer for indirect state.
 *
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@ -176,7 +176,6 @@ void brw_init_state( struct brw_context *brw )
 void brw_destroy_state( struct brw_context *brw )
 {
   brw_destroy_caches(brw);
   brw_destroy_batch_cache(brw);
 }
 /***********************************************************************
@ -383,9 +382,6 @@ void brw_validate_state( struct brw_context *brw )
       state->brw == 0)
      return;
   if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
      brw_clear_batch_cache(brw);
   brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
   /* do prepare stage for all atoms */
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@ -33,6 +33,25 @@
 #include "intel_bufmgr.h"
 #include "intel_buffers.h"
 struct cached_batch_item {
   struct cached_batch_item *next;
   uint16_t header;
   uint16_t size;
 };
 static void clear_cache( struct intel_context *intel )
 {
   struct cached_batch_item *item = intel->batch.cached_items;
   while (item) {
      struct cached_batch_item *next = item->next;
      free(item);
      item = next;
   }
   intel->batch.cached_items = NULL;
 }
 void
 intel_batchbuffer_reset(struct intel_context *intel)
 {
@ -40,6 +59,7 @@ intel_batchbuffer_reset(struct intel_context *intel)
      drm_intel_bo_unreference(intel->batch.bo);
      intel->batch.bo = NULL;
   }
   clear_cache(intel);
   intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
 					intel->maxBatchSize, 4096);
@ -53,6 +73,7 @@ void
 intel_batchbuffer_free(struct intel_context *intel)
 {
   drm_intel_bo_unreference(intel->batch.bo);
   clear_cache(intel);
 }
@ -165,7 +186,8 @@ intel_batchbuffer_emit_reloc(struct intel_context *intel,
   ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,
 				 buffer, delta,
 				 read_domains, write_domain);
-   assert (ret == 0);
+   assert(ret == 0);
   (void)ret;
   /*
    * Using the old buffer offset, write in what the right data would be, in case
@ -191,7 +213,8 @@ intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
   ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,
 				       buffer, delta,
 				       read_domains, write_domain);
-   assert (ret == 0);
+   assert(ret == 0);
   (void)ret;
   /*
    * Using the old buffer offset, write in what the right data would
@ -213,6 +236,47 @@ intel_batchbuffer_data(struct intel_context *intel,
   intel->batch.used += bytes >> 2;
 }
 void
 intel_batchbuffer_cached_advance(struct intel_context *intel)
 {
   struct cached_batch_item **prev = &intel->batch.cached_items, *item;
   uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);
   uint32_t *start = intel->batch.map + intel->batch.emit;
   uint16_t op = *start >> 16;
   while (*prev) {
      uint32_t *old;
      item = *prev;
      old = intel->batch.map + item->header;
      if (op == *old >> 16) {
 	 if (item->size == sz && memcmp(old, start, sz) == 0) {
 	    if (prev != &intel->batch.cached_items) {
 	       *prev = item->next;
 	       item->next = intel->batch.cached_items;
 	       intel->batch.cached_items = item;
 	    }
 	    intel->batch.used = intel->batch.emit;
 	    return;
 	 }
 	 goto emit;
      }
      prev = &item->next;
   }
   item = malloc(sizeof(struct cached_batch_item));
   if (item == NULL)
      return;
   item->next = intel->batch.cached_items;
   intel->batch.cached_items = item;
 emit:
   item->size = sz;
   item->header = intel->batch.emit;
 }
 /* Emit a pipelined flush to either flush render and texture cache for
 * reading from a FBO-drawn texture, or flush so that frontbuffer
 * render appears on the screen in DRI1.
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@ -101,9 +101,9 @@ intel_batchbuffer_begin(struct intel_context *intel, int n, bool is_blit)
 {
   intel_batchbuffer_require_space(intel, n * 4, is_blit);
   intel->batch.emit = intel->batch.used;
 #ifdef DEBUG
-   intel->batch.emit.total = n;
+   intel->batch.total = n;
   intel->batch.emit.start_ptr = intel->batch.used;
 #endif
 }
@ -123,6 +123,8 @@ intel_batchbuffer_advance(struct intel_context *intel)
 #endif
 }
 void intel_batchbuffer_cached_advance(struct intel_context *intel);
 /* Here are the crusty old macros, to be removed:
 */
 #define BATCH_LOCALS
@ -141,5 +143,6 @@ intel_batchbuffer_advance(struct intel_context *intel)
 } while (0)
 #define ADVANCE_BATCH() intel_batchbuffer_advance(intel);
 #define CACHED_BATCH() intel_batchbuffer_cached_advance(intel);
 #endif
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@ -171,22 +171,14 @@ struct intel_context
   struct intel_batchbuffer {
      drm_intel_bo *bo;
      struct cached_batch_item *cached_items;
-      uint16_t used;
+      uint16_t emit, total;
-      uint16_t reserved_space;
+      uint16_t used, reserved_space;
      uint32_t map[8192];
 #define BATCH_SZ (8192*sizeof(uint32_t))
      uint32_t state_batch_offset;
 #ifdef DEBUG
      /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
      struct {
 	 uint16_t total;
 	 uint16_t start_ptr;
      } emit;
 #endif
      bool is_blit;
   } batch;