i965g: more work on compiling, particularly the brw_draw files

2009-10-25 00:02:16 +01:00 · 2009-10-25 00:02:16 +01:00 · 4dd2f6640b
parent 4f7931bb35
commit 4dd2f6640b
33 changed files with 720 additions and 402 deletions
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@ -69,6 +69,7 @@
 #include "util/u_stream.h" 
 #include "util/u_math.h" 
 #include "util/u_tile.h" 
+#include "util/u_prim.h" 


 #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
@ -600,6 +601,32 @@ const char *pf_name( enum pipe_format format )
 }


+
+static const struct debug_named_value pipe_prim_names[] = {
+#ifdef DEBUG
+   DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_LINES),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON),
+#endif
+   DEBUG_NAMED_VALUE_END
+};
+
+
+const char *u_prim_name( unsigned prim )
+{
+   return debug_dump_enum(pipe_prim_names, prim);
+}
+
+
+
+
 #ifdef DEBUG
 void debug_dump_image(const char *prefix,
                      unsigned format, unsigned cpp,
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@ -135,4 +135,6 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim )
   }
 }

+const char *u_prim_name( unsigned pipe_prim );
+
 #endif
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@ -32,6 +32,8 @@
 #ifndef U_UPLOAD_MGR_H
 #define U_UPLOAD_MGR_H

+#include "pipe/p_error.h"
+
 struct pipe_screen;
 struct pipe_buffer;
 struct u_upload_mgr;
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@ -61,7 +61,7 @@ C_SOURCES = \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
 	brw_bo.c \
-	intel_batchbuffer.c \
+	brw_batchbuffer.c \
 	intel_tex_layout.c 

 include ../../Makefile.template
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@ -0,0 +1,198 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "brw_batchbuffer.h"
+#include "brw_decode.h"
+#include "brw_reg.h"
+#include "brw_winsys.h"
+
+
+void
+brw_batchbuffer_reset(struct brw_batchbuffer *batch)
+{
+   struct intel_context *intel = batch->intel;
+
+   if (batch->buf != NULL) {
+      brw->sws->bo_unreference(batch->buf);
+      batch->buf = NULL;
+   }
+
+   if (!batch->buffer && intel->ttm == GL_TRUE)
+      batch->buffer = malloc (intel->maxBatchSize);
+
+   batch->buf = batch->sws->bo_alloc(batch->sws,
+				     BRW_BUFFER_TYPE_BATCH,
+				     intel->maxBatchSize, 4096);
+   if (batch->buffer)
+      batch->map = batch->buffer;
+   else {
+      batch->sws->bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->buf->virtual;
+   }
+   batch->size = intel->maxBatchSize;
+   batch->ptr = batch->map;
+   batch->dirty_state = ~0;
+   batch->cliprect_mode = IGNORE_CLIPRECTS;
+}
+
+struct brw_batchbuffer *
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
+{
+   struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
+
+   batch->sws = sws;
+   brw_batchbuffer_reset(batch);
+
+   return batch;
+}
+
+void
+brw_batchbuffer_free(struct brw_batchbuffer *batch)
+{
+   if (batch->map) {
+      dri_bo_unmap(batch->buf);
+      batch->map = NULL;
+   }
+
+   brw->sws->bo_unreference(batch->buf);
+   batch->buf = NULL;
+   FREE(batch);
+}
+
+
+void
+_brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
+			 int line)
+{
+   struct intel_context *intel = batch->intel;
+   GLuint used = batch->ptr - batch->map;
+
+   if (used == 0)
+      return;
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
+   }
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
+   }
+
+
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+	      used);
+
+   /* Emit a flush if the bufmgr doesn't do it for us. */
+   if (intel->always_flush_cache || !intel->ttm) {
+      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
+   }
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
+   }
+
+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+
+   batch->sws->bo_unmap(batch->buf);
+
+   batch->map = NULL;
+   batch->ptr = NULL;
+      
+   batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 );
+      
+   if (INTEL_DEBUG & DEBUG_BATCH) {
+      dri_bo_map(batch->buf, GL_FALSE);
+      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+		   brw->brw_screen->pci_id);
+      dri_bo_unmap(batch->buf);
+   }
+
+   if (INTEL_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "waiting for idle\n");
+      dri_bo_map(batch->buf, GL_TRUE);
+      dri_bo_unmap(batch->buf);
+   }
+
+   /* Reset the buffer:
+    */
+   brw_batchbuffer_reset(batch);
+}
+
+
+/*  This is the only way buffers get added to the validate list.
+ */
+GLboolean
+brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+                             struct brw_winsys_buffer *buffer,
+                             uint32_t read_domains, uint32_t write_domain,
+			     uint32_t delta)
+{
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+      _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+		    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+
+   ret = batch->sws->bo_emit_reloc(batch->buf,
+				   read_domains,
+				   write_domain,
+				   delta, 
+				   batch->ptr - batch->map,
+				   buffer);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   brw_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
+void
+brw_batchbuffer_data(struct brw_batchbuffer *batch,
+                       const void *data, GLuint bytes,
+		       enum cliprect_mode cliprect_mode)
+{
+   assert((bytes & 3) == 0);
+   brw_batchbuffer_require_space(batch, bytes);
+   __memcpy(batch->ptr, data, bytes);
+   batch->ptr += bytes;
+}
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@ -33,18 +33,16 @@ void brw_batchbuffer_reset(struct brw_batchbuffer *batch);
 * Consider it a convenience function wrapping multple
 * intel_buffer_dword() calls.
 */
-void brw_batchbuffer_data(struct brw_batchbuffer *batch,
+int brw_batchbuffer_data(struct brw_batchbuffer *batch,
                            const void *data, GLuint bytes,
 			    enum cliprect_mode cliprect_mode);

-void brw_batchbuffer_release_space(struct brw_batchbuffer *batch,
-                                     GLuint bytes);

-GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
-                                       struct brw_winsys_buffer *buffer,
-				       uint32_t read_domains,
-				       uint32_t write_domain,
-				       uint32_t offset);
+int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+			       struct brw_winsys_buffer *buffer,
+			       uint32_t read_domains,
+			       uint32_t write_domain,
+			       uint32_t offset);

 /* Inline functions - might actually be better off with these
 * non-inlined.  Certainly better off switching all command packets to
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@ -57,7 +57,7 @@ static void calc_sane_viewport( const struct pipe_viewport_state *vp,
   svp->far = 1;
 }

-static void prepare_cc_vp( struct brw_context *brw )
+static int prepare_cc_vp( struct brw_context *brw )
 {
   struct brw_cc_viewport ccv;
   struct sane_viewport svp;
@ -72,6 +72,8 @@ static void prepare_cc_vp( struct brw_context *brw )

   brw->sws->bo_unreference(brw->cc.vp_bo);
   brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+
+   return 0;
 }

 const struct brw_tracked_state brw_cc_vp = {
@ -158,7 +160,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
   return bo;
 }

-static void prepare_cc_unit( struct brw_context *brw )
+static int prepare_cc_unit( struct brw_context *brw )
 {
   struct brw_cc_unit_key key;

@ -172,6 +174,8 @@ static void prepare_cc_unit( struct brw_context *brw )

   if (brw->cc.state_bo == NULL)
      brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
+   
+   return 0;
 }

 const struct brw_tracked_state brw_cc_unit = {
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@ -146,7 +146,7 @@ static void compile_clip_prog( struct brw_context *brw,

 /* Calculate interpolants for triangle and line rasterization.
 */
-static void upload_clip_prog(struct brw_context *brw)
+static int upload_clip_prog(struct brw_context *brw)
 {
   struct brw_clip_prog_key key;

@ -173,6 +173,8 @@ static void upload_clip_prog(struct brw_context *brw)
 					&brw->clip.prog_data);
   if (brw->clip.prog_bo == NULL)
      compile_clip_prog( brw, &key );
+
+   return 0;
 }


--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@ -159,7 +159,7 @@ clip_unit_create_from_key(struct brw_context *brw,
   return bo;
 }

-static void upload_clip_unit( struct brw_context *brw )
+static int upload_clip_unit( struct brw_context *brw )
 {
   struct brw_clip_unit_key key;

@ -173,6 +173,8 @@ static void upload_clip_unit( struct brw_context *brw )
   if (brw->clip.state_bo == NULL) {
      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
   }
+   
+   return 0;
 }

 const struct brw_tracked_state brw_clip_unit = {
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@ -105,7 +105,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
   brw->state.dirty.mesa = ~0;
   brw->state.dirty.brw = ~0;

-   brw->emit_state_always = 0;
+   brw->flags.always_emit_state = 0;

   make_empty_list(&brw->query.active_head);

--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@ -182,6 +182,8 @@ struct brw_fragment_shader {
 #define PIPE_NEW_FRAGMENT_CONSTANTS     0x2
 #define PIPE_NEW_VERTEX_CONSTANTS       0x2
 #define PIPE_NEW_CLIP                   0x2
+#define PIPE_NEW_INDEX_BUFFER           0x2
+#define PIPE_NEW_INDEX_RANGE            0x2


 #define BRW_NEW_URB_FENCE               0x1
@ -387,8 +389,8 @@ struct brw_cache {
 */
 struct brw_tracked_state {
   struct brw_state_flags dirty;
-   void (*prepare)( struct brw_context *brw );
-   void (*emit)( struct brw_context *brw );
+   int (*prepare)( struct brw_context *brw );
+   int (*emit)( struct brw_context *brw );
 };

 /* Flags for brw->state.cache.
@ -465,9 +467,7 @@ struct brw_context
   GLuint primitive;
   GLuint reduced_primitive;

-   GLboolean emit_state_always;
-
-   /* Active vertex program: 
+   /* Active state from the state tracker: 
    */
   struct {
      const struct brw_vertex_shader *vertex_shader;
@ -475,11 +475,31 @@ struct brw_context
      const struct brw_blend_state *blend;
      const struct brw_rasterizer_state *rast;
      const struct brw_depth_stencil_alpha_state *zstencil;
+
+      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+      struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+      unsigned num_vertex_elements;
+      unsigned num_vertex_buffers;
+
      struct pipe_framebuffer_state fb;
      struct pipe_viewport_state vp;
      struct pipe_clip_state ucp;
      struct pipe_buffer *vertex_constants;
      struct pipe_buffer *fragment_constants;
+
+      /**
+       * Index buffer for this draw_prims call.
+       *
+       * Updates are signaled by PIPE_NEW_INDEX_BUFFER.
+       */
+      struct pipe_buffer *index_buffer;
+      unsigned index_size;
+
+      /* Updates are signalled by PIPE_NEW_INDEX_RANGE:
+       */
+      unsigned min_index;
+      unsigned max_index;
+
   } curr;

   struct {
@ -504,30 +524,26 @@ struct brw_context
   struct brw_cached_batch_item *cached_batch_items;

   struct {
-      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-      struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_element;
-      unsigned num_vertex_buffer;
-
      struct u_upload_mgr *upload_vertex;
      struct u_upload_mgr *upload_index;
      
-
-      /* Summary of size and varying of active arrays, so we can check
-       * for changes to this state:
+      /* Information on uploaded vertex buffers:
       */
-      struct brw_vertex_info info;
-      unsigned int min_index, max_index;
+      struct {
+	 unsigned stride;	/* in bytes between successive vertices */
+	 unsigned offset;	/* in bytes, of first vertex in bo */
+	 unsigned vertex_count;	/* count of valid vertices which may be accessed */
+	 struct brw_winsys_buffer *bo;
+      } vb[PIPE_MAX_ATTRIBS];
+
+      struct {
+      } ve[PIPE_MAX_ATTRIBS];
+
+      unsigned nr_vb;		/* currently the same as curr.num_vertex_buffers */
+      unsigned nr_ve;		/* currently the same as curr.num_vertex_elements */
   } vb;

   struct {
-      /**
-       * Index buffer for this draw_prims call.
-       *
-       * Updates are signaled by BRW_NEW_INDICES.
-       */
-      const struct _mesa_index_buffer *ib;
-
      /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
      struct brw_winsys_buffer *bo;
      unsigned int offset;
@ -668,6 +684,14 @@ struct brw_context
      int index;
      GLboolean active;
   } query;
+
+   struct {
+      unsigned always_emit_state:1;
+      unsigned always_flush_batch:1;
+      unsigned force_swtnl:1;
+      unsigned no_swtnl:1;
+   } flags;
+
   /* Used to give every program string a unique id
    */
   GLuint program_id;
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@ -48,7 +48,7 @@
 * constants.  That greatly reduces the demand for space in the CURBE.
 * Some of the comments within are dated...
 */
-static void calculate_curbe_offsets( struct brw_context *brw )
+static int calculate_curbe_offsets( struct brw_context *brw )
 {
   /* CACHE_NEW_WM_PROG */
   const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
@ -104,6 +104,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )

      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
   }
+
+   return 0;
 }


@ -157,7 +159,7 @@ static GLfloat fixed_plane[6][4] = {
 * cache mechanism, but maybe would benefit from a comparison against
 * the current uploaded set of constants.
 */
-static void prepare_constant_buffer(struct brw_context *brw)
+static int prepare_constant_buffer(struct brw_context *brw)
 {
   const GLuint sz = brw->curbe.total_size;
   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
@ -170,7 +172,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 brw->curbe.last_buf = NULL;
 	 brw->curbe.last_bufsz  = 0;
      }
-      return;
+      return 0;
   }

   buf = (GLfloat *) CALLOC(bufsz, 1);
@ -305,9 +307,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
    * flushes as necessary when doublebuffering of CURBEs isn't
    * possible.
    */
+
+   return 0;
 }

-static void emit_constant_buffer(struct brw_context *brw)
+static int emit_constant_buffer(struct brw_context *brw)
 {
   GLuint sz = brw->curbe.total_size;

@ -322,6 +326,7 @@ static void emit_constant_buffer(struct brw_context *brw)
 		(sz - 1) + brw->curbe.curbe_offset);
   }
   ADVANCE_BATCH();
+   return 0;
 }

 const struct brw_tracked_state brw_constant_buffer = {
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@ -26,15 +26,18 @@
 **************************************************************************/


+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_debug.h"
+#include "brw_screen.h"

 #include "brw_batchbuffer.h"

-#define FILE_DEBUG_FLAG DEBUG_BATCH

 static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
   _3DPRIM_POINTLIST,
@ -56,18 +59,21 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
 * programs be immune to the active primitive (ie. cope with all
 * possibilities).  That may not be realistic however.
 */
-static GLuint brw_set_prim(struct brw_context *brw, unsigned prim)
+static int brw_set_prim(struct brw_context *brw, unsigned prim )
 {

   if (BRW_DEBUG & DEBUG_PRIMS)
      debug_printf("PRIM: %s\n", u_prim_name(prim));
   
   if (prim != brw->primitive) {
+      unsigned reduced_prim;
+
      brw->primitive = prim;
      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;

-      if (reduced_prim[prim] != brw->reduced_primitive) {
-	 brw->reduced_primitive = reduced_prim[prim];
+      reduced_prim = u_reduced_prim(prim);
+      if (reduced_prim != brw->reduced_primitive) {
+	 brw->reduced_primitive = reduced_prim;
 	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
      }
   }
@ -77,17 +83,14 @@ static GLuint brw_set_prim(struct brw_context *brw, unsigned prim)



-static enum pipe_error brw_emit_prim(struct brw_context *brw,
-				     unsigned prim,
-				     unsigned start,
-				     unsigned count,
-				     boolean indexed,
-				     uint32_t hw_prim)
+static int brw_emit_prim(struct brw_context *brw,
+			 unsigned start,
+			 unsigned count,
+			 boolean indexed,
+			 uint32_t hw_prim)
 {
   struct brw_3d_primitive prim_packet;
-
-   if (INTEL_DEBUG & DEBUG_PRIMS)
-      debug_printf("PRIM: %s %d %d\n", u_prim_name(prim), start, count);
+   int ret;

   prim_packet.header.opcode = CMD_3D_PRIM;
   prim_packet.header.length = sizeof(prim_packet)/4 - 2;
@ -101,7 +104,7 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
      prim_packet.start_vert_location += brw->ib.start_vertex_offset;
   prim_packet.instance_count = 1;
   prim_packet.start_instance_location = 0;
-   prim_packet.base_vert_location = prim->basevertex;
+   prim_packet.base_vert_location = 0; // prim->basevertex; XXX: add this to gallium


   /* If we're set to always flush, do it before and after the primitive emit.
@ -109,20 +112,20 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
    * and missed flushes of the render cache as it heads to other parts of
    * the besides the draw code.
    */
-   if (intel->always_flush_cache) {
-      BEGIN_BATCH(1, IGNORE_CLIPRECTS)
-      OUT_BATCH(intel->vtbl.flush_cmd());
+   if (0) {
+      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
      ADVANCE_BATCH();
   }
   if (prim_packet.verts_per_instance) {
-      ret = brw_batchbuffer_data( brw->intel.batch, &prim_packet,
+      ret = brw_batchbuffer_data( brw->batch, &prim_packet,
 				  sizeof(prim_packet), LOOP_CLIPRECTS);
      if (ret)
 	 return ret;
   }
-   if (intel->always_flush_cache) {
+   if (0) {
      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
-      OUT_BATCH(intel->vtbl.flush_cmd());
+      OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
      ADVANCE_BATCH();
   }

@ -133,44 +136,24 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
 /* May fail if out of video memory for texture or vbo upload, or on
 * fallback conditions.
 */
-static GLboolean brw_try_draw_prims( struct brw_context *brw,
-				     const struct gl_client_array *arrays[],
-				     const struct _mesa_prim *prim,
-				     GLuint nr_prims,
-				     const struct _mesa_index_buffer *ib,
-				     GLuint min_index,
-				     GLuint max_index )
+static int
+try_draw_range_elements(struct brw_context *brw,
+			struct pipe_buffer *index_buffer,
+			unsigned hw_prim, 
+			unsigned start, unsigned count)
 {
-   struct brw_context *brw = brw_context(ctx);
-   GLboolean retval = GL_FALSE;
-   GLboolean warn = GL_FALSE;
-   GLboolean first_time = GL_TRUE;
-   uint32_t hw_prim;
-   GLuint i;
+   int ret;

-   if (ctx->NewState)
-      _mesa_update_state( ctx );
-
-   /* Bind all inputs, derive varying and size information:
-    */
-   brw_merge_inputs( brw, arrays );
-
-   brw->ib.ib = ib;
-   brw->state.dirty.brw |= BRW_NEW_INDICES;
-
-   brw->vb.min_index = min_index;
-   brw->vb.max_index = max_index;
-   brw->state.dirty.brw |= BRW_NEW_VERTICES;
-
-   hw_prim = brw_set_prim(brw, prim[i].mode);
-
-   brw_validate_state(brw);
+   ret = brw_validate_state(brw);
+   if (ret)
+      return ret;

   /* Check that we can fit our state in with our existing batchbuffer, or
    * flush otherwise.
    */
-   ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos,
-					 brw->state.validated_bo_count);
+   ret = brw->sws->check_aperture_space(brw->sws,
+					brw->state.validated_bos,
+					brw->state.validated_bo_count);
   if (ret)
      return ret;

@ -178,12 +161,12 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw,
   if (ret)
      return ret;
   
-   ret = brw_emit_prim(brw, &prim[i], hw_prim);
+   ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim);
   if (ret)
      return ret;

-   if (intel->always_flush_batch)
-      brw_batchbuffer_flush(intel->batch);
+   if (brw->flags.always_flush_batch)
+      brw_batchbuffer_flush(brw->batch);

   return 0;
 }
@ -197,22 +180,45 @@ brw_draw_range_elements(struct pipe_context *pipe,
 			unsigned max_index,
 			unsigned mode, unsigned start, unsigned count)
 {
-   enum pipe_error ret;
+   struct brw_context *brw = brw_context(pipe);
+   int ret;
+   uint32_t hw_prim;

-   if (!vbo_all_varyings_in_vbos(arrays)) {
-      if (!index_bounds_valid)
-	 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+   hw_prim = brw_set_prim(brw, mode);
+
+   if (BRW_DEBUG & DEBUG_PRIMS)
+      debug_printf("PRIM: %s %d %d\n", u_prim_name(mode), start, count);
+
+   /* Potentially trigger upload of new index buffer.
+    *
+    * XXX: do we need to go through state validation to achieve this?
+    * Could just call upload code directly.
+    */
+   if (brw->curr.index_buffer != index_buffer) {
+      pipe_buffer_reference( &brw->curr.index_buffer, index_buffer );
+      brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER;
   }

+   /* XXX: do we really care?
+    */
+   if (brw->curr.min_index != min_index ||
+       brw->curr.max_index != max_index) 
+   { 
+      brw->curr.min_index = min_index;
+      brw->curr.max_index = max_index;
+      brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE;
+   }
+
+
   /* Make a first attempt at drawing:
    */
-   ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );

   /* Otherwise, flush and retry:
    */
   if (ret != 0) {
-      brw_batchbuffer_flush(intel->batch);
-      ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+      brw_batchbuffer_flush(brw->batch);
+      ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
      assert(ret == 0);
   }

@ -242,28 +248,37 @@ brw_draw_arrays(struct pipe_context *pipe, unsigned mode,



-void brw_draw_init( struct brw_context *brw )
+boolean brw_draw_init( struct brw_context *brw )
 {
   /* Register our drawing function: 
    */
   brw->base.draw_arrays = brw_draw_arrays;
   brw->base.draw_elements = brw_draw_elements;
   brw->base.draw_range_elements = brw_draw_range_elements;
+
+   /* Create helpers for uploading data in user buffers:
+    */
+   brw->vb.upload_vertex = u_upload_create( &brw->brw_screen->base,
+					    128 * 1024,
+					    64,
+					    PIPE_BUFFER_USAGE_VERTEX );
+   if (brw->vb.upload_vertex == NULL)
+      return FALSE;
+
+   brw->vb.upload_index = u_upload_create( &brw->brw_screen->base,
+					   128 * 1024,
+					   64,
+					   PIPE_BUFFER_USAGE_INDEX );
+   if (brw->vb.upload_index == NULL)
+      return FALSE;
+
+   return TRUE;
 }

-void brw_draw_destroy( struct brw_context *brw )
+void brw_draw_cleanup( struct brw_context *brw )
 {
-   int i;
-
-   if (brw->vb.upload.bo != NULL) {
-      brw->sws->bo_unreference(brw->vb.upload.bo);
-      brw->vb.upload.bo = NULL;
-   }
-
-   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
-      brw->vb.inputs[i].bo = NULL;
-   }
+   u_upload_destroy( brw->vb.upload_vertex );
+   u_upload_destroy( brw->vb.upload_index );

   brw->sws->bo_unreference(brw->ib.bo);
   brw->ib.bo = NULL;
--- a/src/gallium/drivers/i965/brw_draw.h
+++ b/src/gallium/drivers/i965/brw_draw.h
@ -32,8 +32,7 @@

 struct brw_context;

-
-void brw_draw_init( struct brw_context *brw );
+boolean brw_draw_init( struct brw_context *brw );
 void brw_draw_cleanup( struct brw_context *brw );


--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@ -26,21 +26,23 @@
 **************************************************************************/

 #include "pipe/p_context.h"
+#include "pipe/p_error.h"

 #include "util/u_upload_mgr.h"
+#include "util/u_math.h"

 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
-#include "brw_fallback.h"
-
+#include "brw_screen.h"
 #include "brw_batchbuffer.h"
+#include "brw_debug.h"




-unsigned brw_translate_surface_format( unsigned id )
+static unsigned brw_translate_surface_format( unsigned id )
 {
   switch (id) {
   case PIPE_FORMAT_R64_FLOAT:
@ -186,70 +188,136 @@ static unsigned get_index_type(int type)
 }


-
-static boolean brw_prepare_vertices(struct brw_context *brw)
+static int brw_prepare_vertices(struct brw_context *brw)
 {
-   GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; 
+   unsigned int min_index = brw->curr.min_index;
+   unsigned int max_index = brw->curr.max_index;
   GLuint i;
-   const unsigned char *ptr = NULL;
-   GLuint interleave = 0;
-   unsigned int min_index = brw->vb.min_index;
-   unsigned int max_index = brw->vb.max_index;
+   int ret;

-   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
-   GLuint nr_uploads = 0;
-
-   /* First build an array of pointers to ve's in vb.inputs_read
-    */
-   if (0)
-      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+   if (BRW_DEBUG & DEBUG_VERTS)
+      debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);


+   for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+      struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i];
+      struct brw_winsys_buffer *bo;
+      struct pipe_buffer *upload_buf;
+      unsigned offset;
+      
+      if (BRW_DEBUG & DEBUG_VERTS)
+	 debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n",
+		      __FUNCTION__, i,
+		      brw_buffer_is_user_buffer(vb->buffer),
+		      vb->buffer_offset,
+		      vb->buffer->size,
+		      vb->stride);

-   for (i = 0; i < brw->vb.num_vertex_buffer; i++) {
-      struct brw_vertex_buffer *vb = brw->vb.vertex_buffer[i];
-      unsigned size = (vb->stride == 0 ? 
-		       vb->size :
-		       vb->stride * (max_index + 1 - min_index));
+      if (brw_buffer_is_user_buffer(vb->buffer)) {

+	 /* XXX: simplify this.  Stop the state trackers from generating
+	  * zero-stride buffers & have them use additional constants (or
+	  * add support for >1 constant buffer) instead.
+	  */
+	 unsigned size = (vb->stride == 0 ? 
+			  vb->buffer->size - vb->buffer_offset :
+			  MAX2(vb->buffer->size - vb->buffer_offset,
+			       vb->stride * (max_index + 1 - min_index)));

-      if (brw_is_user_buffer(vb)) {
-	 u_upload_buffer( brw->upload_vertex, 
-			  min_index * vb->stride,
-			  size,
-			  &offset,
-			  &buffer );
+	 ret = u_upload_buffer( brw->vb.upload_vertex, 
+				vb->buffer_offset + min_index * vb->stride,
+				size,
+				vb->buffer,
+				&offset,
+				&upload_buf );
+	 if (ret)
+	    return ret;
+
+	 bo = brw_buffer(upload_buf)->bo;
+	 
+	 assert(offset + size <= bo->size);
      }
      else
      {
-	 offset = 0;
-	 buffer = vb->buffer;
+	 offset = vb->buffer_offset;
+	 bo = brw_buffer(vb->buffer)->bo;
      }
+
+      assert(offset < bo->size);
      
      /* Set up post-upload info about this vertex buffer:
       */
-      input->offset = (unsigned long)offset;
-      input->stride = vb->stride;
-      input->count = count;
-      brw->sws->bo_unreference(input->bo);
-      input->bo = intel_bufferobj_buffer(intel, intel_buffer,
-					 INTEL_READ);
-      brw->sws->bo_reference(input->bo);
+      brw->vb.vb[i].offset = offset;
+      brw->vb.vb[i].stride = vb->stride;
+      brw->vb.vb[i].vertex_count = (vb->stride == 0 ?
+				    1 :
+				    (bo->size - offset) / vb->stride);
+      brw->sws->bo_unreference(brw->vb.vb[i].bo);
+      brw->vb.vb[i].bo = bo;
+      brw->sws->bo_reference(brw->vb.vb[i].bo);

-      assert(input->offset < input->bo->size);
-      assert(input->offset + size <= input->bo->size);
+      /* Don't need to retain this reference.  We have a reference on
+       * the underlying winsys buffer:
+       */
+      pipe_buffer_reference( &upload_buf, NULL );
   }

+   brw->vb.nr_vb = i;
   brw_prepare_query_begin(brw);

-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-
-      brw_add_validated_bo(brw, input->bo);
+   for (i = 0; i < brw->vb.nr_vb; i++) {
+      brw_add_validated_bo(brw, brw->vb.vb[i].bo);
   }
+
+   return 0;
 }

-static void brw_emit_vertices(struct brw_context *brw)
+static int brw_emit_vertex_buffers( struct brw_context *brw )
+{
+   int i;
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), just bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   if (brw->vb.nr_vb == 0) {
+      if (BRW_DEBUG & DEBUG_VERTS)
+	 debug_printf("%s: no active vertex buffers\n", __FUNCTION__);
+
+      return 0;
+   }
+
+   /* Emit VB state packets.
+    */
+   BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+	     ((1 + brw->vb.nr_vb * 4) - 2));
+
+   for (i = 0; i < brw->vb.nr_vb; i++) {
+      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+		BRW_VB0_ACCESS_VERTEXDATA |
+		(brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT));
+      OUT_RELOC(brw->vb.vb[i].bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->vb.vb[i].offset);
+      if (BRW_IS_IGDNG(brw)) {
+	 OUT_RELOC(brw->vb.vb[i].bo,
+		   I915_GEM_DOMAIN_VERTEX, 0,
+		   brw->vb.vb[i].bo->size - 1);
+      } else
+	 OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0);
+      OUT_BATCH(0); /* Instance data step rate */
+   }
+   ADVANCE_BATCH();
+   return 0;
+}
+
+
+
+
+static int brw_emit_vertex_elements(struct brw_context *brw)
 {
   GLuint i;

@ -262,7 +330,7 @@ static void brw_emit_vertices(struct brw_context *brw)
    * The stale VB state stays in place, but they don't do anything unless
    * a VE loads from them.
    */
-   if (brw->vb.nr_enabled == 0) {
+   if (brw->vb.nr_ve == 0) {
      BEGIN_BATCH(3, IGNORE_CLIPRECTS);
      OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
      OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
@ -274,59 +342,23 @@ static void brw_emit_vertices(struct brw_context *brw)
 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
 		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
      ADVANCE_BATCH();
-      return;
+      return 0;
   }

-   /* Now emit VB and VEP state packets.
+   /* Now emit vertex element (VEP) state packets.
    *
-    * This still defines a hardware VB for each input, even if they
-    * are interleaved or from the same VBO.  TBD if this makes a
-    * performance difference.
    */
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
-	     ((1 + brw->vb.nr_enabled * 4) - 2));
-
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-
-      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
-		BRW_VB0_ACCESS_VERTEXDATA |
-		(input->stride << BRW_VB0_PITCH_SHIFT));
-      OUT_RELOC(input->bo,
-		I915_GEM_DOMAIN_VERTEX, 0,
-		input->offset);
-      if (BRW_IS_IGDNG(brw)) {
-          if (input->stride) {
-              OUT_RELOC(input->bo,
-                        I915_GEM_DOMAIN_VERTEX, 0,
-                        input->offset + input->stride * input->count - 1);
-          } else {
-              assert(input->count == 1);
-              OUT_RELOC(input->bo,
-                        I915_GEM_DOMAIN_VERTEX, 0,
-                        input->offset + input->element_size - 1);
-          }
-      } else
-          OUT_BATCH(input->stride ? input->count : 0);
-      OUT_BATCH(0); /* Instance data step rate */
-   }
-   ADVANCE_BATCH();
-
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-      uint32_t format = get_surface_type(input->glarray->Type,
-					 input->glarray->Size,
-					 input->glarray->Format,
-					 input->glarray->Normalized);
+   BEGIN_BATCH(1 + brw->curr.num_vertex_elements * 2, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_ve * 2) - 2));
+   for (i = 0; i < brw->vb.nr_ve; i++) {
+      const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
+      uint32_t format = brw_translate_surface_format( input->src_format );
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;

-      switch (input->glarray->Size) {
+      switch (input->nr_components) {
      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
@ -352,11 +384,29 @@ static void brw_emit_vertices(struct brw_context *brw)
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }
   ADVANCE_BATCH();
+   return 0;
 }

+
+static int brw_emit_vertices( struct brw_context *brw )
+{
+   int ret;
+
+   ret = brw_emit_vertex_buffers( brw );
+   if (ret)
+      return ret;
+
+   ret = brw_emit_vertex_elements( brw );
+   if (ret)
+      return ret;
+   
+   return 0;
+}
+
+
 const struct brw_tracked_state brw_vertices = {
   .dirty = {
-      .mesa = 0,
+      .mesa = PIPE_NEW_INDEX_RANGE,
      .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
      .cache = 0,
   },
@ -364,104 +414,106 @@ const struct brw_tracked_state brw_vertices = {
   .emit = brw_emit_vertices,
 };

-static void brw_prepare_indices(struct brw_context *brw)
+
+static int brw_prepare_indices(struct brw_context *brw)
 {
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-   GLuint ib_size;
+   struct pipe_buffer *index_buffer = brw->curr.index_buffer;
   struct brw_winsys_buffer *bo = NULL;
-   struct gl_buffer_object *bufferobj;
   GLuint offset;
-   GLuint ib_type_size;
+   GLuint index_size;
+   GLuint ib_size;
+   int ret;

   if (index_buffer == NULL)
-      return;
+      return 0;

-   ib_type_size = get_size(index_buffer->type);
-   ib_size = ib_type_size * index_buffer->count;
-   bufferobj = index_buffer->obj;;
+   if (DEBUG & DEBUG_VERTS)
+      debug_printf("%s: index_size:%d index_buffer->size:%d\n",
+		   __FUNCTION__,
+		   brw->curr.index_size,
+		   brw->curr.index_buffer->size);

-   /* Turn into a proper VBO:
+   ib_size = index_buffer->size;
+   index_size = brw->curr.index_size;
+
+   /* Turn userbuffer into a proper hardware buffer?
    */
-   if (!_mesa_is_bufferobj(bufferobj)) {
-      brw->ib.start_vertex_offset = 0;
+   if (brw_buffer_is_user_buffer(index_buffer)) {
+      struct pipe_buffer *upload_buf;

-      /* Get new bufferobj, offset:
+      ret = u_upload_buffer( brw->vb.upload_index,
+			     0,
+			     ib_size,
+			     index_buffer,
+			     &offset,
+			     &upload_buf );
+      if (ret)
+	 return ret;
+
+      bo = brw_buffer(upload_buf)->bo;
+      brw->sws->bo_reference(bo);
+      pipe_buffer_reference( &upload_buf, NULL );
+
+      /* XXX: annotate the userbuffer with the upload information so
+       * that successive calls don't get re-uploaded.
       */
-      get_space(brw, ib_size, &bo, &offset);
-
-      /* Straight upload
-       */
-      brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
-
-   } else {
-      offset = (GLuint) (unsigned long) index_buffer->ptr;
-      brw->ib.start_vertex_offset = 0;
-
-      /* If the index buffer isn't aligned to its element size, we have to
-       * rebase it into a temporary.
-       */
-       if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                                GL_DYNAMIC_DRAW_ARB,
-                                                bufferobj);
-           map += offset;
-
-	   get_space(brw, ib_size, &bo, &offset);
-
-	   dri_bo_subdata(bo, offset, ib_size, map);
-
-           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
-       } else {
-	  bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
-				      INTEL_READ);
-	  brw->sws->bo_reference(bo);
-
-	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
-	   * the index buffer state when we're just moving the start index
-	   * of our drawing.
-	   */
-	  brw->ib.start_vertex_offset = offset / ib_type_size;
-	  offset = 0;
-	  ib_size = bo->size;
-       }
+   }
+   else {
+      bo = brw_buffer(index_buffer)->bo;
+      brw->sws->bo_reference(bo);
+      
+      ib_size = bo->size;
+      offset = 0;
   }

+   /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the
+    * index buffer state when we're just moving the start index of our
+    * drawing.
+    *
+    * In gallium this will happen in the case where successive draw
+    * calls are made with (distinct?) userbuffers, but the upload_mgr
+    * places the data into a single winsys buffer.
+    * 
+    * This statechange doesn't raise any state flags and is always
+    * just merged into the final draw packet:
+    */
+   if (1) {
+      assert((offset & (index_size - 1)) == 0);
+      brw->ib.start_vertex_offset = offset / index_size;
+   }
+
+   /* These statechanges trigger a new CMD_INDEX_BUFFER packet:
+    */
   if (brw->ib.bo != bo ||
-       brw->ib.offset != offset ||
       brw->ib.size != ib_size)
   {
-      drm_intel_bo_unreference(brw->ib.bo);
+      brw->sws->bo_unreference(brw->ib.bo);
      brw->ib.bo = bo;
-      brw->ib.offset = offset;
      brw->ib.size = ib_size;
-
      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
-   } else {
-      drm_intel_bo_unreference(bo);
+   }
+   else {
+      brw->sws->bo_unreference(bo);
   }

   brw_add_validated_bo(brw, brw->ib.bo);
+   return 0;
 }

 const struct brw_tracked_state brw_indices = {
   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_INDICES,
+      .mesa = PIPE_NEW_INDEX_BUFFER,
+      .brw = 0,
      .cache = 0,
   },
   .prepare = brw_prepare_indices,
 };

-static void brw_emit_index_buffer(struct brw_context *brw)
+static int brw_emit_index_buffer(struct brw_context *brw)
 {
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-
-   if (index_buffer == NULL)
-      return;
-
   /* Emit the indexbuffer packet:
    */
+   if (brw->ib.bo)
   {
      struct brw_indexbuffer ib;

@ -469,7 +521,7 @@ static void brw_emit_index_buffer(struct brw_context *brw)

      ib.header.bits.opcode = CMD_INDEX_BUFFER;
      ib.header.bits.length = sizeof(ib)/4 - 2;
-      ib.header.bits.index_format = get_index_type(index_buffer->type);
+      ib.header.bits.index_format = get_index_type(brw->ib.size);
      ib.header.bits.cut_index_enable = 0;

      BEGIN_BATCH(4, IGNORE_CLIPRECTS);
@ -483,6 +535,8 @@ static void brw_emit_index_buffer(struct brw_context *brw)
      OUT_BATCH( 0 );
      ADVANCE_BATCH();
   }
+
+   return 0;
 }

 const struct brw_tracked_state brw_index_buffer = {
--- a/src/gallium/drivers/i965/brw_eu.c
+++ b/src/gallium/drivers/i965/brw_eu.c
@ -29,6 +29,7 @@
  *   Keith Whitwell <keith@tungstengraphics.com>
  */
  
+#include "util/u_memory.h"

 #include "brw_context.h"
 #include "brw_defines.h"
@ -237,7 +238,7 @@ brw_resolve_cals(struct brw_compile *c)
        struct brw_glsl_call *call, *next;
        for (call = c->first_call; call; call = next) {
 	    next = call->next;
-	    _mesa_free(call);
+	    FREE(call);
 	}
 	c->first_call = NULL;
    }
@ -247,7 +248,7 @@ brw_resolve_cals(struct brw_compile *c)
        struct brw_glsl_label *label, *next;
 	for (label = c->first_label; label; label = next) {
 	    next = label->next;
-	    _mesa_free(label);
+	    FREE(label);
 	}
 	c->first_label = NULL;
    }
--- a/src/gallium/drivers/i965/brw_eu_debug.c
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@ -28,7 +28,8 @@
  * Authors:
  *   Keith Whitwell <keith@tungstengraphics.com>
  */
-    
+ 
+#include "util/u_debug.h"

 #include "brw_eu.h"

@ -52,7 +53,7 @@ void brw_print_reg( struct brw_reg hwreg )
      "f"
   };

-   _mesa_printf("%s%s", 
+   debug_printf("%s%s", 
 		hwreg.abs ? "abs/" : "",
 		hwreg.negate ? "-" : "");
     
@ -64,7 +65,7 @@ void brw_print_reg( struct brw_reg hwreg )
       hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
       hwreg.type == BRW_REGISTER_TYPE_F) {
      /* vector register */
-      _mesa_printf("vec%d", hwreg.nr);
+      debug_printf("vec%d", hwreg.nr);
   }
   else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
 	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
@ -72,13 +73,13 @@ void brw_print_reg( struct brw_reg hwreg )
 	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
 	    hwreg.type == BRW_REGISTER_TYPE_F) {      
      /* "scalar" register */
-      _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+      debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
   }
   else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
-      _mesa_printf("imm %f", hwreg.dw1.f);
+      debug_printf("imm %f", hwreg.dw1.f);
   }
   else {
-      _mesa_printf("%s%d.%d<%d;%d,%d>:%s", 
+      debug_printf("%s%d.%d<%d;%d,%d>:%s", 
 		   file[hwreg.file],
 		   hwreg.nr,
 		   hwreg.subnr / type_sz(hwreg.type),
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@ -315,24 +315,20 @@ const struct brw_tracked_state brw_polygon_stipple = {

 static void upload_polygon_stipple_offset(struct brw_context *brw)
 {
-   __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
   struct brw_polygon_stipple_offset bpso;

   memset(&bpso, 0, sizeof(bpso));
   bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
   bpso.header.length = sizeof(bpso)/4-2;

-   /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
-    * we have to invert the Y axis in order to match the OpenGL
-    * pixel coordinate system, and our offset must be matched
-    * to the window position.  If we're drawing to a FBO
-    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
-    * system works just fine, and there's no window system to
-    * worry about.
+   /* Never need to offset stipple coordinates.
+    *
+    * XXX: is it ever necessary to invert Y values?
    */
-   if (brw->intel.ctx.DrawBuffer->Name == 0) {
-      bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
-      bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+   if (0) {
+      int x = 0, y = 0, h = 0;
+      bpso.bits0.x_offset = (32 - (x & 31)) & 31;
+      bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31;
   }
   else {
      bpso.bits0.y_offset = 0;
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@ -53,6 +53,9 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence )
 static GLuint brw_flush_cmd( void )
 {
   struct brw_mi_flush flush;
+
+   return ;
+
   flush.opcode = CMD_MI_FLUSH;
   flush.pad = 0;
   flush.flags = BRW_FLUSH_STATE_CACHE;
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@ -33,6 +33,25 @@
 #include "brw_util.h"
 #include "brw_wm.h"

+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
+GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp)
+{
+    return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_IF] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+	    fp->info.insn_count[TGSI_OPCODE_CAL] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_BRK] > 0 ||   /* redundant - BGNLOOP */
+	    fp->info.insn_count[TGSI_OPCODE_RET] > 0 ||	  /* redundant - CAL */
+	    fp->info.insn_count[TGSI_OPCODE_BGNLOOP] > 0);
+}
+
+
+
 static void brwBindProgram( struct brw_context *brw,
 			    GLenum target, 
 			    struct gl_program *prog )
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@ -1,26 +1,11 @@

-static void brw_merge_inputs( struct brw_context *brw,
-		       const struct gl_client_array *arrays[])
+
+
+void 
+brw_pipe_vertex_cleanup( struct brw_context *brw )
 {
-   struct brw_vertex_info old = brw->vb.info;
-   GLuint i;
-
-   for (i = 0; i < VERT_ATTRIB_MAX; i++)
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
-
-   memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
-   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
-
   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      brw->vb.inputs[i].glarray = arrays[i];
-      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
-
-      if (arrays[i]->StrideB != 0)
-	 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
-	    ((i%16) * 2);
+      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
+      brw->vb.inputs[i].bo = NULL;
   }
-
-   /* Raise statechanges if input sizes have changed. */
-   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
-      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
 }
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@ -56,6 +56,14 @@ struct brw_transfer
   unsigned offset;
 };

+struct brw_buffer
+{
+   struct pipe_buffer base;
+   struct brw_winsys_buffer *bo;
+   void *ptr;
+   boolean is_user_buffer;
+};
+

 /*
 * Cast wrappers
@ -72,5 +80,19 @@ brw_transfer(struct pipe_transfer *transfer)
   return (struct brw_transfer *)transfer;
 }

+static INLINE struct brw_buffer *
+brw_buffer(struct pipe_buffer *buffer)
+{
+   return (struct brw_buffer *)buffer;
+}
+
+
+/* Pipe buffer helpers
+ */
+static INLINE boolean
+brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
+{
+   return ((const struct brw_buffer *)buf)->is_user_buffer;
+}

 #endif /* BRW_SCREEN_H */
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@ -134,7 +134,7 @@ static void upload_sf_prog(struct brw_context *brw)
   key.attrs = brw->vs.prog_data->outputs_written; 

   /* BRW_NEW_REDUCED_PRIMITIVE */
-   switch (brw->intel.reduced_primitive) {
+   switch (brw->reduced_primitive) {
   case GL_TRIANGLES: 
      /* NOTE: We just use the edgeflag attribute as an indicator that
       * unfilled triangles are active.  We don't actually do the
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@ -40,19 +40,12 @@ static void upload_sf_vp(struct brw_context *brw)
   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
   struct brw_sf_viewport sfv;
   GLfloat y_scale, y_bias;
-   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
   const GLfloat *v = ctx->Viewport._WindowMap.m;

   memset(&sfv, 0, sizeof(sfv));

-   if (render_to_fbo) {
-      y_scale = 1.0;
-      y_bias = 0;
-   }
-   else {
-      y_scale = -1.0;
-      y_bias = ctx->DrawBuffer->Height;
-   }
+   y_scale = 1.0;
+   y_bias = 0;

   /* _NEW_VIEWPORT */

@ -73,20 +66,11 @@ static void upload_sf_vp(struct brw_context *brw)
    * Note that the hardware's coordinates are inclusive, while Mesa's min is
    * inclusive but max is exclusive.
    */
-   if (render_to_fbo) {
-      /* texmemory: Y=0=bottom */
-      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-      sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
-      sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
-   }
-   else {
-      /* memory: Y=0=top */
-      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-      sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
-      sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
-   }
+   /* Y=0=bottom */
+   sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+   sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+   sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
+   sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;

   brw->sws->bo_unreference(brw->sf.vp_bo);
   brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
@ -151,7 +135,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
   /* _NEW_LIGHT */
   key->provoking_vertex = ctx->Light.ProvokingVertex;

-   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   key->render_to_fbo = 1;
 }

 static struct brw_winsys_buffer *
@ -211,11 +195,6 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
   else
      sf.sf5.front_winding = BRW_FRONTWINDING_CW;

-   /* The viewport is inverted for rendering to a FBO, and that inverts
-    * polygon front/back orientation.
-    */
-   sf.sf5.front_winding ^= key->render_to_fbo;
-
   switch (key->cull_face) {
   case GL_FRONT:
      sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
@ -245,7 +224,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
       sf.sf6.line_width = 0;

   /* _NEW_BUFFERS */
-   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   key->render_to_fbo = 1;
   if (!key->render_to_fbo) {
      /* Rendering to an OpenGL window */
      sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@ -109,8 +109,8 @@ struct brw_surface_key {
 /***********************************************************************
 * brw_state.c
 */
-void brw_validate_state(struct brw_context *brw);
-void brw_upload_state(struct brw_context *brw);
+int brw_validate_state(struct brw_context *brw);
+int brw_upload_state(struct brw_context *brw);
 void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);

@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer
 /***********************************************************************
 * brw_state_batch.c
 */
-#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )

 GLboolean brw_cached_batch_struct( struct brw_context *brw,
--- a/src/gallium/drivers/i965/brw_state_batch.c
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
   struct header *newheader = (struct header *)data;

   if (brw->emit_state_always) {
-      brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+      brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
      return GL_TRUE;
   }

@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,

 emit:
   memcpy(item->header, newheader, sz);
-   brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+   brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
   return GL_TRUE;
 }

--- a/src/gallium/drivers/i965/brw_swtnl.c
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@ -12,13 +12,13 @@ static GLboolean check_fallbacks( struct brw_context *brw,
    * use fallbacks.  If we're forcing fallbacks, always
    * use fallfacks.
    */
-   if (brw->intel.conformance_mode == 0)
+   if (brw->flags.no_swtnl)
      return GL_FALSE;

-   if (brw->intel.conformance_mode == 2)
+   if (brw->flags.force_swtnl)
      return GL_TRUE;

-   if (ctx->Polygon.SmoothFlag) {
+   if (brw->curr.rast->tmpl.smooth_polys) {
      for (i = 0; i < nr_prims; i++)
 	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
 	    return GL_TRUE;
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@ -161,6 +161,13 @@ struct brw_winsys_screen {
 		      size_t size,
 		      const void *data);

+   /* XXX: couldn't this be handled by returning true/false on
+    * bo_emit_reloc?
+    */
+   boolean (*check_aperture_space)( struct brw_winsys_screen *iws,
+				    struct brw_winsys_buffer **buffers,
+				    unsigned count );
+
   /**
    * Map a buffer.
    */
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@ -158,7 +158,7 @@ static void do_wm_prog( struct brw_context *brw,
   memcpy(&c->key, key, sizeof(*key));

   c->fp = fp;
-   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+   c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/

   brw_init_compile(brw, &c->func);

--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@ -131,9 +131,9 @@ struct brw_wm_ref {
   GLuint insn:24;
 };

-struct brw_wm_constref {
+struct brw_wm_imm_ref {
   const struct brw_wm_ref *ref;
-   GLfloat constval;
+   GLfloat imm1f;
 };


@ -232,8 +232,8 @@ struct brw_wm_compile {
   struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
   GLuint nr_insns;

-   struct brw_wm_constref constref[BRW_WM_MAX_CONST];
-   GLuint nr_constrefs;
+   struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST];
+   GLuint nr_imm_refs;

   struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];

--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@ -7,34 +7,6 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
                                  const struct prog_instruction *inst,
                                  GLuint component);

-/**
- * Determine if the given fragment program uses GLSL features such
- * as flow conditionals, loops, subroutines.
- * Some GLSL shaders may use these features, others might not.
- */
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
-{
-    int i;
-
-    for (i = 0; i < fp->Base.NumInstructions; i++) {
-	const struct prog_instruction *inst = &fp->Base.Instructions[i];
-	switch (inst->Opcode) {
-	    case OPCODE_ARL:
-	    case OPCODE_IF:
-	    case OPCODE_ENDIF:
-	    case OPCODE_CAL:
-	    case OPCODE_BRK:
-	    case OPCODE_RET:
-	    case OPCODE_BGNLOOP:
-		return GL_TRUE; 
-	    default:
-		break;
-	}
-    }
-    return GL_FALSE; 
-}
-
-

 static void
 reclaim_temps(struct brw_wm_compile *c);
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@ -124,33 +124,33 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
 }


-/** Return a ref to a constant/literal value */
-static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
-					       const GLfloat *constval )
+/** Return a ref to an immediate value */
+static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c,
+					     const GLfloat *imm1f )
 {
   GLuint i;

   /* Search for an existing const value matching the request:
    */
-   for (i = 0; i < c->nr_constrefs; i++) {
-      if (c->constref[i].constval == *constval) 
-	 return c->constref[i].ref;
+   for (i = 0; i < c->nr_imm_refs; i++) {
+      if (c->imm_ref[i].imm_val == *imm1f) 
+	 return c->imm_ref[i].ref;
   }

   /* Else try to add a new one:
    */
-   if (c->nr_constrefs < BRW_WM_MAX_CONST) {
-      GLuint i = c->nr_constrefs++;
+   if (c->nr_imm_refs < BRW_WM_MAX_IMM) {
+      GLuint i = c->nr_imm_refs++;

-      /* A constant is a special type of parameter:
+      /* An immediate is a special type of parameter:
       */
-      c->constref[i].constval = *constval;
-      c->constref[i].ref = get_param_ref(c, constval);
+      c->imm_ref[i].imm_val = *imm_val;
+      c->imm_ref[i].ref = get_param_ref(c, imm_val);

-      return c->constref[i].ref;
+      return c->imm_ref[i].ref;
   }
   else {
-      _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+      _mesa_printf("%s: out of imm_refs\n", __FUNCTION__);
      c->prog_data.error = 1;
      return NULL;
   }
@ -200,7 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 case PROGRAM_CONSTANT:
 	    /* These are invarient:
 	     */
-	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
 	    break;

 	 case PROGRAM_STATE_VAR:
@ -266,9 +266,9 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
   static const GLfloat const_one = 1.0;

   if (component == SWIZZLE_ZERO) 
-      src_ref = get_const_ref(c, &const_zero);
+      src_ref = get_imm_ref(c, &const_zero);
   else if (component == SWIZZLE_ONE) 
-      src_ref = get_const_ref(c, &const_one);
+      src_ref = get_imm_ref(c, &const_one);
   else 
      src_ref = pass0_get_reg(c, src.File, src.Index, component);

--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@ -554,7 +554,8 @@ st_draw_vbo(GLcontext *ctx,

   /* Gallium probably doesn't want this in some cases. */
   if (!index_bounds_valid)
-      vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
+      if (!vbo_all_varyings_in_vbos(arrays))
+	 vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);

   /* sanity check for pointer arithmetic below */
   assert(sizeof(arrays[0]->Ptr[0]) == 1);