Merge remote-tracking branch 'origin/master' into pipe-video

2011-05-21 16:43:12 +02:00 · 2011-05-21 16:43:12 +02:00 · aa63ebc48a
parent 120b55a96e 3c5e741862
commit aa63ebc48a
106 changed files with 14886 additions and 3903 deletions
--- a/configs/autoconf.in
+++ b/configs/autoconf.in
@ -58,6 +58,10 @@ INSTALL = @INSTALL@
 PYTHON2 = @PYTHON2@
 PYTHON_FLAGS = -t -O -O

+# Flex and Bison for GLSL compiler
+FLEX = @FLEX@
+BISON = @BISON@
+
 # Library names (base name)
 GL_LIB = GL
 GLU_LIB = GLU
--- a/configs/default
+++ b/configs/default
@ -38,6 +38,8 @@ MKLIB_OPTIONS =
 MKDEP = makedepend
 MKDEP_OPTIONS = -fdepend
 MAKE = make
+FLEX = flex
+BISON = bison

 # Use MINSTALL for installing libraries, INSTALL for everything else
 MINSTALL = $(SHELL) $(TOP)/bin/minstall
--- a/configure.ac
+++ b/configure.ac
@ -39,6 +39,12 @@ if test "x$MKDEP" = "x"; then
    AC_MSG_ERROR([makedepend is required to build Mesa])
 fi

+AC_PATH_PROG([FLEX], [flex])
+test "x$FLEX" = "x" && AC_MSG_ERROR([flex is needed to build Mesa])
+
+AC_PATH_PROG([BISON], [bison])
+test "x$BISON" = "x" && AC_MSG_ERROR([bison is needed to build Mesa])
+
 dnl Our fallback install-sh is a symlink to minstall. Use the existing
 dnl configuration in that case.
 AC_PROG_INSTALL
@ -1186,7 +1192,7 @@ if test "x$enable_egl" = xno; then
 fi
 if test "x$enable_egl" = xyes; then
    SRC_DIRS="$SRC_DIRS egl"
-    EGL_LIB_DEPS="$DLOPEN_LIBS -lpthread"
+    EGL_LIB_DEPS="$DLOPEN_LIBS $SELINUX_LIBS -lpthread"
    EGL_DRIVERS_DIRS=""
    if test "$enable_static" != yes; then
        # build egl_glx when libGL is built
@ -1602,7 +1608,7 @@ x*yes*)
 esac
 if test "x$enable_openvg" = xyes; then
    EGL_CLIENT_APIS="$EGL_CLIENT_APIS "'$(VG_LIB)'
-    VG_LIB_DEPS="$VG_LIB_DEPS -lpthread"
+    VG_LIB_DEPS="$VG_LIB_DEPS $SELINUX_LIBS -lpthread"
 fi

 AC_SUBST([VG_LIB_DEPS])
--- a/docs/relnotes-7.11.html
+++ b/docs/relnotes-7.11.html
@ -52,6 +52,7 @@ tbd
 <li>GL_EXT_packed_float (gallium r600)
 <li>GL_EXT_texture_compression_latc (gallium drivers, swrast)
 <li>GL_EXT_texture_compression_rgtc (gallium drivers, swrast, i965)
+<li>GL_EXT_texture_filter_anisotropic (swrast)
 <li>GL_EXT_texture_shared_exponent (gallium drivers, swrast)
 <li>GL_EXT_texture_sRGB_decode (gallium drivers, swrast, i965)
 <li>GL_EXT_texture_snorm (gallium drivers)
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@ -81,6 +81,11 @@ const int i965_chip_ids[] = {
   0x0116, /* PCI_CHIP_SANDYBRIDGE_M_GT2 */
   0x0122, /* PCI_CHIP_SANDYBRIDGE_GT2_PLUS */
   0x0126, /* PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS */
+   0x0152, /* PCI_CHIP_IVYBRIDGE_GT1 */
+   0x0162, /* PCI_CHIP_IVYBRIDGE_GT2 */
+   0x0156, /* PCI_CHIP_IVYBRIDGE_M_GT1 */
+   0x0166, /* PCI_CHIP_IVYBRIDGE_M_GT2 */
+   0x015a, /* PCI_CHIP_IVYBRIDGE_S_GT1 */
   0x29a2, /* PCI_CHIP_I965_G */
   0x2992, /* PCI_CHIP_I965_Q */
   0x2982, /* PCI_CHIP_I965_G_1 */
--- a/src/egl/main/Makefile
+++ b/src/egl/main/Makefile
@ -61,11 +61,12 @@ LOCAL_LIBS += $(TOP)/src/egl/drivers/dri2/libegl_dri2.a
 ifneq ($(findstring x11, $(EGL_PLATFORMS)),)
 EGL_LIB_DEPS += $(XCB_DRI2_LIBS)
 endif
+EGL_LIB_DEPS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) $(LIBDRM_LIB) $(WAYLAND_LIBS)
+endif
+
 ifneq ($(findstring wayland, $(EGL_PLATFORMS)),)
 LOCAL_LIBS += $(TOP)/src/egl/wayland/wayland-drm/libwayland-drm.a
 endif
-EGL_LIB_DEPS += $(LIBUDEV_LIBS) $(DLOPEN_LIBS) $(LIBDRM_LIB) $(WAYLAND_LIBS)
-endif

 ifeq ($(filter glx, $(EGL_DRIVERS_DIRS)),glx)
 LOCAL_CFLAGS += -D_EGL_BUILT_IN_DRIVER_GLX
--- a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
+++ b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
@ -3,6 +3,12 @@
  <!-- drm support. This object is created by the server and published
       using the display's global event. -->
  <interface name="wl_drm" version="1">
+    <enum name="error">
+      <entry name="authenticate_fail" value="0"/>
+      <entry name="invalid_visual" value="1"/>
+      <entry name="invalid_name" value="2"/>
+    </enum>
+
    <!-- Call this request with the magic received from drmGetMagic().
         It will be passed on to the drmAuthMagic() or
         DRIAuthConnection() call.  This authentication must be
--- a/src/egl/wayland/wayland-drm/wayland-drm.c
+++ b/src/egl/wayland/wayland-drm/wayland-drm.c
@ -101,12 +101,9 @@ drm_create_buffer(struct wl_client *client, struct wl_drm *drm,
 	buffer->buffer.visual = visual;

 	if (visual->object.interface != &wl_visual_interface) {
-		/* FIXME: Define a real exception event instead of
-		 * abusing this one */
-		wl_client_post_event(client,
-				     (struct wl_object *) drm->display,
-				     WL_DISPLAY_INVALID_OBJECT, 0);
-		fprintf(stderr, "invalid visual in create_buffer\n");
+		wl_client_post_error(client, &drm->object,
+				     WL_DRM_ERROR_INVALID_VISUAL,
+				     "invalid visual");
 		return;
 	}

@ -116,12 +113,9 @@ drm_create_buffer(struct wl_client *client, struct wl_drm *drm,
 						 stride, visual);

 	if (buffer->driver_buffer == NULL) {
-		/* FIXME: Define a real exception event instead of
-		 * abusing this one */
-		wl_client_post_event(client,
-				     (struct wl_object *) drm->display,
-				     WL_DISPLAY_INVALID_OBJECT, 0);
-		fprintf(stderr, "failed to create image for name %d\n", name);
+		wl_client_post_error(client, &drm->object,
+				     WL_DRM_ERROR_INVALID_NAME,
+				     "invalid name");
 		return;
 	}

@ -140,9 +134,9 @@ drm_authenticate(struct wl_client *client,
 		 struct wl_drm *drm, uint32_t id)
 {
 	if (drm->callbacks->authenticate(drm->user_data, id) < 0)
-		wl_client_post_event(client,
-				     (struct wl_object *) drm->display,
-				     WL_DISPLAY_INVALID_OBJECT, 0);
+		wl_client_post_error(client, &drm->object,
+				     WL_DRM_ERROR_AUTHENTICATE_FAIL,
+				     "authenicate failed");
 	else
 		wl_client_post_event(client, &drm->object,
 				     WL_DRM_AUTHENTICATED);
@ -154,7 +148,8 @@ const static struct wl_drm_interface drm_interface = {
 };

 static void
-post_drm_device(struct wl_client *client, struct wl_object *global)
+post_drm_device(struct wl_client *client, 
+		struct wl_object *global, uint32_t version)
 {
 	struct wl_drm *drm = (struct wl_drm *) global;

--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@ -73,6 +73,19 @@ lp_set_target_options(void)
 #endif
 #endif

+   /*
+    * LLVM revision 123367 switched the default stack alignment to 16 bytes on
+    * Linux (and several other Unices in later revisions), to match recent gcc
+    * versions.
+    *
+    * However our drivers can be loaded by old binary applications, still
+    * maintaining a 4 bytes stack alignment.  Therefore we must tell LLVM here
+    * to only assume a 4 bytes alignment for backwards compatibility.
+    */
+#if defined(PIPE_ARCH_X86)
+   llvm::StackAlignment = 4;
+#endif
+
 #if defined(DEBUG) || defined(PROFILE)
   llvm::NoFramePointerElim = true;
 #endif
@ -93,13 +106,23 @@ lp_set_target_options(void)
    * See also:
    * - http://llvm.org/bugs/show_bug.cgi?id=3287
    * - http://l4.me.uk/post/2009/06/07/llvm-wrinkle-3-configuration-what-configuration/
+    *
+    * The -disable-mmx global option can be specified only once  since we
+    * dynamically link against LLVM it will reside in a separate shared object,
+    * which may or not be delete when this shared object is, so we use the
+    * llvm::DisablePrettyStackTrace variable (which we set below and should
+    * reside in the same shared library) to determine whether the -disable-mmx
+    * option has been set or not.
+    *
+    * Thankfully this ugly hack is not necessary on LLVM 2.9 onwards.
    */
-   static boolean first = TRUE;
-   if (first) {
+   if (!llvm::DisablePrettyStackTrace) {
+      static boolean first = TRUE;
      static const char* options[] = {
         "prog",
         "-disable-mmx"
      };
+      assert(first);
      llvm::cl::ParseCommandLineOptions(2, const_cast<char**>(options));
      first = FALSE;
   }
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@ -56,7 +56,14 @@ typedef pthread_t pipe_thread;
 static INLINE pipe_thread pipe_thread_create( void *(* routine)( void *), void *param )
 {
   pipe_thread thread;
-   if (pthread_create( &thread, NULL, routine, param ))
+   sigset_t saved_set, new_set;
+   int ret;
+
+   sigfillset(&new_set);
+   pthread_sigmask(SIG_SETMASK, &new_set, &saved_set);
+   ret = pthread_create( &thread, NULL, routine, param );
+   pthread_sigmask(SIG_SETMASK, &saved_set, NULL);
+   if (ret)
      return 0;
   return thread;
 }
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@ -581,7 +581,12 @@ static void u_vbuf_mgr_compute_max_index(struct u_vbuf_mgr_priv *mgr)
       * for that when dividing by stride. */
      unused = vb->stride -
               (mgr->ve->ve[i].src_offset + mgr->ve->src_format_size[i]);
-      assert(unused >= 0);
+
+      /* If src_offset is greater than stride (which means it's a buffer
+       * offset rather than a vertex offset)... */
+      if (unused < 0) {
+         unused = 0;
+      }

      /* Compute the maximum index for this vertex element. */
      max_index =
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@ -395,6 +395,9 @@ nv50_vertprog_prepare(struct nv50_translation_info *ti)
      }
   }

+   p->vp.clpd = p->max_out;
+   p->max_out += p->vp.clpd_nr;
+
   for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
      switch (ti->sysval_map[i]) {
      case 2:
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@ -170,6 +170,12 @@ nv50_vertprog_validate(struct nv50_context *nv50)
   struct nouveau_channel *chan = nv50->screen->base.channel;
   struct nv50_program *vp = nv50->vertprog;

+   if (nv50->clip.nr > vp->vp.clpd_nr) {
+      if (vp->translated)
+         nv50_program_destroy(nv50, vp);
+      vp->vp.clpd_nr = nv50->clip.nr;
+   }
+
   if (!nv50_program_validate(nv50, vp))
         return;

@ -369,7 +375,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
   m = nv50_vec4_map(map, 0, lin, &dummy, &vp->out[0]);

   for (c = 0; c < vp->vp.clpd_nr; ++c)
-      map[m++] |= vp->vp.clpd + c;
+      map[m++] = vp->vp.clpd + c;

   colors |= m << 8; /* adjust BFC0 id */

--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@ -225,6 +225,9 @@ nv50_validate_clip(struct nv50_context *nv50)

   BEGIN_RING(chan, RING_3D(VP_CLIP_DISTANCE_ENABLE), 1);
   OUT_RING  (chan, (1 << nv50->clip.nr) - 1);
+
+   if (nv50->vertprog && nv50->clip.nr > nv50->vertprog->vp.clpd_nr)
+      nv50->dirty |= NV50_NEW_VERTPROG;
 }

 static void
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@ -1552,6 +1552,8 @@ static void
 bld_instruction(struct bld_context *bld,
                const struct tgsi_full_instruction *insn)
 {
+   struct nv50_program *prog = bld->ti->p;
+   const struct tgsi_full_dst_register *dreg = &insn->Dst[0];
   struct nv_value *src0;
   struct nv_value *src1;
   struct nv_value *src2;
@ -1990,6 +1992,31 @@ bld_instruction(struct bld_context *bld,

   FOR_EACH_DST0_ENABLED_CHANNEL(c, insn)
      emit_store(bld, insn, c, dst0[c]);
+
+   if (prog->type == PIPE_SHADER_VERTEX && prog->vp.clpd_nr &&
+       dreg->Register.File == TGSI_FILE_OUTPUT && !dreg->Register.Indirect &&
+       prog->out[dreg->Register.Index].sn == TGSI_SEMANTIC_POSITION) {
+
+      int p;
+      for (p = 0; p < prog->vp.clpd_nr; p++) {
+         struct nv_value *clipd = NULL;
+
+         for (c = 0; c < 4; c++) {
+            temp = new_value(bld->pc, NV_FILE_MEM_C(15), NV_TYPE_F32);
+            temp->reg.id = p * 4 + c;
+            temp = bld_insn_1(bld, NV_OP_LDA, temp);
+
+            clipd = clipd ?
+                        bld_insn_3(bld, NV_OP_MAD, dst0[c], temp, clipd) :
+                        bld_insn_2(bld, NV_OP_MUL, dst0[c], temp);
+         }
+
+         temp = bld_insn_1(bld, NV_OP_MOV, clipd);
+         temp->reg.file = NV_FILE_OUT;
+         temp->reg.id = bld->ti->p->vp.clpd + p;
+         temp->insn->fixed = 1;
+      }
+   }
 }

 static INLINE void
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@ -69,6 +69,15 @@ enum st_profile_type
 #define ST_PROFILE_OPENGL_ES1_MASK   (1 << ST_PROFILE_OPENGL_ES1)
 #define ST_PROFILE_OPENGL_ES2_MASK   (1 << ST_PROFILE_OPENGL_ES2)

+/**
+ * New context flags for GL 3.0 and beyond.
+ */
+#define ST_CONTEXT_FLAG_CORE_PROFILE        (1 << 0)
+#define ST_CONTEXT_FLAG_COMPATIBLE_PROFILE  (1 << 1)
+#define ST_CONTEXT_FLAG_FORWARD_COMPATIBLE  (1 << 2)
+#define ST_CONTEXT_FLAG_DEBUG               (1 << 3)
+#define ST_CONTEXT_FLAG_ROBUST_ACCESS       (1 << 4)
+
 /**
 * Used in st_context_iface->teximage.
 */
@ -207,21 +216,14 @@ struct st_context_attribs
    * The profile and minimal version to support.
    *
    * The valid profiles and versions are rendering API dependent.  The latest
-    * version satisfying the request should be returned, unless
-    * forward_compatiible is true.
+    * version satisfying the request should be returned, unless the
+    * ST_CONTEXT_FLAG_FORWARD_COMPATIBLE bit is set.
    */
   enum st_profile_type profile;
   int major, minor;

-   /**
-    * Enable debugging.
-    */
-   boolean debug;
-
-   /**
-    * Return the exact version and disallow the use of deprecated features.
-    */
-   boolean forward_compatible;
+   /** Mask of ST_CONTEXT_FLAG_x bits */
+   unsigned flags;

   /**
    * The visual of the framebuffers the context will be bound to.
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@ -481,6 +481,15 @@ dri2_create_image(__DRIscreen *_screen,
   enum pipe_format pf;

   tex_usage = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+   if (use & __DRI_IMAGE_USE_SCANOUT)
+      tex_usage |= PIPE_BIND_SCANOUT;
+   if (use & __DRI_IMAGE_USE_SHARE)
+      tex_usage |= PIPE_BIND_SHARED;
+   if (use & __DRI_IMAGE_USE_CURSOR) {
+      if (width != 64 || height != 64)
+         return NULL;
+      tex_usage |= PIPE_BIND_CURSOR;
+   }

   switch (format) {
   case __DRI_IMAGE_FORMAT_RGB565:
--- a/src/gallium/state_trackers/egl/common/egl_g3d_image.c
+++ b/src/gallium/state_trackers/egl/common/egl_g3d_image.c
@ -97,7 +97,8 @@ egl_g3d_create_drm_buffer(_EGLDisplay *dpy, _EGLImage *img,
   }

   valid_use = EGL_DRM_BUFFER_USE_SCANOUT_MESA |
-               EGL_DRM_BUFFER_USE_SHARE_MESA;
+               EGL_DRM_BUFFER_USE_SHARE_MESA |
+               EGL_DRM_BUFFER_USE_CURSOR_MESA;
   if (attrs.DRMBufferUseMESA & ~valid_use) {
      _eglLog(_EGL_DEBUG, "bad image use bit 0x%04x",
            attrs.DRMBufferUseMESA);
@ -122,6 +123,11 @@ egl_g3d_create_drm_buffer(_EGLDisplay *dpy, _EGLImage *img,
      templ.bind |= PIPE_BIND_SCANOUT;
   if (attrs.DRMBufferUseMESA & EGL_DRM_BUFFER_USE_SHARE_MESA)
      templ.bind |= PIPE_BIND_SHARED;
+   if (attrs.DRMBufferUseMESA & EGL_DRM_BUFFER_USE_CURSOR_MESA) {
+      if (attrs.Width != 64 || attrs.Height != 64)
+         return NULL;
+      templ.bind |= PIPE_BIND_CURSOR;
+   }

   return screen->resource_create(screen, &templ);
 }
--- a/src/gallium/state_trackers/egl/wayland/native_wayland.c
+++ b/src/gallium/state_trackers/egl/wayland/native_wayland.c
@ -61,31 +61,35 @@ wayland_display_get_configs (struct native_display *ndpy, int *num_configs)
 {
   struct wayland_display *display = wayland_display(ndpy);
   const struct native_config **configs;
+   int i;

   if (!display->config) {
      struct native_config *nconf;
-      enum pipe_format format;
-      display->config = CALLOC(1, sizeof(*display->config));
+      display->config = CALLOC(2, sizeof(*display->config));
      if (!display->config)
         return NULL;
-      nconf = &display->config->base;

-      nconf->buffer_mask =
-         (1 << NATIVE_ATTACHMENT_FRONT_LEFT) |
-         (1 << NATIVE_ATTACHMENT_BACK_LEFT);
+      for (i = 0; i < 2; ++i) {
+         nconf = &display->config[i].base;
+         
+         nconf->buffer_mask =
+            (1 << NATIVE_ATTACHMENT_FRONT_LEFT) |
+            (1 << NATIVE_ATTACHMENT_BACK_LEFT);
+         
+         nconf->window_bit = TRUE;
+         nconf->pixmap_bit = TRUE;
+      }

-      format = PIPE_FORMAT_B8G8R8A8_UNORM;
-
-      nconf->color_format = format;
-      nconf->window_bit = TRUE;
-      nconf->pixmap_bit = TRUE;
+      display->config[0].base.color_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+      display->config[1].base.color_format = PIPE_FORMAT_B8G8R8X8_UNORM;
   }

-   configs = MALLOC(sizeof(*configs));
+   configs = MALLOC(2 * sizeof(*configs));
   if (configs) {
-      configs[0] = &display->config->base;
+      configs[0] = &display->config[0].base;
+      configs[1] = &display->config[1].base;
      if (num_configs)
-         *num_configs = 1;
+         *num_configs = 2;
   }

   return configs;
@ -368,9 +372,9 @@ wayland_create_pixmap_surface(struct native_display *ndpy,
   surface->type = WL_PIXMAP_SURFACE;
   surface->pix = egl_pixmap;

-   if (surface->pix->visual == wl_display_get_rgb_visual(display->dpy))
-      surface->color_format = PIPE_FORMAT_B8G8R8X8_UNORM;
-   else
+   if (nconf)
+      surface->color_format = nconf->color_format;
+   else /* FIXME: derive format from wl_visual */
      surface->color_format = PIPE_FORMAT_B8G8R8A8_UNORM;

   surface->attachment_mask = (1 << NATIVE_ATTACHMENT_FRONT_LEFT);
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@ -1018,15 +1018,18 @@ glXChooseVisual( Display *dpy, int screen, int *list )
 }


-PUBLIC GLXContext
-glXCreateContext( Display *dpy, XVisualInfo *visinfo,
-                  GLXContext share_list, Bool direct )
+/**
+ * Helper function used by other glXCreateContext functions.
+ */
+static GLXContext
+create_context(Display *dpy, XMesaVisual xmvis,
+               XMesaContext shareCtx, Bool direct,
+               unsigned major, unsigned minor,
+               unsigned profileMask, unsigned contextFlags)
 {
-   XMesaVisual xmvis;
   GLXContext glxCtx;
-   GLXContext shareCtx = share_list;

-   if (!dpy || !visinfo)
+   if (!dpy || !xmvis)
      return 0;

   glxCtx = CALLOC_STRUCT(__GLXcontextRec);
@ -1038,19 +1041,8 @@ glXCreateContext( Display *dpy, XVisualInfo *visinfo,
   XMesaGarbageCollect();
 #endif

-   xmvis = find_glx_visual( dpy, visinfo );
-   if (!xmvis) {
-      /* This visual wasn't found with glXChooseVisual() */
-      xmvis = create_glx_visual( dpy, visinfo );
-      if (!xmvis) {
-         /* unusable visual */
-         free(glxCtx);
-         return NULL;
-      }
-   }
-
-   glxCtx->xmesaContext = XMesaCreateContext(xmvis,
-                                   shareCtx ? shareCtx->xmesaContext : NULL);
+   glxCtx->xmesaContext = XMesaCreateContext(xmvis, shareCtx, major, minor,
+                                             profileMask, contextFlags);
   if (!glxCtx->xmesaContext) {
      free(glxCtx);
      return NULL;
@ -1064,6 +1056,29 @@ glXCreateContext( Display *dpy, XVisualInfo *visinfo,
 }


+PUBLIC GLXContext
+glXCreateContext( Display *dpy, XVisualInfo *visinfo,
+                  GLXContext shareCtx, Bool direct )
+{
+   XMesaVisual xmvis;
+
+   xmvis = find_glx_visual( dpy, visinfo );
+   if (!xmvis) {
+      /* This visual wasn't found with glXChooseVisual() */
+      xmvis = create_glx_visual( dpy, visinfo );
+      if (!xmvis) {
+         /* unusable visual */
+         return NULL;
+      }
+   }
+
+   return create_context(dpy, xmvis,
+                         shareCtx ? shareCtx->xmesaContext : NULL,
+                         direct,
+                         1, 0, GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB, 0x0);
+}
+
+
 /* XXX these may have to be removed due to thread-safety issues. */
 static GLXContext MakeCurrent_PrevContext = 0;
 static GLXDrawable MakeCurrent_PrevDrawable = 0;
@ -2084,35 +2099,18 @@ glXQueryDrawable(Display *dpy, GLXDrawable draw, int attribute,

 PUBLIC GLXContext
 glXCreateNewContext( Display *dpy, GLXFBConfig config,
-                     int renderType, GLXContext shareList, Bool direct )
+                     int renderType, GLXContext shareCtx, Bool direct )
 {
-   GLXContext glxCtx;
-   GLXContext shareCtx = shareList;
   XMesaVisual xmvis = (XMesaVisual) config;

   if (!dpy || !config ||
       (renderType != GLX_RGBA_TYPE && renderType != GLX_COLOR_INDEX_TYPE))
      return 0;

-   glxCtx = CALLOC_STRUCT(__GLXcontextRec);
-   if (!glxCtx)
-      return 0;
-
-   /* deallocate unused windows/buffers */
-   XMesaGarbageCollect();
-
-   glxCtx->xmesaContext = XMesaCreateContext(xmvis,
-                                   shareCtx ? shareCtx->xmesaContext : NULL);
-   if (!glxCtx->xmesaContext) {
-      free(glxCtx);
-      return NULL;
-   }
-
-   glxCtx->isDirect = DEFAULT_DIRECT;
-   glxCtx->currentDpy = dpy;
-   glxCtx->xid = (XID) glxCtx;  /* self pointer */
-
-   return glxCtx;
+   return create_context(dpy, xmvis,
+                         shareCtx ? shareCtx->xmesaContext : NULL,
+                         direct,
+                         1, 0, GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB, 0x0);
 }


@ -2315,32 +2313,19 @@ glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config,

 PUBLIC GLXContext
 glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config,
-                               int render_type, GLXContext share_list,
+                               int renderType, GLXContext shareCtx,
                               Bool direct)
 {
   XMesaVisual xmvis = (XMesaVisual) config;
-   GLXContext glxCtx;
-   GLXContext shareCtx = share_list;

-   glxCtx = CALLOC_STRUCT(__GLXcontextRec);
-   if (!glxCtx)
+   if (!dpy || !config ||
+       (renderType != GLX_RGBA_TYPE && renderType != GLX_COLOR_INDEX_TYPE))
      return 0;

-   /* deallocate unused windows/buffers */
-   XMesaGarbageCollect();
-
-   glxCtx->xmesaContext = XMesaCreateContext(xmvis,
-                                   shareCtx ? shareCtx->xmesaContext : NULL);
-   if (!glxCtx->xmesaContext) {
-      free(glxCtx);
-      return NULL;
-   }
-
-   glxCtx->isDirect = DEFAULT_DIRECT;
-   glxCtx->currentDpy = dpy;
-   glxCtx->xid = (XID) glxCtx;  /* self pointer */
-
-   return glxCtx;
+   return create_context(dpy, xmvis,
+                         shareCtx ? shareCtx->xmesaContext : NULL,
+                         direct,
+                         1, 0, GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB, 0x0);
 }


@ -2654,3 +2639,98 @@ glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer)
   if (b)
      XMesaReleaseTexImage(dpy, b, buffer);
 }
+
+
+
+/*** GLX_ARB_create_context ***/
+
+GLXContext
+glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config,
+                           GLXContext shareCtx, Bool direct,
+                           const int *attrib_list)
+{
+   XMesaVisual xmvis = (XMesaVisual) config;
+   int majorVersion = 1, minorVersion = 0;
+   int contextFlags = 0x0;
+   int profileMask = GLX_CONTEXT_CORE_PROFILE_BIT_ARB;
+   int renderType = GLX_RGBA_TYPE;
+   unsigned i;
+   Bool done = False;
+   const int contextFlagsAll = (GLX_CONTEXT_DEBUG_BIT_ARB |
+                                GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB);
+
+   /* parse attrib_list */
+   for (i = 0; !done && attrib_list[i]; i++) {
+      switch (attrib_list[i]) {
+      case GLX_CONTEXT_MAJOR_VERSION_ARB:
+         majorVersion = attrib_list[++i];
+         break;
+      case GLX_CONTEXT_MINOR_VERSION_ARB:
+         minorVersion = attrib_list[++i];
+         break;
+      case GLX_CONTEXT_FLAGS_ARB:
+         contextFlags = attrib_list[++i];
+         break;
+      case GLX_CONTEXT_PROFILE_MASK_ARB:
+         profileMask = attrib_list[++i];
+         break;
+      case GLX_RENDER_TYPE:
+         renderType = attrib_list[++i];
+         break;
+      case 0:
+         /* end of list */
+         done = True;
+         break;
+      default:
+         /* bad attribute */
+         /* XXX generate BadValue X Error */
+         return NULL;
+      }
+   }
+
+   /* check contextFlags */
+   if (contextFlags & ~contextFlagsAll) {
+      return NULL; /* generate BadValue X Error */
+   }
+
+   /* check profileMask */
+   if (profileMask != GLX_CONTEXT_CORE_PROFILE_BIT_ARB &&
+       profileMask != GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB) {
+      return NULL; /* generate BadValue X Error */
+   }
+
+   /* check version (generate BadMatch if bad) */
+   switch (majorVersion) {
+   case 1:
+      if (minorVersion < 0 || minorVersion > 5)
+         return NULL;
+      break;
+   case 2:
+      if (minorVersion < 0 || minorVersion > 1)
+         return NULL;
+      break;
+   case 3:
+      if (minorVersion < 0 || minorVersion > 2)
+         return NULL;
+      break;
+   case 4:
+      if (minorVersion < 0 || minorVersion > 0)
+         return NULL;
+      break;
+   default:
+      return NULL;
+   }
+
+   if ((contextFlags & GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB) &&
+       majorVersion < 3)
+      return NULL; /* generate GLXBadProfileARB */
+
+   if (renderType == GLX_COLOR_INDEX_TYPE && majorVersion >= 3)
+      return NULL; /* generate BadMatch */
+
+   return create_context(dpy, xmvis,
+                         shareCtx ? shareCtx->xmesaContext : NULL,
+                         direct,
+                         majorVersion, minorVersion,
+                         profileMask, contextFlags);
+}
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@ -853,7 +853,9 @@ xmesa_init( Display *display )
 * \return an XMesaContext or NULL if error.
 */
 PUBLIC
-XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
+XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list,
+                                 GLuint major, GLuint minor,
+                                 GLuint profileMask, GLuint contextFlags)
 {
   XMesaDisplay xmdpy = xmesa_init_display(v->display);
   struct st_context_attribs attribs;
@ -874,6 +876,18 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
   memset(&attribs, 0, sizeof(attribs));
   attribs.profile = ST_PROFILE_DEFAULT;
   attribs.visual = v->stvis;
+   attribs.major = major;
+   attribs.minor = minor;
+   if (contextFlags & GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB)
+      attribs.flags |= ST_CONTEXT_FLAG_FORWARD_COMPATIBLE;
+   if (contextFlags & GLX_CONTEXT_DEBUG_BIT_ARB)
+      attribs.flags |= ST_CONTEXT_FLAG_DEBUG;
+   if (contextFlags & GLX_CONTEXT_ROBUST_ACCESS_BIT_ARB)
+      attribs.flags |= ST_CONTEXT_FLAG_ROBUST_ACCESS;
+   if (profileMask & GLX_CONTEXT_CORE_PROFILE_BIT_ARB)
+      attribs.flags |= ST_CONTEXT_FLAG_CORE_PROFILE;
+   if (profileMask & GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB)
+      attribs.flags |= ST_CONTEXT_FLAG_COMPATIBLE_PROFILE;

   c->st = stapi->create_context(stapi, xmdpy->smapi,
         &attribs, (share_list) ? share_list->st : NULL);
--- a/src/gallium/state_trackers/glx/xlib/xm_api.h
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.h
@ -140,7 +140,10 @@ extern void XMesaDestroyVisual( XMesaVisual v );
 * Return:  an XMesaContext or NULL if error.
 */
 extern XMesaContext XMesaCreateContext( XMesaVisual v,
-					XMesaContext share_list );
+					XMesaContext share_list,
+                                        GLuint major, GLuint minor,
+                                        GLuint profileMask,
+                                        GLuint contextFlags);


 /*
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@ -38,7 +38,8 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 {
 	struct r600_bo *bo;
 	struct radeon_bo *rbo;
-
+	uint32_t initial_domain;
+	  
 	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
 		bo = r600_bomgr_bo_create(radeon->bomgr, size, alignment, *radeon->cfence);
 		if (bo) {
@ -46,7 +47,24 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 		}
 	}

-	rbo = radeon_bo(radeon, 0, size, alignment);
+	if (binding & (PIPE_BIND_CONSTANT_BUFFER | PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) {
+		initial_domain = RADEON_GEM_DOMAIN_GTT;
+	} else {
+		switch(usage) {
+		case PIPE_USAGE_DYNAMIC:
+		case PIPE_USAGE_STREAM:
+		case PIPE_USAGE_STAGING:
+			initial_domain = RADEON_GEM_DOMAIN_GTT;
+			break;
+		case PIPE_USAGE_DEFAULT:
+		case PIPE_USAGE_STATIC:
+		case PIPE_USAGE_IMMUTABLE:
+		default:
+			initial_domain = RADEON_GEM_DOMAIN_VRAM;
+			break;
+		}
+	}
+	rbo = radeon_bo(radeon, 0, size, alignment, initial_domain);
 	if (rbo == NULL) {
 		return NULL;
 	}
@ -93,7 +111,7 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon,
 	struct r600_bo *bo = calloc(1, sizeof(struct r600_bo));
 	struct radeon_bo *rbo;

-	rbo = bo->bo = radeon_bo(radeon, handle, 0, 0);
+	rbo = bo->bo = radeon_bo(radeon, handle, 0, 0, 0);
 	if (rbo == NULL) {
 		free(bo);
 		return NULL;
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@ -132,7 +132,7 @@ unsigned radeon_family_from_device(unsigned device);
 * radeon_bo.c
 */
 struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			    unsigned size, unsigned alignment);
+			    unsigned size, unsigned alignment, unsigned initial_domain);
 void radeon_bo_reference(struct radeon *radeon, struct radeon_bo **dst,
 			 struct radeon_bo *src);
 int radeon_bo_wait(struct radeon *radeon, struct radeon_bo *bo);
--- a/src/gallium/winsys/r600/drm/radeon_bo.c
+++ b/src/gallium/winsys/r600/drm/radeon_bo.c
@ -71,7 +71,7 @@ static void radeon_bo_fixed_unmap(struct radeon *radeon, struct radeon_bo *bo)
 }

 struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,
-			unsigned size, unsigned alignment)
+			    unsigned size, unsigned alignment, unsigned initial_domain)
 {
 	struct radeon_bo *bo;
 	int r;
@ -115,7 +115,7 @@ struct radeon_bo *radeon_bo(struct radeon *radeon, unsigned handle,

 		args.size = size;
 		args.alignment = alignment;
-		args.initial_domain = RADEON_GEM_DOMAIN_CPU;
+		args.initial_domain = initial_domain;
 		args.flags = 0;
 		args.handle = 0;
 		r = drmCommandWriteRead(radeon->fd, DRM_RADEON_GEM_CREATE,
--- a/src/glsl/Makefile
+++ b/src/glsl/Makefile
@ -192,16 +192,16 @@ $(DRICORE_OBJ_DIR)/%.o : %.c
 	$(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DEFINES) $< -o $@

 glsl_lexer.cpp: glsl_lexer.ll
-	flex --nounistd -o$@  $<
+	$(FLEX) --nounistd -o$@  $<

 glsl_parser.cpp: glsl_parser.yy
-	bison -v -o "$@" -p "_mesa_glsl_" --defines=glsl_parser.h $<
+	$(BISON) -v -o "$@" -p "_mesa_glsl_" --defines=glsl_parser.h $<

 glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-	flex --nounistd -o$@  $<
+	$(FLEX) --nounistd -o$@  $<

 glcpp/glcpp-parse.c: glcpp/glcpp-parse.y
-	bison -v -o "$@" --defines=glcpp/glcpp-parse.h $<
+	$(BISON) -v -o "$@" --defines=glcpp/glcpp-parse.h $<

 builtin_compiler: $(GLSL2_OBJECTS) $(OBJECTS) builtin_stubs.o
 	$(APP_CXX) $(INCLUDES) $(CXXFLAGS) $(LDFLAGS) $(OBJECTS) $(GLSL2_OBJECTS) builtin_stubs.o -o $@
--- a/src/mapi/glapi/gen/gl_table.py
+++ b/src/mapi/glapi/gen/gl_table.py
@ -167,11 +167,18 @@ class PrintRemapTable(gl_XML.gl_print_base):

 		for f, index in abi_functions + functions:
 			arg_string = gl_XML.create_parameter_string( f.parameters, 0 )
-			cast = '%s (GLAPIENTRYP)(%s)' % (f.return_type, arg_string)

-			print '#define CALL_%s(disp, parameters) CALL_by_offset(disp, (%s), _gloffset_%s, parameters)' % (f.name, cast, f.name)
-			print '#define GET_%s(disp) GET_by_offset(disp, _gloffset_%s)' % (f.name, f.name)
-			print '#define SET_%s(disp, fn) SET_by_offset(disp, _gloffset_%s, fn)' % (f.name, f.name)
+			print 'typedef %s (GLAPIENTRYP _glptr_%s)(%s);' % (f.return_type, f.name, arg_string)
+			print '#define CALL_%s(disp, parameters) \\' % (f.name)
+			print '    (* GET_%s(disp)) parameters' % (f.name)
+			print 'static INLINE _glptr_%s GET_%s(struct _glapi_table *disp) {' % (f.name, f.name)
+			print '   return (_glptr_%s) (GET_by_offset(disp, _gloffset_%s));' % (f.name, f.name)
+			print '}'
+			print
+			print 'static INLINE void SET_%s(struct _glapi_table *disp, %s (GLAPIENTRYP fn)(%s)) {' % (f.name, f.return_type, arg_string)
+			print '   SET_by_offset(disp, _gloffset_%s, fn);' % (f.name)
+			print '}'
+			print

 		if alias_functions:
 			print ''
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@ -75,10 +75,10 @@ main/api_exec_es2.c: main/APIspec.xml main/es_generator.py main/APIspecutil.py m
 	$(PYTHON2) $(PYTHON_FLAGS) main/es_generator.py -S main/APIspec.xml -V GLES2.0 > $@

 program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
-	bison -v -d --output=program/program_parse.tab.c $<
+	$(BISON) -v -d --output=program/program_parse.tab.c $<

 program/lex.yy.c: program/program_lexer.l
-	flex --never-interactive --outfile=$@ $<
+	$(FLEX) --never-interactive --outfile=$@ $<

 ######################################################################
 # Helper libraries used by many drivers:
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@ -96,7 +96,18 @@ DRIVER_SOURCES = \
 	gen6_urb.c \
 	gen6_viewport_state.c \
 	gen6_vs_state.c \
-	gen6_wm_state.c
+	gen6_wm_state.c \
+	gen7_cc_state.c \
+	gen7_clip_state.c \
+	gen7_disable.c \
+	gen7_misc_state.c \
+	gen7_sampler_state.c \
+	gen7_sf_state.c \
+	gen7_urb.c \
+	gen7_viewport_state.c \
+	gen7_vs_state.c \
+	gen7_wm_state.c \
+	gen7_wm_surface_state.c \

 C_SOURCES = \
 	$(COMMON_SOURCES) \
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@ -42,6 +42,8 @@
 #include "brw_state.h"
 #include "brw_clip.h"

+#include "../glsl/ralloc.h"
+
 #define FRONT_UNFILLED_BIT  0x1
 #define BACK_UNFILLED_BIT   0x2

@ -52,16 +54,19 @@ static void compile_clip_prog( struct brw_context *brw,
   struct intel_context *intel = &brw->intel;
   struct brw_clip_compile c;
   const GLuint *program;
+   void *mem_ctx;
   GLuint program_size;
   GLuint delta;
   GLuint i;
   GLuint header_regs;

   memset(&c, 0, sizeof(c));
+
+   mem_ctx = ralloc_context(NULL);
   
   /* Begin the compilation:
    */
-   brw_init_compile(brw, &c.func);
+   brw_init_compile(brw, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

@ -150,6 +155,7 @@ static void compile_clip_prog( struct brw_context *brw,
 					program, program_size,
 					&c.prog_data, sizeof(c.prog_data),
 					&brw->clip.prog_data);
+   ralloc_free(mem_ctx);
 }

 /* Calculate interpolants for triangle and line rasterization.
--- a/src/mesa/drivers/dri/i965/brw_clip_line.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@ -133,10 +133,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
   struct brw_indirect newvtx1   = brw_indirect(3, 0);
   struct brw_indirect plane_ptr = brw_indirect(4, 0);
   struct brw_instruction *plane_loop;
-   struct brw_instruction *plane_active;
-   struct brw_instruction *is_negative;
-   struct brw_instruction *is_neg2 = NULL;
-   struct brw_instruction *not_culled;
   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);

   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
@ -169,7 +165,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
      
-      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      brw_IF(p, BRW_EXECUTE_1);
      {
 	 if (c->key.nr_userclip)
 	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
@ -184,7 +180,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 	  */
 	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
 	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
-	 is_negative = brw_IF(p, BRW_EXECUTE_1);
+	 brw_IF(p, BRW_EXECUTE_1);
 	 {
             /*
              * Both can be negative on GM965/G965 due to RHW workaround
@ -192,11 +188,11 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
              */
             if (brw->has_negative_rhw_bug) {
                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
-                 is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+                 brw_IF(p, BRW_EXECUTE_1);
                 {
                     brw_clip_kill_thread(c);
                 }
-                 brw_ENDIF(p, is_neg2);
+                 brw_ENDIF(p);
             }

             brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
@ -207,7 +203,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
             brw_MOV(p, c->reg.t1, c->reg.t);
             brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 	 } 
-	 is_negative = brw_ELSE(p, is_negative);
+	 brw_ELSE(p);
 	 {
             /* Coming back in.  We know that both cannot be negative
              * because the line would have been culled in that case.
@ -217,7 +213,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
             /* Only on GM965/G965 */
             if (brw->has_negative_rhw_bug) {
                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
-                 is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+                 brw_IF(p, BRW_EXECUTE_1);
             }

             {
@ -231,12 +227,12 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
             }

             if (brw->has_negative_rhw_bug) {
-                 brw_ENDIF(p, is_neg2);
+                 brw_ENDIF(p);
             }
         }
-	 brw_ENDIF(p, is_negative);	 
+	 brw_ENDIF(p);
      }
-      brw_ENDIF(p, plane_active);
+      brw_ENDIF(p);
      
      /* plane_ptr++;
       */
@ -251,7 +247,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )

   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
-   not_culled = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE);
@ -259,7 +255,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
      brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
      brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); 
   }
-   brw_ENDIF(p, not_culled);
+   brw_ENDIF(p);
   brw_clip_kill_thread(c);
 }

--- a/src/mesa/drivers/dri/i965/brw_clip_tri.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@ -134,7 +134,6 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
-   struct brw_instruction *is_rev;

   /* Initial list of indices for incoming vertexes:
    */
@ -148,21 +147,21 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
   /* XXX: Is there an easier way to do this?  Need to reverse every
    * second tristrip element:  Can ignore sometimes?
    */
-   is_rev = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {   
      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[1]) );
      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[0]) );
      if (c->need_direction)
 	 brw_MOV(p, c->reg.dir, brw_imm_f(-1));
   }
-   is_rev = brw_ELSE(p, is_rev);
+   brw_ELSE(p);
   {
      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[0]) );
      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[1]) );
      if (c->need_direction)
 	 brw_MOV(p, c->reg.dir, brw_imm_f(1));
   }
-   brw_ENDIF(p, is_rev);
+   brw_ENDIF(p);

   brw_MOV(p, get_element(c->reg.inlist, 2),  brw_address(c->reg.vertex[2]) );
   brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
@ -174,7 +173,6 @@ void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
 void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *is_poly, *is_trifan;
   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */

   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
@ -184,12 +182,12 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
 	   tmp0,
 	   brw_imm_ud(_3DPRIM_POLYGON));

-   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_copy_colors(c, 1, 0);
      brw_clip_copy_colors(c, 2, 0);
   }
-   is_poly = brw_ELSE(p, is_poly);
+   brw_ELSE(p);
   {
      if (c->key.pv_first) {
 	 brw_CMP(p,
@ -197,24 +195,24 @@ void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
 		 BRW_CONDITIONAL_EQ,
 		 tmp0,
 		 brw_imm_ud(_3DPRIM_TRIFAN));
-	 is_trifan = brw_IF(p, BRW_EXECUTE_1);
+	 brw_IF(p, BRW_EXECUTE_1);
 	 {
 	    brw_clip_copy_colors(c, 0, 1);
 	    brw_clip_copy_colors(c, 2, 1);
 	 }
-	 is_trifan = brw_ELSE(p, is_trifan);
+	 brw_ELSE(p);
 	 {
 	    brw_clip_copy_colors(c, 1, 0);
 	    brw_clip_copy_colors(c, 2, 0);
 	 }
-	 brw_ENDIF(p, is_trifan);
+	 brw_ENDIF(p);
      }
      else {
         brw_clip_copy_colors(c, 0, 2);
         brw_clip_copy_colors(c, 1, 2);
      }
   }
-   brw_ENDIF(p, is_poly);
+   brw_ENDIF(p);
 }


@ -232,10 +230,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
   struct brw_instruction *plane_loop;
-   struct brw_instruction *plane_active;
   struct brw_instruction *vertex_loop;
-   struct brw_instruction *next_test;
-   struct brw_instruction *prev_test;
   
   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
@ -251,7 +246,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
      
-      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      brw_IF(p, BRW_EXECUTE_1);
      {
 	 /* vtxOut = freelist_ptr++ 
 	  */
@ -275,13 +270,13 @@ void brw_clip_tri( struct brw_clip_compile *c )
 	    /* IS_NEGATIVE(prev) */
 	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
 	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
-	    prev_test = brw_IF(p, BRW_EXECUTE_1);
+	    brw_IF(p, BRW_EXECUTE_1);
 	    {
 	       /* IS_POSITIVE(next)
 		*/
 	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
 	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
-	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       brw_IF(p, BRW_EXECUTE_1);
 	       {

 		  /* Coming back in.
@ -307,10 +302,10 @@ void brw_clip_tri( struct brw_clip_compile *c )
 		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
 		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
 	       }
-	       brw_ENDIF(p, next_test);
+	       brw_ENDIF(p);
 	       
 	    }
-	    prev_test = brw_ELSE(p, prev_test);
+	    brw_ELSE(p);
 	    {
 	       /* *outlist_ptr++ = vtxPrev;
 		* nr_verts++;
@ -323,7 +318,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
 		*/
 	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
 	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
-	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       brw_IF(p, BRW_EXECUTE_1);
 	       {
 		  /* Going out of bounds.  Avoid division by zero as we
 		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
@ -349,9 +344,9 @@ void brw_clip_tri( struct brw_clip_compile *c )
 		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
 		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
 	       } 	       
-	       brw_ENDIF(p, next_test);
+	       brw_ENDIF(p);
 	    }
-	    brw_ENDIF(p, prev_test);
+	    brw_ENDIF(p);
 	    
 	    /* vtxPrev = vtx;
 	     * inlist_ptr++;
@ -377,7 +372,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
 	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
 	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
      }
-      brw_ENDIF(p, plane_active);
+      brw_ENDIF(p);
      
      /* plane_ptr++;
       */
@ -404,7 +399,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
 void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *loop, *if_insn;
+   struct brw_instruction *loop;

   /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
    */
@ -414,7 +409,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
 	   c->reg.nr_verts,
 	   brw_imm_d(-2));

-   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      struct brw_indirect v0 = brw_indirect(0, 0);
      struct brw_indirect vptr = brw_indirect(1, 0);
@ -441,7 +436,7 @@ void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)

      brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
   }
-   brw_ENDIF(p, if_insn);
+   brw_ENDIF(p);
 }

 static void do_clip_tri( struct brw_clip_compile *c )
@ -455,14 +450,13 @@ static void do_clip_tri( struct brw_clip_compile *c )
 static void maybe_do_clip_tri( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *do_clip;

   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
-   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      do_clip_tri(c);
   }
-   brw_ENDIF(p, do_clip);
+   brw_ENDIF(p);
 }

 static void brw_clip_test( struct brw_clip_compile *c )
@ -481,7 +475,6 @@ static void brw_clip_test( struct brw_clip_compile *c )
    struct brw_indirect vt2 = brw_indirect(2, 0);

    struct brw_compile *p = &c->func;
-    struct brw_instruction *is_outside;
    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */

    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
@ -508,11 +501,11 @@ static void brw_clip_test( struct brw_clip_compile *c )
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
-    is_outside = brw_IF(p, BRW_EXECUTE_1);
+    brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
-    brw_ENDIF(p, is_outside);
+    brw_ENDIF(p);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
@ -549,11 +542,11 @@ static void brw_clip_test( struct brw_clip_compile *c )
    brw_OR(p, tmp0, tmp0, get_element(t, 2));
    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
-    is_outside = brw_IF(p, BRW_EXECUTE_1);
+    brw_IF(p, BRW_EXECUTE_1);
    {
        brw_clip_kill_thread(c);
    }
-    brw_ENDIF(p, is_outside);
+    brw_ENDIF(p);
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);

    /* some vertices are inside a plane, some are outside,need to clip */
@ -580,7 +573,6 @@ static void brw_clip_test( struct brw_clip_compile *c )

 void brw_emit_tri_clip( struct brw_clip_compile *c )
 {
-   struct brw_instruction *neg_rhw;
   struct brw_compile *p = &c->func;
   struct brw_context *brw = p->brw;
   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
@ -594,11 +586,11 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), 
              brw_imm_ud(1<<20));
-      neg_rhw = brw_IF(p, BRW_EXECUTE_1); 
+      brw_IF(p, BRW_EXECUTE_1);
      {
         brw_clip_test(c);
      }
-      brw_ENDIF(p, neg_rhw);
+      brw_ENDIF(p);
   }
   /* Can't push into do_clip_tri because with polygon (or quad)
    * flatshading, need to apply the flatshade here because we don't
--- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@ -96,7 +96,6 @@ static void compute_tri_direction( struct brw_clip_compile *c )
 static void cull_direction( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *ccw;
   GLuint conditional;

   assert (!(c->key.fill_ccw == CLIP_CULL &&
@ -113,11 +112,11 @@ static void cull_direction( struct brw_clip_compile *c )
 	   get_element(c->reg.dir, 2),
 	   brw_imm_f(0));
   
-   ccw = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_kill_thread(c);
   }
-   brw_ENDIF(p, ccw);
+   brw_ENDIF(p);
 }


@ -125,7 +124,6 @@ static void cull_direction( struct brw_clip_compile *c )
 static void copy_bfc( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *ccw;
   GLuint conditional;

   /* Do we have any colors to copy? 
@ -149,7 +147,7 @@ static void copy_bfc( struct brw_clip_compile *c )
 	   get_element(c->reg.dir, 2),
 	   brw_imm_f(0));
   
-   ccw = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      GLuint i;

@ -165,7 +163,7 @@ static void copy_bfc( struct brw_clip_compile *c )
 		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
      }
   }
-   brw_ENDIF(p, ccw);
+   brw_ENDIF(p);
 }


@ -205,7 +203,6 @@ static void compute_offset( struct brw_clip_compile *c )
 static void merge_edgeflags( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *is_poly;
   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);

   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
@ -218,7 +215,7 @@ static void merge_edgeflags( struct brw_clip_compile *c )
   /* Get away with using reg.vertex because we know that this is not
    * a _3DPRIM_TRISTRIP_REVERSE:
    */
-   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {   
      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
@ -230,7 +227,7 @@ static void merge_edgeflags( struct brw_clip_compile *c )
      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
   }
-   brw_ENDIF(p, is_poly);
+   brw_ENDIF(p);
 }


@ -255,7 +252,6 @@ static void emit_lines(struct brw_clip_compile *c,
 {
   struct brw_compile *p = &c->func;
   struct brw_instruction *loop;
-   struct brw_instruction *draw_edge;
   struct brw_indirect v0 = brw_indirect(0, 0);
   struct brw_indirect v1 = brw_indirect(1, 0);
   struct brw_indirect v0ptr = brw_indirect(2, 0);
@ -300,12 +296,12 @@ static void emit_lines(struct brw_clip_compile *c,
 	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
 	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
 	      brw_imm_f(0));
-      draw_edge = brw_IF(p, BRW_EXECUTE_1);
+      brw_IF(p, BRW_EXECUTE_1);
      {
 	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
 	 brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
      }
-      brw_ENDIF(p, draw_edge);
+      brw_ENDIF(p);

      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
@ -320,7 +316,6 @@ static void emit_points(struct brw_clip_compile *c,
 {
   struct brw_compile *p = &c->func;
   struct brw_instruction *loop;
-   struct brw_instruction *draw_point;

   struct brw_indirect v0 = brw_indirect(0, 0);
   struct brw_indirect v0ptr = brw_indirect(2, 0);
@ -339,14 +334,14 @@ static void emit_points(struct brw_clip_compile *c,
 	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
 	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
 	      brw_imm_f(0));
-      draw_point = brw_IF(p, BRW_EXECUTE_1);
+      brw_IF(p, BRW_EXECUTE_1);
      {
 	 if (do_offset)
 	    apply_one_offset(c, v0);

 	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
      }
-      brw_ENDIF(p, draw_point);
+      brw_ENDIF(p);

      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
@ -388,7 +383,6 @@ static void emit_primitives( struct brw_clip_compile *c,
 static void emit_unfilled_primitives( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *ccw;

   /* Direction culling has already been done.
    */
@ -402,15 +396,15 @@ static void emit_unfilled_primitives( struct brw_clip_compile *c )
 	      get_element(c->reg.dir, 2),
 	      brw_imm_f(0));
   
-      ccw = brw_IF(p, BRW_EXECUTE_1);
+      brw_IF(p, BRW_EXECUTE_1);
      {
 	 emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
      }
-      ccw = brw_ELSE(p, ccw);
+      brw_ELSE(p);
      {
 	 emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
      }
-      brw_ENDIF(p, ccw);
+      brw_ENDIF(p);
   }
   else if (c->key.fill_cw != CLIP_CULL) {
      emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
@ -426,22 +420,19 @@ static void emit_unfilled_primitives( struct brw_clip_compile *c )
 static void check_nr_verts( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *if_insn;

   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));      
-   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_kill_thread(c);
   }
-   brw_ENDIF(p, if_insn);
+   brw_ENDIF(p);
 }


 void brw_emit_unfilled_clip( struct brw_clip_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *do_clip;
-   

   c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
 			(c->key.fill_ccw != c->key.fill_cw) ||
@ -488,14 +479,14 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
   
   brw_clip_init_clipmask(c);
   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
-   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   brw_IF(p, BRW_EXECUTE_1);
   {
      brw_clip_init_planes(c);
      brw_clip_tri(c);
      check_nr_verts(c);
   }
-   brw_ENDIF(p, do_clip);
-   
+   brw_ENDIF(p);
+
   emit_unfilled_primitives(c);
   brw_clip_kill_thread(c);
 }
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@ -338,11 +338,10 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)

    if (intel->needs_ff_sync) {
        struct brw_compile *p = &c->func;
-        struct brw_instruction *need_ff_sync;

        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
-        need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+        brw_IF(p, BRW_EXECUTE_1);
        {
            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
            brw_ff_sync(p,
@ -353,7 +352,7 @@ void brw_clip_ff_sync(struct brw_clip_compile *c)
 			1, /* response length */
 			0 /* eot */);
        }
-        brw_ENDIF(p, need_ff_sync);
+        brw_ENDIF(p);
        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
 }
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@ -161,7 +161,7 @@ GLboolean brwCreateContext( int api,
      but we're not sure how it's actually done for vertex order,
      that affect provoking vertex decision. Always use last vertex
      convention for quad primitive which works as expected for now. */
-   if (intel->gen == 6)
+   if (intel->gen >= 6)
       ctx->Const.QuadsFollowProvokingVertexConvention = GL_FALSE;

   if (intel->is_g4x || intel->gen >= 5) {
@ -178,8 +178,24 @@ GLboolean brwCreateContext( int api,
   }

   /* WM maximum threads is number of EUs times number of threads per EU. */
-   if (intel->gen >= 6) {
-      if (IS_GT2(intel->intelScreen->deviceID)) {
+   if (intel->gen >= 7) {
+      if (IS_IVB_GT1(intel->intelScreen->deviceID)) {
+	 brw->wm_max_threads = 86;
+	 brw->vs_max_threads = 36;
+	 brw->urb.size = 128;
+	 brw->urb.max_vs_entries = 512;
+	 brw->urb.max_gs_entries = 192;
+      } else if (IS_IVB_GT2(intel->intelScreen->deviceID)) {
+	 brw->wm_max_threads = 86;
+	 brw->vs_max_threads = 128;
+	 brw->urb.size = 256;
+	 brw->urb.max_vs_entries = 704;
+	 brw->urb.max_gs_entries = 320;
+      } else {
+	 assert(!"Unknown gen7 device.");
+      }
+   } else if (intel->gen == 6) {
+      if (IS_SNB_GT2(intel->intelScreen->deviceID)) {
 	 /* This could possibly be 80, but is supposed to require
 	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
 	  * GPU reset to change.
@ -187,12 +203,12 @@ GLboolean brwCreateContext( int api,
 	 brw->wm_max_threads = 40;
 	 brw->vs_max_threads = 60;
 	 brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
-	 brw->urb.max_vs_handles = 128; /* volume 2a (see 3DSTATE_URB) */
+	 brw->urb.max_vs_entries = 128; /* volume 2a (see 3DSTATE_URB) */
      } else {
 	 brw->wm_max_threads = 40;
 	 brw->vs_max_threads = 24;
 	 brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
-	 brw->urb.max_vs_handles = 256; /* volume 2a (see 3DSTATE_URB) */
+	 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
      }
   } else if (intel->gen == 5) {
      brw->urb.size = 1024;
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@ -120,32 +120,58 @@

 struct brw_context;

-#define BRW_NEW_URB_FENCE               0x1
-#define BRW_NEW_FRAGMENT_PROGRAM        0x2
-#define BRW_NEW_VERTEX_PROGRAM          0x4
-#define BRW_NEW_INPUT_DIMENSIONS        0x8
-#define BRW_NEW_CURBE_OFFSETS           0x10
-#define BRW_NEW_REDUCED_PRIMITIVE       0x20
-#define BRW_NEW_PRIMITIVE               0x40
-#define BRW_NEW_CONTEXT                 0x80
-#define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
-#define BRW_NEW_PSP                     0x800
-#define BRW_NEW_WM_SURFACES		0x1000
-#define BRW_NEW_BINDING_TABLE		0x2000
-#define BRW_NEW_INDICES			0x4000
-#define BRW_NEW_VERTICES		0x8000
+enum brw_state_id {
+   BRW_STATE_URB_FENCE,
+   BRW_STATE_FRAGMENT_PROGRAM,
+   BRW_STATE_VERTEX_PROGRAM,
+   BRW_STATE_INPUT_DIMENSIONS,
+   BRW_STATE_CURBE_OFFSETS,
+   BRW_STATE_REDUCED_PRIMITIVE,
+   BRW_STATE_PRIMITIVE,
+   BRW_STATE_CONTEXT,
+   BRW_STATE_WM_INPUT_DIMENSIONS,
+   BRW_STATE_PSP,
+   BRW_STATE_WM_SURFACES,
+   BRW_STATE_VS_BINDING_TABLE,
+   BRW_STATE_GS_BINDING_TABLE,
+   BRW_STATE_PS_BINDING_TABLE,
+   BRW_STATE_INDICES,
+   BRW_STATE_VERTICES,
+   BRW_STATE_BATCH,
+   BRW_STATE_NR_WM_SURFACES,
+   BRW_STATE_NR_VS_SURFACES,
+   BRW_STATE_INDEX_BUFFER,
+   BRW_STATE_VS_CONSTBUF,
+   BRW_STATE_WM_CONSTBUF
+};
+
+#define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
+#define BRW_NEW_FRAGMENT_PROGRAM        (1 << BRW_STATE_FRAGMENT_PROGRAM)
+#define BRW_NEW_VERTEX_PROGRAM          (1 << BRW_STATE_VERTEX_PROGRAM)
+#define BRW_NEW_INPUT_DIMENSIONS        (1 << BRW_STATE_INPUT_DIMENSIONS)
+#define BRW_NEW_CURBE_OFFSETS           (1 << BRW_STATE_CURBE_OFFSETS)
+#define BRW_NEW_REDUCED_PRIMITIVE       (1 << BRW_STATE_REDUCED_PRIMITIVE)
+#define BRW_NEW_PRIMITIVE               (1 << BRW_STATE_PRIMITIVE)
+#define BRW_NEW_CONTEXT                 (1 << BRW_STATE_CONTEXT)
+#define BRW_NEW_WM_INPUT_DIMENSIONS     (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
+#define BRW_NEW_PSP                     (1 << BRW_STATE_PSP)
+#define BRW_NEW_WM_SURFACES		(1 << BRW_STATE_WM_SURFACES)
+#define BRW_NEW_VS_BINDING_TABLE	(1 << BRW_STATE_VS_BINDING_TABLE)
+#define BRW_NEW_GS_BINDING_TABLE	(1 << BRW_STATE_GS_BINDING_TABLE)
+#define BRW_NEW_PS_BINDING_TABLE	(1 << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_INDICES			(1 << BRW_STATE_INDICES)
+#define BRW_NEW_VERTICES		(1 << BRW_STATE_VERTICES)
 /**
 * Used for any batch entry with a relocated pointer that will be used
 * by any 3D rendering.
 */
-#define BRW_NEW_BATCH			0x10000
+#define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
 /** \see brw.state.depth_region */
-#define BRW_NEW_DEPTH_BUFFER		0x20000
-#define BRW_NEW_NR_WM_SURFACES		0x40000
-#define BRW_NEW_NR_VS_SURFACES		0x80000
-#define BRW_NEW_INDEX_BUFFER		0x100000
-#define BRW_NEW_VS_CONSTBUF		0x200000
-#define BRW_NEW_WM_CONSTBUF		0x400000
+#define BRW_NEW_NR_WM_SURFACES         (1 << BRW_STATE_NR_WM_SURFACES)
+#define BRW_NEW_NR_VS_SURFACES         (1 << BRW_STATE_NR_VS_SURFACES)
+#define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
+#define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
+#define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)

 struct brw_state_flags {
   /** State update flags signalled by mesa internals */
@ -462,28 +488,6 @@ struct brw_context

   struct {
      struct brw_state_flags dirty;
-
-      /**
-       * \name Cached region pointers
-       *
-       * When the draw buffer is updated, often the depth buffer is not
-       * changed. Caching the pointer to the buffer's region allows us to
-       * detect when the buffer has in fact changed, and allows us to avoid
-       * updating the buffer's GPU state when it has not.
-       *
-       * The original of each cached pointer is an instance of
-       * \c intel_renderbuffer.region.
-       *
-       * \see brw_set_draw_region()
-       *
-       * \{
-       */
-
-      /** \see struct brw_tracked_state brw_depthbuffer */
-      struct intel_region *depth_region;
-
-      /** \} */
-
      /**
       * List of buffers accumulated in brw_validate_state to receive
       * drm_intel_bo_check_aperture treatment before exec, so we can
@ -567,8 +571,8 @@ struct brw_context

      GLboolean constrained;

-      GLuint max_vs_handles;	/* Maximum number of VS handles */
-      GLuint max_gs_handles;	/* Maximum number of GS handles */
+      GLuint max_vs_entries;	/* Maximum number of VS entries */
+      GLuint max_gs_entries;	/* Maximum number of GS entries */

      GLuint nr_vs_entries;
      GLuint nr_gs_entries;
@ -579,6 +583,8 @@ struct brw_context
      /* gen6:
       * The length of each URB entry owned by the VS (or GS), as
       * a number of 1024-bit (128-byte) rows.  Should be >= 1.
+       *
+       * gen7: Same meaning, but in 512-bit (64-byte) rows.
       */
      GLuint vs_size;
      GLuint gs_size;
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@ -48,6 +48,9 @@
 # define GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT            10
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15)
 # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 15)
+/* DW1 */
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
+# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     (1 << 8)

 #define _3DPRIM_POINTLIST         0x01
 #define _3DPRIM_LINELIST          0x02
@ -832,6 +835,12 @@
 # define GEN6_BINDING_TABLE_MODIFY_GS	(1 << 9)
 # define GEN6_BINDING_TABLE_MODIFY_PS	(1 << 12)

+#define _3DSTATE_BINDING_TABLE_POINTERS_VS	0x7826 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_HS	0x7827 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_DS	0x7828 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_GS	0x7829 /* GEN7+ */
+#define _3DSTATE_BINDING_TABLE_POINTERS_PS	0x782A /* GEN7+ */
+
 #define _3DSTATE_SAMPLER_STATE_POINTERS		0x7802 /* GEN6+ */
 # define PS_SAMPLER_STATE_CHANGE				(1 << 12)
 # define GS_SAMPLER_STATE_CHANGE				(1 << 9)
@ -840,6 +849,10 @@
 /* DW2: GS */
 /* DW3: PS */

+#define _3DSTATE_SAMPLER_STATE_POINTERS_VS	0x782B /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_GS	0x782E /* GEN7+ */
+#define _3DSTATE_SAMPLER_STATE_POINTERS_PS	0x782F /* GEN7+ */
+
 #define CMD_VERTEX_BUFFER             0x7808
 # define BRW_VB0_INDEX_SHIFT		27
 # define GEN6_VB0_INDEX_SHIFT		26
@ -847,6 +860,7 @@
 # define BRW_VB0_ACCESS_INSTANCEDATA	(1 << 26)
 # define GEN6_VB0_ACCESS_VERTEXDATA	(0 << 20)
 # define GEN6_VB0_ACCESS_INSTANCEDATA	(1 << 20)
+# define GEN7_VB0_ADDRESS_MODIFYENABLE  (1 << 14)
 # define BRW_VB0_PITCH_SHIFT		0

 #define CMD_VERTEX_ELEMENT            0x7809
@ -874,18 +888,34 @@
 #define CMD_VF_STATISTICS_965          0x780b
 #define CMD_VF_STATISTICS_GM45        0x680b
 #define _3DSTATE_CC_STATE_POINTERS		0x780e /* GEN6+ */
+#define _3DSTATE_BLEND_STATE_POINTERS		0x7824 /* GEN7+ */
+#define _3DSTATE_DEPTH_STENCIL_STATE_POINTERS	0x7825 /* GEN7+ */

-#define _3DSTATE_URB				0x7805 /* GEN6+ */
+#define _3DSTATE_URB				0x7805 /* GEN6 */
 # define GEN6_URB_VS_SIZE_SHIFT				16
 # define GEN6_URB_VS_ENTRIES_SHIFT			0
 # define GEN6_URB_GS_ENTRIES_SHIFT			8
 # define GEN6_URB_GS_SIZE_SHIFT				0

+#define _3DSTATE_URB_VS                         0x7830 /* GEN7+ */
+#define _3DSTATE_URB_HS                         0x7831 /* GEN7+ */
+#define _3DSTATE_URB_DS                         0x7832 /* GEN7+ */
+#define _3DSTATE_URB_GS                         0x7833 /* GEN7+ */
+# define GEN7_URB_ENTRY_SIZE_SHIFT                      16
+# define GEN7_URB_STARTING_ADDRESS_SHIFT                25
+
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_VS         0x7912 /* GEN7+ */
+#define _3DSTATE_PUSH_CONSTANT_ALLOC_PS         0x7916 /* GEN7+ */
+# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT         16
+
 #define _3DSTATE_VIEWPORT_STATE_POINTERS	0x780d /* GEN6+ */
 # define GEN6_CC_VIEWPORT_MODIFY			(1 << 12)
 # define GEN6_SF_VIEWPORT_MODIFY			(1 << 11)
 # define GEN6_CLIP_VIEWPORT_MODIFY			(1 << 10)

+#define _3DSTATE_VIEWPORT_STATE_POINTERS_CC	0x7823 /* GEN7+ */
+#define _3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL	0x7821 /* GEN7+ */
+
 #define _3DSTATE_SCISSOR_STATE_POINTERS		0x780f /* GEN6+ */

 #define _3DSTATE_VS				0x7810 /* GEN6+ */
@ -914,6 +944,7 @@
 # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
 /* DW4 */
 # define GEN6_GS_URB_READ_LENGTH_SHIFT			11
+# define GEN7_GS_INCLUDE_VERTEX_HANDLES		        (1 << 10)
 # define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT		4
 # define GEN6_GS_DISPATCH_START_GRF_SHIFT		0
 /* DW5 */
@ -921,11 +952,25 @@
 # define GEN6_GS_STATISTICS_ENABLE			(1 << 10)
 # define GEN6_GS_SO_STATISTICS_ENABLE			(1 << 9)
 # define GEN6_GS_RENDERING_ENABLE			(1 << 8)
+# define GEN7_GS_ENABLE					(1 << 0)
 /* DW6 */
 # define GEN6_GS_ENABLE					(1 << 15)

+#define _3DSTATE_HS                             0x781B /* GEN7+ */
+#define _3DSTATE_TE                             0x781C /* GEN7+ */
+#define _3DSTATE_DS                             0x781D /* GEN7+ */
+
 #define _3DSTATE_CLIP				0x7812 /* GEN6+ */
 /* DW1 */
+# define GEN7_CLIP_WINDING_CW                           (0 << 20)
+# define GEN7_CLIP_WINDING_CCW                          (1 << 20)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_8          (0 << 19)
+# define GEN7_CLIP_VERTEX_SUBPIXEL_PRECISION_4          (1 << 19)
+# define GEN7_CLIP_EARLY_CULL                           (1 << 18)
+# define GEN7_CLIP_CULLMODE_BOTH                        (0 << 16)
+# define GEN7_CLIP_CULLMODE_NONE                        (1 << 16)
+# define GEN7_CLIP_CULLMODE_FRONT                       (2 << 16)
+# define GEN7_CLIP_CULLMODE_BACK                        (3 << 16)
 # define GEN6_CLIP_STATISTICS_ENABLE			(1 << 10)
 /**
 * Just does cheap culling based on the clip distance.  Bits must be
@ -955,7 +1000,7 @@
 # define GEN6_CLIP_FORCE_ZERO_RTAINDEX			(1 << 5)

 #define _3DSTATE_SF				0x7813 /* GEN6+ */
-/* DW1 */
+/* DW1 (for gen6) */
 # define GEN6_SF_NUM_OUTPUTS_SHIFT			22
 # define GEN6_SF_SWIZZLE_ENABLE				(1 << 21)
 # define GEN6_SF_POINT_SPRITE_LOWERLEFT			(1 << 20)
@ -1031,6 +1076,27 @@
 /* DW18: attr 0-7 wrap shortest enables */
 /* DW19: attr 8-16 wrap shortest enables */

+/* On GEN7, many fields of 3DSTATE_SF were split out into a new command:
+ * 3DSTATE_SBE.  The remaining fields live in different DWords, but retain
+ * the same bit-offset.  The only new field:
+ */
+/* GEN7/DW1: */
+# define GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT	12
+
+#define _3DSTATE_SBE				0x781F /* GEN7+ */
+/* DW1 */
+# define GEN7_SBE_SWIZZLE_CONTROL_MODE			(1 << 28)
+# define GEN7_SBE_NUM_OUTPUTS_SHIFT			22
+# define GEN7_SBE_SWIZZLE_ENABLE			(1 << 21)
+# define GEN7_SBE_POINT_SPRITE_LOWERLEFT		(1 << 20)
+# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT		11
+# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW2-9: Attribute setup (same as DW8-15 of gen6 _3DSTATE_SF) */
+/* DW10: Point sprite texture coordinate enables */
+/* DW11: Constant interpolation enables */
+/* DW12: attr 0-7 wrap shortest enables */
+/* DW13: attr 8-16 wrap shortest enables */
+
 #define _3DSTATE_WM				0x7814 /* GEN6+ */
 /* DW1: kernel pointer */
 /* DW2 */
@ -1102,12 +1168,88 @@
 # define GEN6_CONSTANT_BUFFER_1_ENABLE			(1 << 13)
 # define GEN6_CONSTANT_BUFFER_0_ENABLE			(1 << 12)

+#define _3DSTATE_CONSTANT_HS                  0x7819 /* GEN7+ */
+#define _3DSTATE_CONSTANT_DS                  0x781A /* GEN7+ */
+
+/* 3DSTATE_WM for Gen7 */
+/* DW1 */
+# define GEN7_WM_STATISTICS_ENABLE			(1 << 31)
+# define GEN7_WM_DEPTH_CLEAR				(1 << 30)
+# define GEN7_WM_DISPATCH_ENABLE			(1 << 29)
+# define GEN6_WM_DEPTH_RESOLVE				(1 << 28)
+# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE		(1 << 27)
+# define GEN7_WM_KILL_ENABLE				(1 << 25)
+# define GEN7_WM_PSCDEPTH_OFF			        (0 << 23)
+# define GEN7_WM_PSCDEPTH_ON			        (1 << 23)
+# define GEN7_WM_PSCDEPTH_ON_GE			        (2 << 23)
+# define GEN7_WM_PSCDEPTH_ON_LE			        (3 << 23)
+# define GEN7_WM_USES_SOURCE_DEPTH			(1 << 20)
+# define GEN7_WM_USES_SOURCE_W			        (1 << 19)
+# define GEN7_WM_POSITION_ZW_PIXEL			(0 << 17)
+# define GEN7_WM_POSITION_ZW_CENTROID			(2 << 17)
+# define GEN7_WM_POSITION_ZW_SAMPLE			(3 << 17)
+# define GEN7_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 16)
+# define GEN7_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 15)
+# define GEN7_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 14)
+# define GEN7_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 13)
+# define GEN7_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 12)
+# define GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 11)
+# define GEN7_WM_USES_INPUT_COVERAGE_MASK	        (1 << 10)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5		(0 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0		(1 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0		(2 << 8)
+# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0		(3 << 8)
+# define GEN7_WM_LINE_AA_WIDTH_0_5			(0 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_1_0			(1 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_2_0			(2 << 6)
+# define GEN7_WM_LINE_AA_WIDTH_4_0			(3 << 6)
+# define GEN7_WM_POLYGON_STIPPLE_ENABLE			(1 << 4)
+# define GEN7_WM_LINE_STIPPLE_ENABLE			(1 << 3)
+# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT		(1 << 2)
+# define GEN7_WM_MSRAST_OFF_PIXEL			(0 << 0)
+# define GEN7_WM_MSRAST_OFF_PATTERN			(1 << 0)
+# define GEN7_WM_MSRAST_ON_PIXEL			(2 << 0)
+# define GEN7_WM_MSRAST_ON_PATTERN			(3 << 0)
+/* DW2 */
+# define GEN7_WM_MSDISPMODE_PERPIXEL			(1 << 31)
+
+#define _3DSTATE_PS				0x7820 /* GEN7+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN7_PS_SPF_MODE				(1 << 31)
+# define GEN7_PS_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN7_PS_SAMPLER_COUNT_SHIFT			27
+# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754		(0 << 16)
+# define GEN7_PS_FLOATING_POINT_MODE_ALT		(1 << 16)
+/* DW3: scratch space */
+/* DW4 */
+# define GEN7_PS_MAX_THREADS_SHIFT			23
+# define GEN7_PS_PUSH_CONSTANT_ENABLE		        (1 << 11)
+# define GEN7_PS_ATTRIBUTE_ENABLE		        (1 << 10)
+# define GEN7_PS_OMASK_TO_RENDER_TARGET			(1 << 9)
+# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
+# define GEN7_PS_POSOFFSET_NONE				(0 << 3)
+# define GEN7_PS_POSOFFSET_CENTROID			(2 << 3)
+# define GEN7_PS_POSOFFSET_SAMPLE			(3 << 3)
+# define GEN7_PS_32_DISPATCH_ENABLE			(1 << 2)
+# define GEN7_PS_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN7_PS_8_DISPATCH_ENABLE			(1 << 0)
+/* DW5 */
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0		16
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1		8
+# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2		0
+/* DW6: kernel 1 pointer */
+/* DW7: kernel 2 pointer */
+
+#define _3DSTATE_STREAMOUT                      0x781e /* GEN7+ */
+
 #define _3DSTATE_SAMPLE_MASK			0x7818 /* GEN6+ */

 #define _3DSTATE_DRAWING_RECTANGLE		0x7900
 #define _3DSTATE_BLEND_CONSTANT_COLOR		0x7901
 #define _3DSTATE_CHROMA_KEY			0x7904
-#define _3DSTATE_DEPTH_BUFFER			0x7905
+#define _3DSTATE_DEPTH_BUFFER			0x7905 /* GEN4-6 */
 #define _3DSTATE_POLY_STIPPLE_OFFSET		0x7906
 #define _3DSTATE_POLY_STIPPLE_PATTERN		0x7907
 #define _3DSTATE_LINE_STIPPLE_PATTERN		0x7908
@ -1132,7 +1274,12 @@
 #define _3DSTATE_STENCIL_BUFFER			0x790e /* ILK, SNB */
 #define _3DSTATE_HIER_DEPTH_BUFFER		0x790f /* ILK, SNB */

-#define _3DSTATE_CLEAR_PARAMS			0x7910 /* ILK+ */
+#define GEN7_3DSTATE_CLEAR_PARAMS		0x7804
+#define GEN7_3DSTATE_DEPTH_BUFFER		0x7805
+#define GEN7_3DSTATE_STENCIL_BUFFER		0x7806
+#define GEN7_3DSTATE_HIER_DEPTH_BUFFER		0x7807
+
+#define _3DSTATE_CLEAR_PARAMS			0x7910 /* ILK, SNB */
 # define DEPTH_CLEAR_VALID				(1 << 15)
 /* DW1: depth clear value */

--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@ -182,6 +182,61 @@ static void brw_emit_prim(struct brw_context *brw,
   }
 }

+static void gen7_emit_prim(struct brw_context *brw,
+			   const struct _mesa_prim *prim,
+			   uint32_t hw_prim)
+{
+   struct intel_context *intel = &brw->intel;
+   int verts_per_instance;
+   int vertex_access_type;
+   int start_vertex_location;
+   int base_vertex_location;
+
+   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+       prim->start, prim->count);
+
+   start_vertex_location = prim->start;
+   base_vertex_location = prim->basevertex;
+   if (prim->indexed) {
+      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
+      start_vertex_location += brw->ib.start_vertex_offset;
+      base_vertex_location += brw->vb.start_vertex_bias;
+   } else {
+      vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+      start_vertex_location += brw->vb.start_vertex_bias;
+   }
+
+   verts_per_instance = trim(prim->mode, prim->count);
+
+   /* If nothing to emit, just return. */
+   if (verts_per_instance == 0)
+      return;
+
+   /* If we're set to always flush, do it before and after the primitive emit.
+    * We want to catch both missed flushes that hurt instruction/state cache
+    * and missed flushes of the render cache as it heads to other parts of
+    * the besides the draw code.
+    */
+   if (intel->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(intel);
+   }
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+   OUT_BATCH(hw_prim | vertex_access_type);
+   OUT_BATCH(verts_per_instance);
+   OUT_BATCH(start_vertex_location);
+   OUT_BATCH(1); // instance count
+   OUT_BATCH(0); // start instance location
+   OUT_BATCH(base_vertex_location);
+   ADVANCE_BATCH();
+
+   if (intel->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(intel);
+   }
+}
+
+
 static void brw_merge_inputs( struct brw_context *brw,
 		       const struct gl_client_array *arrays[])
 {
@ -270,42 +325,6 @@ static GLboolean check_fallbacks( struct brw_context *brw,
 	    return GL_TRUE;
   }

-   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
-    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
-    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
-    * we want strict conformance, force the fallback.
-    * Right now, we only do this for 2D textures.
-    */
-   {
-      int u;
-      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
-         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
-
-         if (texUnit->Enabled) {
-	    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, u);
-
-            if (texUnit->Enabled & TEXTURE_1D_BIT) {
-               if (sampler->WrapS == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_2D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_3D_BIT) {
-               if (sampler->WrapS == GL_CLAMP ||
-                   sampler->WrapT == GL_CLAMP ||
-                   sampler->WrapR == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-         }
-      }
-   }
-      
   /* Nothing stopping us from the fast path now */
   return GL_FALSE;
 }
@ -415,7 +434,10 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx,
 	 brw_upload_state(brw);
      }

-      brw_emit_prim(brw, &prim[i], hw_prim);
+      if (intel->gen >= 7)
+	 gen7_emit_prim(brw, &prim[i], hw_prim);
+      else
+	 brw_emit_prim(brw, &prim[i], hw_prim);

      intel->no_batch_wrap = GL_FALSE;

--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@ -570,6 +570,9 @@ static void brw_emit_vertices(struct brw_context *brw)
 	    dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT);
 	 }

+	 if (intel->gen >= 7)
+	    dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
 	 OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
 	 OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
 	 if (intel->gen >= 5) {
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@ -34,6 +34,8 @@
 #include "brw_defines.h"
 #include "brw_eu.h"

+#include "../glsl/ralloc.h"
+
 /* Returns the corresponding conditional mod for swapping src0 and
 * src1 in e.g. CMP.
 */
@ -166,7 +168,8 @@ void brw_pop_insn_state( struct brw_compile *p )

 /***********************************************************************
 */
-void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+void
+brw_init_compile(struct brw_context *brw, struct brw_compile *p, void *mem_ctx)
 {
   p->brw = brw;
   p->nr_insn = 0;
@ -174,12 +177,20 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
   p->compressed = false;
   memset(p->current, 0, sizeof(p->current[0]));

+   p->mem_ctx = mem_ctx;
+
   /* Some defaults?
    */
   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
   brw_set_saturate(p, 0);
   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
   brw_set_predicate_control_flag_value(p, 0xff); 
+
+   /* Set up control flow stack */
+   p->if_stack_depth = 0;
+   p->if_stack_array_size = 16;
+   p->if_stack =
+      rzalloc_array(mem_ctx, struct brw_instruction *, p->if_stack_array_size);
 }


@ -295,7 +306,7 @@ brw_resolve_cals(struct brw_compile *c)
 	GLint offset = brw_sub_inst - brw_call_inst;

 	/* patch brw_inst1 to point to brw_inst2 */
-	brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+	brw_set_src1(c, brw_call_inst, brw_imm_d(offset * 16));
    }

    /* free linked list of calls */
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@ -104,6 +104,8 @@ struct brw_compile {
   struct brw_instruction store[BRW_EU_MAX_INSN];
   GLuint nr_insn;

+   void *mem_ctx;
+
   /* Allow clients to push/pop instruction state:
    */
   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
@ -115,6 +117,14 @@ struct brw_compile {
   bool compressed;
   struct brw_context *brw;

+   /* Control flow stacks:
+    * - if_stack contains IF and ELSE instructions which must be patched
+    *   (and popped) once the matching ENDIF instruction is encountered.
+    */
+   struct brw_instruction **if_stack;
+   int if_stack_depth;
+   int if_stack_array_size;
+
   struct brw_glsl_label *first_label;  /**< linked list of labels */
   struct brw_glsl_call *first_call;    /**< linked list of CALs */
 };
@ -784,7 +794,8 @@ void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
 void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
 void brw_set_acc_write_control(struct brw_compile *p, GLuint value);

-void brw_init_compile( struct brw_context *, struct brw_compile *p );
+void brw_init_compile(struct brw_context *, struct brw_compile *p,
+		      void *mem_ctx);
 const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );


@ -950,12 +961,8 @@ struct brw_instruction *brw_IF(struct brw_compile *p,
 struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
 				struct brw_reg src0, struct brw_reg src1);

-struct brw_instruction *brw_ELSE(struct brw_compile *p, 
-				 struct brw_instruction *if_insn);
-
-void brw_ENDIF(struct brw_compile *p, 
-	       struct brw_instruction *if_or_else_insn);
-
+void brw_ELSE(struct brw_compile *p);
+void brw_ENDIF(struct brw_compile *p);

 /* DO/WHILE loops:
 */
@ -1020,8 +1027,9 @@ void brw_math_invert( struct brw_compile *p,
 		      struct brw_reg dst,
 		      struct brw_reg src);

-void brw_set_src1( struct brw_instruction *insn,
-                          struct brw_reg reg );
+void brw_set_src1(struct brw_compile *p,
+		  struct brw_instruction *insn,
+		  struct brw_reg reg);

 void brw_set_uip_jip(struct brw_compile *p);

--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@ -485,8 +485,6 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
 {
   fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
   fs_reg wpos = *reg;
-   fs_reg neg_y = this->pixel_y;
-   neg_y.negate = true;
   bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo;

   /* gl_FragCoord.x */
@ -1174,7 +1172,8 @@ fs_visitor::visit(ir_assignment *ir)
 }

 fs_inst *
-fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler)
 {
   int mlen;
   int base_mrf = 1;
@ -1186,7 +1185,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)

   if (ir->shadow_comparitor) {
      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+	 fs_inst *inst = emit(BRW_OPCODE_MOV,
+			      fs_reg(MRF, base_mrf + mlen + i), coordinate);
+	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	    inst->saturate = true;
+
 	 coordinate.reg_offset++;
      }
      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@ -1214,7 +1217,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
      mlen++;
   } else if (ir->op == ir_tex) {
      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i), coordinate);
+	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
+			      coordinate);
+	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	    inst->saturate = true;
 	 coordinate.reg_offset++;
      }
      /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
@ -1228,7 +1234,11 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
      assert(ir->op == ir_txb || ir->op == ir_txl);

      for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2), coordinate);
+	 fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF,
+						     base_mrf + mlen + i * 2),
+			      coordinate);
+	 if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	    inst->saturate = true;
 	 coordinate.reg_offset++;
      }

@ -1279,6 +1289,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
   }
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
+   inst->header_present = true;

   if (simd16) {
      for (int i = 0; i < 4; i++) {
@ -1300,21 +1311,35 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
 * surprising in the disassembly.
 */
 fs_inst *
-fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
+fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler)
 {
-   int mlen = 1; /* g0 header always present. */
-   int base_mrf = 1;
+   int mlen = 0;
+   int base_mrf = 2;
   int reg_width = c->dispatch_width / 8;
+   bool header_present = false;
+
+   if (ir->offset) {
+      /* The offsets set up by the ir_texture visitor are in the
+       * m1 header, so we can't go headerless.
+       */
+      header_present = true;
+      mlen++;
+      base_mrf--;
+   }

   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width),
-	   coordinate);
+      fs_inst *inst = emit(BRW_OPCODE_MOV,
+			   fs_reg(MRF, base_mrf + mlen + i * reg_width),
+			   coordinate);
+      if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	 inst->saturate = true;
      coordinate.reg_offset++;
   }
   mlen += ir->coordinate->type->vector_elements * reg_width;

   if (ir->shadow_comparitor) {
-      mlen = MAX2(mlen, 1 + 4 * reg_width);
+      mlen = MAX2(mlen, header_present + 4 * reg_width);

      ir->shadow_comparitor->accept(this);
      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
@ -1328,7 +1353,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
      break;
   case ir_txb:
      ir->lod_info.bias->accept(this);
-      mlen = MAX2(mlen, 1 + 4 * reg_width);
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
      mlen += reg_width;

@ -1337,7 +1362,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
      break;
   case ir_txl:
      ir->lod_info.lod->accept(this);
-      mlen = MAX2(mlen, 1 + 4 * reg_width);
+      mlen = MAX2(mlen, header_present + 4 * reg_width);
      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
      mlen += reg_width;

@ -1350,6 +1375,81 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
   }
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
+   inst->header_present = header_present;
+
+   if (mlen > 11) {
+      fail("Message length >11 disallowed by hardware\n");
+   }
+
+   return inst;
+}
+
+fs_inst *
+fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler)
+{
+   int mlen = 0;
+   int base_mrf = 2;
+   int reg_width = c->dispatch_width / 8;
+   bool header_present = false;
+
+   if (ir->offset) {
+      /* The offsets set up by the ir_texture visitor are in the
+       * m1 header, so we can't go headerless.
+       */
+      header_present = true;
+      mlen++;
+      base_mrf--;
+   }
+
+   if (ir->shadow_comparitor) {
+      ir->shadow_comparitor->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+      mlen += reg_width;
+   }
+
+   /* Set up the LOD info */
+   switch (ir->op) {
+   case ir_tex:
+      break;
+   case ir_txb:
+      ir->lod_info.bias->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+      mlen += reg_width;
+      break;
+   case ir_txl:
+      ir->lod_info.lod->accept(this);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result);
+      mlen += reg_width;
+      break;
+   case ir_txd:
+   case ir_txf:
+      assert(!"GLSL 1.30 features unsupported");
+      break;
+   }
+
+   /* Set up the coordinate */
+   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+      fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
+			   coordinate);
+      if (i < 3 && c->key.gl_clamp_mask[i] & (1 << sampler))
+	 inst->saturate = true;
+      coordinate.reg_offset++;
+      mlen += reg_width;
+   }
+
+   /* Generate the SEND */
+   fs_inst *inst = NULL;
+   switch (ir->op) {
+   case ir_tex: inst = emit(FS_OPCODE_TEX, dst); break;
+   case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
+   case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break;
+   case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break;
+   case ir_txf: assert(!"TXF unsupported.");
+   }
+   inst->base_mrf = base_mrf;
+   inst->mlen = mlen;
+   inst->header_present = header_present;

   if (mlen > 11) {
      fail("Message length >11 disallowed by hardware\n");
@ -1458,16 +1558,18 @@ fs_visitor::visit(ir_texture *ir)
    */
   fs_reg dst = fs_reg(this, glsl_type::vec4_type);

-   if (intel->gen < 5) {
-      inst = emit_texture_gen4(ir, dst, coordinate);
+   if (intel->gen >= 7) {
+      inst = emit_texture_gen7(ir, dst, coordinate, sampler);
+   } else if (intel->gen >= 5) {
+      inst = emit_texture_gen5(ir, dst, coordinate, sampler);
   } else {
-      inst = emit_texture_gen5(ir, dst, coordinate);
+      inst = emit_texture_gen4(ir, dst, coordinate, sampler);
   }

   /* If there's an offset, we already set up m1.  To avoid the implied move,
    * use the null register.  Otherwise, we want an implied move from g0.
    */
-   if (ir->offset != NULL)
+   if (ir->offset != NULL || !inst->header_present)
      inst->src[0] = fs_reg(brw_null_reg());
   else
      inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
@ -1546,12 +1648,9 @@ fs_visitor::visit(ir_swizzle *ir)
 void
 fs_visitor::visit(ir_discard *ir)
 {
-   fs_reg temp = fs_reg(this, glsl_type::uint_type);
-
   assert(ir->condition == NULL); /* FINISHME */

-   emit(FS_OPCODE_DISCARD_NOT, temp, reg_null_d);
-   emit(FS_OPCODE_DISCARD_AND, reg_null_d, temp);
+   emit(FS_OPCODE_DISCARD);
   kill_emitted = true;
 }

@ -1802,7 +1901,7 @@ fs_visitor::visit(ir_if *ir)
 {
   fs_inst *inst;

-   if (c->dispatch_width == 16) {
+   if (intel->gen != 6 && c->dispatch_width == 16) {
      fail("Can't support (non-uniform) control flow on 16-wide\n");
   }

@ -1811,7 +1910,7 @@ fs_visitor::visit(ir_if *ir)
    */
   this->base_ir = ir->condition;

-   if (intel->gen >= 6) {
+   if (intel->gen == 6) {
      emit_if_gen6(ir);
   } else {
      emit_bool_to_cond_code(ir->condition);
@ -2260,9 +2359,11 @@ fs_visitor::generate_fb_write(fs_inst *inst)

   if (inst->header_present) {
      if (intel->gen >= 6) {
+	 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 	 brw_MOV(p,
-		 brw_message_reg(inst->base_mrf),
-		 brw_vec8_grf(0, 0));
+		 retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
+		 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);

 	 if (inst->target > 0) {
 	    /* Set the render target index for choosing BLEND_STATE. */
@ -2271,20 +2372,14 @@ fs_visitor::generate_fb_write(fs_inst *inst)
 		    brw_imm_ud(inst->target));
 	 }

-	 /* Clear viewport index, render target array index. */
-	 brw_AND(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 0, 0),
-			   BRW_REGISTER_TYPE_UD),
-		 retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
-		 brw_imm_ud(0xf7ff));
-
 	 implied_header = brw_null_reg();
      } else {
 	 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
-      }

-      brw_MOV(p,
-	      brw_message_reg(inst->base_mrf + 1),
-	      brw_vec8_grf(1, 0));
+	 brw_MOV(p,
+		 brw_message_reg(inst->base_mrf + 1),
+		 brw_vec8_grf(1, 0));
+      }
   } else {
      implied_header = brw_null_reg();
   }
@ -2459,11 +2554,8 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
   int rlen = 4;
   uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;

-   if (c->dispatch_width == 16) {
-      rlen = 8;
-      dst = vec16(dst);
+   if (c->dispatch_width == 16)
      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
-   }

   if (intel->gen >= 5) {
      switch (inst->opcode) {
@ -2498,6 +2590,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	 /* Note that G45 and older determines shadow compare and dispatch width
 	  * from message length for most messages.
 	  */
+	 assert(c->dispatch_width == 8);
 	 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
 	 if (inst->shadow_compare) {
 	    assert(inst->mlen == 6);
@ -2532,6 +2625,11 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
   }
   assert(msg_type != -1);

+   if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
+      rlen = 8;
+      dst = vec16(dst);
+   }
+
   brw_SAMPLE(p,
 	      retype(dst, BRW_REGISTER_TYPE_UW),
 	      inst->base_mrf,
@ -2543,7 +2641,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 	      rlen,
 	      inst->mlen,
 	      0,
-	      1,
+	      inst->header_present,
 	      simd_mode);
 }

@ -2611,56 +2709,54 @@ fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
 }

 void
-fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
+fs_visitor::generate_discard(fs_inst *inst)
 {
-   if (intel->gen >= 6) {
-      /* Gen6 no longer has the mask reg for us to just read the
-       * active channels from.  However, cmp updates just the channels
-       * of the flag reg that are enabled, so we can get at the
-       * channel enables that way.  In this step, make a reg of ones
-       * we'll compare to.
-       */
-      brw_MOV(p, mask, brw_imm_ud(1));
-   } else {
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
-      brw_pop_insn_state(p);
-   }
-}
+   struct brw_reg f0 = brw_flag_reg();

-void
-fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
-{
   if (intel->gen >= 6) {
-      struct brw_reg f0 = brw_flag_reg();
      struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+      struct brw_reg some_register;

+      /* As of gen6, we no longer have the mask register to look at,
+       * so life gets a bit more complicated.
+       */
+
+      /* Load the flag register with all ones. */
      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_MOV(p, f0, brw_imm_uw(0xffff)); /* inactive channels undiscarded */
+      brw_MOV(p, f0, brw_imm_uw(0xffff));
      brw_pop_insn_state(p);

+      /* Do a comparison that should always fail, to produce 0s in the flag
+       * reg where we have active channels.
+       */
+      some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
      brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
-	      BRW_CONDITIONAL_Z, mask, brw_imm_ud(0)); /* active channels fail test */
+	      BRW_CONDITIONAL_NZ, some_register, some_register);
+
      /* Undo CMP's whacking of predication*/
      brw_set_predicate_control(p, BRW_PREDICATE_NONE);

      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      brw_AND(p, g1, f0, g1);
      brw_pop_insn_state(p);
   } else {
      struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
-      mask = brw_uw1_reg(mask.file, mask.nr, 0);
+      struct brw_reg mask = brw_uw1_reg(mask.file, mask.nr, 0);

      brw_push_insn_state(p);
      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-      brw_AND(p, g0, mask, g0);
+
+      /* Unlike the 965, we have the mask reg, so we just need
+       * somewhere to invert that (containing channels to be disabled)
+       * so it can be ANDed with the mask of pixels still to be
+       * written. Use the flag reg for consistency with gen6+.
+       */
+      brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
+      brw_AND(p, g0, f0, g0);
+
      brw_pop_insn_state(p);
   }
 }
@ -3008,7 +3104,6 @@ fs_visitor::calculate_live_intervals()
   int *use = ralloc_array(mem_ctx, int, num_vars);
   int loop_depth = 0;
   int loop_start = 0;
-   int bb_header_ip = 0;

   if (this->live_intervals_valid)
      return;
@ -3068,22 +3163,6 @@ fs_visitor::calculate_live_intervals()
      }

      ip++;
-
-      /* Set the basic block header IP.  This is used for determining
-       * if a complete def of single-register virtual GRF in a loop
-       * dominates a use in the same basic block.  It's a quick way to
-       * reduce the live interval range of most register used in a
-       * loop.
-       */
-      if (inst->opcode == BRW_OPCODE_IF ||
-	  inst->opcode == BRW_OPCODE_ELSE ||
-	  inst->opcode == BRW_OPCODE_ENDIF ||
-	  inst->opcode == BRW_OPCODE_DO ||
-	  inst->opcode == BRW_OPCODE_WHILE ||
-	  inst->opcode == BRW_OPCODE_BREAK ||
-	  inst->opcode == BRW_OPCODE_CONTINUE) {
-	 bb_header_ip = ip;
-      }
   }

   ralloc_free(this->virtual_grf_def);
@ -3324,7 +3403,7 @@ fs_visitor::register_coalesce()
 	 /* The gen6 MATH instruction can't handle source modifiers, so avoid
 	  * coalescing those for now.  We should do something more specific.
 	  */
-	 if (intel->gen == 6 && scan_inst->is_math() && has_source_modifiers) {
+	 if (intel->gen >= 6 && scan_inst->is_math() && has_source_modifiers) {
 	    interfered = true;
 	    break;
 	 }
@ -3722,11 +3801,8 @@ fs_visitor::generate_code()
   const char *last_annotation_string = NULL;
   ir_instruction *last_annotation_ir = NULL;

-   int if_stack_array_size = 16;
   int loop_stack_array_size = 16;
-   int if_stack_depth = 0, loop_stack_depth = 0;
-   brw_instruction **if_stack =
-      rzalloc_array(this->mem_ctx, brw_instruction *, if_stack_array_size);
+   int loop_stack_depth = 0;
   brw_instruction **loop_stack =
      rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
   int *if_depth_in_loop =
@ -3831,27 +3907,20 @@ fs_visitor::generate_code()

      case BRW_OPCODE_IF:
 	 if (inst->src[0].file != BAD_FILE) {
-	    assert(intel->gen >= 6);
-	    if_stack[if_stack_depth] = gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+	    /* The instruction has an embedded compare (only allowed on gen6) */
+	    assert(intel->gen == 6);
+	    gen6_IF(p, inst->conditional_mod, src[0], src[1]);
 	 } else {
-	    if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
+	    brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
 	 }
 	 if_depth_in_loop[loop_stack_depth]++;
-	 if_stack_depth++;
-	 if (if_stack_array_size <= if_stack_depth) {
-	    if_stack_array_size *= 2;
-	    if_stack = reralloc(this->mem_ctx, if_stack, brw_instruction *,
-			        if_stack_array_size);
-	 }
 	 break;

      case BRW_OPCODE_ELSE:
-	 if_stack[if_stack_depth - 1] =
-	    brw_ELSE(p, if_stack[if_stack_depth - 1]);
+	 brw_ELSE(p);
 	 break;
      case BRW_OPCODE_ENDIF:
-	 if_stack_depth--;
-	 brw_ENDIF(p , if_stack[if_stack_depth]);
+	 brw_ENDIF(p);
 	 if_depth_in_loop[loop_stack_depth]--;
 	 break;

@ -3935,11 +4004,8 @@ fs_visitor::generate_code()
      case FS_OPCODE_TXL:
 	 generate_tex(inst, dst, src[0]);
 	 break;
-      case FS_OPCODE_DISCARD_NOT:
-	 generate_discard_not(inst, dst);
-	 break;
-      case FS_OPCODE_DISCARD_AND:
-	 generate_discard_and(inst, src[0]);
+      case FS_OPCODE_DISCARD:
+	 generate_discard(inst);
 	 break;
      case FS_OPCODE_DDX:
 	 generate_ddx(inst, dst, src[0]);
@ -3993,7 +4059,6 @@ fs_visitor::generate_code()
      printf("\n");
   }

-   ralloc_free(if_stack);
   ralloc_free(loop_stack);
   ralloc_free(if_depth_in_loop);

--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@ -75,8 +75,7 @@ enum fs_opcodes {
   FS_OPCODE_TXB,
   FS_OPCODE_TXD,
   FS_OPCODE_TXL,
-   FS_OPCODE_DISCARD_NOT,
-   FS_OPCODE_DISCARD_AND,
+   FS_OPCODE_DISCARD,
   FS_OPCODE_SPILL,
   FS_OPCODE_UNSPILL,
   FS_OPCODE_PULL_CONSTANT_LOAD,
@ -500,8 +499,7 @@ public:
 			 struct brw_reg *src);
   void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
   void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
-   void generate_discard_not(fs_inst *inst, struct brw_reg temp);
-   void generate_discard_and(fs_inst *inst, struct brw_reg temp);
+   void generate_discard(fs_inst *inst);
   void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
   void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
   void generate_spill(fs_inst *inst, struct brw_reg src);
@ -514,8 +512,12 @@ public:
   fs_reg *emit_general_interpolation(ir_variable *ir);
   void emit_interpolation_setup_gen4();
   void emit_interpolation_setup_gen6();
-   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
-   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
+   fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler);
+   fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler);
+   fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
+			      int sampler);
   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0);
   fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1);
   bool try_emit_saturate(ir_expression *ir);
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@ -244,6 +244,8 @@ fs_visitor::assign_regs()

      if (reg == -1) {
 	 fail("no register to spill\n");
+      } else if (intel->gen >= 7) {
+	 fail("no spilling support on gen7 yet\n");
      } else if (c->dispatch_width == 16) {
 	 fail("no spilling support on 16-wide yet\n");
      } else {
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@ -42,7 +42,7 @@
 #include "brw_state.h"
 #include "brw_gs.h"

-
+#include "../glsl/ralloc.h"

 static void compile_gs_prog( struct brw_context *brw,
 			     struct brw_gs_prog_key *key )
@ -50,8 +50,15 @@ static void compile_gs_prog( struct brw_context *brw,
   struct intel_context *intel = &brw->intel;
   struct brw_gs_compile c;
   const GLuint *program;
+   void *mem_ctx;
   GLuint program_size;

+   /* Gen6: VF has already converted into polygon, and LINELOOP is
+    * converted to LINESTRIP at the beginning of the 3D pipeline.
+    */
+   if (intel->gen >= 6)
+      return;
+
   memset(&c, 0, sizeof(c));
   
   c.key = *key;
@ -67,10 +74,11 @@ static void compile_gs_prog( struct brw_context *brw,

   c.nr_bytes = c.nr_regs * REG_SIZE;

+   mem_ctx = NULL;
   
   /* Begin the compilation:
    */
-   brw_init_compile(brw, &c.func);
+   brw_init_compile(brw, &c.func, mem_ctx);

   c.func.single_program_flow = 1;

@ -84,12 +92,6 @@ static void compile_gs_prog( struct brw_context *brw,
    * already been weeded out by this stage:
    */

-   /* Gen6: VF has already converted into polygon, and LINELOOP is
-    * converted to LINESTRIP at the beginning of the 3D pipeline.
-    */
-   if (intel->gen == 6)
-      return;
-
   switch (key->primitive) {
   case GL_QUADS:
      brw_gs_quads( &c, key );
@ -101,6 +103,7 @@ static void compile_gs_prog( struct brw_context *brw,
      brw_gs_lines( &c );
      break;
   default:
+      ralloc_free(mem_ctx);
      return;
   }

@ -126,6 +129,7 @@ static void compile_gs_prog( struct brw_context *brw,
 				      program, program_size,
 				      &c.prog_data, sizeof(c.prog_data),
 				      &brw->gs.prog_data);
+   ralloc_free(mem_ctx);
 }

 static const GLenum gs_prim[GL_POLYGON+1] = {  
@ -164,7 +168,7 @@ static void populate_key( struct brw_context *brw,
      key->pv_first = GL_TRUE;
   }

-   key->need_gs_prog = (intel->gen == 6)
+   key->need_gs_prog = (intel->gen >= 6)
      ? 0
      : (brw->primitive == GL_QUADS ||
 	 brw->primitive == GL_QUAD_STRIP ||
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@ -32,6 +32,7 @@


 #include "intel_batchbuffer.h"
+#include "intel_fbo.h"
 #include "intel_regions.h"

 #include "brw_context.h"
@ -86,7 +87,10 @@ static void upload_binding_table_pointers(struct brw_context *brw)
 const struct brw_tracked_state brw_binding_table_pointers = {
   .dirty = {
      .mesa = 0,
-      .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+      .brw = BRW_NEW_BATCH
+	   | BRW_NEW_VS_BINDING_TABLE
+	   | BRW_NEW_GS_BINDING_TABLE
+	   | BRW_NEW_PS_BINDING_TABLE,
      .cache = 0,
   },
   .emit = upload_binding_table_pointers,
@ -118,7 +122,10 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
 const struct brw_tracked_state gen6_binding_table_pointers = {
   .dirty = {
      .mesa = 0,
-      .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+      .brw = BRW_NEW_BATCH
+	   | BRW_NEW_VS_BINDING_TABLE
+	   | BRW_NEW_GS_BINDING_TABLE
+	   | BRW_NEW_PS_BINDING_TABLE,
      .cache = 0,
   },
   .emit = upload_gen6_binding_table_pointers,
@ -187,18 +194,33 @@ const struct brw_tracked_state brw_psp_urb_cbs = {

 static void prepare_depthbuffer(struct brw_context *brw)
 {
-   struct intel_region *region = brw->state.depth_region;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);

-   if (region != NULL)
-      brw_add_validated_bo(brw, region->buffer);
+   if (drb)
+      brw_add_validated_bo(brw, drb->region->buffer);
+   if (srb)
+      brw_add_validated_bo(brw, srb->region->buffer);
 }

 static void emit_depthbuffer(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
-   struct intel_region *region = brw->state.depth_region;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   /* _NEW_BUFFERS */
+   struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
   unsigned int len;

+   /* If we're combined depth stencil but no depth is attached, look
+    * up stencil.
+    */
+   if (!irb)
+      irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+
   if (intel->gen >= 6)
      len = 7;
   else if (intel->is_g4x || intel->gen == 5)
@ -206,7 +228,7 @@ static void emit_depthbuffer(struct brw_context *brw)
   else
      len = 5;

-   if (region == NULL) {
+   if (!irb) {
      BEGIN_BATCH(len);
      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
@ -223,7 +245,9 @@ static void emit_depthbuffer(struct brw_context *brw)

      ADVANCE_BATCH();
   } else {
+      struct intel_region *region = irb->region;
      unsigned int format;
+      uint32_t tile_x, tile_y, offset;

      switch (region->cpp) {
      case 2:
@ -240,7 +264,8 @@ static void emit_depthbuffer(struct brw_context *brw)
 	 return;
      }

-      assert(region->tiling != I915_TILING_X);
+      offset = intel_region_tile_offsets(region, &tile_x, &tile_y);
+
      assert(intel->gen < 6 || region->tiling == I915_TILING_Y);

      BEGIN_BATCH(len);
@ -252,14 +277,16 @@ static void emit_depthbuffer(struct brw_context *brw)
 		(BRW_SURFACE_2D << 29));
      OUT_RELOC(region->buffer,
 		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		0);
+		offset);
      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
 		((region->width - 1) << 6) |
 		((region->height - 1) << 19));
      OUT_BATCH(0);

      if (intel->is_g4x || intel->gen >= 5)
-         OUT_BATCH(0);
+         OUT_BATCH(tile_x | (tile_y << 16));
+      else
+	 assert(tile_x == 0 && tile_y == 0);

      if (intel->gen >= 6)
 	 OUT_BATCH(0);
@ -276,13 +303,10 @@ static void emit_depthbuffer(struct brw_context *brw)
   }
 }

-/**
- * \see brw_context.state.depth_region
- */
 const struct brw_tracked_state brw_depthbuffer = {
   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
+      .mesa = _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH,
      .cache = 0,
   },
   .prepare = prepare_depthbuffer,
@ -470,12 +494,15 @@ static void upload_invarient_state( struct brw_context *brw )

   if (intel->gen >= 6) {
      int i;
+      int len = intel->gen >= 7 ? 4 : 3;

-      BEGIN_BATCH(3);
-      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (3 - 2));
+      BEGIN_BATCH(len);
+      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (len - 2));
      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
 		MS_NUMSAMPLES_1);
      OUT_BATCH(0); /* positions for 4/8-sample */
+      if (intel->gen >= 7)
+	 OUT_BATCH(0);
      ADVANCE_BATCH();

      BEGIN_BATCH(2);
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@ -43,20 +43,24 @@
 #include "brw_sf.h"
 #include "brw_state.h"

+#include "../glsl/ralloc.h"
+
 static void compile_sf_prog( struct brw_context *brw,
 			     struct brw_sf_prog_key *key )
 {
   struct intel_context *intel = &brw->intel;
   struct brw_sf_compile c;
   const GLuint *program;
+   void *mem_ctx;
   GLuint program_size;
   GLuint i, idx;

   memset(&c, 0, sizeof(c));

+   mem_ctx = ralloc_context(NULL);
   /* Begin the compilation:
    */
-   brw_init_compile(brw, &c.func);
+   brw_init_compile(brw, &c.func, mem_ctx);

   c.key = *key;
   c.nr_attrs = brw_count_bits(c.key.attrs);
@ -124,6 +128,7 @@ static void compile_sf_prog( struct brw_context *brw,
 				      program, program_size,
 				      &c.prog_data, sizeof(c.prog_data),
 				      &brw->sf.prog_data);
+   ralloc_free(mem_ctx);
 }

 /* Calculate interpolants for triangle and line rasterization.
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@ -81,7 +81,6 @@ static void copy_bfc( struct brw_sf_compile *c,
 static void do_twoside_color( struct brw_sf_compile *c )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *if_insn;
   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;

   /* Already done in clip program:
@ -104,7 +103,7 @@ static void do_twoside_color( struct brw_sf_compile *c )
    */
   brw_push_insn_state(p);
   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
-   if_insn = brw_IF(p, BRW_EXECUTE_4); 
+   brw_IF(p, BRW_EXECUTE_4);
   {
      switch (c->nr_verts) {
      case 3: copy_bfc(c, c->vert[2]);
@ -112,7 +111,7 @@ static void do_twoside_color( struct brw_sf_compile *c )
      case 1: copy_bfc(c, c->vert[0]);
      }
   }
-   brw_ENDIF(p, if_insn);
+   brw_ENDIF(p);
   brw_pop_insn_state(p);
 }

--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@ -111,6 +111,26 @@ extern const struct brw_tracked_state gen6_vs_constants;
 extern const struct brw_tracked_state gen6_vs_state;
 extern const struct brw_tracked_state gen6_wm_constants;
 extern const struct brw_tracked_state gen6_wm_state;
+extern const struct brw_tracked_state gen7_depthbuffer;
+extern const struct brw_tracked_state gen7_blend_state_pointer;
+extern const struct brw_tracked_state gen7_cc_state_pointer;
+extern const struct brw_tracked_state gen7_cc_viewport_state_pointer;
+extern const struct brw_tracked_state gen7_clip_state;
+extern const struct brw_tracked_state gen7_depth_stencil_state_pointer;
+extern const struct brw_tracked_state gen7_disable_stages;
+extern const struct brw_tracked_state gen7_ps_state;
+extern const struct brw_tracked_state gen7_samplers;
+extern const struct brw_tracked_state gen7_sbe_state;
+extern const struct brw_tracked_state gen7_sf_clip_viewport;
+extern const struct brw_tracked_state gen7_sf_clip_viewport_state_pointer;
+extern const struct brw_tracked_state gen7_sf_state;
+extern const struct brw_tracked_state gen7_sol_state;
+extern const struct brw_tracked_state gen7_urb;
+extern const struct brw_tracked_state gen7_vs_state;
+extern const struct brw_tracked_state gen7_wm_constants;
+extern const struct brw_tracked_state gen7_wm_constant_surface;
+extern const struct brw_tracked_state gen7_wm_state;
+extern const struct brw_tracked_state gen7_wm_surfaces;

 /***********************************************************************
 * brw_state.c
@ -162,4 +182,27 @@ void brw_create_constant_surface(struct brw_context *brw,
 				 int width,
 				 uint32_t *out_offset);

+uint32_t brw_format_for_mesa_format(gl_format mesa_format);
+
+GLuint translate_tex_target(GLenum target);
+
+GLuint translate_tex_format(gl_format mesa_format,
+			    GLenum internal_format,
+			    GLenum depth_mode,
+			    GLenum srgb_decode);
+
+/* brw_wm_sampler_state.c */
+uint32_t translate_wrap_mode(GLenum wrap, bool using_nearest);
+void upload_default_color(struct brw_context *brw,
+			  struct gl_sampler_object *sampler,
+			  int unit);
+
+/* gen6_sf_state.c */
+uint32_t
+get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color);
+
+/* gen7_misc_state.c */
+unsigned int
+gen7_depth_format(struct brw_context *brw);
+
 #endif
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@ -170,6 +170,71 @@ static const struct brw_tracked_state *gen6_atoms[] =
   &brw_vertices,
 };

+const struct brw_tracked_state *gen7_atoms[] =
+{
+   &brw_check_fallback,
+
+   &brw_wm_input_sizes,
+   &brw_vs_prog,
+   &brw_gs_prog,
+   &brw_wm_prog,
+
+   /* Command packets: */
+   &brw_invarient_state,
+
+   /* must do before binding table pointers, cc state ptrs */
+   &brw_state_base_address,
+
+   &brw_cc_vp,
+   &gen7_cc_viewport_state_pointer, /* must do after brw_cc_vp */
+   &gen7_sf_clip_viewport,
+
+   &gen7_urb,
+   &gen6_blend_state,		/* must do before cc unit */
+   &gen6_color_calc_state,	/* must do before cc unit */
+   &gen6_depth_stencil_state,	/* must do before cc unit */
+   &gen7_blend_state_pointer,
+   &gen7_cc_state_pointer,
+   &gen7_depth_stencil_state_pointer,
+
+   &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
+   &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+   &gen6_vs_constants, /* Before vs_state */
+   &gen7_wm_constants, /* Before wm_surfaces and constant_buffer */
+
+   &brw_vs_surfaces,		/* must do before unit */
+   &gen7_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   &gen7_wm_surfaces,		/* must do before samplers and unit */
+   &brw_wm_binding_table,
+
+   &gen7_samplers,
+
+   &gen7_disable_stages,
+   &gen7_vs_state,
+   &gen7_clip_state,
+   &gen7_sbe_state,
+   &gen7_sf_state,
+   &gen7_wm_state,
+   &gen7_ps_state,
+
+   &gen6_scissor_state,
+
+   &gen7_depthbuffer,
+
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+
+   &brw_drawing_rect,
+
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+};
+
+
 void brw_init_state( struct brw_context *brw )
 {
   const struct brw_tracked_state **atoms;
@ -177,7 +242,10 @@ void brw_init_state( struct brw_context *brw )

   brw_init_caches(brw);

-   if (brw->intel.gen >= 6) {
+   if (brw->intel.gen >= 7) {
+      atoms = gen7_atoms;
+      num_atoms = ARRAY_SIZE(gen7_atoms);
+   } else if (brw->intel.gen == 6) {
      atoms = gen6_atoms;
      num_atoms = ARRAY_SIZE(gen6_atoms);
   } else {
@ -299,16 +367,17 @@ static struct dirty_bit_map brw_bits[] = {
   DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
   DEFINE_BIT(BRW_NEW_PSP),
   DEFINE_BIT(BRW_NEW_WM_SURFACES),
-   DEFINE_BIT(BRW_NEW_BINDING_TABLE),
   DEFINE_BIT(BRW_NEW_INDICES),
   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
   DEFINE_BIT(BRW_NEW_VERTICES),
   DEFINE_BIT(BRW_NEW_BATCH),
-   DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
   DEFINE_BIT(BRW_NEW_NR_WM_SURFACES),
   DEFINE_BIT(BRW_NEW_NR_VS_SURFACES),
   DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
   DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
+   DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
   {0, 0, 0}
 };

--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@ -1111,6 +1111,54 @@ struct brw_sampler_state
   } ss3;
 };

+struct gen7_sampler_state
+{
+   struct
+   {
+      GLuint aniso_algorithm:1;
+      GLuint lod_bias:13;
+      GLuint min_filter:3;
+      GLuint mag_filter:3;
+      GLuint mip_filter:2;
+      GLuint base_level:5;
+      GLuint pad1:1;
+      GLuint lod_preclamp:1;
+      GLuint default_color_mode:1;
+      GLuint pad0:1;
+      GLuint disable:1;
+   } ss0;
+
+   struct
+   {
+      GLuint cube_control_mode:1;
+      GLuint shadow_function:3;
+      GLuint pad:4;
+      GLuint max_lod:12;
+      GLuint min_lod:12;
+   } ss1;
+
+   struct
+   {
+      GLuint pad:5;
+      GLuint default_color_pointer:27;
+   } ss2;
+
+   struct
+   {
+      GLuint r_wrap_mode:3;
+      GLuint t_wrap_mode:3;
+      GLuint s_wrap_mode:3;
+      GLuint pad:1;
+      GLuint non_normalized_coord:1;
+      GLuint trilinear_quality:2;
+      GLuint address_round:6;
+      GLuint max_aniso:3;
+      GLuint chroma_key_mode:1;
+      GLuint chroma_key_index:2;
+      GLuint chroma_key_enable:1;
+      GLuint pad0:6;
+   } ss3;
+};

 struct brw_clipper_viewport
 {
@ -1155,7 +1203,31 @@ struct gen6_sf_viewport {
   GLfloat m32;
 };

+struct gen7_sf_clip_viewport {
+   struct {
+      GLfloat m00;
+      GLfloat m11;
+      GLfloat m22;
+      GLfloat m30;
+      GLfloat m31;
+      GLfloat m32;
+   } viewport;
+
+   GLuint pad0[2];
+
+   struct {
+      GLfloat xmin;
+      GLfloat xmax;
+      GLfloat ymin;
+      GLfloat ymax;
+   } guardband;
+
+   GLfloat pad1[4];
+};
+
 /* Documented in the subsystem/shared-functions/sampler chapter...
+ *
+ * vol5c Shared Functions - 1.13.4.1.1
 */
 struct brw_surface_state
 {
@ -1227,6 +1299,82 @@ struct brw_surface_state

 };

+/* volume 5c Shared Functions - 1.13.4.1.2 */
+struct gen7_surface_state
+{
+   struct {
+      GLuint cube_pos_z:1;
+      GLuint cube_neg_z:1;
+      GLuint cube_pos_y:1;
+      GLuint cube_neg_y:1;
+      GLuint cube_pos_x:1;
+      GLuint cube_neg_x:1;
+      GLuint pad2:2;
+      GLuint render_cache_read_write:1;
+      GLuint pad1:1;
+      GLuint surface_array_spacing:1;
+      GLuint vert_line_stride_ofs:1;
+      GLuint vert_line_stride:1;
+      GLuint tile_walk:1;
+      GLuint tiled_surface:1;
+      GLuint horizontal_alignment:1;
+      GLuint vertical_alignment:2;
+      GLuint surface_format:9;     /**< BRW_SURFACEFORMAT_x */
+      GLuint pad0:1;
+      GLuint is_array:1;
+      GLuint surface_type:3;       /**< BRW_SURFACE_1D/2D/3D/CUBE */
+   } ss0;
+
+   struct {
+      GLuint base_addr;
+   } ss1;
+
+   struct {
+      GLuint width:14;
+      GLuint pad1:2;
+      GLuint height:14;
+      GLuint pad0:2;
+   } ss2;
+
+   struct {
+      GLuint pitch:18;
+      GLuint pad:3;
+      GLuint depth:11;
+   } ss3;
+
+   struct {
+      GLuint multisample_position_palette_index:3;
+      GLuint num_multisamples:3;
+      GLuint multisampled_surface_storage_format:1;
+      GLuint render_target_view_extent:11;
+      GLuint min_array_elt:11;
+      GLuint rotation:2;
+      GLuint pad0:1;
+   } ss4;
+
+   struct {
+      GLuint mip_count:4;
+      GLuint min_lod:4;
+      GLuint pad1:12;
+      GLuint y_offset:4;
+      GLuint pad0:1;
+      GLuint x_offset:7;
+   } ss5;
+
+   struct {
+      GLuint pad; /* Multisample Control Surface stuff */
+   } ss6;
+
+   struct {
+      GLuint resource_min_lod:12;
+      GLuint pad0:16;
+      GLuint alpha_clear_color:1;
+      GLuint blue_clear_color:1;
+      GLuint green_clear_color:1;
+      GLuint red_clear_color:1;
+   } ss7;
+};
+

 struct brw_vertex_element_state
 {
@ -1516,6 +1664,7 @@ struct brw_instruction
 	 GLuint  pad0:12;
      } if_else;

+      /* This is also used for gen7 IF/ELSE instructions */
      struct
      {
 	 /* Signed jump distance to the ip to jump to if all channels
@ -1596,6 +1745,18 @@ struct brw_instruction
 	 GLuint end_of_thread:1;
      } sampler_gen5;

+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint sampler:4;
+	 GLuint msg_type:5;
+	 GLuint simd_mode:2;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } sampler_gen7;
+
      struct brw_urb_immediate urb;

      struct {
@ -1614,6 +1775,20 @@ struct brw_instruction
 	 GLuint end_of_thread:1;
      } urb_gen5;

+      struct {
+	 GLuint opcode:3;
+	 GLuint offset:11;
+	 GLuint swizzle_control:1;
+	 GLuint complete:1;
+	 GLuint per_slot_offset:1;
+	 GLuint pad0:2;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } urb_gen7;
+
      struct {
 	 GLuint binding_table_index:8;
 	 GLuint msg_control:4;  
@ -1706,6 +1881,22 @@ struct brw_instruction
 	 GLuint end_of_thread:1;
      } gen6_dp;

+      /* See volume vol5c.2 sections 2.11.2.1.5 and 2.11.21.2.2. */
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;
+	 GLuint slot_group_select:1;
+	 GLuint pixel_scoreboard_clear:1;
+	 GLuint pad0:1;
+	 GLuint msg_type:4;
+	 GLuint pad1:1;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad2:2;
+	 GLuint end_of_thread:1;
+      } gen7_dp;
+
      struct {
 	 GLuint function_control:16;
 	 GLuint response_length:4;
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@ -37,7 +37,7 @@
 #include "program/prog_print.h"
 #include "program/prog_parameter.h"

-
+#include "../glsl/ralloc.h"

 static void do_vs_prog( struct brw_context *brw, 
 			struct brw_vertex_program *vp,
@ -47,13 +47,16 @@ static void do_vs_prog( struct brw_context *brw,
   GLuint program_size;
   const GLuint *program;
   struct brw_vs_compile c;
+   void *mem_ctx;
   int aux_size;
   int i;

   memset(&c, 0, sizeof(c));
   memcpy(&c.key, key, sizeof(*key));

-   brw_init_compile(brw, &c.func);
+   mem_ctx = ralloc_context(NULL);
+
+   brw_init_compile(brw, &c.func, mem_ctx);
   c.vp = vp;

   c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
@ -108,6 +111,7 @@ static void do_vs_prog( struct brw_context *brw,
 				      program, program_size,
 				      &c.prog_data, aux_size,
 				      &brw->vs.prog_data);
+   ralloc_free(mem_ctx);
 }


--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@ -432,7 +432,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
   /* See emit_vertex_write() for where the VUE's overhead on top of the
    * attributes comes from.
    */
-   if (intel->gen >= 6) {
+   if (intel->gen >= 7) {
+      int header_regs = 2;
+      if (c->key.nr_userclip)
+	 header_regs += 2;
+
+      /* Each attribute is 16 bytes (1 vec4), so dividing by 4 gives us the
+       * number of 64-byte (512-bit) units.
+       */
+      c->prog_data.urb_entry_size = (attributes_in_vue + header_regs + 3) / 4;
+   } else if (intel->gen == 6) {
      int header_regs = 2;
      if (c->key.nr_userclip)
 	 header_regs += 2;
@ -1039,7 +1048,6 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
 			      struct brw_reg arg0 )
 {
   struct brw_compile *p = &c->func;
-   struct brw_instruction *if_insn;
   struct brw_reg tmp = dst;
   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);

@ -1055,7 +1063,7 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
    * BRW_EXECUTE_1 for all comparisions.
    */
   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
-   if_insn = brw_IF(p, BRW_EXECUTE_8);
+   brw_IF(p, BRW_EXECUTE_8);
   {
      brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));

@ -1070,8 +1078,7 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
 		 brw_swizzle1(arg0, 3),
 		 BRW_MATH_PRECISION_PARTIAL);      
   }
-
-   brw_ENDIF(p, if_insn);
+   brw_ENDIF(p);

   release_tmp(c, tmp);
 }
@ -1881,8 +1888,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
   struct brw_context *brw = p->brw;
   struct intel_context *intel = &brw->intel;
   const GLuint nr_insns = c->vp->program.Base.NumInstructions;
-   GLuint insn, if_depth = 0, loop_depth = 0;
-   struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 };
+   GLuint insn, loop_depth = 0;
+   struct brw_instruction *loop_inst[MAX_LOOP_DEPTH] = { 0 };
   int if_depth_in_loop[MAX_LOOP_DEPTH];
   const struct brw_indirect stack_index = brw_indirect(0, 0);   
   GLuint index;
@ -2102,23 +2109,20 @@ void brw_vs_emit(struct brw_vs_compile *c )
      case OPCODE_XPD:
 	 emit_xpd(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_IF:
-	 assert(if_depth < MAX_IF_DEPTH);
-	 if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+      case OPCODE_IF: {
+	 struct brw_instruction *if_inst = brw_IF(p, BRW_EXECUTE_8);
 	 /* Note that brw_IF smashes the predicate_control field. */
-	 if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+	 if_inst->header.predicate_control = get_predicate(inst);
 	 if_depth_in_loop[loop_depth]++;
-	 if_depth++;
 	 break;
+      }
      case OPCODE_ELSE:
 	 clear_current_const(c);
-	 assert(if_depth > 0);
-	 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+	 brw_ELSE(p);
 	 break;
      case OPCODE_ENDIF:
 	 clear_current_const(c);
-         assert(if_depth > 0);
-	 brw_ENDIF(p, if_inst[--if_depth]);
+	 brw_ENDIF(p);
 	 if_depth_in_loop[loop_depth]--;
 	 break;			
      case OPCODE_BGNLOOP:
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@ -165,7 +165,7 @@ static void upload_vs_surfaces(struct brw_context *brw)
   /* BRW_NEW_NR_VS_SURFACES */
   if (brw->vs.nr_surfaces == 0) {
      if (brw->vs.bind_bo_offset) {
-	 brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
+	 brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
      }
      brw->vs.bind_bo_offset = 0;
      return;
@ -184,7 +184,7 @@ static void upload_vs_surfaces(struct brw_context *brw)
      bind[i] = brw->vs.surf_offset[i];
   }

-   brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
+   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
 }

 const struct brw_tracked_state brw_vs_surfaces = {
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@ -46,6 +46,8 @@
 #include "brw_vs.h"
 #include "brw_wm.h"

+#include "../glsl/ralloc.h"
+
 static void
 dri_bo_release(drm_intel_bo **bo)
 {
@ -64,15 +66,7 @@ static void brw_destroy_context( struct intel_context *intel )
   brw_destroy_state(brw);
   brw_draw_destroy( brw );
   brw_clear_validated_bos(brw);
-   if (brw->wm.compile_data) {
-      free(brw->wm.compile_data->instruction);
-      free(brw->wm.compile_data->vreg);
-      free(brw->wm.compile_data->refs);
-      free(brw->wm.compile_data->prog_instructions);
-      free(brw->wm.compile_data);
-   }
-
-   intel_region_release(&brw->state.depth_region);
+   ralloc_free(brw->wm.compile_data);

   dri_bo_release(&brw->curbe.curbe_bo);
   dri_bo_release(&brw->vs.prog_bo);
@ -97,13 +91,6 @@ static void brw_set_draw_region( struct intel_context *intel,
                                 struct intel_region *depth_region,
                                 GLuint num_color_regions)
 {
-   struct brw_context *brw = brw_context(&intel->ctx);
-
-   if (brw->state.depth_region != depth_region) {
-      brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
-      intel_region_release(&brw->state.depth_region);
-      intel_region_reference(&brw->state.depth_region, depth_region);
-   }
 }


--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@ -35,6 +35,8 @@
 #include "main/formats.h"
 #include "main/samplerobj.h"

+#include "../glsl/ralloc.h"
+
 /** Return number of src args for given instruction */
 GLuint brw_wm_nr_args( GLuint opcode )
 {
@ -193,7 +195,7 @@ static void do_wm_prog( struct brw_context *brw,

   c = brw->wm.compile_data;
   if (c == NULL) {
-      brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+      brw->wm.compile_data = rzalloc(NULL, struct brw_wm_compile);
      c = brw->wm.compile_data;
      if (c == NULL) {
         /* Ouch - big out of memory problem.  Can't continue
@ -202,11 +204,10 @@ static void do_wm_prog( struct brw_context *brw,
          */
         return;
      }
-      c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction));
-      c->prog_instructions = calloc(1, BRW_WM_MAX_INSN *
-					  sizeof(*c->prog_instructions));
-      c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg));
-      c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs));
+      c->instruction = rzalloc_array(c, struct brw_wm_instruction, BRW_WM_MAX_INSN);
+      c->prog_instructions = rzalloc_array(c, struct prog_instruction, BRW_WM_MAX_INSN);
+      c->vreg = rzalloc_array(c, struct brw_wm_value, BRW_WM_MAX_VREG);
+      c->refs = rzalloc_array(c, struct brw_wm_ref, BRW_WM_MAX_REF);
   } else {
      void *instruction = c->instruction;
      void *prog_instructions = c->prog_instructions;
@ -223,7 +224,7 @@ static void do_wm_prog( struct brw_context *brw,
   c->fp = fp;
   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;

-   brw_init_compile(brw, &c->func);
+   brw_init_compile(brw, &c->func, c);

   if (!brw_wm_fs_emit(brw, c)) {
      /* Fallback for fixed function and ARB_fp shaders. */
@ -409,6 +410,16 @@ static void brw_wm_populate_key( struct brw_context *brw,
 			  swizzles[GET_SWZ(t->_Swizzle, 1)],
 			  swizzles[GET_SWZ(t->_Swizzle, 2)],
 			  swizzles[GET_SWZ(t->_Swizzle, 3)]);
+
+	 if (sampler->MinFilter != GL_NEAREST &&
+	     sampler->MagFilter != GL_NEAREST) {
+	    if (sampler->WrapS == GL_CLAMP)
+	       key->gl_clamp_mask[0] |= 1 << i;
+	    if (sampler->WrapT == GL_CLAMP)
+	       key->gl_clamp_mask[1] |= 1 << i;
+	    if (sampler->WrapR == GL_CLAMP)
+	       key->gl_clamp_mask[2] |= 1 << i;
+	 }
      }
      else {
         key->tex_swizzles[i] = SWIZZLE_NOOP;
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@ -71,9 +71,9 @@ struct brw_wm_prog_key {
   GLuint shadowtex_mask:16;
   GLuint yuvtex_mask:16;
   GLuint yuvtex_swap_mask:16;	/* UV swaped */
+   uint16_t gl_clamp_mask[3];

   GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
-
   GLushort drawable_height;
   GLbitfield64 vp_outputs_written;
   GLuint iz_lookup;
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@ -1100,11 +1100,16 @@ void emit_tex(struct brw_wm_compile *c,

   /* Emit the texcoords. */
   for (i = 0; i < nr_texcoords; i++) {
+      if (c->key.gl_clamp_mask[i] & (1 << sampler))
+	 brw_set_saturate(p, true);
+
      if (emit & (1<<i))
 	 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
      else
 	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
      cur_mrf += mrf_per_channel;
+
+      brw_set_saturate(p, false);
   }

   /* Fill in the shadow comparison reference value. */
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@ -44,19 +44,28 @@



-/* The brw (and related graphics cores) do not support GL_CLAMP.  The
- * Intel drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- */
-static GLuint translate_wrap_mode( GLenum wrap )
+uint32_t
+translate_wrap_mode(GLenum wrap, bool using_nearest)
 {
   switch( wrap ) {
   case GL_REPEAT: 
      return BRW_TEXCOORDMODE_WRAP;
-   case GL_CLAMP:  
-      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP:
+      /* GL_CLAMP is the weird mode where coordinates are clamped to
+       * [0.0, 1.0], so linear filtering of coordinates outside of
+       * [0.0, 1.0] give you half edge texel value and half border
+       * color.  The fragment shader will clamp the coordinates, and
+       * we set clamp_border here, which gets the result desired.  We
+       * just use clamp(_to_edge) for nearest, because for nearest
+       * clamping to 1.0 gives border color instead of the desired
+       * edge texels.
+       */
+      if (using_nearest)
+	 return BRW_TEXCOORDMODE_CLAMP;
+      else
+	 return BRW_TEXCOORDMODE_CLAMP_BORDER;
   case GL_CLAMP_TO_EDGE: 
-      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+      return BRW_TEXCOORDMODE_CLAMP;
   case GL_CLAMP_TO_BORDER: 
      return BRW_TEXCOORDMODE_CLAMP_BORDER;
   case GL_MIRRORED_REPEAT: 
@ -66,7 +75,10 @@ static GLuint translate_wrap_mode( GLenum wrap )
   }
 }

-static void
+/**
+ * Upload SAMPLER_BORDER_COLOR_STATE.
+ */
+void
 upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
 		     int unit)
 {
@ -93,7 +105,7 @@ upload_default_color(struct brw_context *brw, struct gl_sampler_object *sampler,
      color[3] = sampler->BorderColor.f[3];
   }

-   if (intel->gen >= 5) {
+   if (intel->gen == 5 || intel->gen == 6) {
      struct gen5_sampler_default_color *sdc;

      sdc = brw_state_batch(brw, sizeof(*sdc), 32, &brw->wm.sdc_offset[unit]);
@ -151,11 +163,13 @@ static void brw_update_sampler_state(struct brw_context *brw,
   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
   struct gl_texture_object *texObj = texUnit->_Current;
   struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;

   switch (gl_sampler->MinFilter) {
   case GL_NEAREST:
      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
      break;
   case GL_LINEAR:
      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
@ -196,6 +210,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
      switch (gl_sampler->MagFilter) {
      case GL_NEAREST:
 	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 using_nearest = true;
 	 break;
      case GL_LINEAR:
 	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
@ -205,9 +220,12 @@ static void brw_update_sampler_state(struct brw_context *brw,
      }  
   }

-   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT);
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+						  using_nearest);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+						  using_nearest);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+						  using_nearest);

   if (intel->gen >= 6 &&
       sampler->ss0.min_filter != sampler->ss0.mag_filter)
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@ -31,6 +31,7 @@
                   


+#include "intel_fbo.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
@ -144,11 +145,11 @@ brw_prepare_wm_unit(struct brw_context *brw)
 				 (1 << FRAG_ATTRIB_WPOS)) != 0;
   wm->wm5.program_computes_depth = (fp->Base.OutputsWritten &
 				     BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
-   /* BRW_NEW_DEPTH_BUFFER
+   /* _NEW_BUFFERS
    * Override for NULL depthbuffer case, required by the Pixel Shader Computed
    * Depth field.
    */
-   if (brw->state.depth_region == NULL)
+   if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH))
      wm->wm5.program_computes_depth = 0;

   /* _NEW_COLOR */
@ -266,7 +267,6 @@ const struct brw_tracked_state brw_wm_unit = {
      .brw = (BRW_NEW_BATCH |
 	      BRW_NEW_FRAGMENT_PROGRAM |
 	      BRW_NEW_CURBE_OFFSETS |
-	      BRW_NEW_DEPTH_BUFFER |
 	      BRW_NEW_NR_WM_SURFACES),

      .cache = (CACHE_NEW_WM_PROG |
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@ -45,7 +45,8 @@
 #include "brw_defines.h"
 #include "brw_wm.h"

-static GLuint translate_tex_target( GLenum target )
+GLuint
+translate_tex_target(GLenum target)
 {
   switch (target) {
   case GL_TEXTURE_1D: 
@ -69,55 +70,61 @@ static GLuint translate_tex_target( GLenum target )
   }
 }

-static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] =
+uint32_t
+brw_format_for_mesa_format(gl_format mesa_format)
 {
-   [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM,
-   [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM,
-   [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM,
-   [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM,
-   [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM,
-   [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM,
-   [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM,
-   [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM,
-   [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
-   [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
-   [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
-   [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
-   [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM,
-   [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
-   [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
-   [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
-   [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1,
-   [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1,
-   [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB,
-   [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM,
-   [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM,
-   [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM,
-   [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB,
-   [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
-   [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB,
-   [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB,
-   [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB,
-   [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
-   [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
-   [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM,
-   [MESA_FORMAT_SIGNED_R8] = BRW_SURFACEFORMAT_R8_SNORM,
-   [MESA_FORMAT_SIGNED_RG88_REV] = BRW_SURFACEFORMAT_R8G8_SNORM,
-   [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
-   [MESA_FORMAT_SIGNED_R16] = BRW_SURFACEFORMAT_R16_SNORM,
-   [MESA_FORMAT_SIGNED_GR1616] = BRW_SURFACEFORMAT_R16G16_SNORM,
-   [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
-   [MESA_FORMAT_RG_FLOAT32] = BRW_SURFACEFORMAT_R32G32_FLOAT,
-   [MESA_FORMAT_R_FLOAT32] = BRW_SURFACEFORMAT_R32_FLOAT,
-   [MESA_FORMAT_INTENSITY_FLOAT32] = BRW_SURFACEFORMAT_I32_FLOAT,
-   [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT,
-   [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT,
-   [MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_L32A32_FLOAT,
-   [MESA_FORMAT_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_UNORM,
-   [MESA_FORMAT_SIGNED_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_SNORM,
-   [MESA_FORMAT_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_UNORM,
-   [MESA_FORMAT_SIGNED_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_SNORM,
-};
+   uint32_t table[MESA_FORMAT_COUNT] =
+   {
+      [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM,
+      [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM,
+      [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM,
+      [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM,
+      [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM,
+      [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM,
+      [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM,
+      [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM,
+      [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM,
+      [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM,
+      [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM,
+      [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM,
+      [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM,
+      [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM,
+      [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL,
+      [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY,
+      [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1,
+      [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1,
+      [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB,
+      [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM,
+      [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM,
+      [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM,
+      [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB,
+      [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB,
+      [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB,
+      [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB,
+      [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB,
+      [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
+      [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
+      [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM,
+      [MESA_FORMAT_SIGNED_R8] = BRW_SURFACEFORMAT_R8_SNORM,
+      [MESA_FORMAT_SIGNED_RG88_REV] = BRW_SURFACEFORMAT_R8G8_SNORM,
+      [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM,
+      [MESA_FORMAT_SIGNED_R16] = BRW_SURFACEFORMAT_R16_SNORM,
+      [MESA_FORMAT_SIGNED_GR1616] = BRW_SURFACEFORMAT_R16G16_SNORM,
+      [MESA_FORMAT_RGBA_FLOAT32] = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
+      [MESA_FORMAT_RG_FLOAT32] = BRW_SURFACEFORMAT_R32G32_FLOAT,
+      [MESA_FORMAT_R_FLOAT32] = BRW_SURFACEFORMAT_R32_FLOAT,
+      [MESA_FORMAT_INTENSITY_FLOAT32] = BRW_SURFACEFORMAT_I32_FLOAT,
+      [MESA_FORMAT_LUMINANCE_FLOAT32] = BRW_SURFACEFORMAT_L32_FLOAT,
+      [MESA_FORMAT_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_A32_FLOAT,
+      [MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32] = BRW_SURFACEFORMAT_L32A32_FLOAT,
+      [MESA_FORMAT_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_UNORM,
+      [MESA_FORMAT_SIGNED_RED_RGTC1] = BRW_SURFACEFORMAT_BC4_SNORM,
+      [MESA_FORMAT_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_UNORM,
+      [MESA_FORMAT_SIGNED_RG_RGTC2] = BRW_SURFACEFORMAT_BC5_SNORM,
+   };
+   assert(mesa_format < MESA_FORMAT_COUNT);
+   return table[mesa_format];
+}

 bool
 brw_render_target_supported(gl_format format)
@ -139,13 +146,14 @@ brw_render_target_supported(gl_format format)
   /* Not exactly true, as some of those formats are not renderable.
    * But at least we know how to translate them.
    */
-   return brw_format_for_mesa_format[format] != 0;
+   return brw_format_for_mesa_format(format) != 0;
 }

-static GLuint translate_tex_format( gl_format mesa_format,
-                                    GLenum internal_format,
-				    GLenum depth_mode, 
-				    GLenum srgb_decode )
+GLuint
+translate_tex_format(gl_format mesa_format,
+		     GLenum internal_format,
+		     GLenum depth_mode,
+		     GLenum srgb_decode)
 {
   switch( mesa_format ) {

@ -176,9 +184,9 @@ static GLuint translate_tex_format( gl_format mesa_format,
   case MESA_FORMAT_SLA8:
   case MESA_FORMAT_SL8:
      if (srgb_decode == GL_DECODE_EXT)
-	 return brw_format_for_mesa_format[mesa_format];
+	 return brw_format_for_mesa_format(mesa_format);
      else if (srgb_decode == GL_SKIP_DECODE_EXT)
-	 return brw_format_for_mesa_format[_mesa_get_srgb_format_linear(mesa_format)];
+	 return brw_format_for_mesa_format(_mesa_get_srgb_format_linear(mesa_format));

   case MESA_FORMAT_RGBA_FLOAT32:
      /* The value of this BRW_SURFACEFORMAT is 0, which tricks the
@ -187,8 +195,8 @@ static GLuint translate_tex_format( gl_format mesa_format,
      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;

   default:
-      assert(brw_format_for_mesa_format[mesa_format] != 0);
-      return brw_format_for_mesa_format[mesa_format];
+      assert(brw_format_for_mesa_format(mesa_format) != 0);
+      return brw_format_for_mesa_format(mesa_format);
   }
 }

@ -442,6 +450,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
   struct intel_region *region = irb->region;
   struct brw_surface_state *surf;
+   uint32_t tile_x, tile_y;

   surf = brw_state_batch(brw, sizeof(*surf), 32,
 			  &brw->wm.surf_offset[unit]);
@ -470,47 +479,29 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
      /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
 	 surfaces to the blend/update as sRGB */
      if (ctx->Color.sRGBEnabled)
-	 surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format];
+	 surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format);
      else
 	 surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
      break;
   default:
      assert(brw_render_target_supported(irb->Base.Format));
-      surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format];
+      surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format);
   }

   surf->ss0.surface_type = BRW_SURFACE_2D;
-   if (region->tiling == I915_TILING_NONE) {
-      surf->ss1.base_addr = (region->draw_x +
-			    region->draw_y * region->pitch) * region->cpp;
-   } else {
-      uint32_t tile_base, tile_x, tile_y;
-      uint32_t pitch = region->pitch * region->cpp;
-
-      if (region->tiling == I915_TILING_X) {
-	 tile_x = region->draw_x % (512 / region->cpp);
-	 tile_y = region->draw_y % 8;
-	 tile_base = ((region->draw_y / 8) * (8 * pitch));
-	 tile_base += (region->draw_x - tile_x) / (512 / region->cpp) * 4096;
-      } else {
-	 /* Y */
-	 tile_x = region->draw_x % (128 / region->cpp);
-	 tile_y = region->draw_y % 32;
-	 tile_base = ((region->draw_y / 32) * (32 * pitch));
-	 tile_base += (region->draw_x - tile_x) / (128 / region->cpp) * 4096;
-      }
-      assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
-      assert(tile_x % 4 == 0);
-      assert(tile_y % 2 == 0);
-      /* Note that the low bits of these fields are missing, so
-       * there's the possibility of getting in trouble.
-       */
-      surf->ss1.base_addr = tile_base;
-      surf->ss5.x_offset = tile_x / 4;
-      surf->ss5.y_offset = tile_y / 2;
-   }
+   /* reloc */
+   surf->ss1.base_addr = intel_region_tile_offsets(region, &tile_x, &tile_y);
   surf->ss1.base_addr += region->buffer->offset; /* reloc */

+   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   assert(tile_x % 4 == 0);
+   assert(tile_y % 2 == 0);
+   surf->ss5.x_offset = tile_x / 4;
+   surf->ss5.y_offset = tile_y / 2;
+
   surf->ss2.width = rb->Width - 1;
   surf->ss2.height = rb->Height - 1;
   brw_set_surface_tiling(surf, region->tiling);
@ -554,12 +545,8 @@ prepare_wm_surfaces(struct brw_context *brw)
 	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
 	 struct intel_region *region = irb ? irb->region : NULL;

-	 if (region == NULL || region->buffer == NULL) {
-	    brw->intel.Fallback = GL_TRUE; /* boolean, not bitfield */
-	    return;
-	 }
-
-	 brw_add_validated_bo(brw, region->buffer);
+	 if (region)
+	    brw_add_validated_bo(brw, region->buffer);
 	 nr_surfaces = SURF_INDEX_DRAW(i) + 1;
      }
   }
@ -664,7 +651,7 @@ brw_wm_upload_binding_table(struct brw_context *brw)
      bind[i] = brw->wm.surf_offset[i];
   }

-   brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
+   brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 }

 const struct brw_tracked_state brw_wm_binding_table = {
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@ -39,8 +39,18 @@ prepare_blend_state(struct brw_context *brw)
   struct gen6_blend_state *blend;
   int b;
   int nr_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
-   int size = sizeof(*blend) * nr_draw_buffers;
+   int size;

+   /* We need at least one BLEND_STATE written, because we might do
+    * thread dispatch even if _NumColorDrawBuffers is 0 (for example
+    * for computed depth or alpha test), which will do an FB write
+    * with render target 0, which will reference BLEND_STATE[0] for
+    * alpha test enable.
+    */
+   if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
+      nr_draw_buffers = 1;
+
+   size = sizeof(*blend) * nr_draw_buffers;
   blend = brw_state_batch(brw, size, 64, &brw->cc.blend_state_offset);

   memset(blend, 0, size);
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@ -42,6 +42,7 @@ upload_clip_state(struct brw_context *brw)
   if (!ctx->Transform.DepthClamp)
      depth_clamp = GEN6_CLIP_Z_TEST;

+   /* _NEW_LIGHT */
   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
      provoking =
 	 (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
@ -75,7 +76,7 @@ upload_clip_state(struct brw_context *brw)

 const struct brw_tracked_state gen6_clip_state = {
   .dirty = {
-      .mesa  = _NEW_TRANSFORM,
+      .mesa  = _NEW_TRANSFORM | _NEW_LIGHT,
      .brw   = BRW_NEW_CONTEXT,
      .cache = 0
   },
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@ -32,7 +32,7 @@
 #include "main/macros.h"
 #include "intel_batchbuffer.h"

-static uint32_t
+uint32_t
 get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color)
 {
   int attr_index = 0, i, vs_attr;
@ -103,6 +103,7 @@ upload_sf_state(struct brw_context *brw)
   int attr = 0;
   int urb_start;
   int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+   float point_size;

   /* _NEW_TRANSFORM */
   if (ctx->Transform.ClipPlanesEnabled)
@ -209,8 +210,12 @@ upload_sf_state(struct brw_context *brw)
 	 ctx->Point._Attenuated))
      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;

-   dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 255.875), 3) <<
-      GEN6_SF_POINT_WIDTH_SHIFT;
+   /* Clamp to ARB_point_parameters user limits */
+   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+   /* Clamp to the hardware limits and convert to fixed point */
+   dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+
   if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
      dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;

--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@ -42,8 +42,8 @@ prepare_urb( struct brw_context *brw )
   /* Calculate how many VS URB entries fit in the total URB size */
   nr_vs_entries = (brw->urb.size * 1024) / (brw->urb.vs_size * 128);

-   if (nr_vs_entries > brw->urb.max_vs_handles)
-      nr_vs_entries = brw->urb.max_vs_handles;
+   if (nr_vs_entries > brw->urb.max_vs_entries)
+      nr_vs_entries = brw->urb.max_vs_entries;

   /* According to volume 2a, nr_vs_entries must be a multiple of 4. */
   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@ -42,7 +42,7 @@ gen6_prepare_wm_push_constants(struct brw_context *brw)
   const struct brw_fragment_program *fp =
      brw_fragment_program_const(brw->fragment_program);

-   /* Updates the ParamaterValues[i] pointers for all parameters of the
+   /* Updates the ParameterValues[i] pointers for all parameters of the
    * basic type of PROGRAM_STATE_VAR.
    */
   /* XXX: Should this happen somewhere before to get our state flag set? */
--- a/src/mesa/drivers/dri/i965/gen7_cc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_cc_state.c
@ -0,0 +1,89 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+static void
+upload_cc_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(brw->cc.state_offset | 1);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_cc_state_pointer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_COLOR_CALC_STATE
+   },
+   .emit = upload_cc_state_pointers,
+};
+
+static void
+upload_blend_state_pointer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(brw->cc.blend_state_offset | 1);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_blend_state_pointer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_BLEND_STATE
+   },
+   .emit = upload_blend_state_pointer,
+};
+
+static void
+upload_depth_stencil_state_pointer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
+   OUT_BATCH(brw->cc.depth_stencil_state_offset | 1);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_depth_stencil_state_pointer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_DEPTH_STENCIL_STATE
+   },
+   .emit = upload_depth_stencil_state_pointer,
+};
--- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
@ -0,0 +1,113 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_clip_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   uint32_t depth_clamp = 0;
+   uint32_t provoking, userclip;
+   uint32_t dw1 = GEN6_CLIP_STATISTICS_ENABLE;
+
+   /* _NEW_BUFFERS */
+   GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+
+   dw1 |= GEN7_CLIP_EARLY_CULL;
+
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw1 |= GEN7_CLIP_WINDING_CCW;
+
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+	 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
+	 break;
+      case GL_BACK:
+	 dw1 |= GEN7_CLIP_CULLMODE_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
+	 break;
+      default:
+	 assert(!"Should not get here: invalid CullFlag");
+	 break;
+      }
+   } else {
+      dw1 |= GEN7_CLIP_CULLMODE_NONE;
+   }
+
+   /* _NEW_TRANSFORM */
+   if (!ctx->Transform.DepthClamp)
+      depth_clamp = GEN6_CLIP_Z_TEST;
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
+      provoking =
+	 (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+	 (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+	 (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   } else {
+      provoking =
+	 (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   }
+
+   /* _NEW_TRANSFORM */
+   userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(GEN6_CLIP_ENABLE |
+	     GEN6_CLIP_API_OGL |
+	     GEN6_CLIP_MODE_NORMAL |
+	     GEN6_CLIP_XY_TEST |
+	     userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
+	     depth_clamp |
+	     provoking);
+   OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+             U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
+             GEN6_CLIP_FORCE_ZERO_RTAINDEX);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_clip_state = {
+   .dirty = {
+      .mesa  = (_NEW_BUFFERS |
+                _NEW_POLYGON |
+                _NEW_LIGHT |
+                _NEW_TRANSFORM),
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_clip_state,
+};
--- a/src/mesa/drivers/dri/i965/gen7_disable.c
+++ b/src/mesa/drivers/dri/i965/gen7_disable.c
@ -0,0 +1,141 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+disable_stages(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   assert(brw->gs.prog_bo == NULL);
+
+   /* Disable the Geometry Shader (GS) Unit */
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+   OUT_BATCH(0); /* prog_bo */
+   OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+	     (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+	     (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+	     GEN7_GS_INCLUDE_VERTEX_HANDLES |
+	     (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+   OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+	     GEN6_GS_STATISTICS_ENABLE);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_GS << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   /* Disable the HS Unit */
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   /* Disable the TE */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   /* Disable the DS Unit */
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   /* Disable the SOL stage */
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_disable_stages = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = disable_stages,
+};
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@ -0,0 +1,144 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "intel_fbo.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+unsigned int
+gen7_depth_format(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   struct intel_region *region = NULL;
+
+   if (drb)
+      region = drb->region;
+   else if (srb)
+      region = srb->region;
+   else
+      return BRW_DEPTHFORMAT_D32_FLOAT;
+
+   switch (region->cpp) {
+   case 2:
+      return BRW_DEPTHFORMAT_D16_UNORM;
+   case 4:
+      if (intel->depth_buffer_is_float)
+	 return BRW_DEPTHFORMAT_D32_FLOAT;
+      else
+	 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
+   default:
+      assert(!"Should not get here.");
+   }
+   return 0;
+}
+
+static void emit_depthbuffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   struct intel_region *region = NULL;
+
+   /* _NEW_BUFFERS */
+   if (drb)
+      region = drb->region;
+   else if (srb)
+      region = srb->region;
+
+   if (region == NULL) {
+      BEGIN_BATCH(7);
+      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+		(BRW_SURFACE_NULL << 29));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      uint32_t tile_x, tile_y, offset;
+
+      offset = intel_region_tile_offsets(region, &tile_x, &tile_y);
+
+      assert(region->tiling == I915_TILING_Y);
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH(((region->pitch * region->cpp) - 1) |
+		(gen7_depth_format(brw) << 18) |
+		(0 << 22) /* no HiZ buffer */ |
+		(0 << 27) /* no stencil write */ |
+		((ctx->Depth.Mask != 0) << 28) |
+		(BRW_SURFACE_2D << 29));
+      OUT_RELOC(region->buffer,
+	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		offset);
+      OUT_BATCH(((region->width - 1) << 4) | ((region->height - 1) << 18));
+      OUT_BATCH(0);
+      OUT_BATCH(tile_x | (tile_y << 16));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (4 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+/**
+ * \see brw_context.state.depth_region
+ */
+const struct brw_tracked_state gen7_depthbuffer = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = emit_depthbuffer,
+};
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@ -0,0 +1,205 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+#include "main/macros.h"
+#include "main/samplerobj.h"
+
+/**
+ * Sets the sampler state for a single unit.
+ */
+static void
+gen7_update_sampler_state(struct brw_context *brw, int unit,
+			  struct gen7_sampler_state *sampler)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *texObj = texUnit->_Current;
+   struct gl_sampler_object *gl_sampler = _mesa_get_samplerobj(ctx, unit);
+   bool using_nearest = false;
+
+   switch (gl_sampler->MinFilter) {
+   case GL_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      using_nearest = true;
+      break;
+   case GL_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+
+   /* Set Anisotropy: */
+   if (gl_sampler->MaxAnisotropy > 1.0) {
+      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC;
+      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+      if (gl_sampler->MaxAnisotropy > 2.0) {
+	 sampler->ss3.max_aniso = MIN2((gl_sampler->MaxAnisotropy - 2) / 2,
+				       BRW_ANISORATIO_16);
+      }
+   }
+   else {
+      switch (gl_sampler->MagFilter) {
+      case GL_NEAREST:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 using_nearest = true;
+	 break;
+      case GL_LINEAR:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }
+   }
+
+   sampler->ss3.r_wrap_mode = translate_wrap_mode(gl_sampler->WrapR,
+						  using_nearest);
+   sampler->ss3.s_wrap_mode = translate_wrap_mode(gl_sampler->WrapS,
+						  using_nearest);
+   sampler->ss3.t_wrap_mode = translate_wrap_mode(gl_sampler->WrapT,
+						  using_nearest);
+
+   /* Cube-maps on 965 and later must use the same wrap mode for all 3
+    * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
+    */
+   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
+      if (ctx->Texture.CubeMapSeamless &&
+	  (gl_sampler->MinFilter != GL_NEAREST ||
+	   gl_sampler->MagFilter != GL_NEAREST)) {
+	 sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	 sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	 sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+      } else {
+	 sampler->ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	 sampler->ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	 sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+      }
+   } else if (texObj->Target == GL_TEXTURE_1D) {
+      /* There's a bug in 1D texture sampling - it actually pays
+       * attention to the wrap_t value, though it should not.
+       * Override the wrap_t value here to GL_REPEAT to keep
+       * any nonexistent border pixels from floating in.
+       */
+      sampler->ss3.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+   }
+
+   /* Set shadow function: */
+   if (gl_sampler->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* Shadowing is "enabled" by emitting a particular sampler
+       * message (sample_c).  So need to recompile WM program when
+       * shadow comparison is enabled on each/any texture unit.
+       */
+      sampler->ss1.shadow_function =
+	 intel_translate_shadow_compare_func(gl_sampler->CompareFunc);
+   }
+
+   /* Set LOD bias: */
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(texUnit->LodBias +
+					 gl_sampler->LodBias, -16, 15), 8);
+
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+   /* Set BaseMipLevel, MaxLOD, MinLOD:
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+
+   sampler->ss1.max_lod = U_FIXED(CLAMP(gl_sampler->MaxLod, 0, 13), 8);
+   sampler->ss1.min_lod = U_FIXED(CLAMP(gl_sampler->MinLod, 0, 13), 8);
+
+   upload_default_color(brw, gl_sampler, unit);
+
+   sampler->ss2.default_color_pointer = brw->wm.sdc_offset[unit] >> 5;
+}
+
+
+/* All samplers must be uploaded in a single contiguous array, which
+ * complicates various things.  However, this is still too confusing -
+ * FIXME: simplify all the different new texture state flags.
+ */
+static void
+gen7_prepare_samplers(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   struct gen7_sampler_state *samplers;
+   int i;
+
+   brw->wm.sampler_count = 0;
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled)
+	 brw->wm.sampler_count = i + 1;
+   }
+
+   if (brw->wm.sampler_count == 0)
+      return;
+
+   samplers = brw_state_batch(brw, brw->wm.sampler_count * sizeof(*samplers),
+			      32, &brw->wm.sampler_offset);
+   memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
+
+   for (i = 0; i < brw->wm.sampler_count; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled)
+	 gen7_update_sampler_state(brw, i, &samplers[i]);
+   }
+
+   brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+}
+
+const struct brw_tracked_state gen7_samplers = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0
+   },
+   .prepare = gen7_prepare_samplers,
+};
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@ -0,0 +1,266 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_sbe_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   /* CACHE_NEW_VS_PROG */
+   uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead);
+   uint32_t dw1, dw10, dw11;
+   int i;
+   int attr = 0;
+   /* _NEW_TRANSFORM */
+   int urb_start = ctx->Transform.ClipPlanesEnabled ? 2 : 1;
+   /* _NEW_LIGHT */
+   int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+
+   /* FINISHME: Attribute Swizzle Control Mode? */
+   dw1 =
+      GEN7_SBE_SWIZZLE_ENABLE |
+      num_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
+      (num_inputs + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+      urb_start << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
+
+   /* _NEW_POINT */
+   if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
+      dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+
+   dw10 = 0;
+   if (ctx->Point.PointSprite) {
+       for (i = 0; i < 8; i++) {
+	   if (ctx->Point.CoordReplace[i])
+	       dw10 |= (1 << i);
+       }
+   }
+
+   /* _NEW_LIGHT (flat shading) */
+   dw11 = 0;
+   if (ctx->Light.ShadeModel == GL_FLAT) {
+       dw11 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >>
+                ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1));
+   }
+
+   BEGIN_BATCH(14);
+   OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
+   OUT_BATCH(dw1);
+
+   /* Output dwords 2 through 9 */
+   for (i = 0; i < 8; i++) {
+      uint32_t attr_overrides = 0;
+
+      for (; attr < 64; attr++) {
+	 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr, two_side_color);
+	    attr++;
+	    break;
+	 }
+      }
+
+      for (; attr < 64; attr++) {
+	 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16;
+	    attr++;
+	    break;
+	 }
+      }
+      OUT_BATCH(attr_overrides);
+   }
+
+   OUT_BATCH(dw10); /* point sprite texcoord bitmask */
+   OUT_BATCH(dw11); /* constant interp bitmask */
+   OUT_BATCH(0); /* wrapshortest enables 0-7 */
+   OUT_BATCH(0); /* wrapshortest enables 8-15 */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_sbe_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+		_NEW_POINT |
+		_NEW_TRANSFORM),
+      .brw   = (BRW_NEW_CONTEXT |
+		BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_sbe_state,
+};
+
+static void
+upload_sf_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   uint32_t dw1, dw2, dw3;
+   float point_size;
+   /* _NEW_BUFFERS */
+   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+
+   dw1 = GEN6_SF_STATISTICS_ENABLE | GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+
+   /* _NEW_BUFFERS */
+   dw1 |= (gen7_depth_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
+
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw1 |= GEN6_SF_WINDING_CCW;
+
+   if (ctx->Polygon.OffsetFill)
+       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
+
+   if (ctx->Polygon.OffsetLine)
+       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
+
+   if (ctx->Polygon.OffsetPoint)
+       dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
+
+   switch (ctx->Polygon.FrontMode) {
+   case GL_FILL:
+       dw1 |= GEN6_SF_FRONT_SOLID;
+       break;
+
+   case GL_LINE:
+       dw1 |= GEN6_SF_FRONT_WIREFRAME;
+       break;
+
+   case GL_POINT:
+       dw1 |= GEN6_SF_FRONT_POINT;
+       break;
+
+   default:
+       assert(0);
+       break;
+   }
+
+   switch (ctx->Polygon.BackMode) {
+   case GL_FILL:
+       dw1 |= GEN6_SF_BACK_SOLID;
+       break;
+
+   case GL_LINE:
+       dw1 |= GEN6_SF_BACK_WIREFRAME;
+       break;
+
+   case GL_POINT:
+       dw1 |= GEN6_SF_BACK_POINT;
+       break;
+
+   default:
+       assert(0);
+       break;
+   }
+
+   dw2 = 0;
+
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+	 dw2 |= GEN6_SF_CULL_FRONT;
+	 break;
+      case GL_BACK:
+	 dw2 |= GEN6_SF_CULL_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 dw2 |= GEN6_SF_CULL_BOTH;
+	 break;
+      default:
+	 assert(0);
+	 break;
+      }
+   } else {
+      dw2 |= GEN6_SF_CULL_NONE;
+   }
+
+   /* _NEW_SCISSOR */
+   if (ctx->Scissor.Enabled)
+      dw2 |= GEN6_SF_SCISSOR_ENABLE;
+
+   /* _NEW_LINE */
+   dw2 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
+      GEN6_SF_LINE_WIDTH_SHIFT;
+   if (ctx->Line.SmoothFlag) {
+      dw2 |= GEN6_SF_LINE_AA_ENABLE;
+      dw2 |= GEN6_SF_LINE_AA_MODE_TRUE;
+      dw2 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+   }
+
+   /* FINISHME: Last Pixel Enable?  Vertex Sub Pixel Precision Select?
+    * FINISHME: AA Line Distance Mode?
+    */
+
+   dw3 = 0;
+
+   /* _NEW_POINT */
+   if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))
+      dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+
+   /* Clamp to ARB_point_parameters user limits */
+   point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+   /* Clamp to the hardware limits and convert to fixed point */
+   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+      dw3 |=
+	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
+   } else {
+      dw3 |= (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
+   }
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   OUT_BATCH(dw3);
+   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
+   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
+   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_sf_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+		_NEW_POLYGON |
+		_NEW_LINE |
+		_NEW_SCISSOR |
+		_NEW_BUFFERS |
+		_NEW_POINT),
+      .brw   = (BRW_NEW_CONTEXT),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_sf_state,
+};
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@ -0,0 +1,128 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/**
+ * The following diagram shows how we partition the URB:
+ *
+ *      8kB         8kB              Rest of the URB space
+ *   ____-____   ____-____   _________________-_________________
+ *  /         \ /         \ /                                   \
+ * +-------------------------------------------------------------+
+ * | VS Push   | FS Push   | VS                                  |
+ * | Constants | Constants | Handles                             |
+ * +-------------------------------------------------------------+
+ *
+ * Notably, push constants must be stored at the beginning of the URB
+ * space, while entries can be stored anywhere.  Ivybridge has a maximum
+ * constant buffer size of 16kB.
+ *
+ * Currently we split the constant buffer space evenly between VS and FS.
+ * This is probably not ideal, but simple.
+ *
+ * Ivybridge GT1 has 128kB of URB space.
+ * Ivybridge GT2 has 256kB of URB space.
+ *
+ * See "Volume 2a: 3D Pipeline," section 1.8.
+ */
+static void
+prepare_urb(struct brw_context *brw)
+{
+   /* Total space for entries is URB size - 16kB for push constants */
+   int handle_region_size = (brw->urb.size - 16) * 1024; /* bytes */
+
+   /* CACHE_NEW_VS_PROG */
+   brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+
+   int nr_vs_entries = handle_region_size / (brw->urb.vs_size * 64);
+   if (nr_vs_entries > brw->urb.max_vs_entries)
+      nr_vs_entries = brw->urb.max_vs_entries;
+
+   /* According to volume 2a, nr_vs_entries must be a multiple of 8. */
+   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8);
+
+   /* URB Starting Addresses are specified in multiples of 8kB. */
+   brw->urb.vs_start = 2; /* skip over push constants */
+}
+
+static void
+upload_urb(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   assert(brw->urb.nr_vs_entries % 8 == 0);
+   assert(brw->urb.nr_gs_entries % 8 == 0);
+   /* GS requirement */
+   assert(!brw->gs.prog_bo);
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
+   OUT_BATCH(8);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
+   OUT_BATCH(8 | 8 << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
+   OUT_BATCH(brw->urb.nr_vs_entries |
+             ((brw->urb.vs_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
+	     (brw->urb.vs_start << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+
+   /* Allocate the GS, HS, and DS zero space - we don't use them. */
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
+   OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
+             (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_urb = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
+   },
+   .prepare = prepare_urb,
+   .emit = upload_urb,
+};
--- a/src/mesa/drivers/dri/i965/gen7_viewport_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_viewport_state.c
@ -0,0 +1,106 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+prepare_sf_clip_viewport(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat y_scale, y_bias;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   struct gen7_sf_clip_viewport *vp;
+
+   vp = brw_state_batch(brw, sizeof(vp), 64, &brw->sf.vp_offset);
+   /* Also assign to clip.vp_offset in case something uses it. */
+   brw->clip.vp_offset = brw->sf.vp_offset;
+
+   /* Disable guardband clipping (see gen6_viewport_state.c for rationale). */
+   vp->guardband.xmin = -1.0;
+   vp->guardband.xmax = 1.0;
+   vp->guardband.ymin = -1.0;
+   vp->guardband.ymax = 1.0;
+
+   /* _NEW_BUFFERS */
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+
+   /* _NEW_VIEWPORT */
+   vp->viewport.m00 = v[MAT_SX];
+   vp->viewport.m11 = v[MAT_SY] * y_scale;
+   vp->viewport.m22 = v[MAT_SZ] * depth_scale;
+   vp->viewport.m30 = v[MAT_TX];
+   vp->viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   vp->viewport.m32 = v[MAT_TZ] * depth_scale;
+}
+
+static void upload_sf_clip_viewport_state_pointer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL << 16 | (2 - 2));
+   OUT_BATCH(brw->sf.vp_offset);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_sf_clip_viewport = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .prepare = prepare_sf_clip_viewport,
+   .emit = upload_sf_clip_viewport_state_pointer,
+};
+
+/* ----------------------------------------------------- */
+
+static void upload_cc_viewport_state_pointer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2));
+   OUT_BATCH(brw->cc.vp_offset);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_cc_viewport_state_pointer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_CC_VP
+   },
+   .emit = upload_cc_viewport_state_pointer,
+};
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@ -0,0 +1,99 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_vs_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
+   OUT_BATCH(brw->vs.bind_bo_offset);
+   ADVANCE_BATCH();
+
+   if (brw->vs.push_const_size == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
+      OUT_BATCH(brw->vs.push_const_size);
+      OUT_BATCH(0);
+      /* Pointer to the VS constant buffer.  Covered by the set of
+       * state flags from gen6_prepare_wm_contants
+       */
+      OUT_BATCH(brw->vs.push_const_offset);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+	     GEN6_VS_FLOATING_POINT_MODE_ALT |
+	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
+
+   OUT_BATCH(((brw->vs_max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
+	     GEN6_VS_STATISTICS_ENABLE |
+	     GEN6_VS_ENABLE);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_vs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_NR_VS_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_CONTEXT |
+		BRW_NEW_VERTEX_PROGRAM |
+		BRW_NEW_VS_BINDING_TABLE |
+		BRW_NEW_BATCH),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_vs_state,
+};
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@ -0,0 +1,255 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+gen7_prepare_wm_constants(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+
+   /* Updates the ParameterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   /* XXX: Should this happen somewhere before to get our state flag set? */
+   _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (brw->wm.prog_data->nr_params != 0) {
+      float *constants;
+      unsigned int i;
+
+      constants = brw_state_batch(brw,
+				  brw->wm.prog_data->nr_params *
+				  sizeof(float),
+				  32, &brw->wm.push_const_offset);
+
+      for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+	 constants[i] = convert_param(brw->wm.prog_data->param_convert[i],
+				      *brw->wm.prog_data->param[i]);
+      }
+
+      if (0) {
+	 printf("WM constants:\n");
+	 for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+	    if ((i & 7) == 0)
+	       printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8);
+	    printf("%8f ", constants[i]);
+	    if ((i & 7) == 7)
+	       printf("\n");
+	 }
+	 if ((i & 7) != 0)
+	    printf("\n");
+	 printf("\n");
+      }
+   }
+}
+
+const struct brw_tracked_state gen7_wm_constants = {
+   .dirty = {
+      .mesa  = _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = 0,
+   },
+   .prepare = gen7_prepare_wm_constants,
+};
+
+static void
+upload_wm_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   bool writes_depth = false;
+   uint32_t dw1;
+
+   dw1 = 0;
+   dw1 |= GEN7_WM_STATISTICS_ENABLE;
+   dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
+   dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
+
+   /* _NEW_LINE */
+   if (ctx->Line.StippleFlag)
+      dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
+
+   /* _NEW_POLYGONSTIPPLE */
+   if (ctx->Polygon.StippleFlag)
+      dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+      dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
+   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+      writes_depth = true;
+      dw1 |= GEN7_WM_PSCDEPTH_ON;
+   }
+
+   /* _NEW_COLOR */
+   if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+      dw1 |= GEN7_WM_KILL_ENABLE;
+
+   /* _NEW_BUFFERS */
+   if (brw_color_buffer_write_enabled(brw) || writes_depth ||
+       dw1 & GEN7_WM_KILL_ENABLE) {
+      dw1 |= GEN7_WM_DISPATCH_ENABLE;
+   }
+
+   dw1 |= GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_wm_state = {
+   .dirty = {
+      .mesa  = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE |
+	        _NEW_COLOR | _NEW_BUFFERS),
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_NR_WM_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_BATCH),
+      .cache = 0,
+   },
+   .emit = upload_wm_state,
+};
+
+static void
+upload_ps_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   uint32_t dw2, dw4, dw5;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
+   OUT_BATCH(brw->wm.bind_bo_offset);
+   ADVANCE_BATCH();
+
+   /* CACHE_NEW_SAMPLER */
+   BEGIN_BATCH(2);
+   OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
+   OUT_BATCH(brw->wm.sampler_offset);
+   ADVANCE_BATCH();
+
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
+
+      OUT_BATCH(ALIGN(brw->wm.prog_data->nr_params,
+		      brw->wm.prog_data->dispatch_width) / 8);
+      OUT_BATCH(0);
+      /* Pointer to the WM constant buffer.  Covered by the set of
+       * state flags from gen7_prepare_wm_constants
+       */
+      OUT_BATCH(brw->wm.push_const_offset);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   dw2 = dw4 = dw5 = 0;
+
+   dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
+
+   /* BRW_NEW_NR_WM_SURFACES */
+   dw2 |= brw->wm.nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
+
+   /* OpenGL non-ieee floating point mode */
+   dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
+
+   /* CACHE_NEW_SAMPLER */
+   dw4 |= (brw->wm_max_threads - 1) << GEN7_PS_MAX_THREADS_SHIFT;
+
+   /* CACHE_NEW_WM_PROG */
+   if (brw->wm.prog_data->nr_params > 0)
+      dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (brw->fragment_program->Base.InputsRead != 0)
+      dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
+
+   if (brw->wm.prog_data->dispatch_width == 8)
+      dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
+   else
+      dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   dw5 |= (brw->wm.prog_data->first_curbe_grf <<
+	   GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
+
+   BEGIN_BATCH(8);
+   OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
+   OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(dw2);
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   /* FINISHME: need to upload the SIMD16 program */
+   OUT_BATCH(0); /* kernel 1 pointer */
+   OUT_BATCH(0); /* kernel 2 pointer */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen7_ps_state = {
+   .dirty = {
+      .mesa  = (_NEW_LINE | _NEW_POLYGON | _NEW_POLYGONSTIPPLE |
+		_NEW_PROGRAM_CONSTANTS),
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_NR_WM_SURFACES |
+		BRW_NEW_PS_BINDING_TABLE |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_BATCH),
+      .cache = (CACHE_NEW_SAMPLER |
+		CACHE_NEW_WM_PROG)
+   },
+   .emit = upload_ps_state,
+};
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@ -0,0 +1,397 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "main/mtypes.h"
+#include "main/samplerobj.h"
+#include "main/texstore.h"
+#include "program/prog_parameter.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+
+static void
+gen7_set_surface_tiling(struct gen7_surface_state *surf, uint32_t tiling)
+{
+   switch (tiling) {
+   case I915_TILING_NONE:
+      surf->ss0.tiled_surface = 0;
+      surf->ss0.tile_walk = 0;
+      break;
+   case I915_TILING_X:
+      surf->ss0.tiled_surface = 1;
+      surf->ss0.tile_walk = BRW_TILEWALK_XMAJOR;
+      break;
+   case I915_TILING_Y:
+      surf->ss0.tiled_surface = 1;
+      surf->ss0.tile_walk = BRW_TILEWALK_YMAJOR;
+      break;
+   }
+}
+
+static void
+gen7_update_texture_surface(struct gl_context *ctx, GLuint unit)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
+   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
+   struct gen7_surface_state *surf;
+
+   surf = brw_state_batch(brw, sizeof(*surf), 32,
+			 &brw->wm.surf_offset[surf_index]);
+   memset(surf, 0, sizeof(*surf));
+
+   surf->ss0.surface_type = translate_tex_target(tObj->Target);
+   surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat,
+                                                   firstImage->InternalFormat,
+                                                   sampler->DepthMode,
+                                                   sampler->sRGBDecode);
+   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      surf->ss0.cube_pos_x = 1;
+      surf->ss0.cube_pos_y = 1;
+      surf->ss0.cube_pos_z = 1;
+      surf->ss0.cube_neg_x = 1;
+      surf->ss0.cube_neg_y = 1;
+      surf->ss0.cube_neg_z = 1;
+   }
+
+   gen7_set_surface_tiling(surf, intelObj->mt->region->tiling);
+
+   /* ss0 remaining fields:
+    * - is_array
+    * - vertical_alignment
+    * - horizontal_alignment
+    * - vert_line_stride (exists on gen6 but we ignore it)
+    * - vert_line_stride_ofs (exists on gen6 but we ignore it)
+    * - surface_array_spacing
+    * - render_cache_read_write (exists on gen6 but ignored here)
+    */
+
+   surf->ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */
+
+   surf->ss2.width = firstImage->Width - 1;
+   surf->ss2.height = firstImage->Height - 1;
+
+   surf->ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1;
+   surf->ss3.depth = firstImage->Depth - 1;
+
+   /* ss4: ignored? */
+
+   surf->ss5.mip_count = intelObj->_MaxLevel - tObj->BaseLevel;
+   surf->ss5.min_lod = 0;
+
+   /* ss5 remaining fields:
+    * - x_offset (N/A for textures?)
+    * - y_offset (ditto)
+    * - cache_control
+    */
+
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+			   brw->wm.surf_offset[surf_index] +
+			   offsetof(struct gen7_surface_state, ss1),
+			   intelObj->mt->region->buffer, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
+}
+
+/**
+ * Create the constant buffer surface.  Vertex/fragment shader constants will
+ * be read from this buffer with Data Port Read instructions/messages.
+ */
+static void
+gen7_create_constant_surface(struct brw_context *brw,
+			     drm_intel_bo *bo,
+			     int width,
+			     uint32_t *out_offset)
+{
+   const GLint w = width - 1;
+   struct gen7_surface_state *surf;
+
+   surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset);
+   memset(surf, 0, sizeof(*surf));
+
+   surf->ss0.surface_type = BRW_SURFACE_BUFFER;
+   surf->ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   surf->ss0.render_cache_read_write = 1;
+
+   assert(bo);
+   surf->ss1.base_addr = bo->offset; /* reloc */
+
+   surf->ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
+   surf->ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
+   surf->ss3.depth = (w >> 20) & 0x7f;    /* bits 26:20 of size or width */
+   surf->ss3.pitch = (width * 16) - 1; /* ignored?? */
+   gen7_set_surface_tiling(surf, I915_TILING_NONE); /* tiling now allowed */
+
+   /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
+    * bspec ("Data Cache") says that the data cache does not exist as
+    * a separate cache and is just the sampler cache.
+    */
+   drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+			   (*out_offset +
+			    offsetof(struct gen7_surface_state, ss1)),
+			   bo, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
+}
+
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static void upload_wm_constant_surface(struct brw_context *brw)
+{
+   GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   const struct gl_program_parameter_list *params =
+      fp->program.Base.Parameters;
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (brw->wm.const_bo == 0) {
+      if (brw->wm.surf_offset[surf]) {
+	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+	 brw->wm.surf_offset[surf] = 0;
+      }
+      return;
+   }
+
+   gen7_create_constant_surface(brw, brw->wm.const_bo, params->NumParameters,
+			        &brw->wm.surf_offset[surf]);
+   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+}
+
+const struct brw_tracked_state gen7_wm_constant_surface = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_WM_CONSTBUF |
+	      BRW_NEW_BATCH),
+      .cache = 0
+   },
+   .emit = upload_wm_constant_surface,
+};
+
+static void
+gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
+{
+   struct gen7_surface_state *surf;
+
+   surf = brw_state_batch(brw, sizeof(*surf), 32,
+			 &brw->wm.surf_offset[unit]);
+   memset(surf, 0, sizeof(*surf));
+
+   surf->ss0.surface_type = BRW_SURFACE_NULL;
+   surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+}
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+gen7_update_renderbuffer_surface(struct brw_context *brw,
+				 struct gl_renderbuffer *rb,
+				 unsigned int unit)
+{
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_region *region = irb->region;
+   struct gen7_surface_state *surf;
+   uint32_t tile_x, tile_y;
+
+   surf = brw_state_batch(brw, sizeof(*surf), 32,
+			  &brw->wm.surf_offset[unit]);
+   memset(surf, 0, sizeof(*surf));
+
+   switch (irb->Base.Format) {
+   case MESA_FORMAT_XRGB8888:
+      /* XRGB is handled as ARGB because the chips in this family
+       * cannot render to XRGB targets.  This means that we have to
+       * mask writes to alpha (ala glColorMask) and reconfigure the
+       * alpha blending hardware to use GL_ONE (or GL_ZERO) for
+       * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is
+       * used.
+       */
+      surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      break;
+   case MESA_FORMAT_INTENSITY_FLOAT32:
+   case MESA_FORMAT_LUMINANCE_FLOAT32:
+      /* For these formats, we just need to read/write the first
+       * channel into R, which is to say that we just treat them as
+       * GL_RED.
+       */
+      surf->ss0.surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
+      break;
+   case MESA_FORMAT_SARGB8:
+      /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB
+	 surfaces to the blend/update as sRGB */
+      if (ctx->Color.sRGBEnabled)
+	 surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format);
+      else
+	 surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      break;
+   default:
+      assert(brw_render_target_supported(irb->Base.Format));
+      surf->ss0.surface_format = brw_format_for_mesa_format(irb->Base.Format);
+   }
+
+   surf->ss0.surface_type = BRW_SURFACE_2D;
+   /* reloc */
+   surf->ss1.base_addr = intel_region_tile_offsets(region, &tile_x, &tile_y);
+   surf->ss1.base_addr += region->buffer->offset; /* reloc */
+
+   assert(brw->has_surface_tile_offset);
+   /* Note that the low bits of these fields are missing, so
+    * there's the possibility of getting in trouble.
+    */
+   assert(tile_x % 4 == 0);
+   assert(tile_y % 2 == 0);
+   surf->ss5.x_offset = tile_x / 4;
+   surf->ss5.y_offset = tile_y / 2;
+
+   surf->ss2.width = rb->Width - 1;
+   surf->ss2.height = rb->Height - 1;
+   gen7_set_surface_tiling(surf, region->tiling);
+   surf->ss3.pitch = (region->pitch * region->cpp) - 1;
+
+   drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+			   brw->wm.surf_offset[unit] +
+			   offsetof(struct gen7_surface_state, ss1),
+			   region->buffer,
+			   surf->ss1.base_addr - region->buffer->offset,
+			   I915_GEM_DOMAIN_RENDER,
+			   I915_GEM_DOMAIN_RENDER);
+}
+
+static void
+prepare_wm_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   int i;
+   int nr_surfaces = 0;
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+	 struct intel_region *region = irb ? irb->region : NULL;
+
+	 if (region)
+	    brw_add_validated_bo(brw, region->buffer);
+	 nr_surfaces = SURF_INDEX_DRAW(i) + 1;
+      }
+   }
+
+   if (brw->wm.const_bo) {
+      brw_add_validated_bo(brw, brw->wm.const_bo);
+      nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
+   }
+
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+      struct gl_texture_object *tObj = texUnit->_Current;
+      struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+      if (texUnit->_ReallyEnabled) {
+	 brw_add_validated_bo(brw, intelObj->mt->region->buffer);
+	 nr_surfaces = SURF_INDEX_TEXTURE(i) + 1;
+      }
+   }
+
+   /* Have to update this in our prepare, since the unit's prepare
+    * relies on it.
+    */
+   if (brw->wm.nr_surfaces != nr_surfaces) {
+      brw->wm.nr_surfaces = nr_surfaces;
+      brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+   }
+}
+
+/**
+ * Constructs the set of surface state objects pointed to by the
+ * binding table.
+ */
+static void
+upload_wm_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   GLuint i;
+
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   /* Update surfaces for drawing buffers */
+   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+	 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
+	    gen7_update_renderbuffer_surface(brw,
+	       ctx->DrawBuffer->_ColorDrawBuffers[i], i);
+	 } else {
+	    gen7_update_null_renderbuffer_surface(brw, i);
+	 }
+      }
+   } else {
+      gen7_update_null_renderbuffer_surface(brw, 0);
+   }
+
+   /* Update surfaces for textures */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+      const GLuint surf = SURF_INDEX_TEXTURE(i);
+
+      /* _NEW_TEXTURE */
+      if (texUnit->_ReallyEnabled) {
+	 gen7_update_texture_surface(ctx, i);
+      } else {
+         brw->wm.surf_offset[surf] = 0;
+      }
+   }
+
+   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+}
+
+const struct brw_tracked_state gen7_wm_surfaces = {
+   .dirty = {
+      .mesa = (_NEW_COLOR |
+               _NEW_TEXTURE |
+               _NEW_BUFFERS),
+      .brw = BRW_NEW_BATCH,
+      .cache = 0
+   },
+   .prepare = prepare_wm_surfaces,
+   .emit = upload_wm_surfaces,
+};
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@ -80,6 +80,12 @@
 #define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS	0x0126
 #define PCI_CHIP_SANDYBRIDGE_S		0x010A	/* Server */

+#define PCI_CHIP_IVYBRIDGE_GT1          0x0152  /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2          0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1        0x0156  /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2        0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1        0x015a  /* Server */
+
 #define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
 				 devid == PCI_CHIP_I915_GM || \
 				 devid == PCI_CHIP_I945_GM || \
@ -125,21 +131,33 @@
 /* Compat macro for intel_decode.c */
 #define IS_IRONLAKE(devid)	IS_GEN5(devid)

-#define IS_GT1(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+#define IS_SNB_GT1(devid)	(devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_S)

-#define IS_GT2(devid)		(devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+#define IS_SNB_GT2(devid)	(devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS	|| \
 				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
 				 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS)

-#define IS_GEN6(devid)		(IS_GT1(devid) || IS_GT2(devid))
+#define IS_GEN6(devid)		(IS_SNB_GT1(devid) || IS_SNB_GT2(devid))
+
+#define IS_IVB_GT1(devid)       (devid == PCI_CHIP_IVYBRIDGE_GT1 || \
+				 devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+				 devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid)       (devid == PCI_CHIP_IVYBRIDGE_GT2 || \
+				 devid == PCI_CHIP_IVYBRIDGE_M_GT2)
+
+#define IS_IVYBRIDGE(devid)     (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))
+
+#define IS_GEN7(devid)	        IS_IVYBRIDGE(devid)

 #define IS_965(devid)		(IS_GEN4(devid) || \
 				 IS_G4X(devid) || \
 				 IS_GEN5(devid) || \
-				 IS_GEN6(devid))
+				 IS_GEN6(devid) || \
+				 IS_GEN7(devid))

 #define IS_9XX(devid)		(IS_915(devid) || \
 				 IS_945(devid) || \
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@ -173,6 +173,17 @@ intelGetString(struct gl_context * ctx, GLenum name)
      case PCI_CHIP_SANDYBRIDGE_S:
 	 chipset = "Intel(R) Sandybridge Server";
 	 break;
+      case PCI_CHIP_IVYBRIDGE_GT1:
+      case PCI_CHIP_IVYBRIDGE_GT2:
+	 chipset = "Intel(R) Ivybridge Desktop";
+	 break;
+      case PCI_CHIP_IVYBRIDGE_M_GT1:
+      case PCI_CHIP_IVYBRIDGE_M_GT2:
+	 chipset = "Intel(R) Ivybridge Mobile";
+	 break;
+      case PCI_CHIP_IVYBRIDGE_S_GT1:
+	 chipset = "Intel(R) Ivybridge Server";
+	 break;
      default:
         chipset = "Unknown Intel Chipset";
         break;
@ -653,7 +664,10 @@ intelInitContext(struct intel_context *intel,

   intel->has_xrgb_textures = GL_TRUE;
   intel->gen = intelScreen->gen;
-   if (IS_GEN6(intel->intelScreen->deviceID)) {
+   if (IS_GEN7(intel->intelScreen->deviceID)) {
+      intel->needs_ff_sync = GL_TRUE;
+      intel->has_luminance_srgb = GL_TRUE;
+   } else if (IS_GEN6(intel->intelScreen->deviceID)) {
      intel->needs_ff_sync = GL_TRUE;
      intel->has_luminance_srgb = GL_TRUE;
   } else if (IS_GEN5(intel->intelScreen->deviceID)) {
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@ -524,3 +524,38 @@ intel_region_buffer(struct intel_context *intel,

   return region->buffer;
 }
+
+/**
+ * Rendering to tiled buffers requires that the base address of the
+ * buffer be aligned to a page boundary.  We generally render to
+ * textures by pointing the surface at the mipmap image level, which
+ * may not be aligned to a tile boundary.
+ *
+ * This function returns an appropriately-aligned base offset
+ * according to the tiling restrictions, plus any required x/y offset
+ * from there.
+ */
+uint32_t
+intel_region_tile_offsets(struct intel_region *region,
+			  uint32_t *tile_x,
+			  uint32_t *tile_y)
+{
+   uint32_t pitch = region->pitch * region->cpp;
+
+   if (region->tiling == I915_TILING_NONE) {
+      *tile_x = 0;
+      *tile_y = 0;
+      return region->draw_x * region->cpp + region->draw_y * pitch;
+   } else if (region->tiling == I915_TILING_X) {
+      *tile_x = region->draw_x % (512 / region->cpp);
+      *tile_y = region->draw_y % 8;
+      return ((region->draw_y / 8) * (8 * pitch) +
+	      (region->draw_x - *tile_x) / (512 / region->cpp) * 4096);
+   } else {
+      assert(region->tiling == I915_TILING_Y);
+      *tile_x = region->draw_x % (128 / region->cpp);
+      *tile_y = region->draw_y % 32;
+      return ((region->draw_y / 32) * (32 * pitch) +
+	      (region->draw_x - *tile_x) / (128 / region->cpp) * 4096);
+   }
+}
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@ -142,6 +142,10 @@ drm_intel_bo *intel_region_buffer(struct intel_context *intel,
 				  struct intel_region *region,
 				  GLuint flag);

+uint32_t intel_region_tile_offsets(struct intel_region *region,
+				   uint32_t *tile_x,
+				   uint32_t *tile_y);
+
 void _mesa_copy_rect(GLubyte * dst,
                GLuint cpp,
                GLuint dst_pitch,
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@ -556,7 +556,9 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
      intelScreen->deviceID = strtod(devid_override, NULL);
   }

-   if (IS_GEN6(intelScreen->deviceID)) {
+   if (IS_GEN7(intelScreen->deviceID)) {
+      intelScreen->gen = 7;
+   } else if (IS_GEN6(intelScreen->deviceID)) {
      intelScreen->gen = 6;
   } else if (IS_GEN5(intelScreen->deviceID)) {
      intelScreen->gen = 5;
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c
@ -534,10 +534,10 @@ int rc_get_max_index(
 	rc_register_file file)
 {
 	struct max_data data;
+	struct rc_instruction * inst;
 	data.Max = 0;
 	data.HasFileType = 0;
 	data.File = file;
-	struct rc_instruction * inst;
 	for (inst = c->Program.Instructions.Next;
 					inst != &c->Program.Instructions;
 					inst = inst->Next) {
--- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
@ -292,20 +292,20 @@ struct rc_variable * rc_variable(
 }

 static void get_variable_helper(
-	struct rc_list ** aborted_list,
 	struct rc_list ** variable_list,
-	unsigned int aborted,
 	struct rc_variable * variable)
 {
-	if (aborted) {
-		rc_list_add(aborted_list, rc_list(&variable->C->Pool, variable));
-	} else {
-		rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
+	struct rc_list * list_ptr;
+	for (list_ptr = *variable_list; list_ptr; list_ptr = list_ptr->Next) {
+		if (readers_intersect(variable, list_ptr->Item)) {
+			rc_variable_add_friend(list_ptr->Item, variable);
+			return;
+		}
 	}
+	rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
 }

 static void get_variable_pair_helper(
-	struct rc_list ** aborted_list,
 	struct rc_list ** variable_list,
 	struct radeon_compiler * c,
 	struct rc_instruction * inst,
@ -338,8 +338,7 @@ static void get_variable_pair_helper(
 	}
 	new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
 								&reader_data);
-	get_variable_helper(aborted_list, variable_list, reader_data.Abort,
-								new_var);
+	get_variable_helper(variable_list, new_var);
 }

 /**
@ -352,10 +351,7 @@ static void get_variable_pair_helper(
 struct rc_list * rc_get_variables(struct radeon_compiler * c)
 {
 	struct rc_instruction * inst;
-	struct rc_list * aborted_list = NULL;
 	struct rc_list * variable_list = NULL;
-	struct rc_list * var_ptr;
-	struct rc_list * search_ptr;

 	for (inst = c->Program.Instructions.Next;
 					inst != &c->Program.Instructions;
@ -372,43 +368,15 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c)
 			new_var = rc_variable(c, inst->U.I.DstReg.File,
 				inst->U.I.DstReg.Index,
 				inst->U.I.DstReg.WriteMask, &reader_data);
-			get_variable_helper(&aborted_list, &variable_list,
-						reader_data.Abort, new_var);
+			get_variable_helper(&variable_list, new_var);
 		} else {
-			get_variable_pair_helper(&aborted_list, &variable_list,
-					c, inst, &inst->U.P.RGB);
-			get_variable_pair_helper(&aborted_list, &variable_list,
-					c, inst, &inst->U.P.Alpha);
+			get_variable_pair_helper(&variable_list, c, inst,
+							&inst->U.P.RGB);
+			get_variable_pair_helper(&variable_list, c, inst,
+							&inst->U.P.Alpha);
 		}
 	}

-	/* The aborted_list contains a list of variables that might share a
-	 * reader with another variable.  We need to search through this list
-	 * and pair together variables that do share the same reader.
-	 */
-	while (aborted_list) {
-		struct rc_list * search_ptr_next;
-		struct rc_variable * var;
-		var_ptr = aborted_list;
-		for (var = var_ptr->Item; var; var = var->Friend) {
-
-			search_ptr = var_ptr->Next;
-			while(search_ptr) {
-				search_ptr_next = search_ptr->Next;
-				if (readers_intersect(var, search_ptr->Item)){
-					rc_list_remove(&aborted_list,
-							search_ptr);
-					rc_variable_add_friend(var,
-							search_ptr->Item);
-				}
-				search_ptr = search_ptr_next;
-			}
-		}
-		rc_list_remove(&aborted_list, var_ptr);
-		rc_list_add(&variable_list, rc_list(
-			&((struct rc_variable*)(var_ptr->Item))->C->Pool,
-			var_ptr->Item));
-	}
 	return variable_list;
 }

--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@ -116,17 +116,39 @@ check_valid_to_render(struct gl_context *ctx, const char *function)
      break;
 #endif

-#if FEATURE_ES1 || FEATURE_GL
+#if FEATURE_ES1
   case API_OPENGLES:
-   case API_OPENGL:
-      /* For regular OpenGL, only draw if we have vertex positions
-       * (regardless of whether or not we have a vertex program/shader). */
-      if (!ctx->Array.ArrayObj->Vertex.Enabled &&
-	  !ctx->Array.ArrayObj->VertexAttrib[0].Enabled)
+      /* For OpenGL ES, only draw if we have vertex positions
+       */
+      if (!ctx->Array.ArrayObj->Vertex.Enabled)
 	 return GL_FALSE;
      break;
 #endif

+#if FEATURE_GL
+   case API_OPENGL:
+      {
+         const struct gl_shader_program *vsProg =
+            ctx->Shader.CurrentVertexProgram;
+         GLboolean haveVertexShader = (vsProg && vsProg->LinkStatus);
+         GLboolean haveVertexProgram = ctx->VertexProgram._Enabled;
+         if (haveVertexShader || haveVertexProgram) {
+            /* Draw regardless of whether or not we have any vertex arrays.
+             * (Ex: could draw a point using a constant vertex pos)
+             */
+            return GL_TRUE;
+         }
+         else {
+            /* Draw if we have vertex positions (GL_VERTEX_ARRAY or generic
+             * array [0]).
+             */
+            return (ctx->Array.ArrayObj->Vertex.Enabled ||
+                    ctx->Array.ArrayObj->VertexAttrib[0].Enabled);
+         }
+      }
+      break;
+#endif
+
   default:
      ASSERT_NO_FEATURE();
   }
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@ -289,9 +289,10 @@ remove_array_object( struct gl_context *ctx, struct gl_array_object *obj )
 static GLuint
 update_min(GLuint min, struct gl_client_array *array)
 {
-   _mesa_update_array_max_element(array);
-   if (array->Enabled)
+   if (array->Enabled) {
+      _mesa_update_array_max_element(array);
      return MIN2(min, array->_MaxElement);
+   }
   else
      return min;
 }
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@ -447,7 +447,7 @@ _mesa_BlendEquationSeparateEXT( GLenum modeRGB, GLenum modeA )
 /**
 * Set separate blend equations for one color buffer/target.
 */
-void
+void GLAPIENTRY
 _mesa_BlendEquationSeparatei(GLuint buf, GLenum modeRGB, GLenum modeA)
 {
   GET_CURRENT_CONTEXT(ctx);
--- a/src/mesa/main/blend.h
+++ b/src/mesa/main/blend.h
@ -68,7 +68,7 @@ extern void GLAPIENTRY
 _mesa_BlendEquationSeparateEXT( GLenum modeRGB, GLenum modeA );


-extern void
+extern void GLAPIENTRY
 _mesa_BlendEquationSeparatei(GLuint buf, GLenum modeRGB, GLenum modeA);


--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@ -1416,14 +1416,13 @@ _mesa_MapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length,
      return NULL;
   }

-   if (access & GL_MAP_READ_BIT) {
-      if ((access & GL_MAP_INVALIDATE_RANGE_BIT) ||
-          (access & GL_MAP_INVALIDATE_BUFFER_BIT) ||
-          (access & GL_MAP_UNSYNCHRONIZED_BIT)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glMapBufferRange(invalid access flags)");
-         return NULL;
-      }
+   if ((access & GL_MAP_READ_BIT) &&
+       (access & (GL_MAP_INVALIDATE_RANGE_BIT |
+                  GL_MAP_INVALIDATE_BUFFER_BIT |
+                  GL_MAP_UNSYNCHRONIZED_BIT))) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glMapBufferRange(invalid access flags)");
+      return NULL;
   }

   if ((access & GL_MAP_FLUSH_EXPLICIT_BIT) &&
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@ -5351,7 +5351,7 @@ save_SetFragmentShaderConstantATI(GLuint dst, const GLfloat *value)
 }
 #endif

-static void
+static void GLAPIENTRY
 save_Attr1fNV(GLenum attr, GLfloat x)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -5372,7 +5372,7 @@ save_Attr1fNV(GLenum attr, GLfloat x)
   }
 }

-static void
+static void GLAPIENTRY
 save_Attr2fNV(GLenum attr, GLfloat x, GLfloat y)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -5394,7 +5394,7 @@ save_Attr2fNV(GLenum attr, GLfloat x, GLfloat y)
   }
 }

-static void
+static void GLAPIENTRY
 save_Attr3fNV(GLenum attr, GLfloat x, GLfloat y, GLfloat z)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -5417,7 +5417,7 @@ save_Attr3fNV(GLenum attr, GLfloat x, GLfloat y, GLfloat z)
   }
 }

-static void
+static void GLAPIENTRY
 save_Attr4fNV(GLenum attr, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -5442,7 +5442,7 @@ save_Attr4fNV(GLenum attr, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
 }


-static void
+static void GLAPIENTRY
 save_Attr1fARB(GLenum attr, GLfloat x)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -5463,7 +5463,7 @@ save_Attr1fARB(GLenum attr, GLfloat x)
   }
 }

-static void
+static void GLAPIENTRY
 save_Attr2fARB(GLenum attr, GLfloat x, GLfloat y)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -5485,7 +5485,7 @@ save_Attr2fARB(GLenum attr, GLfloat x, GLfloat y)
   }
 }

-static void
+static void GLAPIENTRY
 save_Attr3fARB(GLenum attr, GLfloat x, GLfloat y, GLfloat z)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -5508,7 +5508,7 @@ save_Attr3fARB(GLenum attr, GLfloat x, GLfloat y, GLfloat z)
   }
 }

-static void
+static void GLAPIENTRY
 save_Attr4fARB(GLenum attr, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -7040,7 +7040,7 @@ exec_GetTexParameterIuiv(GLenum target, GLenum pname, GLuint *params)


 /* GL_ARB_instanced_arrays */
-static void
+static void GLAPIENTRY
 save_VertexAttribDivisor(GLuint index, GLuint divisor)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -7058,7 +7058,7 @@ save_VertexAttribDivisor(GLuint index, GLuint divisor)


 /* GL_NV_texture_barrier */
-static void
+static void GLAPIENTRY
 save_TextureBarrierNV(void)
 {
   GET_CURRENT_CONTEXT(ctx);
@ -7071,7 +7071,7 @@ save_TextureBarrierNV(void)


 /* GL_ARB_sampler_objects */
-static void
+static void GLAPIENTRY
 save_BindSampler(GLuint unit, GLuint sampler)
 {
   Node *n;
--- a/src/mesa/main/es_generator.py
+++ b/src/mesa/main/es_generator.py
@ -195,6 +195,10 @@ print """
 #include "main/api_exec.h"

 #if FEATURE_%s
+
+#ifndef GLAPIENTRYP
+#define GLAPIENTRYP GL_APIENTRYP
+#endif
 """ % (versionHeader, versionExtHeader, shortname.upper())

 # Everyone needs these types.
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@ -503,6 +503,7 @@ _mesa_enable_sw_extensions(struct gl_context *ctx)
   ctx->Extensions.EXT_texture_env_add = GL_TRUE;
   ctx->Extensions.EXT_texture_env_combine = GL_TRUE;
   ctx->Extensions.EXT_texture_env_dot3 = GL_TRUE;
+   ctx->Extensions.EXT_texture_filter_anisotropic = GL_TRUE;
   ctx->Extensions.EXT_texture_mirror_clamp = GL_TRUE;
   ctx->Extensions.EXT_texture_lod_bias = GL_TRUE;
   ctx->Extensions.EXT_texture_shared_exponent = GL_TRUE;
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@ -418,18 +418,16 @@ _mesa_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb)
         case GL_RG:
            fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
            return;
-         case GL_RGB:
+
+         default:
            switch (rb->Format) {
+            /* XXX This list is likely incomplete. */
            case MESA_FORMAT_RGB9_E5_FLOAT:
               fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
               return;
            default:;
+               /* render buffer format is supported by software rendering */
            }
-            break;
-
-         default:
-            /* render buffer format is supported by software rendering */
-            ;
         }
      }
   }
--- a/src/mesa/main/glapidispatch.h
+++ b/src/mesa/main/glapidispatch.h
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@ -418,21 +418,24 @@ get_tex_memcpy(struct gl_context *ctx, GLenum format, GLenum type, GLvoid *pixel
        texObj->Target == GL_TEXTURE_RECTANGLE ||
        (texObj->Target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X &&
         texObj->Target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z))) {
-      if (texImage->TexFormat == MESA_FORMAT_ARGB8888 &&
+      if ((texImage->TexFormat == MESA_FORMAT_ARGB8888 ||
+             texImage->TexFormat == MESA_FORMAT_SARGB8) &&
          format == GL_BGRA &&
-          type == GL_UNSIGNED_BYTE &&
+          (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) &&
          !ctx->Pack.SwapBytes &&
          _mesa_little_endian()) {
         memCopy = GL_TRUE;
      }
-      else if (texImage->TexFormat == MESA_FORMAT_AL88 &&
+      else if ((texImage->TexFormat == MESA_FORMAT_AL88 ||
+                  texImage->TexFormat == MESA_FORMAT_SLA8) &&
               format == GL_LUMINANCE_ALPHA &&
               type == GL_UNSIGNED_BYTE &&
               !ctx->Pack.SwapBytes &&
               _mesa_little_endian()) {
         memCopy = GL_TRUE;
      }
-      else if (texImage->TexFormat == MESA_FORMAT_L8 &&
+      else if ((texImage->TexFormat == MESA_FORMAT_L8 ||
+                  texImage->TexFormat == MESA_FORMAT_SL8) &&
               format == GL_LUMINANCE &&
               type == GL_UNSIGNED_BYTE) {
         memCopy = GL_TRUE;
--- a/src/mesa/main/texrender.c
+++ b/src/mesa/main/texrender.c
@ -530,7 +530,6 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att)
 {
   struct texture_renderbuffer *trb
      = (struct texture_renderbuffer *) att->Renderbuffer;
-   GLuint unused;

   (void) ctx;
   ASSERT(trb);
@ -603,10 +602,8 @@ update_wrapper(struct gl_context *ctx, struct gl_renderbuffer_attachment *att)
      trb->Base._BaseFormat = GL_RGBA;
      break;
   default:
-      _mesa_format_to_type_and_comps(trb->TexImage->TexFormat,
-                                     &trb->Base.DataType, &unused);
-      trb->Base._BaseFormat =
-         _mesa_base_fbo_format(ctx, trb->TexImage->InternalFormat);
+      trb->Base.DataType = CHAN_TYPE;
+      trb->Base._BaseFormat = GL_RGBA;
   }
   trb->Base.Data = trb->TexImage->Data;
 }
--- a/src/mesa/state_tracker/st_atom.c
+++ b/src/mesa/state_tracker/st_atom.c
@ -56,6 +56,7 @@ static const struct st_tracked_state *atoms[] =
   &st_update_scissor,
   &st_update_blend,
   &st_update_sampler,
+   &st_update_vertex_texture,
   &st_update_texture,
   &st_update_framebuffer,
   &st_update_msaa,
--- a/src/mesa/state_tracker/st_atom.h
+++ b/src/mesa/state_tracker/st_atom.h
@ -60,6 +60,7 @@ extern const struct st_tracked_state st_update_blend;
 extern const struct st_tracked_state st_update_msaa;
 extern const struct st_tracked_state st_update_sampler;
 extern const struct st_tracked_state st_update_texture;
+extern const struct st_tracked_state st_update_vertex_texture;
 extern const struct st_tracked_state st_finalize_textures;
 extern const struct st_tracked_state st_update_fs_constants;
 extern const struct st_tracked_state st_update_gs_constants;
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@ -120,14 +120,110 @@ gl_filter_to_img_filter(GLenum filter)
   }
 }

+static void convert_sampler(struct st_context *st,
+			    struct pipe_sampler_state *sampler,
+			    GLuint texUnit)
+{
+    struct gl_texture_object *texobj;
+    struct gl_sampler_object *msamp;

-static void 
-update_samplers(struct st_context *st)
+    texobj = st->ctx->Texture.Unit[texUnit]._Current;
+    if (!texobj) {
+	texobj = st_get_default_texture(st);
+    }
+
+    msamp = _mesa_get_samplerobj(st->ctx, texUnit);
+
+    memset(sampler, 0, sizeof(*sampler));
+    sampler->wrap_s = gl_wrap_xlate(msamp->WrapS);
+    sampler->wrap_t = gl_wrap_xlate(msamp->WrapT);
+    sampler->wrap_r = gl_wrap_xlate(msamp->WrapR);
+
+    sampler->min_img_filter = gl_filter_to_img_filter(msamp->MinFilter);
+    sampler->min_mip_filter = gl_filter_to_mip_filter(msamp->MinFilter);
+    sampler->mag_img_filter = gl_filter_to_img_filter(msamp->MagFilter);
+
+    if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB)
+       sampler->normalized_coords = 1;
+
+    sampler->lod_bias = st->ctx->Texture.Unit[texUnit].LodBias +
+       msamp->LodBias;
+
+    sampler->min_lod = CLAMP(msamp->MinLod,
+			     0.0f,
+			     (GLfloat) texobj->MaxLevel - texobj->BaseLevel);
+    sampler->max_lod = MIN2((GLfloat) texobj->MaxLevel - texobj->BaseLevel,
+			    msamp->MaxLod);
+    if (sampler->max_lod < sampler->min_lod) {
+       /* The GL spec doesn't seem to specify what to do in this case.
+	* Swap the values.
+	*/
+       float tmp = sampler->max_lod;
+       sampler->max_lod = sampler->min_lod;
+       sampler->min_lod = tmp;
+       assert(sampler->min_lod <= sampler->max_lod);
+    }
+
+    if (msamp->BorderColor.ui[0] ||
+	msamp->BorderColor.ui[1] ||
+	msamp->BorderColor.ui[2] ||
+	msamp->BorderColor.ui[3]) {
+       struct gl_texture_image *teximg;
+
+       teximg = texobj->Image[0][texobj->BaseLevel];
+
+       st_translate_color(msamp->BorderColor.f,
+			  teximg ? teximg->_BaseFormat : GL_RGBA,
+			  sampler->border_color);
+    }
+
+    sampler->max_anisotropy = (msamp->MaxAnisotropy == 1.0 ?
+			       0 : (GLuint) msamp->MaxAnisotropy);
+
+    /* only care about ARB_shadow, not SGI shadow */
+    if (msamp->CompareMode == GL_COMPARE_R_TO_TEXTURE) {
+       sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE;
+       sampler->compare_func
+	  = st_compare_func_to_pipe(msamp->CompareFunc);
+    }
+
+    sampler->seamless_cube_map =
+       st->ctx->Texture.CubeMapSeamless || msamp->CubeMapSeamless;
+}
+
+static void
+update_vertex_samplers(struct st_context *st)
 {
   struct gl_vertex_program *vprog = st->ctx->VertexProgram._Current;
+   GLuint su;
+
+   st->state.num_vertex_samplers = 0;
+
+   /* loop over sampler units (aka tex image units) */
+   for (su = 0; su < st->ctx->Const.MaxVertexTextureImageUnits; su++) {
+      struct pipe_sampler_state *sampler = st->state.vertex_samplers + su;
+
+      if (vprog->Base.SamplersUsed & (1 << su)) {
+	 GLuint texUnit;
+
+	 texUnit = vprog->Base.SamplerUnits[su];
+
+	 convert_sampler(st, sampler, texUnit);
+
+	 st->state.num_vertex_samplers = su + 1;
+
+	 cso_single_vertex_sampler(st->cso_context, su, sampler);
+      } else {
+	 cso_single_vertex_sampler(st->cso_context, su, NULL);
+      }
+   }
+   cso_single_vertex_sampler_done(st->cso_context);
+}
+
+static void
+update_fragment_samplers(struct st_context *st)
+{
   struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current;
-   const GLbitfield samplersUsed = (vprog->Base.SamplersUsed |
-                                    fprog->Base.SamplersUsed);
   GLuint su;

   st->state.num_samplers = 0;
@ -136,97 +232,34 @@ update_samplers(struct st_context *st)
   for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) {
      struct pipe_sampler_state *sampler = st->state.samplers + su;

-      memset(sampler, 0, sizeof(*sampler));

-      if (samplersUsed & (1 << su)) {
-         struct gl_texture_object *texobj;
-         struct gl_texture_image *teximg;
-         struct gl_sampler_object *msamp;
+      if (fprog->Base.SamplersUsed & (1 << su)) {
         GLuint texUnit;

-         if (fprog->Base.SamplersUsed & (1 << su))
-            texUnit = fprog->Base.SamplerUnits[su];
-         else
-            texUnit = vprog->Base.SamplerUnits[su];
+	 texUnit = fprog->Base.SamplerUnits[su];

-         texobj = st->ctx->Texture.Unit[texUnit]._Current;
-         if (!texobj) {
-            texobj = st_get_default_texture(st);
-         }
-
-         teximg = texobj->Image[0][texobj->BaseLevel];
-
-         msamp = _mesa_get_samplerobj(st->ctx, texUnit);
-
-         sampler->wrap_s = gl_wrap_xlate(msamp->WrapS);
-         sampler->wrap_t = gl_wrap_xlate(msamp->WrapT);
-         sampler->wrap_r = gl_wrap_xlate(msamp->WrapR);
-
-         sampler->min_img_filter = gl_filter_to_img_filter(msamp->MinFilter);
-         sampler->min_mip_filter = gl_filter_to_mip_filter(msamp->MinFilter);
-         sampler->mag_img_filter = gl_filter_to_img_filter(msamp->MagFilter);
-
-         if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB)
-            sampler->normalized_coords = 1;
-
-         sampler->lod_bias = st->ctx->Texture.Unit[texUnit].LodBias +
-            msamp->LodBias;
-
-         sampler->min_lod = CLAMP(msamp->MinLod,
-                                  0.0f,
-                                  (GLfloat) texobj->MaxLevel - texobj->BaseLevel);
-         sampler->max_lod = MIN2((GLfloat) texobj->MaxLevel - texobj->BaseLevel,
-                                 msamp->MaxLod);
-         if (sampler->max_lod < sampler->min_lod) {
-            /* The GL spec doesn't seem to specify what to do in this case.
-             * Swap the values.
-             */
-            float tmp = sampler->max_lod;
-            sampler->max_lod = sampler->min_lod;
-            sampler->min_lod = tmp;
-            assert(sampler->min_lod <= sampler->max_lod);
-         }
-
-         st_translate_color(msamp->BorderColor.f,
-                            teximg ? teximg->_BaseFormat : GL_RGBA,
-                            sampler->border_color);
-
-	 sampler->max_anisotropy = (msamp->MaxAnisotropy == 1.0 ?
-                                    0 : (GLuint) msamp->MaxAnisotropy);
-
-         /* only care about ARB_shadow, not SGI shadow */
-         if (msamp->CompareMode == GL_COMPARE_R_TO_TEXTURE) {
-            sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE;
-            sampler->compare_func
-               = st_compare_func_to_pipe(msamp->CompareFunc);
-         }
-
-         sampler->seamless_cube_map =
-               st->ctx->Texture.CubeMapSeamless || msamp->CubeMapSeamless;
+	 convert_sampler(st, sampler, texUnit);

         st->state.num_samplers = su + 1;

         /*printf("%s su=%u non-null\n", __FUNCTION__, su);*/
         cso_single_sampler(st->cso_context, su, sampler);
-         if (su < st->ctx->Const.MaxVertexTextureImageUnits) {
-            cso_single_vertex_sampler(st->cso_context, su, sampler);
-         }
      }
      else {
         /*printf("%s su=%u null\n", __FUNCTION__, su);*/
         cso_single_sampler(st->cso_context, su, NULL);
-         if (su < st->ctx->Const.MaxVertexTextureImageUnits) {
-            cso_single_vertex_sampler(st->cso_context, su, NULL);
-         }
      }
   }

   cso_single_sampler_done(st->cso_context);
-   if (st->ctx->Const.MaxVertexTextureImageUnits > 0) {
-      cso_single_vertex_sampler_done(st->cso_context);
-   }
 }

+static void
+update_samplers(struct st_context *st)
+{
+    update_fragment_samplers(st);
+    update_vertex_samplers(st);
+}

 const struct st_tracked_state st_update_sampler = {
   "st_update_sampler",					/* name */
--- a/Show More
+++ b/Show More