crocus: initial gallium driver for Intel gfx 4-7

This is a gallium driver for the Intel gfx 4-7 GPUs. It was initially cloned from the iris driver by Ilia Mirkin, then I ported over large reams of code from i965 until it worked. Acked-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11146>
2021-06-01 13:14:51 +10:00 · 2021-06-01 13:14:51 +10:00 · f3630548f1
parent 8da92b5c0a
commit f3630548f1
51 changed files with 28508 additions and 6 deletions
--- a/meson.build
+++ b/meson.build
@ -231,6 +231,7 @@ with_gallium_v3d = gallium_drivers.contains('v3d')
 with_gallium_panfrost = gallium_drivers.contains('panfrost')
 with_gallium_etnaviv = gallium_drivers.contains('etnaviv')
 with_gallium_tegra = gallium_drivers.contains('tegra')
+with_gallium_crocus = gallium_drivers.contains('crocus')
 with_gallium_iris = gallium_drivers.contains('iris')
 with_gallium_i915 = gallium_drivers.contains('i915')
 with_gallium_svga = gallium_drivers.contains('svga')
@ -284,7 +285,7 @@ with_broadcom_vk = _vulkan_drivers.contains('broadcom')
 with_any_vk = _vulkan_drivers.length() != 0

 with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
-with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris
+with_any_intel = with_dri_i965 or with_intel_vk or with_gallium_iris or with_gallium_crocus

 if with_swrast_vk and not with_gallium_softpipe
  error('swrast vulkan requires gallium swrast')
@ -795,7 +796,7 @@ if with_gallium_st_nine
    error('The nine state tracker requires gallium softpipe/llvmpipe.')
  elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600
            or with_gallium_r300 or with_gallium_svga or with_gallium_i915
-            or with_gallium_iris)
+            or with_gallium_iris or with_gallium_crocus)
    error('The nine state tracker requires at least one non-swrast gallium driver.')
  endif
  if not with_dri3
--- a/meson_options.txt
+++ b/meson_options.txt
@ -67,7 +67,7 @@ option(
  choices : [
    'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
    'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl',
-    'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12', 'asahi'
+    'swr', 'panfrost', 'iris', 'lima', 'zink', 'd3d12', 'asahi', 'crocus'
  ],
  description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
 )
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@ -70,6 +70,7 @@ static const struct pipe_loader_ops pipe_loader_drm_ops;
 static const struct drm_driver_descriptor *driver_descriptors[] = {
   &i915_driver_descriptor,
   &iris_driver_descriptor,
+   &crocus_driver_descriptor,
   &nouveau_driver_descriptor,
   &r300_driver_descriptor,
   &r600_driver_descriptor,
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@ -112,6 +112,26 @@ DRM_DRIVER_DESCRIPTOR(iris, iris_driconf, ARRAY_SIZE(iris_driconf))
 DRM_DRIVER_DESCRIPTOR_STUB(iris)
 #endif

+#ifdef GALLIUM_CROCUS
+#include "crocus/drm/crocus_drm_public.h"
+
+static struct pipe_screen *
+pipe_crocus_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   struct pipe_screen *screen;
+
+   screen = crocus_drm_screen_create(fd, config);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+const driOptionDescription crocus_driconf[] = {
+      #include "crocus/driinfo_crocus.h"
+};
+DRM_DRIVER_DESCRIPTOR(crocus, crocus_driconf, ARRAY_SIZE(crocus_driconf))
+#else
+DRM_DRIVER_DESCRIPTOR_STUB(crocus)
+#endif
+
 #ifdef GALLIUM_NOUVEAU
 #include "nouveau/drm/nouveau_drm_public.h"

--- a/src/gallium/auxiliary/target-helpers/drm_helper_public.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
@ -6,6 +6,7 @@ struct pipe_screen_config;

 extern const struct drm_driver_descriptor i915_driver_descriptor;
 extern const struct drm_driver_descriptor iris_driver_descriptor;
+extern const struct drm_driver_descriptor crocus_driver_descriptor;
 extern const struct drm_driver_descriptor nouveau_driver_descriptor;
 extern const struct drm_driver_descriptor r300_driver_descriptor;
 extern const struct drm_driver_descriptor r600_driver_descriptor;
--- a/src/gallium/drivers/crocus/crocus_batch.c
+++ b/src/gallium/drivers/crocus/crocus_batch.c
--- a/src/gallium/drivers/crocus/crocus_batch.h
+++ b/src/gallium/drivers/crocus/crocus_batch.h
@ -0,0 +1,325 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CROCUS_BATCH_DOT_H
+#define CROCUS_BATCH_DOT_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "util/u_dynarray.h"
+
+#include "common/intel_decoder.h"
+#include "drm-uapi/i915_drm.h"
+
+#include "crocus_fence.h"
+#include "crocus_fine_fence.h"
+
+#include "crocus_bufmgr.h"
+/* The kernel assumes batchbuffers are smaller than 256kB. */
+#define MAX_BATCH_SIZE (256 * 1024)
+
+/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
+ * Address, which means that we can't put binding tables beyond 64kB.  This
+ * effectively limits the maximum statebuffer size to 64kB.
+ */
+#define MAX_STATE_SIZE (64 * 1024)
+
+/* Our target batch size - flush approximately at this point. */
+#define BATCH_SZ (20 * 1024)
+#define STATE_SZ (16 * 1024)
+
+enum crocus_batch_name {
+   CROCUS_BATCH_RENDER,
+   CROCUS_BATCH_COMPUTE,
+};
+
+#define CROCUS_BATCH_COUNT 2
+
+struct crocus_address {
+   struct crocus_bo *bo;
+   int32_t offset;
+   uint32_t reloc_flags;
+};
+
+struct crocus_reloc_list {
+   struct drm_i915_gem_relocation_entry *relocs;
+   int reloc_count;
+   int reloc_array_size;
+};
+
+struct crocus_growing_bo {
+   struct crocus_bo *bo;
+   void *map;
+   void *map_next;
+   struct crocus_bo *partial_bo;
+   void *partial_bo_map;
+   unsigned partial_bytes;
+   struct crocus_reloc_list relocs;
+   unsigned used;
+};
+
+struct crocus_batch {
+   struct crocus_context *ice;
+   struct crocus_screen *screen;
+   struct pipe_debug_callback *dbg;
+   struct pipe_device_reset_callback *reset;
+
+   /** What batch is this? (e.g. CROCUS_BATCH_RENDER/COMPUTE) */
+   enum crocus_batch_name name;
+
+   /** buffers: command, state */
+   struct crocus_growing_bo command, state;
+
+   /** Size of the primary batch if we've moved on to a secondary. */
+   unsigned primary_batch_size;
+
+   bool state_base_address_emitted;
+   uint8_t pipe_controls_since_last_cs_stall;
+
+   uint32_t hw_ctx_id;
+
+   uint32_t valid_reloc_flags;
+
+   bool use_shadow_copy;
+   bool no_wrap;
+
+   /** The validation list */
+   struct drm_i915_gem_exec_object2 *validation_list;
+   struct crocus_bo **exec_bos;
+   int exec_count;
+   int exec_array_size;
+
+   /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
+    * instruction is a MI_BATCH_BUFFER_END).
+    */
+   bool noop_enabled;
+
+   /**
+    * A list of crocus_syncobjs associated with this batch.
+    *
+    * The first list entry will always be a signalling sync-point, indicating
+    * that this batch has completed.  The others are likely to be sync-points
+    * to wait on before executing the batch.
+    */
+   struct util_dynarray syncobjs;
+
+   /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
+   struct util_dynarray exec_fences;
+
+   /** The amount of aperture space (in bytes) used by all exec_bos */
+   int aperture_space;
+
+   struct {
+      /** Uploader to use for sequence numbers */
+      struct u_upload_mgr *uploader;
+
+      /** GPU buffer and CPU map where our seqno's will be written. */
+      struct crocus_state_ref ref;
+      uint32_t *map;
+
+      /** The sequence number to write the next time we add a fence. */
+      uint32_t next;
+   } fine_fences;
+
+   /** A seqno (and syncobj) for the last batch that was submitted. */
+   struct crocus_fine_fence *last_fence;
+
+   /** List of other batches which we might need to flush to use a BO */
+   struct crocus_batch *other_batches[CROCUS_BATCH_COUNT - 1];
+
+   struct {
+      /**
+       * Set of struct brw_bo * that have been rendered to within this
+       * batchbuffer and would need flushing before being used from another
+       * cache domain that isn't coherent with it (i.e. the sampler).
+       */
+      struct hash_table *render;
+
+      /**
+       * Set of struct brw_bo * that have been used as a depth buffer within
+       * this batchbuffer and would need flushing before being used from
+       * another cache domain that isn't coherent with it (i.e. the sampler).
+       */
+      struct set *depth;
+   } cache;
+
+   struct intel_batch_decode_ctx decoder;
+   struct hash_table_u64 *state_sizes;
+
+   /** Have we emitted any draw calls to this batch? */
+   bool contains_draw;
+
+   /** Batch contains fence signal operation. */
+   bool contains_fence_signal;
+};
+
+static inline bool
+batch_has_fine_fence(struct crocus_batch *batch)
+{
+   return !!batch->fine_fences.uploader;
+}
+
+#define BATCH_HAS_FINE_FENCES(batch) (!!(batch)->fine_fences.uploader)
+void crocus_init_batch(struct crocus_context *ctx,
+                       enum crocus_batch_name name,
+                       int priority);
+void crocus_batch_free(struct crocus_batch *batch);
+void crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate);
+
+void _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line);
+#define crocus_batch_flush(batch) _crocus_batch_flush((batch), __FILE__, __LINE__)
+
+bool crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo);
+
+bool crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable);
+
+#define RELOC_WRITE EXEC_OBJECT_WRITE
+#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
+/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
+#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
+
+void crocus_use_pinned_bo(struct crocus_batch *batch, struct crocus_bo *bo,
+                          bool writable);
+uint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
+                              struct crocus_bo *target, uint32_t target_offset,
+                              unsigned int reloc_flags);
+uint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset,
+                            struct crocus_bo *target, uint32_t target_offset,
+                            unsigned int reloc_flags);
+
+enum pipe_reset_status crocus_batch_check_for_reset(struct crocus_batch *batch);
+
+void crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
+                        unsigned used, unsigned new_size);
+
+static inline unsigned
+crocus_batch_bytes_used(struct crocus_batch *batch)
+{
+   return batch->command.map_next - batch->command.map;
+}
+
+/**
+ * Ensure the current command buffer has \param size bytes of space
+ * remaining.  If not, this creates a secondary batch buffer and emits
+ * a jump from the primary batch to the start of the secondary.
+ *
+ * Most callers want crocus_get_command_space() instead.
+ */
+static inline void
+crocus_require_command_space(struct crocus_batch *batch, unsigned size)
+{
+   const unsigned required_bytes = crocus_batch_bytes_used(batch) + size;
+   unsigned used = crocus_batch_bytes_used(batch);
+   if (required_bytes >= BATCH_SZ && !batch->no_wrap) {
+      crocus_batch_flush(batch);
+   } else if (used + size >= batch->command.bo->size) {
+      const unsigned new_size =
+         MIN2(batch->command.bo->size + batch->command.bo->size / 2,
+              MAX_BATCH_SIZE);
+
+      crocus_grow_buffer(batch, false, used, new_size);
+      batch->command.map_next = (void *)batch->command.map + used;
+      assert(crocus_batch_bytes_used(batch) + size < batch->command.bo->size);
+   }
+}
+
+/**
+ * Allocate space in the current command buffer, and return a pointer
+ * to the mapped area so the caller can write commands there.
+ *
+ * This should be called whenever emitting commands.
+ */
+static inline void *
+crocus_get_command_space(struct crocus_batch *batch, unsigned bytes)
+{
+   crocus_require_command_space(batch, bytes);
+   void *map = batch->command.map_next;
+   batch->command.map_next += bytes;
+   return map;
+}
+
+/**
+ * Helper to emit GPU commands - allocates space, copies them there.
+ */
+static inline void
+crocus_batch_emit(struct crocus_batch *batch, const void *data, unsigned size)
+{
+   void *map = crocus_get_command_space(batch, size);
+   memcpy(map, data, size);
+}
+
+/**
+ * Get a pointer to the batch's signalling syncobj.  Does not refcount.
+ */
+static inline struct crocus_syncobj *
+crocus_batch_get_signal_syncobj(struct crocus_batch *batch)
+{
+   /* The signalling syncobj is the first one in the list. */
+   struct crocus_syncobj *syncobj =
+      ((struct crocus_syncobj **)util_dynarray_begin(&batch->syncobjs))[0];
+   return syncobj;
+}
+
+/**
+ * Take a reference to the batch's signalling syncobj.
+ *
+ * Callers can use this to wait for the the current batch under construction
+ * to complete (after flushing it).
+ */
+static inline void
+crocus_batch_reference_signal_syncobj(struct crocus_batch *batch,
+                                      struct crocus_syncobj **out_syncobj)
+{
+   struct crocus_syncobj *syncobj = crocus_batch_get_signal_syncobj(batch);
+   crocus_syncobj_reference(batch->screen, out_syncobj, syncobj);
+}
+
+/**
+ * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
+ */
+static inline void
+crocus_record_state_size(struct hash_table_u64 *ht, uint32_t offset_from_base,
+                         uint32_t size)
+{
+   if (ht) {
+      _mesa_hash_table_u64_insert(ht, offset_from_base,
+                                  (void *)(uintptr_t)size);
+   }
+}
+
+static inline bool
+crocus_ptr_in_state_buffer(struct crocus_batch *batch, void *p)
+{
+   return (char *)p >= (char *)batch->state.map &&
+          (char *)p < (char *)batch->state.map + batch->state.bo->size;
+}
+
+static inline void
+crocus_require_statebuffer_space(struct crocus_batch *batch, int size)
+{
+   if (batch->state.used + size >= STATE_SZ)
+      crocus_batch_flush(batch);
+}
+#endif
--- a/src/gallium/drivers/crocus/crocus_blit.c
+++ b/src/gallium/drivers/crocus/crocus_blit.c
@ -0,0 +1,836 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/format/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/ralloc.h"
+#include "intel/blorp/blorp.h"
+#include "crocus_context.h"
+#include "crocus_resource.h"
+#include "crocus_screen.h"
+
+void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond)
+{
+   util_blitter_save_vertex_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_VERTEX]);
+   util_blitter_save_tessctrl_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]);
+   util_blitter_save_tesseval_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]);
+   util_blitter_save_geometry_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]);
+   util_blitter_save_so_targets(ice->blitter, ice->state.so_targets,
+                                (struct pipe_stream_output_target**)ice->state.so_target);
+   util_blitter_save_vertex_buffer_slot(ice->blitter, ice->state.vertex_buffers);
+   util_blitter_save_vertex_elements(ice->blitter, (void *)ice->state.cso_vertex_elements);
+   if (op & CROCUS_SAVE_FRAGMENT_STATE) {
+      util_blitter_save_blend(ice->blitter, ice->state.cso_blend);
+      util_blitter_save_depth_stencil_alpha(ice->blitter, ice->state.cso_zsa);
+      util_blitter_save_stencil_ref(ice->blitter, &ice->state.stencil_ref);
+      util_blitter_save_fragment_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]);
+      util_blitter_save_sample_mask(ice->blitter, ice->state.sample_mask);
+      util_blitter_save_rasterizer(ice->blitter, ice->state.cso_rast);
+      util_blitter_save_scissor(ice->blitter, &ice->state.scissors[0]);
+      util_blitter_save_viewport(ice->blitter, &ice->state.viewports[0]);
+      util_blitter_save_fragment_constant_buffer_slot(ice->blitter, &ice->state.shaders[MESA_SHADER_FRAGMENT].constbufs[0]);
+   }
+
+   if (!render_cond)
+      util_blitter_save_render_condition(ice->blitter,
+                                         (struct pipe_query *)ice->condition.query,
+                                         ice->condition.condition,
+                                         ice->condition.mode);
+
+//   util_blitter_save_scissor(ice->blitter, &ice->scissors[0]);
+   if (op & CROCUS_SAVE_FRAMEBUFFER)
+      util_blitter_save_framebuffer(ice->blitter, &ice->state.framebuffer);
+
+   if (op & CROCUS_SAVE_TEXTURES) {
+      util_blitter_save_fragment_sampler_states(ice->blitter, 1, (void **)ice->state.shaders[MESA_SHADER_FRAGMENT].samplers);
+      util_blitter_save_fragment_sampler_views(ice->blitter, 1, (struct pipe_sampler_view **)ice->state.shaders[MESA_SHADER_FRAGMENT].textures);
+   }
+}
+
+/**
+ * Helper function for handling mirror image blits.
+ *
+ * If coord0 > coord1, swap them and return "true" (mirrored).
+ */
+static bool
+apply_mirror(float *coord0, float *coord1)
+{
+   if (*coord0 > *coord1) {
+      float tmp = *coord0;
+      *coord0 = *coord1;
+      *coord1 = tmp;
+      return true;
+   }
+   return false;
+}
+
+/**
+ * Compute the number of pixels to clip for each side of a rect
+ *
+ * \param x0 The rect's left coordinate
+ * \param y0 The rect's bottom coordinate
+ * \param x1 The rect's right coordinate
+ * \param y1 The rect's top coordinate
+ * \param min_x The clipping region's left coordinate
+ * \param min_y The clipping region's bottom coordinate
+ * \param max_x The clipping region's right coordinate
+ * \param max_y The clipping region's top coordinate
+ * \param clipped_x0 The number of pixels to clip from the left side
+ * \param clipped_y0 The number of pixels to clip from the bottom side
+ * \param clipped_x1 The number of pixels to clip from the right side
+ * \param clipped_y1 The number of pixels to clip from the top side
+ *
+ * \return false if we clip everything away, true otherwise
+ */
+static inline bool
+compute_pixels_clipped(float x0, float y0, float x1, float y1,
+                       float min_x, float min_y, float max_x, float max_y,
+                       float *clipped_x0, float *clipped_y0,
+                       float *clipped_x1, float *clipped_y1)
+{
+   /* If we are going to clip everything away, stop. */
+   if (!(min_x <= max_x &&
+         min_y <= max_y &&
+         x0 <= max_x &&
+         y0 <= max_y &&
+         min_x <= x1 &&
+         min_y <= y1 &&
+         x0 <= x1 &&
+         y0 <= y1)) {
+      return false;
+   }
+
+   if (x0 < min_x)
+      *clipped_x0 = min_x - x0;
+   else
+      *clipped_x0 = 0;
+   if (max_x < x1)
+      *clipped_x1 = x1 - max_x;
+   else
+      *clipped_x1 = 0;
+
+   if (y0 < min_y)
+      *clipped_y0 = min_y - y0;
+   else
+      *clipped_y0 = 0;
+   if (max_y < y1)
+      *clipped_y1 = y1 - max_y;
+   else
+      *clipped_y1 = 0;
+
+   return true;
+}
+
+/**
+ * Clips a coordinate (left, right, top or bottom) for the src or dst rect
+ * (whichever requires the largest clip) and adjusts the coordinate
+ * for the other rect accordingly.
+ *
+ * \param mirror true if mirroring is required
+ * \param src the source rect coordinate (for example src_x0)
+ * \param dst0 the dst rect coordinate (for example dst_x0)
+ * \param dst1 the opposite dst rect coordinate (for example dst_x1)
+ * \param clipped_dst0 number of pixels to clip from the dst coordinate
+ * \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
+ * \param scale the src vs dst scale involved for that coordinate
+ * \param is_left_or_bottom true if we are clipping the left or bottom sides
+ *        of the rect.
+ */
+static void
+clip_coordinates(bool mirror,
+                 float *src, float *dst0, float *dst1,
+                 float clipped_dst0,
+                 float clipped_dst1,
+                 float scale,
+                 bool is_left_or_bottom)
+{
+   /* When clipping we need to add or subtract pixels from the original
+    * coordinates depending on whether we are acting on the left/bottom
+    * or right/top sides of the rect respectively. We assume we have to
+    * add them in the code below, and multiply by -1 when we should
+    * subtract.
+    */
+   int mult = is_left_or_bottom ? 1 : -1;
+
+   if (!mirror) {
+      *dst0 += clipped_dst0 * mult;
+      *src += clipped_dst0 * scale * mult;
+   } else {
+      *dst1 -= clipped_dst1 * mult;
+      *src += clipped_dst1 * scale * mult;
+   }
+}
+
+/**
+ * Apply a scissor rectangle to blit coordinates.
+ *
+ * Returns true if the blit was entirely scissored away.
+ */
+static bool
+apply_blit_scissor(const struct pipe_scissor_state *scissor,
+                   float *src_x0, float *src_y0,
+                   float *src_x1, float *src_y1,
+                   float *dst_x0, float *dst_y0,
+                   float *dst_x1, float *dst_y1,
+                   bool mirror_x, bool mirror_y)
+{
+   float clip_dst_x0, clip_dst_x1, clip_dst_y0, clip_dst_y1;
+
+   /* Compute number of pixels to scissor away. */
+   if (!compute_pixels_clipped(*dst_x0, *dst_y0, *dst_x1, *dst_y1,
+                               scissor->minx, scissor->miny,
+                               scissor->maxx, scissor->maxy,
+                               &clip_dst_x0, &clip_dst_y0,
+                               &clip_dst_x1, &clip_dst_y1))
+      return true;
+
+   // XXX: comments assume source clipping, which we don't do
+
+   /* When clipping any of the two rects we need to adjust the coordinates
+    * in the other rect considering the scaling factor involved.  To obtain
+    * the best precision we want to make sure that we only clip once per
+    * side to avoid accumulating errors due to the scaling adjustment.
+    *
+    * For example, if src_x0 and dst_x0 need both to be clipped we want to
+    * avoid the situation where we clip src_x0 first, then adjust dst_x0
+    * accordingly but then we realize that the resulting dst_x0 still needs
+    * to be clipped, so we clip dst_x0 and adjust src_x0 again.  Because we are
+    * applying scaling factors to adjust the coordinates in each clipping
+    * pass we lose some precision and that can affect the results of the
+    * blorp blit operation slightly.  What we want to do here is detect the
+    * rect that we should clip first for each side so that when we adjust
+    * the other rect we ensure the resulting coordinate does not need to be
+    * clipped again.
+    *
+    * The code below implements this by comparing the number of pixels that
+    * we need to clip for each side of both rects considering the scales
+    * involved.  For example, clip_src_x0 represents the number of pixels
+    * to be clipped for the src rect's left side, so if clip_src_x0 = 5,
+    * clip_dst_x0 = 4 and scale_x = 2 it means that we are clipping more
+    * from the dst rect so we should clip dst_x0 only and adjust src_x0.
+    * This is because clipping 4 pixels in the dst is equivalent to
+    * clipping 4 * 2 = 8 > 5 in the src.
+    */
+
+   if (*src_x0 == *src_x1 || *src_y0 == *src_y1
+       || *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1)
+      return true;
+
+   float scale_x = (float) (*src_x1 - *src_x0) / (*dst_x1 - *dst_x0);
+   float scale_y = (float) (*src_y1 - *src_y0) / (*dst_y1 - *dst_y0);
+
+   /* Clip left side */
+   clip_coordinates(mirror_x, src_x0, dst_x0, dst_x1,
+                    clip_dst_x0, clip_dst_x1, scale_x, true);
+
+   /* Clip right side */
+   clip_coordinates(mirror_x, src_x1, dst_x1, dst_x0,
+                    clip_dst_x1, clip_dst_x0, scale_x, false);
+
+   /* Clip bottom side */
+   clip_coordinates(mirror_y, src_y0, dst_y0, dst_y1,
+                    clip_dst_y0, clip_dst_y1, scale_y, true);
+
+   /* Clip top side */
+   clip_coordinates(mirror_y, src_y1, dst_y1, dst_y0,
+                    clip_dst_y1, clip_dst_y0, scale_y, false);
+
+   /* Check for invalid bounds
+    * Can't blit for 0-dimensions
+    */
+   return *src_x0 == *src_x1 || *src_y0 == *src_y1
+      || *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1;
+}
+
+void
+crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
+                               struct isl_device *isl_dev,
+                               struct blorp_surf *surf,
+                               struct pipe_resource *p_res,
+                               enum isl_aux_usage aux_usage,
+                               unsigned level,
+                               bool is_render_target)
+{
+   struct crocus_resource *res = (void *) p_res;
+
+   assert(!crocus_resource_unfinished_aux_import(res));
+
+   if (isl_aux_usage_has_hiz(aux_usage) &&
+       !crocus_resource_level_has_hiz(res, level))
+      aux_usage = ISL_AUX_USAGE_NONE;
+
+   *surf = (struct blorp_surf) {
+      .surf = &res->surf,
+      .addr = (struct blorp_address) {
+         .buffer = res->bo,
+         .offset = res->offset,
+         .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
+         .mocs = crocus_mocs(res->bo, isl_dev),
+      },
+      .aux_usage = aux_usage,
+   };
+
+   if (aux_usage != ISL_AUX_USAGE_NONE) {
+      surf->aux_surf = &res->aux.surf;
+      surf->aux_addr = (struct blorp_address) {
+         .buffer = res->aux.bo,
+         .offset = res->aux.offset,
+         .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
+         .mocs = crocus_mocs(res->bo, isl_dev),
+      };
+      surf->clear_color =
+         crocus_resource_get_clear_color(res);
+   }
+}
+
+static void
+tex_cache_flush_hack(struct crocus_batch *batch,
+                     enum isl_format view_format,
+                     enum isl_format surf_format)
+{
+   /* The WaSamplerCacheFlushBetweenRedescribedSurfaceReads workaround says:
+    *
+    *    "Currently Sampler assumes that a surface would not have two
+    *     different format associate with it.  It will not properly cache
+    *     the different views in the MT cache, causing a data corruption."
+    *
+    * We may need to handle this for texture views in general someday, but
+    * for now we handle it here, as it hurts copies and blits particularly
+    * badly because they ofter reinterpret formats.
+    *
+    * If the BO hasn't been referenced yet this batch, we assume that the
+    * texture cache doesn't contain any relevant data nor need flushing.
+    *
+    * Icelake (Gen11+) claims to fix this issue, but seems to still have
+    * issues with ASTC formats.
+    */
+   bool need_flush = view_format != surf_format;
+   if (!need_flush)
+      return;
+
+   const char *reason =
+      "workaround: WaSamplerCacheFlushBetweenRedescribedSurfaceReads";
+
+   crocus_emit_pipe_control_flush(batch, reason, PIPE_CONTROL_CS_STALL);
+   crocus_emit_pipe_control_flush(batch, reason,
+                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+}
+
+static struct crocus_resource *
+crocus_resource_for_aspect(const struct intel_device_info *devinfo,
+                           struct pipe_resource *p_res, unsigned pipe_mask)
+{
+   if (pipe_mask == PIPE_MASK_S) {
+      struct crocus_resource *junk, *s_res;
+      crocus_get_depth_stencil_resources(devinfo, p_res, &junk, &s_res);
+      return s_res;
+   } else {
+      return (struct crocus_resource *)p_res;
+   }
+}
+
+static enum pipe_format
+pipe_format_for_aspect(enum pipe_format format, unsigned pipe_mask)
+{
+   if (pipe_mask == PIPE_MASK_S) {
+      return util_format_stencil_only(format);
+   } else if (pipe_mask == PIPE_MASK_Z) {
+      return util_format_get_depth_only(format);
+   } else {
+      return format;
+   }
+}
+
+static void
+crocus_u_blitter(struct crocus_context *ice,
+                 const struct pipe_blit_info *info)
+{
+   struct pipe_blit_info dinfo = *info;
+   if (!util_format_has_alpha(dinfo.dst.resource->format))
+      dinfo.mask &= ~PIPE_MASK_A;
+   crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
+   util_blitter_blit(ice->blitter, &dinfo);
+}
+
+/**
+ * The pipe->blit() driver hook.
+ *
+ * This performs a blit between two surfaces, which copies data but may
+ * also perform format conversion, scaling, flipping, and so on.
+ */
+static void
+crocus_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   enum blorp_batch_flags blorp_flags = 0;
+
+   /* We don't support color masking. */
+   assert((info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA ||
+          (info->mask & PIPE_MASK_RGBA) == 0);
+
+   if (info->render_condition_enable)
+      if (!crocus_check_conditional_render(ice))
+         return;
+
+   if (devinfo->ver <= 5) {
+      if (!screen->vtbl.blit_blt(batch, info)) {
+
+         if (!util_format_is_depth_or_stencil(info->src.resource->format) &&
+             info->dst.resource->target != PIPE_TEXTURE_3D)
+            goto use_blorp;
+
+         if (!util_blitter_is_blit_supported(ice->blitter, info)) {
+            if (util_format_is_depth_or_stencil(info->src.resource->format)) {
+
+               struct pipe_blit_info depth_blit = *info;
+               depth_blit.mask = PIPE_MASK_Z;
+               crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
+               util_blitter_blit(ice->blitter, &depth_blit);
+
+               struct pipe_surface *dst_view, dst_templ;
+               util_blitter_default_dst_texture(&dst_templ, info->dst.resource, info->dst.level, info->dst.box.z);
+               dst_view = ctx->create_surface(ctx, info->dst.resource, &dst_templ);
+
+               crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
+
+               util_blitter_clear_depth_stencil(ice->blitter, dst_view, PIPE_CLEAR_STENCIL,
+                                                0, 0, info->dst.box.x, info->dst.box.y,
+                                                info->dst.box.width, info->dst.box.height);
+               crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);
+               util_blitter_stencil_fallback(ice->blitter,
+                                             info->dst.resource,
+                                             info->dst.level,
+                                             &info->dst.box,
+                                             info->src.resource,
+                                             info->src.level,
+                                             &info->src.box, NULL);
+
+            }
+            return;
+         }
+
+         crocus_u_blitter(ice, info);
+      }
+      return;
+   }
+
+   if (devinfo->ver == 6) {
+      if (info->src.resource->target == PIPE_TEXTURE_3D &&
+          info->dst.resource->target == PIPE_TEXTURE_3D) {
+         crocus_u_blitter(ice, info);
+         return;
+      }
+   }
+
+use_blorp:
+   if (info->render_condition_enable) {
+      if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
+         blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
+   }
+
+   float src_x0 = info->src.box.x;
+   float src_x1 = info->src.box.x + info->src.box.width;
+   float src_y0 = info->src.box.y;
+   float src_y1 = info->src.box.y + info->src.box.height;
+   float dst_x0 = info->dst.box.x;
+   float dst_x1 = info->dst.box.x + info->dst.box.width;
+   float dst_y0 = info->dst.box.y;
+   float dst_y1 = info->dst.box.y + info->dst.box.height;
+   bool mirror_x = apply_mirror(&src_x0, &src_x1);
+   bool mirror_y = apply_mirror(&src_y0, &src_y1);
+   enum blorp_filter filter;
+
+   if (info->scissor_enable) {
+      bool noop = apply_blit_scissor(&info->scissor,
+                                     &src_x0, &src_y0, &src_x1, &src_y1,
+                                     &dst_x0, &dst_y0, &dst_x1, &dst_y1,
+                                     mirror_x, mirror_y);
+      if (noop)
+         return;
+   }
+
+   if (abs(info->dst.box.width) == abs(info->src.box.width) &&
+       abs(info->dst.box.height) == abs(info->src.box.height)) {
+      if (info->src.resource->nr_samples > 1 &&
+          info->dst.resource->nr_samples <= 1) {
+         /* The OpenGL ES 3.2 specification, section 16.2.1, says:
+          *
+          *    "If the read framebuffer is multisampled (its effective
+          *     value of SAMPLE_BUFFERS is one) and the draw framebuffer
+          *     is not (its value of SAMPLE_BUFFERS is zero), the samples
+          *     corresponding to each pixel location in the source are
+          *     converted to a single sample before being written to the
+          *     destination.  The filter parameter is ignored.  If the
+          *     source formats are integer types or stencil values, a
+          *     single sample’s value is selected for each pixel.  If the
+          *     source formats are floating-point or normalized types,
+          *     the sample values for each pixel are resolved in an
+          *     implementation-dependent manner.  If the source formats
+          *     are depth values, sample values are resolved in an
+          *     implementation-dependent manner where the result will be
+          *     between the minimum and maximum depth values in the pixel."
+          *
+          * When selecting a single sample, we always choose sample 0.
+          */
+         if (util_format_is_depth_or_stencil(info->src.format) ||
+             util_format_is_pure_integer(info->src.format)) {
+            filter = BLORP_FILTER_SAMPLE_0;
+         } else {
+            filter = BLORP_FILTER_AVERAGE;
+         }
+      } else {
+         /* The OpenGL 4.6 specification, section 18.3.1, says:
+          *
+          *    "If the source and destination dimensions are identical,
+          *     no filtering is applied."
+          *
+          * Using BLORP_FILTER_NONE will also handle the upsample case by
+          * replicating the one value in the source to all values in the
+          * destination.
+          */
+         filter = BLORP_FILTER_NONE;
+      }
+   } else if (info->filter == PIPE_TEX_FILTER_LINEAR) {
+      filter = BLORP_FILTER_BILINEAR;
+   } else {
+      filter = BLORP_FILTER_NEAREST;
+   }
+
+   struct blorp_batch blorp_batch;
+   blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
+
+   float src_z_step = (float)info->src.box.depth / (float)info->dst.box.depth;
+
+   /* There is no interpolation to the pixel center during rendering, so
+    * add the 0.5 offset ourselves here.
+    */
+   float depth_center_offset = 0;
+   if (info->src.resource->target == PIPE_TEXTURE_3D)
+      depth_center_offset = 0.5 / info->dst.box.depth * info->src.box.depth;
+
+   /* Perform a blit for each aspect requested by the caller. PIPE_MASK_R is
+    * used to represent the color aspect. */
+   unsigned aspect_mask = info->mask & (PIPE_MASK_R | PIPE_MASK_ZS);
+   while (aspect_mask) {
+      unsigned aspect = 1 << u_bit_scan(&aspect_mask);
+
+      struct crocus_resource *src_res =
+         crocus_resource_for_aspect(devinfo, info->src.resource, aspect);
+      struct crocus_resource *dst_res =
+         crocus_resource_for_aspect(devinfo, info->dst.resource, aspect);
+
+      enum pipe_format src_pfmt =
+         pipe_format_for_aspect(info->src.format, aspect);
+      enum pipe_format dst_pfmt =
+         pipe_format_for_aspect(info->dst.format, aspect);
+
+      if (crocus_resource_unfinished_aux_import(src_res))
+         crocus_resource_finish_aux_import(ctx->screen, src_res);
+      if (crocus_resource_unfinished_aux_import(dst_res))
+         crocus_resource_finish_aux_import(ctx->screen, dst_res);
+
+      struct crocus_format_info src_fmt =
+         crocus_format_for_usage(devinfo, src_pfmt, ISL_SURF_USAGE_TEXTURE_BIT);
+      enum isl_aux_usage src_aux_usage =
+         crocus_resource_texture_aux_usage(src_res);
+
+      crocus_resource_prepare_texture(ice, src_res, src_fmt.fmt,
+                                      info->src.level, 1, info->src.box.z,
+                                      info->src.box.depth);
+      //      crocus_emit_buffer_barrier_for(batch, src_res->bo,
+      //                                   CROCUS_DOMAIN_OTHER_READ);
+
+      struct crocus_format_info dst_fmt =
+         crocus_format_for_usage(devinfo, dst_pfmt,
+                                 ISL_SURF_USAGE_RENDER_TARGET_BIT);
+      enum isl_aux_usage dst_aux_usage =
+         crocus_resource_render_aux_usage(ice, dst_res, info->dst.level,
+                                          dst_fmt.fmt, false);
+
+      struct blorp_surf src_surf, dst_surf;
+      crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,
+                                     &src_res->base, src_aux_usage,
+                                     info->src.level, false);
+      crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,
+                                     &dst_res->base, dst_aux_usage,
+                                     info->dst.level, true);
+
+      crocus_resource_prepare_render(ice, dst_res, info->dst.level,
+                                     info->dst.box.z, info->dst.box.depth,
+                                     dst_aux_usage);
+      //      crocus_emit_buffer_barrier_for(batch, dst_res->bo,
+      //                                   CROCUS_DOMAIN_RENDER_WRITE);
+
+      if (crocus_batch_references(batch, src_res->bo))
+         tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
+
+      if (dst_res->base.target == PIPE_BUFFER) {
+         util_range_add(&dst_res->base, &dst_res->valid_buffer_range,
+                        dst_x0, dst_x1);
+      }
+
+      struct isl_swizzle src_swiz = pipe_to_isl_swizzles(src_fmt.swizzles);
+      struct isl_swizzle dst_swiz = pipe_to_isl_swizzles(dst_fmt.swizzles);
+
+      for (int slice = 0; slice < info->dst.box.depth; slice++) {
+         unsigned dst_z = info->dst.box.z + slice;
+         float src_z = info->src.box.z + slice * src_z_step +
+            depth_center_offset;
+
+         crocus_batch_maybe_flush(batch, 1500);
+
+         blorp_blit(&blorp_batch,
+                    &src_surf, info->src.level, src_z,
+                    src_fmt.fmt, src_swiz,
+                    &dst_surf, info->dst.level, dst_z,
+                    dst_fmt.fmt, dst_swiz,
+                    src_x0, src_y0, src_x1, src_y1,
+                    dst_x0, dst_y0, dst_x1, dst_y1,
+                    filter, mirror_x, mirror_y);
+
+      }
+
+      tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);
+
+      crocus_resource_finish_render(ice, dst_res, info->dst.level,
+                                    info->dst.box.z, info->dst.box.depth,
+                                    dst_aux_usage);
+   }
+
+   blorp_batch_finish(&blorp_batch);
+
+   crocus_flush_and_dirty_for_history(ice, batch, (struct crocus_resource *)
+                                      info->dst.resource,
+                                      PIPE_CONTROL_RENDER_TARGET_FLUSH,
+                                      "cache history: post-blit");
+}
+
+static void
+get_copy_region_aux_settings(struct crocus_resource *res,
+                             enum isl_aux_usage *out_aux_usage,
+                             bool is_render_target)
+{
+   switch (res->aux.usage) {
+   case ISL_AUX_USAGE_MCS:
+      /* A stencil resolve operation must be performed prior to doing resource
+       * copies or used by CPU.
+       * (see HSD 1209978162)
+       */
+      if (is_render_target && isl_surf_usage_is_stencil(res->surf.usage)) {
+         *out_aux_usage = ISL_AUX_USAGE_NONE;
+      } else {
+         *out_aux_usage = res->aux.usage;
+      }
+      break;
+   default:
+      *out_aux_usage = ISL_AUX_USAGE_NONE;
+      break;
+   }
+}
+
+/**
+ * Perform a GPU-based raw memory copy between compatible view classes.
+ *
+ * Does not perform any flushing - the new data may still be left in the
+ * render cache, and old data may remain in other caches.
+ *
+ * Wraps blorp_copy() and blorp_buffer_copy().
+ */
+void
+crocus_copy_region(struct blorp_context *blorp,
+                   struct crocus_batch *batch,
+                   struct pipe_resource *dst,
+                   unsigned dst_level,
+                   unsigned dstx, unsigned dsty, unsigned dstz,
+                   struct pipe_resource *src,
+                   unsigned src_level,
+                   const struct pipe_box *src_box)
+{
+   struct blorp_batch blorp_batch;
+   struct crocus_context *ice = blorp->driver_ctx;
+   struct crocus_screen *screen = (void *) ice->ctx.screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+   struct crocus_resource *src_res = (void *) src;
+   struct crocus_resource *dst_res = (void *) dst;
+
+   if (devinfo->ver <= 5) {
+      if (screen->vtbl.copy_region_blt(batch, dst_res,
+                                       dst_level, dstx, dsty, dstz,
+                                       src_res, src_level, src_box))
+         return;
+   }
+   enum isl_aux_usage src_aux_usage, dst_aux_usage;
+   get_copy_region_aux_settings(src_res, &src_aux_usage,
+                                false);
+   get_copy_region_aux_settings(dst_res, &dst_aux_usage,
+                                true);
+
+   if (crocus_batch_references(batch, src_res->bo))
+      tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
+
+   if (dst->target == PIPE_BUFFER)
+      util_range_add(&dst_res->base, &dst_res->valid_buffer_range, dstx, dstx + src_box->width);
+
+   if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+      struct blorp_address src_addr = {
+         .buffer = crocus_resource_bo(src), .offset = src_box->x,
+      };
+      struct blorp_address dst_addr = {
+         .buffer = crocus_resource_bo(dst), .offset = dstx,
+         .reloc_flags = EXEC_OBJECT_WRITE,
+      };
+
+      crocus_batch_maybe_flush(batch, 1500);
+
+      blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
+      blorp_buffer_copy(&blorp_batch, src_addr, dst_addr, src_box->width);
+      blorp_batch_finish(&blorp_batch);
+   } else {
+      // XXX: what about one surface being a buffer and not the other?
+
+      struct blorp_surf src_surf, dst_surf;
+      crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,
+                                     src, src_aux_usage, src_level, false);
+      crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,
+                                     dst, dst_aux_usage, dst_level, true);
+
+      crocus_resource_prepare_access(ice, src_res, src_level, 1,
+                                     src_box->z, src_box->depth,
+                                     src_aux_usage, false);
+      crocus_resource_prepare_access(ice, dst_res, dst_level, 1,
+                                     dstz, src_box->depth,
+                                     dst_aux_usage, false);
+
+      blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
+
+      for (int slice = 0; slice < src_box->depth; slice++) {
+         crocus_batch_maybe_flush(batch, 1500);
+
+         blorp_copy(&blorp_batch, &src_surf, src_level, src_box->z + slice,
+                    &dst_surf, dst_level, dstz + slice,
+                    src_box->x, src_box->y, dstx, dsty,
+                    src_box->width, src_box->height);
+      }
+      blorp_batch_finish(&blorp_batch);
+
+      crocus_resource_finish_write(ice, dst_res, dst_level, dstz,
+                                   src_box->depth, dst_aux_usage);
+   }
+
+   tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);
+}
+
+static struct crocus_batch *
+get_preferred_batch(struct crocus_context *ice, struct crocus_bo *bo)
+{
+   /* If the compute batch is already using this buffer, we'd prefer to
+    * continue queueing in the compute batch.
+    */
+   if (crocus_batch_references(&ice->batches[CROCUS_BATCH_COMPUTE], bo))
+      return &ice->batches[CROCUS_BATCH_COMPUTE];
+
+   /* Otherwise default to the render batch. */
+   return &ice->batches[CROCUS_BATCH_RENDER];
+}
+
+
+/**
+ * The pipe->resource_copy_region() driver hook.
+ *
+ * This implements ARB_copy_image semantics - a raw memory copy between
+ * compatible view classes.
+ */
+static void
+crocus_resource_copy_region(struct pipe_context *ctx,
+                            struct pipe_resource *p_dst,
+                            unsigned dst_level,
+                            unsigned dstx, unsigned dsty, unsigned dstz,
+                            struct pipe_resource *p_src,
+                            unsigned src_level,
+                            const struct pipe_box *src_box)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+   struct crocus_resource *src = (void *) p_src;
+   struct crocus_resource *dst = (void *) p_dst;
+
+   if (crocus_resource_unfinished_aux_import(src))
+      crocus_resource_finish_aux_import(ctx->screen, src);
+   if (crocus_resource_unfinished_aux_import(dst))
+      crocus_resource_finish_aux_import(ctx->screen, dst);
+
+   /* Use MI_COPY_MEM_MEM for tiny (<= 16 byte, % 4) buffer copies. */
+   if (p_src->target == PIPE_BUFFER && p_dst->target == PIPE_BUFFER &&
+       (src_box->width % 4 == 0) && src_box->width <= 16 &&
+       screen->vtbl.copy_mem_mem) {
+      struct crocus_bo *dst_bo = crocus_resource_bo(p_dst);
+      batch = get_preferred_batch(ice, dst_bo);
+      crocus_batch_maybe_flush(batch, 24 + 5 * (src_box->width / 4));
+      crocus_emit_pipe_control_flush(batch,
+                                     "stall for MI_COPY_MEM_MEM copy_region",
+                                     PIPE_CONTROL_CS_STALL);
+      screen->vtbl.copy_mem_mem(batch, dst_bo, dstx, crocus_resource_bo(p_src),
+                                src_box->x, src_box->width);
+      return;
+   }
+
+   if (devinfo->ver < 6 && util_format_is_depth_or_stencil(p_dst->format)) {
+      util_resource_copy_region(ctx, p_dst, dst_level, dstx, dsty, dstz,
+                                p_src, src_level, src_box);
+      return;
+   }
+   crocus_copy_region(&ice->blorp, batch, p_dst, dst_level, dstx, dsty, dstz,
+                      p_src, src_level, src_box);
+
+   if (util_format_is_depth_and_stencil(p_dst->format) &&
+       util_format_has_stencil(util_format_description(p_src->format)) &&
+       devinfo->ver >= 6) {
+      struct crocus_resource *junk, *s_src_res, *s_dst_res;
+      crocus_get_depth_stencil_resources(devinfo, p_src, &junk, &s_src_res);
+      crocus_get_depth_stencil_resources(devinfo, p_dst, &junk, &s_dst_res);
+
+      crocus_copy_region(&ice->blorp, batch, &s_dst_res->base, dst_level, dstx,
+                         dsty, dstz, &s_src_res->base, src_level, src_box);
+   }
+
+   crocus_flush_and_dirty_for_history(ice, batch, dst,
+                                      PIPE_CONTROL_RENDER_TARGET_FLUSH,
+                                      "cache history: post copy_region");
+}
+
+void
+crocus_init_blit_functions(struct pipe_context *ctx)
+{
+   ctx->blit = crocus_blit;
+   ctx->resource_copy_region = crocus_resource_copy_region;
+}
--- a/src/gallium/drivers/crocus/crocus_blorp.c
+++ b/src/gallium/drivers/crocus/crocus_blorp.c
@ -0,0 +1,399 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_blorp.c
+ *
+ * ============================= GENXML CODE =============================
+ *              [This file is compiled once per generation.]
+ * =======================================================================
+ *
+ * GenX specific code for working with BLORP (blitting, resolves, clears
+ * on the 3D engine).  This provides the driver-specific hooks needed to
+ * implement the BLORP API.
+ *
+ * See crocus_blit.c, crocus_clear.c, and so on.
+ */
+
+#include <assert.h>
+
+#include "crocus_batch.h"
+#include "crocus_resource.h"
+#include "crocus_context.h"
+
+#include "util/u_upload_mgr.h"
+#include "intel/common/intel_l3_config.h"
+
+#include "blorp/blorp_genX_exec.h"
+
+#if GFX_VER <= 5
+#include "gen4_blorp_exec.h"
+#endif
+
+static uint32_t *
+stream_state(struct crocus_batch *batch,
+             unsigned size,
+             unsigned alignment,
+             uint32_t *out_offset,
+             struct crocus_bo **out_bo)
+{
+   uint32_t offset = ALIGN(batch->state.used, alignment);
+
+   if (offset + size >= STATE_SZ && !batch->no_wrap) {
+      crocus_batch_flush(batch);
+      offset = ALIGN(batch->state.used, alignment);
+   } else if (offset + size >= batch->state.bo->size) {
+      const unsigned new_size =
+         MIN2(batch->state.bo->size + batch->state.bo->size / 2,
+              MAX_STATE_SIZE);
+      crocus_grow_buffer(batch, true, batch->state.used, new_size);
+      assert(offset + size < batch->state.bo->size);
+   }
+
+   crocus_record_state_size(batch->state_sizes, offset, size);
+
+   batch->state.used = offset + size;
+   *out_offset = offset;
+
+   /* If the caller has asked for a BO, we leave them the responsibility of
+    * adding bo->gtt_offset (say, by handing an address to genxml).  If not,
+    * we assume they want the offset from a base address.
+    */
+   if (out_bo)
+      *out_bo = batch->state.bo;
+
+   return (uint32_t *)batch->state.map + (offset >> 2);
+}
+
+static void *
+blorp_emit_dwords(struct blorp_batch *blorp_batch, unsigned n)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   return crocus_get_command_space(batch, n * sizeof(uint32_t));
+}
+
+static uint64_t
+blorp_emit_reloc(struct blorp_batch *blorp_batch, UNUSED void *location,
+                 struct blorp_address addr, uint32_t delta)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   uint32_t offset;
+
+   if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) {
+      offset = (char *)location - (char *)batch->state.map;
+      return crocus_state_reloc(batch, offset,
+                                addr.buffer, addr.offset + delta,
+                                addr.reloc_flags);
+   }
+
+   assert(!crocus_ptr_in_state_buffer(batch, location));
+
+   offset = (char *)location - (char *)batch->command.map;
+   return crocus_command_reloc(batch, offset,
+                               addr.buffer, addr.offset + delta,
+                               addr.reloc_flags);
+}
+
+static void
+blorp_surface_reloc(struct blorp_batch *blorp_batch, uint32_t ss_offset,
+                    struct blorp_address addr, uint32_t delta)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   struct crocus_bo *bo = addr.buffer;
+
+   uint64_t reloc_val =
+      crocus_state_reloc(batch, ss_offset, bo, addr.offset + delta,
+                         addr.reloc_flags);
+
+   void *reloc_ptr = (void *)batch->state.map + ss_offset;
+   *(uint32_t *)reloc_ptr = reloc_val;
+}
+
+static uint64_t
+blorp_get_surface_address(struct blorp_batch *blorp_batch,
+                          struct blorp_address addr)
+{
+   /* We'll let blorp_surface_reloc write the address. */
+   return 0ull;
+}
+
+#if GFX_VER >= 7
+static struct blorp_address
+blorp_get_surface_base_address(struct blorp_batch *blorp_batch)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   return (struct blorp_address) {
+      .buffer = batch->state.bo,
+      .offset = 0
+   };
+}
+#endif
+
+static void *
+blorp_alloc_dynamic_state(struct blorp_batch *blorp_batch,
+                          uint32_t size,
+                          uint32_t alignment,
+                          uint32_t *offset)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+
+   return stream_state(batch, size, alignment, offset, NULL);
+}
+
+static void
+blorp_alloc_binding_table(struct blorp_batch *blorp_batch,
+                          unsigned num_entries,
+                          unsigned state_size,
+                          unsigned state_alignment,
+                          uint32_t *bt_offset,
+                          uint32_t *surface_offsets,
+                          void **surface_maps)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   uint32_t *bt_map = stream_state(batch, num_entries * sizeof(uint32_t), 32,
+                                   bt_offset, NULL);
+
+   for (unsigned i = 0; i < num_entries; i++) {
+      surface_maps[i] = stream_state(batch,
+                                     state_size, state_alignment,
+                                     &(surface_offsets)[i], NULL);
+      bt_map[i] = surface_offsets[i];
+   }
+}
+
+static void *
+blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch,
+                          uint32_t size,
+                          struct blorp_address *addr)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   struct crocus_bo *bo;
+   uint32_t offset;
+
+   void *map = stream_state(batch, size, 64,
+                            &offset, &bo);
+
+   *addr = (struct blorp_address) {
+      .buffer = bo,
+      .offset = offset,
+      .reloc_flags = RELOC_32BIT,
+#if GFX_VER >= 7
+      .mocs = crocus_mocs(bo, &batch->screen->isl_dev),
+#endif
+   };
+
+   return map;
+}
+
+/**
+ */
+static void
+blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,
+                                           const struct blorp_address *addrs,
+                                           UNUSED uint32_t *sizes,
+                                           unsigned num_vbs)
+{
+}
+
+static struct blorp_address
+blorp_get_workaround_address(struct blorp_batch *blorp_batch)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+
+   return (struct blorp_address) {
+      .buffer = batch->ice->workaround_bo,
+      .offset = batch->ice->workaround_offset,
+   };
+}
+
+static void
+blorp_flush_range(UNUSED struct blorp_batch *blorp_batch,
+                  UNUSED void *start,
+                  UNUSED size_t size)
+{
+   /* All allocated states come from the batch which we will flush before we
+    * submit it.  There's nothing for us to do here.
+    */
+}
+
+#if GFX_VER >= 7
+static const struct intel_l3_config *
+blorp_get_l3_config(struct blorp_batch *blorp_batch)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   return batch->screen->l3_config_3d;
+}
+#else /* GFX_VER < 7 */
+static void
+blorp_emit_urb_config(struct blorp_batch *blorp_batch,
+                      unsigned vs_entry_size,
+                      UNUSED unsigned sf_entry_size)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+#if GFX_VER <= 5
+   batch->screen->vtbl.calculate_urb_fence(batch, 0, vs_entry_size, sf_entry_size);
+#else
+   genX(upload_urb)(batch, vs_entry_size, false, vs_entry_size);
+#endif
+}
+#endif
+
+static void
+crocus_blorp_exec(struct blorp_batch *blorp_batch,
+                  const struct blorp_params *params)
+{
+   struct crocus_context *ice = blorp_batch->blorp->driver_ctx;
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+
+   /* Flush the sampler and render caches.  We definitely need to flush the
+    * sampler cache so that we get updated contents from the render cache for
+    * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
+    * docs to flush the cache between reinterpretations of the same surface
+    * data with different formats, which blorp does for stencil and depth
+    * data.
+    */
+   if (params->src.enabled)
+      crocus_cache_flush_for_read(batch, params->src.addr.buffer);
+   if (params->dst.enabled) {
+      crocus_cache_flush_for_render(batch, params->dst.addr.buffer,
+                                    params->dst.view.format,
+                                    params->dst.aux_usage);
+   }
+   if (params->depth.enabled)
+      crocus_cache_flush_for_depth(batch, params->depth.addr.buffer);
+   if (params->stencil.enabled)
+      crocus_cache_flush_for_depth(batch, params->stencil.addr.buffer);
+
+   crocus_require_command_space(batch, 1400);
+   crocus_require_statebuffer_space(batch, 600);
+   batch->no_wrap = true;
+#if GFX_VER == 6
+   /* Emit workaround flushes when we switch from drawing to blorping. */
+   crocus_emit_post_sync_nonzero_flush(batch);
+#endif
+
+#if GFX_VER >= 6
+   crocus_emit_depth_stall_flushes(batch);
+#endif
+
+   blorp_emit(blorp_batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
+      rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
+      rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
+   }
+
+   batch->screen->vtbl.update_surface_base_address(batch);
+   crocus_handle_always_flush_cache(batch);
+
+   batch->contains_draw = true;
+   blorp_exec(blorp_batch, params);
+
+   batch->no_wrap = false;
+   crocus_handle_always_flush_cache(batch);
+
+   /* We've smashed all state compared to what the normal 3D pipeline
+    * rendering tracks for GL.
+    */
+
+   uint64_t skip_bits = (CROCUS_DIRTY_POLYGON_STIPPLE |
+                         CROCUS_DIRTY_GEN7_SO_BUFFERS |
+                         CROCUS_DIRTY_SO_DECL_LIST |
+                         CROCUS_DIRTY_LINE_STIPPLE |
+                         CROCUS_ALL_DIRTY_FOR_COMPUTE |
+                         CROCUS_DIRTY_GEN6_SCISSOR_RECT |
+                         CROCUS_DIRTY_GEN75_VF |
+                         CROCUS_DIRTY_SF_CL_VIEWPORT);
+
+   uint64_t skip_stage_bits = (CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE |
+                               CROCUS_STAGE_DIRTY_UNCOMPILED_VS |
+                               CROCUS_STAGE_DIRTY_UNCOMPILED_TCS |
+                               CROCUS_STAGE_DIRTY_UNCOMPILED_TES |
+                               CROCUS_STAGE_DIRTY_UNCOMPILED_GS |
+                               CROCUS_STAGE_DIRTY_UNCOMPILED_FS |
+                               CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS |
+                               CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS |
+                               CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES |
+                               CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS);
+
+   if (!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) {
+      /* BLORP disabled tessellation, that's fine for the next draw */
+     skip_stage_bits |= CROCUS_STAGE_DIRTY_TCS |
+                        CROCUS_STAGE_DIRTY_TES |
+                        CROCUS_STAGE_DIRTY_CONSTANTS_TCS |
+                        CROCUS_STAGE_DIRTY_CONSTANTS_TES |
+                        CROCUS_STAGE_DIRTY_BINDINGS_TCS |
+                        CROCUS_STAGE_DIRTY_BINDINGS_TES;
+   }
+
+   if (!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) {
+      /* BLORP disabled geometry shaders, that's fine for the next draw */
+     skip_stage_bits |= CROCUS_STAGE_DIRTY_GS |
+                        CROCUS_STAGE_DIRTY_CONSTANTS_GS |
+                        CROCUS_STAGE_DIRTY_BINDINGS_GS;
+   }
+
+   /* we can skip flagging CROCUS_DIRTY_DEPTH_BUFFER, if
+    * BLORP_BATCH_NO_EMIT_DEPTH_STENCIL is set.
+    */
+   if (blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)
+      skip_bits |= CROCUS_DIRTY_DEPTH_BUFFER;
+
+   if (!params->wm_prog_data)
+      skip_bits |= CROCUS_DIRTY_GEN6_BLEND_STATE;
+
+   ice->state.dirty |= ~skip_bits;
+   ice->state.stage_dirty |= ~skip_stage_bits;
+
+   ice->urb.vsize = 0;
+   ice->urb.gs_present = false;
+   ice->urb.gsize = 0;
+   ice->urb.tess_present = false;
+   ice->urb.hsize = 0;
+   ice->urb.dsize = 0;
+
+   if (params->dst.enabled) {
+      crocus_render_cache_add_bo(batch, params->dst.addr.buffer,
+                                 params->dst.view.format,
+                                 params->dst.aux_usage);
+   }
+   if (params->depth.enabled)
+      crocus_depth_cache_add_bo(batch, params->depth.addr.buffer);
+   if (params->stencil.enabled)
+      crocus_depth_cache_add_bo(batch, params->stencil.addr.buffer);
+}
+
+static void
+blorp_measure_start(struct blorp_batch *blorp_batch,
+                    const struct blorp_params *params)
+{
+}
+
+void
+genX(init_blorp)(struct crocus_context *ice)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
+
+   blorp_init(&ice->blorp, ice, &screen->isl_dev);
+   ice->blorp.compiler = screen->compiler;
+   ice->blorp.lookup_shader = crocus_blorp_lookup_shader;
+   ice->blorp.upload_shader = crocus_blorp_upload_shader;
+   ice->blorp.exec = crocus_blorp_exec;
+}
--- a/src/gallium/drivers/crocus/crocus_blt.c
+++ b/src/gallium/drivers/crocus/crocus_blt.c
@ -0,0 +1,337 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* blt command encoding for gen4/5 */
+#include "crocus_context.h"
+
+#include "crocus_genx_macros.h"
+#include "crocus_genx_protos.h"
+#include "crocus_resource.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+#if GFX_VER <= 5
+
+static bool validate_blit_for_blt(struct crocus_batch *batch,
+                                  const struct pipe_blit_info *info)
+{
+   /* If the source and destination are the same size with no mirroring,
+    * the rectangles are within the size of the texture and there is no
+    * scissor, then we can probably use the blit engine.
+    */
+   if (info->dst.box.width != info->src.box.width ||
+       info->dst.box.height != info->src.box.height)
+      return false;
+
+   if (info->scissor_enable)
+      return false;
+
+   if (info->dst.box.height < 0 || info->src.box.height < 0)
+      return false;
+
+   if (info->dst.box.depth > 1 || info->src.box.depth > 1)
+      return false;
+
+   return true;
+}
+
+static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
+{
+   int pitch = res->surf.row_pitch_B;
+   if (res->surf.tiling != ISL_TILING_LINEAR)
+      pitch /= 4;
+   return pitch;
+}
+
+static uint32_t
+color_depth_for_cpp(int cpp)
+{
+   switch (cpp) {
+   case 4: return COLOR_DEPTH__32bit;
+   case 2: return COLOR_DEPTH__565;
+   case 1: return COLOR_DEPTH__8bit;
+   default:
+      unreachable("not reached");
+   }
+}
+
+static bool emit_copy_blt(struct crocus_batch *batch,
+                          struct crocus_resource *src,
+                          struct crocus_resource *dst,
+                          unsigned cpp,
+                          int32_t src_pitch,
+                          unsigned src_offset,
+                          int32_t dst_pitch,
+                          unsigned dst_offset,
+                          uint16_t src_x, uint16_t src_y,
+                          uint16_t dst_x, uint16_t dst_y,
+                          uint16_t w, uint16_t h)
+
+{
+   uint32_t src_tile_w, src_tile_h;
+   uint32_t dst_tile_w, dst_tile_h;
+   int dst_y2 = dst_y + h;
+   int dst_x2 = dst_x + w;
+
+   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+       __func__,
+       src, src_pitch, src_offset, src_x, src_y,
+       dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+
+   isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
+   isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
+
+   /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
+    * (X direction width of the Tile). This is ensured while allocating the
+    * buffer object.
+    */
+   assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
+   assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
+
+   /* For big formats (such as floating point), do the copy using 16 or
+    * 32bpp and multiply the coordinates.
+    */
+   if (cpp > 4) {
+      if (cpp % 4 == 2) {
+         dst_x *= cpp / 2;
+         dst_x2 *= cpp / 2;
+         src_x *= cpp / 2;
+         cpp = 2;
+      } else {
+         assert(cpp % 4 == 0);
+         dst_x *= cpp / 4;
+         dst_x2 *= cpp / 4;
+         src_x *= cpp / 4;
+         cpp = 4;
+      }
+   }
+
+   /* For tiled source and destination, pitch value should be specified
+    * as a number of Dwords.
+    */
+   if (dst->surf.tiling != ISL_TILING_LINEAR)
+      dst_pitch /= 4;
+
+   if (src->surf.tiling != ISL_TILING_LINEAR)
+      src_pitch /= 4;
+
+   assert(cpp <= 4);
+   crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
+      xyblt.RasterOperation = 0xCC;
+      xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
+      xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
+      xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
+      xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
+      xyblt.ColorDepth = color_depth_for_cpp(cpp);
+      xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
+      xyblt.DestinationX1Coordinate = dst_x;
+      xyblt.DestinationY1Coordinate = dst_y;
+      xyblt.DestinationX2Coordinate = dst_x2;
+      xyblt.DestinationY2Coordinate = dst_y2;
+      xyblt.DestinationPitch = dst_pitch;
+      xyblt.SourceX1Coordinate = src_x;
+      xyblt.SourceY1Coordinate = src_y;
+      xyblt.SourcePitch = src_pitch;
+   };
+
+   crocus_emit_mi_flush(batch);
+   return true;
+}
+
+static bool crocus_emit_blt(struct crocus_batch *batch,
+                            struct crocus_resource *src,
+                            struct crocus_resource *dst,
+                            unsigned dst_level,
+                            unsigned dst_x, unsigned dst_y,
+                            unsigned dst_z,
+                            unsigned src_level,
+                            const struct pipe_box *src_box)
+{
+   const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
+   unsigned src_cpp = src_fmtl->bpb / 8;
+   const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
+   const unsigned dst_cpp = dst_fmtl->bpb / 8;
+   uint16_t src_x, src_y;
+   uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
+   uint32_t src_width = src_box->width, src_height = src_box->height;
+
+   /* gen4/5 can't handle Y tiled blits. */
+   if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
+      return false;
+
+   if (src->surf.format != dst->surf.format)
+      return false;
+
+   if (src_cpp != dst_cpp)
+      return false;
+
+   src_x = src_box->x;
+   src_y = src_box->y;
+
+   assert(src_cpp == dst_cpp);
+
+   crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
+                                    &src_image_y);
+   if (util_format_is_compressed(src->base.format)) {
+      int bw = util_format_get_blockwidth(src->base.format);
+      int bh = util_format_get_blockheight(src->base.format);
+      assert(src_x % bw == 0);
+      assert(src_y % bh == 0);
+      src_x /= (int)bw;
+      src_y /= (int)bh;
+      src_width = DIV_ROUND_UP(src_width, (int)bw);
+      src_height = DIV_ROUND_UP(src_height, (int)bh);
+   }
+
+   crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
+                                    &dst_image_y);
+   if (util_format_is_compressed(dst->base.format)) {
+      int bw = util_format_get_blockwidth(dst->base.format);
+      int bh = util_format_get_blockheight(dst->base.format);
+      assert(dst_x % bw == 0);
+      assert(dst_y % bh == 0);
+      dst_x /= (int)bw;
+      dst_y /= (int)bh;
+   }
+   src_x += src_image_x;
+   src_y += src_image_y;
+   dst_x += dst_image_x;
+   dst_y += dst_image_y;
+
+   /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
+    * Data Size Limitations):
+    *
+    *    The BLT engine is capable of transferring very large quantities of
+    *    graphics data. Any graphics data read from and written to the
+    *    destination is permitted to represent a number of pixels that
+    *    occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
+    *    at the destination. The maximum number of pixels that may be
+    *    represented per scan line’s worth of graphics data depends on the
+    *    color depth.
+    *
+    * The blitter's pitch is a signed 16-bit integer, but measured in bytes
+    * for linear surfaces and DWords for tiled surfaces.  So the maximum
+    * pitch is 32k linear and 128k tiled.
+    */
+   if (crocus_resource_blt_pitch(src) >= 32768 ||
+       crocus_resource_blt_pitch(dst) >= 32768) {
+      return false;
+   }
+
+   /* We need to split the blit into chunks that each fit within the blitter's
+    * restrictions.  We can't use a chunk size of 32768 because we need to
+    * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
+    * a nice round power of two, big enough that performance won't suffer, and
+    * small enough to guarantee everything fits.
+    */
+   const uint32_t max_chunk_size = 16384;
+
+   for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
+      for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
+         const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
+         const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
+
+         ASSERTED uint32_t z_offset_el, array_offset;
+         uint32_t src_offset, src_tile_x, src_tile_y;
+         isl_tiling_get_intratile_offset_el(src->surf.tiling,
+                                            src_cpp * 8, src->surf.row_pitch_B,
+                                            src->surf.array_pitch_el_rows,
+                                            src_x + chunk_x, src_y + chunk_y, 0, 0,
+                                            &src_offset,
+                                            &src_tile_x, &src_tile_y,
+                                            &z_offset_el, &array_offset);
+         assert(z_offset_el == 0);
+         assert(array_offset == 0);
+
+         uint32_t dst_offset, dst_tile_x, dst_tile_y;
+         isl_tiling_get_intratile_offset_el(dst->surf.tiling,
+                                            dst_cpp * 8, dst->surf.row_pitch_B,
+                                            dst->surf.array_pitch_el_rows,
+                                            dst_x + chunk_x, dst_y + chunk_y, 0, 0,
+                                            &dst_offset,
+                                            &dst_tile_x, &dst_tile_y,
+                                            &z_offset_el, &array_offset);
+         assert(z_offset_el == 0);
+         assert(array_offset == 0);
+         if (!emit_copy_blt(batch, src, dst,
+                            src_cpp, src->surf.row_pitch_B,
+                            src_offset,
+                            dst->surf.row_pitch_B, dst_offset,
+                            src_tile_x, src_tile_y,
+                            dst_tile_x, dst_tile_y,
+                            chunk_w, chunk_h)) {
+            return false;
+         }
+      }
+   }
+   return true;
+}
+
+static bool crocus_blit_blt(struct crocus_batch *batch,
+                            const struct pipe_blit_info *info)
+{
+   if (!validate_blit_for_blt(batch, info))
+      return false;
+
+   return crocus_emit_blt(batch,
+                          (struct crocus_resource *)info->src.resource,
+                          (struct crocus_resource *)info->dst.resource,
+                          info->dst.level,
+                          info->dst.box.x,
+                          info->dst.box.y,
+                          info->dst.box.z,
+                          info->src.level,
+                          &info->src.box);
+}
+
+
+static bool crocus_copy_region_blt(struct crocus_batch *batch,
+                                   struct crocus_resource *dst,
+                                   unsigned dst_level,
+                                   unsigned dstx, unsigned dsty, unsigned dstz,
+                                   struct crocus_resource *src,
+                                   unsigned src_level,
+                                   const struct pipe_box *src_box)
+{
+   if (dst->base.target == PIPE_BUFFER || src->base.target == PIPE_BUFFER)
+      return false;
+   return crocus_emit_blt(batch,
+                          src,
+                          dst,
+                          dst_level,
+                          dstx, dsty, dstz,
+                          src_level,
+                          src_box);
+}
+#endif
+
+void
+genX(init_blt)(struct crocus_screen *screen)
+{
+#if GFX_VER <= 5
+   screen->vtbl.blit_blt = crocus_blit_blt;
+   screen->vtbl.copy_region_blt = crocus_copy_region_blt;
+#else
+   screen->vtbl.blit_blt = NULL;
+   screen->vtbl.copy_region_blt = NULL;
+#endif
+}
--- a/src/gallium/drivers/crocus/crocus_bufmgr.c
+++ b/src/gallium/drivers/crocus/crocus_bufmgr.c
--- a/src/gallium/drivers/crocus/crocus_bufmgr.h
+++ b/src/gallium/drivers/crocus/crocus_bufmgr.h
@ -0,0 +1,331 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CROCUS_BUFMGR_H
+#define CROCUS_BUFMGR_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include "util/macros.h"
+#include "util/u_atomic.h"
+#include "util/list.h"
+#include "pipe/p_defines.h"
+
+struct crocus_batch;
+struct intel_device_info;
+struct pipe_debug_callback;
+
+#define CROCUS_BINDER_SIZE (64 * 1024)
+#define CROCUS_MAX_BINDERS 100
+
+struct crocus_bo {
+   /**
+    * Size in bytes of the buffer object.
+    *
+    * The size may be larger than the size originally requested for the
+    * allocation, such as being aligned to page size.
+    */
+   uint64_t size;
+
+   /** Buffer manager context associated with this buffer object */
+   struct crocus_bufmgr *bufmgr;
+
+   /** The GEM handle for this buffer object. */
+   uint32_t gem_handle;
+
+   /**
+    * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
+    * Translation Table).
+    *
+    * Although each hardware context has its own VMA, we assign BO's to the
+    * same address in all contexts, for simplicity.
+    */
+   uint64_t gtt_offset;
+
+   /**
+    * The validation list index for this buffer, or -1 when not in a batch.
+    * Note that a single buffer may be in multiple batches (contexts), and
+    * this is a global field, which refers to the last batch using the BO.
+    * It should not be considered authoritative, but can be used to avoid a
+    * linear walk of the validation list in the common case by guessing that
+    * exec_bos[bo->index] == bo and confirming whether that's the case.
+    *
+    * XXX: this is not ideal now that we have more than one batch per context,
+    * XXX: as the index will flop back and forth between the render index and
+    * XXX: compute index...
+    */
+   unsigned index;
+
+   /**
+    * Boolean of whether the GPU is definitely not accessing the buffer.
+    *
+    * This is only valid when reusable, since non-reusable
+    * buffers are those that have been shared with other
+    * processes, so we don't know their state.
+    */
+   bool idle;
+
+   int refcount;
+   const char *name;
+
+   uint64_t kflags;
+
+   /**
+    * Kenel-assigned global name for this object
+    *
+    * List contains both flink named and prime fd'd objects
+    */
+   unsigned global_name;
+
+   /**
+    * Current tiling mode
+    */
+   uint32_t tiling_mode;
+   uint32_t swizzle_mode;
+   uint32_t stride;
+
+   time_t free_time;
+
+   /** Mapped address for the buffer, saved across map/unmap cycles */
+   void *map_cpu;
+   /** GTT virtual address for the buffer, saved across map/unmap cycles */
+   void *map_gtt;
+   /** WC CPU address for the buffer, saved across map/unmap cycles */
+   void *map_wc;
+
+   /** BO cache list */
+   struct list_head head;
+
+   /** List of GEM handle exports of this buffer (bo_export) */
+   struct list_head exports;
+
+   /**
+    * Boolean of whether this buffer can be re-used
+    */
+   bool reusable;
+
+   /**
+    * Boolean of whether this buffer has been shared with an external client.
+    */
+   bool external;
+
+   /**
+    * Boolean of whether this buffer is cache coherent
+    */
+   bool cache_coherent;
+
+   /**
+    * Boolean of whether this buffer points into user memory
+    */
+   bool userptr;
+
+   /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
+   uint32_t hash;
+};
+
+#define BO_ALLOC_ZEROED   (1 << 0)
+#define BO_ALLOC_COHERENT (1 << 1)
+
+/**
+ * Allocate a buffer object.
+ *
+ * Buffer objects are not necessarily initially mapped into CPU virtual
+ * address space or graphics device aperture.  They must be mapped
+ * using crocus_bo_map() to be used by the CPU.
+ */
+struct crocus_bo *crocus_bo_alloc(struct crocus_bufmgr *bufmgr,
+                                  const char *name, uint64_t size);
+
+/**
+ * Allocate a tiled buffer object.
+ *
+ * Alignment for tiled objects is set automatically; the 'flags'
+ * argument provides a hint about how the object will be used initially.
+ *
+ * Valid tiling formats are:
+ *  I915_TILING_NONE
+ *  I915_TILING_X
+ *  I915_TILING_Y
+ */
+struct crocus_bo *crocus_bo_alloc_tiled(struct crocus_bufmgr *bufmgr,
+                                        const char *name, uint64_t size,
+                                        uint32_t alignment,
+                                        uint32_t tiling_mode, uint32_t pitch,
+                                        unsigned flags);
+
+struct crocus_bo *crocus_bo_create_userptr(struct crocus_bufmgr *bufmgr,
+                                           const char *name, void *ptr,
+                                           size_t size);
+
+/** Takes a reference on a buffer object */
+static inline void
+crocus_bo_reference(struct crocus_bo *bo)
+{
+   p_atomic_inc(&bo->refcount);
+}
+
+/**
+ * Releases a reference on a buffer object, freeing the data if
+ * no references remain.
+ */
+void crocus_bo_unreference(struct crocus_bo *bo);
+
+#define MAP_READ          PIPE_MAP_READ
+#define MAP_WRITE         PIPE_MAP_WRITE
+#define MAP_ASYNC         PIPE_MAP_UNSYNCHRONIZED
+#define MAP_PERSISTENT    PIPE_MAP_PERSISTENT
+#define MAP_COHERENT      PIPE_MAP_COHERENT
+/* internal */
+#define MAP_INTERNAL_MASK (0xff << 24)
+#define MAP_RAW           (0x01 << 24)
+
+#define MAP_FLAGS         (MAP_READ | MAP_WRITE | MAP_ASYNC | \
+                           MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)
+
+/**
+ * Maps the buffer into userspace.
+ *
+ * This function will block waiting for any existing execution on the
+ * buffer to complete, first.  The resulting mapping is returned.
+ */
+MUST_CHECK void *crocus_bo_map(struct pipe_debug_callback *dbg,
+                             struct crocus_bo *bo, unsigned flags);
+
+/**
+ * Reduces the refcount on the userspace mapping of the buffer
+ * object.
+ */
+static inline int crocus_bo_unmap(struct crocus_bo *bo) { return 0; }
+
+/**
+ * Waits for rendering to an object by the GPU to have completed.
+ *
+ * This is not required for any access to the BO by bo_map,
+ * bo_subdata, etc.  It is merely a way for the driver to implement
+ * glFinish.
+ */
+void crocus_bo_wait_rendering(struct crocus_bo *bo);
+
+/**
+ * Unref a buffer manager instance.
+ */
+void crocus_bufmgr_unref(struct crocus_bufmgr *bufmgr);
+
+/**
+ * Get the current tiling (and resulting swizzling) mode for the bo.
+ *
+ * \param buf Buffer to get tiling mode for
+ * \param tiling_mode returned tiling mode
+ * \param swizzle_mode returned swizzling mode
+ */
+int crocus_bo_get_tiling(struct crocus_bo *bo, uint32_t *tiling_mode,
+                         uint32_t *swizzle_mode);
+
+/**
+ * Create a visible name for a buffer which can be used by other apps
+ *
+ * \param buf Buffer to create a name for
+ * \param name Returned name
+ */
+int crocus_bo_flink(struct crocus_bo *bo, uint32_t *name);
+
+/**
+ * Is this buffer shared with external clients (exported)?
+ */
+static inline bool
+crocus_bo_is_external(const struct crocus_bo *bo)
+{
+   return bo->external;
+}
+
+/**
+ * Returns 1 if mapping the buffer for write could cause the process
+ * to block, due to the object being active in the GPU.
+ */
+int crocus_bo_busy(struct crocus_bo *bo);
+
+/**
+ * Specify the volatility of the buffer.
+ * \param bo Buffer to create a name for
+ * \param madv The purgeable status
+ *
+ * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
+ * reclaimed under memory pressure. If you subsequently require the buffer,
+ * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
+ *
+ * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
+ * marked as I915_MADV_DONTNEED.
+ */
+int crocus_bo_madvise(struct crocus_bo *bo, int madv);
+
+/* drm_bacon_bufmgr_gem.c */
+struct crocus_bufmgr *
+crocus_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd,
+                         bool bo_reuse);
+int crocus_bufmgr_get_fd(struct crocus_bufmgr *bufmgr);
+
+struct crocus_bo *crocus_bo_gem_create_from_name(struct crocus_bufmgr *bufmgr,
+                                                 const char *name,
+                                                 unsigned handle);
+
+int crocus_bo_wait(struct crocus_bo *bo, int64_t timeout_ns);
+
+uint32_t crocus_create_hw_context(struct crocus_bufmgr *bufmgr);
+uint32_t crocus_clone_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id);
+
+#define CROCUS_CONTEXT_LOW_PRIORITY    ((I915_CONTEXT_MIN_USER_PRIORITY - 1) / 2)
+#define CROCUS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
+#define CROCUS_CONTEXT_HIGH_PRIORITY   ((I915_CONTEXT_MAX_USER_PRIORITY + 1) / 2)
+
+int crocus_hw_context_set_priority(struct crocus_bufmgr *bufmgr,
+                                   uint32_t ctx_id, int priority);
+
+void crocus_destroy_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id);
+
+int crocus_bo_export_dmabuf(struct crocus_bo *bo, int *prime_fd);
+struct crocus_bo *crocus_bo_import_dmabuf(struct crocus_bufmgr *bufmgr,
+                                          int prime_fd, uint32_t tiling,
+                                          uint32_t stride);
+
+/**
+ * Exports a bo as a GEM handle into a given DRM file descriptor
+ * \param bo Buffer to export
+ * \param drm_fd File descriptor where the new handle is created
+ * \param out_handle Pointer to store the new handle
+ *
+ * Returns 0 if the buffer was successfully exported, a non zero error code
+ * otherwise.
+ */
+int crocus_bo_export_gem_handle_for_device(struct crocus_bo *bo, int drm_fd,
+                                           uint32_t *out_handle);
+
+uint32_t crocus_bo_export_gem_handle(struct crocus_bo *bo);
+
+int crocus_reg_read(struct crocus_bufmgr *bufmgr, uint32_t offset,
+                    uint64_t *out);
+
+int drm_ioctl(int fd, unsigned long request, void *arg);
+
+#endif /* CROCUS_BUFMGR_H */
--- a/src/gallium/drivers/crocus/crocus_clear.c
+++ b/src/gallium/drivers/crocus/crocus_clear.c
@ -0,0 +1,859 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+#include "util/format/u_format.h"
+#include "util/u_upload_mgr.h"
+#include "util/ralloc.h"
+#include "crocus_context.h"
+#include "crocus_resource.h"
+#include "crocus_screen.h"
+#include "intel/compiler/brw_compiler.h"
+#include "util/format_srgb.h"
+
+static bool
+crocus_is_color_fast_clear_compatible(struct crocus_context *ice,
+                                      enum isl_format format,
+                                      const union isl_color_value color)
+{
+   if (isl_format_has_int_channel(format)) {
+      perf_debug(&ice->dbg, "Integer fast clear not enabled for %s",
+                 isl_format_get_name(format));
+      return false;
+   }
+
+   for (int i = 0; i < 4; i++) {
+      if (!isl_format_has_color_component(format, i)) {
+         continue;
+      }
+
+      if (color.f32[i] != 0.0f && color.f32[i] != 1.0f) {
+         return false;
+      }
+   }
+
+   return true;
+}
+
+static bool
+can_fast_clear_color(struct crocus_context *ice,
+                     struct pipe_resource *p_res,
+                     unsigned level,
+                     const struct pipe_box *box,
+                     bool render_condition_enabled,
+                     enum isl_format format,
+                     enum isl_format render_format,
+                     union isl_color_value color)
+{
+   struct crocus_resource *res = (void *) p_res;
+
+   if (INTEL_DEBUG & DEBUG_NO_FAST_CLEAR)
+      return false;
+
+   if (!isl_aux_usage_has_fast_clears(res->aux.usage))
+      return false;
+
+   /* Check for partial clear */
+   if (box->x > 0 || box->y > 0 ||
+       box->width < minify(p_res->width0, level) ||
+       box->height < minify(p_res->height0, level)) {
+      return false;
+   }
+
+   /* Avoid conditional fast clears to maintain correct tracking of the aux
+    * state (see iris_resource_finish_write for more info). Note that partial
+    * fast clears (if they existed) would not pose a problem with conditional
+    * rendering.
+    */
+   if (render_condition_enabled &&
+       ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
+      return false;
+   }
+
+   /* We store clear colors as floats or uints as needed.  If there are
+    * texture views in play, the formats will not properly be respected
+    * during resolves because the resolve operations only know about the
+    * resource and not the renderbuffer.
+    */
+   if (isl_format_srgb_to_linear(render_format) !=
+       isl_format_srgb_to_linear(format)) {
+      return false;
+   }
+
+   /* XXX: if (irb->mt->supports_fast_clear)
+    * see intel_miptree_create_for_dri_image()
+    */
+
+   if (!crocus_is_color_fast_clear_compatible(ice, format, color))
+      return false;
+
+   return true;
+}
+
+static union isl_color_value
+convert_fast_clear_color(struct crocus_context *ice,
+                         struct crocus_resource *res,
+                         enum isl_format render_format,
+                         const union isl_color_value color)
+{
+   union isl_color_value override_color = color;
+   struct pipe_resource *p_res = (void *) res;
+
+   const enum pipe_format format = p_res->format;
+   const struct util_format_description *desc =
+      util_format_description(format);
+   unsigned colormask = util_format_colormask(desc);
+
+   if (util_format_is_intensity(format) ||
+       util_format_is_luminance(format) ||
+       util_format_is_luminance_alpha(format)) {
+      override_color.u32[1] = override_color.u32[0];
+      override_color.u32[2] = override_color.u32[0];
+      if (util_format_is_intensity(format))
+         override_color.u32[3] = override_color.u32[0];
+   } else {
+      for (int chan = 0; chan < 3; chan++) {
+         if (!(colormask & (1 << chan)))
+            override_color.u32[chan] = 0;
+      }
+   }
+
+   if (util_format_is_unorm(format)) {
+      for (int i = 0; i < 4; i++)
+         override_color.f32[i] = CLAMP(override_color.f32[i], 0.0f, 1.0f);
+   } else if (util_format_is_snorm(format)) {
+      for (int i = 0; i < 4; i++)
+         override_color.f32[i] = CLAMP(override_color.f32[i], -1.0f, 1.0f);
+   } else if (util_format_is_pure_uint(format)) {
+      for (int i = 0; i < 4; i++) {
+         unsigned bits = util_format_get_component_bits(
+            format, UTIL_FORMAT_COLORSPACE_RGB, i);
+         if (bits < 32) {
+            uint32_t max = (1u << bits) - 1;
+            override_color.u32[i] = MIN2(override_color.u32[i], max);
+         }
+      }
+   } else if (util_format_is_pure_sint(format)) {
+      for (int i = 0; i < 4; i++) {
+         unsigned bits = util_format_get_component_bits(
+            format, UTIL_FORMAT_COLORSPACE_RGB, i);
+         if (bits < 32) {
+            int32_t max = (1 << (bits - 1)) - 1;
+            int32_t min = -(1 << (bits - 1));
+            override_color.i32[i] = CLAMP(override_color.i32[i], min, max);
+         }
+      }
+   } else if (format == PIPE_FORMAT_R11G11B10_FLOAT ||
+              format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
+      /* these packed float formats only store unsigned values */
+      for (int i = 0; i < 4; i++)
+         override_color.f32[i] = MAX2(override_color.f32[i], 0.0f);
+   }
+
+   if (!(colormask & 1 << 3)) {
+      if (util_format_is_pure_integer(format))
+         override_color.u32[3] = 1;
+      else
+         override_color.f32[3] = 1.0f;
+   }
+
+   /* Handle linear to SRGB conversion */
+   if (isl_format_is_srgb(render_format)) {
+      for (int i = 0; i < 3; i++) {
+         override_color.f32[i] =
+            util_format_linear_to_srgb_float(override_color.f32[i]);
+      }
+   }
+
+   return override_color;
+}
+
+static void
+fast_clear_color(struct crocus_context *ice,
+                 struct crocus_resource *res,
+                 unsigned level,
+                 const struct pipe_box *box,
+                 enum isl_format format,
+                 union isl_color_value color,
+                 enum blorp_batch_flags blorp_flags)
+{
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = batch->screen;
+   struct pipe_resource *p_res = (void *) res;
+
+   color = convert_fast_clear_color(ice, res, format, color);
+
+   bool color_changed = !!memcmp(&res->aux.clear_color, &color,
+                                 sizeof(color));
+
+   if (color_changed) {
+      /* If we are clearing to a new clear value, we need to resolve fast
+       * clears from other levels/layers first, since we can't have different
+       * levels/layers with different fast clear colors.
+       */
+      for (unsigned res_lvl = 0; res_lvl < res->surf.levels; res_lvl++) {
+         const unsigned level_layers =
+            crocus_get_num_logical_layers(res, res_lvl);
+         for (unsigned layer = 0; layer < level_layers; layer++) {
+            if (res_lvl == level &&
+                layer >= box->z &&
+                layer < box->z + box->depth) {
+               /* We're going to clear this layer anyway.  Leave it alone. */
+               continue;
+            }
+
+            enum isl_aux_state aux_state =
+               crocus_resource_get_aux_state(res, res_lvl, layer);
+
+            if (aux_state != ISL_AUX_STATE_CLEAR &&
+                aux_state != ISL_AUX_STATE_PARTIAL_CLEAR &&
+                aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
+               /* This slice doesn't have any fast-cleared bits. */
+               continue;
+            }
+
+            /* If we got here, then the level may have fast-clear bits that use
+             * the old clear value.  We need to do a color resolve to get rid
+             * of their use of the clear color before we can change it.
+             * Fortunately, few applications ever change their clear color at
+             * different levels/layers, so this shouldn't happen often.
+             */
+            crocus_resource_prepare_access(ice, res,
+                                           res_lvl, 1, layer, 1,
+                                           res->aux.usage,
+                                           false);
+            perf_debug(&ice->dbg,
+                       "Resolving resource (%p) level %d, layer %d: color changing from "
+                       "(%0.2f, %0.2f, %0.2f, %0.2f) to "
+                       "(%0.2f, %0.2f, %0.2f, %0.2f)\n",
+                       res, res_lvl, layer,
+                       res->aux.clear_color.f32[0],
+                       res->aux.clear_color.f32[1],
+                       res->aux.clear_color.f32[2],
+                       res->aux.clear_color.f32[3],
+                       color.f32[0], color.f32[1], color.f32[2], color.f32[3]);
+         }
+      }
+   }
+
+   crocus_resource_set_clear_color(ice, res, color);
+
+   /* If the buffer is already in ISL_AUX_STATE_CLEAR, and the color hasn't
+    * changed, the clear is redundant and can be skipped.
+    */
+   const enum isl_aux_state aux_state =
+      crocus_resource_get_aux_state(res, level, box->z);
+   if (!color_changed && box->depth == 1 && aux_state == ISL_AUX_STATE_CLEAR)
+      return;
+
+   /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+    *
+    *    "Any transition from any value in {Clear, Render, Resolve} to a
+    *    different value in {Clear, Render, Resolve} requires end of pipe
+    *    synchronization."
+    *
+    * In other words, fast clear ops are not properly synchronized with
+    * other drawing.  We need to use a PIPE_CONTROL to ensure that the
+    * contents of the previous draw hit the render target before we resolve
+    * and again afterwards to ensure that the resolve is complete before we
+    * do any more regular drawing.
+    */
+   crocus_emit_end_of_pipe_sync(batch,
+                                "fast clear: pre-flush",
+                                PIPE_CONTROL_RENDER_TARGET_FLUSH);
+
+   /* If we reach this point, we need to fast clear to change the state to
+    * ISL_AUX_STATE_CLEAR, or to update the fast clear color (or both).
+    */
+   blorp_flags |= color_changed ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR;
+
+   struct blorp_batch blorp_batch;
+   blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
+
+   struct blorp_surf surf;
+   crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
+                                  p_res, res->aux.usage, level, true);
+
+   /* In newer gens (> 9), the hardware will do a linear -> sRGB conversion of
+    * the clear color during the fast clear, if the surface format is of sRGB
+    * type. We use the linear version of the surface format here to prevent
+    * that from happening, since we already do our own linear -> sRGB
+    * conversion in convert_fast_clear_color().
+    */
+   blorp_fast_clear(&blorp_batch, &surf, isl_format_srgb_to_linear(format),
+                    ISL_SWIZZLE_IDENTITY,
+                    level, box->z, box->depth,
+                    box->x, box->y, box->x + box->width,
+                    box->y + box->height);
+   blorp_batch_finish(&blorp_batch);
+   crocus_emit_end_of_pipe_sync(batch,
+                                "fast clear: post flush",
+                                PIPE_CONTROL_RENDER_TARGET_FLUSH);
+
+   crocus_resource_set_aux_state(ice, res, level, box->z,
+                                 box->depth, ISL_AUX_STATE_CLEAR);
+   ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
+   return;
+}
+
+static void
+clear_color(struct crocus_context *ice,
+            struct pipe_resource *p_res,
+            unsigned level,
+            const struct pipe_box *box,
+            bool render_condition_enabled,
+            enum isl_format format,
+            struct isl_swizzle swizzle,
+            union isl_color_value color)
+{
+   struct crocus_resource *res = (void *) p_res;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = batch->screen;
+   const struct intel_device_info *devinfo = &batch->screen->devinfo;
+   enum blorp_batch_flags blorp_flags = 0;
+
+   if (render_condition_enabled) {
+      if (!crocus_check_conditional_render(ice))
+         return;
+
+      if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
+         blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
+   }
+
+   if (p_res->target == PIPE_BUFFER)
+      util_range_add(&res->base, &res->valid_buffer_range, box->x, box->x + box->width);
+
+   crocus_batch_maybe_flush(batch, 1500);
+
+   bool can_fast_clear = can_fast_clear_color(ice, p_res, level, box,
+                                              render_condition_enabled,
+                                              res->surf.format, format, color);
+   if (can_fast_clear) {
+      fast_clear_color(ice, res, level, box, format, color,
+                       blorp_flags);
+      return;
+   }
+
+   bool color_write_disable[4] = { false, false, false, false };
+   enum isl_aux_usage aux_usage =
+      crocus_resource_render_aux_usage(ice, res, format,
+                                       false, false);
+
+   crocus_resource_prepare_render(ice, res, level,
+                                  box->z, box->depth, aux_usage);
+
+   struct blorp_surf surf;
+   crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
+                                  p_res, aux_usage, level, true);
+
+   struct blorp_batch blorp_batch;
+   blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
+
+   if (!isl_format_supports_rendering(devinfo, format) &&
+       isl_format_is_rgbx(format))
+      format = isl_format_rgbx_to_rgba(format);
+
+   blorp_clear(&blorp_batch, &surf, format, swizzle,
+               level, box->z, box->depth, box->x, box->y,
+               box->x + box->width, box->y + box->height,
+               color, color_write_disable);
+
+   blorp_batch_finish(&blorp_batch);
+   crocus_flush_and_dirty_for_history(ice, batch, res,
+                                      PIPE_CONTROL_RENDER_TARGET_FLUSH,
+                                      "cache history: post color clear");
+
+   crocus_resource_finish_render(ice, res, level,
+                                 box->z, box->depth, aux_usage);
+}
+
+static bool
+can_fast_clear_depth(struct crocus_context *ice,
+                     struct crocus_resource *res,
+                     unsigned level,
+                     const struct pipe_box *box,
+                     bool render_condition_enabled,
+                     float depth)
+{
+   struct pipe_resource *p_res = (void *) res;
+   struct pipe_context *ctx = (void *) ice;
+   struct crocus_screen *screen = (void *) ctx->screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   if (devinfo->ver < 6)
+      return false;
+
+   if (INTEL_DEBUG & DEBUG_NO_FAST_CLEAR)
+      return false;
+
+   /* Check for partial clears */
+   if (box->x > 0 || box->y > 0 ||
+       box->width < u_minify(p_res->width0, level) ||
+       box->height < u_minify(p_res->height0, level)) {
+      return false;
+   }
+
+   /* Avoid conditional fast clears to maintain correct tracking of the aux
+    * state (see iris_resource_finish_write for more info). Note that partial
+    * fast clears would not pose a problem with conditional rendering.
+    */
+   if (render_condition_enabled &&
+       ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
+      return false;
+   }
+
+   if (!crocus_resource_level_has_hiz(res, level))
+      return false;
+
+   if (res->base.format == PIPE_FORMAT_Z16_UNORM) {
+      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
+       *
+       *     "[DevSNB+]: Several cases exist where Depth Buffer Clear cannot be
+       *      enabled (the legacy method of clearing must be performed):
+       *
+       *      - DevSNB{W/A}]: When depth buffer format is D16_UNORM and the
+       *        width of the map (LOD0) is not multiple of 16, fast clear
+       *        optimization must be disabled.
+       */
+      if (devinfo->ver == 6 &&
+          (minify(res->surf.phys_level0_sa.width,
+                  level) % 16) != 0)
+         return false;
+   }
+   return true;
+}
+
+static void
+fast_clear_depth(struct crocus_context *ice,
+                 struct crocus_resource *res,
+                 unsigned level,
+                 const struct pipe_box *box,
+                 float depth)
+{
+   struct pipe_resource *p_res = (void *) res;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+
+   /* Quantize the clear value to what can be stored in the actual depth
+    * buffer.  This makes the following check more accurate because it now
+    * checks if the actual depth bits will match.  It also prevents us from
+    * getting a too-accurate depth value during depth testing or when sampling
+    * with HiZ enabled.
+    */
+   const unsigned nbits = p_res->format == PIPE_FORMAT_Z16_UNORM ? 16 : 24;
+   const uint32_t depth_max = (1 << nbits) - 1;
+   depth = p_res->format == PIPE_FORMAT_Z32_FLOAT ? depth :
+      (unsigned)(depth * depth_max) / (float)depth_max;
+
+   bool update_clear_depth = false;
+
+   /* If we're clearing to a new clear value, then we need to resolve any clear
+    * flags out of the HiZ buffer into the real depth buffer.
+    */
+   if (res->aux.clear_color.f32[0] != depth) {
+      for (unsigned res_level = 0; res_level < res->surf.levels; res_level++) {
+         if (!crocus_resource_level_has_hiz(res, res_level))
+            continue;
+
+         const unsigned level_layers =
+            crocus_get_num_logical_layers(res, res_level);
+         for (unsigned layer = 0; layer < level_layers; layer++) {
+            if (res_level == level &&
+                layer >= box->z &&
+                layer < box->z + box->depth) {
+               /* We're going to clear this layer anyway.  Leave it alone. */
+               continue;
+            }
+
+            enum isl_aux_state aux_state =
+               crocus_resource_get_aux_state(res, res_level, layer);
+
+            if (aux_state != ISL_AUX_STATE_CLEAR &&
+                aux_state != ISL_AUX_STATE_COMPRESSED_CLEAR) {
+               /* This slice doesn't have any fast-cleared bits. */
+               continue;
+            }
+
+            /* If we got here, then the level may have fast-clear bits that
+             * use the old clear value.  We need to do a depth resolve to get
+             * rid of their use of the clear value before we can change it.
+             * Fortunately, few applications ever change their depth clear
+             * value so this shouldn't happen often.
+             */
+            crocus_hiz_exec(ice, batch, res, res_level, layer, 1,
+                            ISL_AUX_OP_FULL_RESOLVE, false);
+            crocus_resource_set_aux_state(ice, res, res_level, layer, 1,
+                                          ISL_AUX_STATE_RESOLVED);
+         }
+      }
+      const union isl_color_value clear_value = { .f32 = {depth, } };
+      crocus_resource_set_clear_color(ice, res, clear_value);
+      update_clear_depth = true;
+   }
+
+   for (unsigned l = 0; l < box->depth; l++) {
+      enum isl_aux_state aux_state =
+         crocus_resource_level_has_hiz(res, level) ?
+         crocus_resource_get_aux_state(res, level, box->z + l) :
+         ISL_AUX_STATE_AUX_INVALID;
+      if (update_clear_depth || aux_state != ISL_AUX_STATE_CLEAR) {
+         if (aux_state == ISL_AUX_STATE_CLEAR) {
+            perf_debug(&ice->dbg, "Performing HiZ clear just to update the "
+                       "depth clear value\n");
+         }
+         crocus_hiz_exec(ice, batch, res, level,
+                         box->z + l, 1, ISL_AUX_OP_FAST_CLEAR,
+                         update_clear_depth);
+      }
+   }
+
+   crocus_resource_set_aux_state(ice, res, level, box->z, box->depth,
+                                 ISL_AUX_STATE_CLEAR);
+   ice->state.dirty |= CROCUS_DIRTY_DEPTH_BUFFER;
+}
+
+static void
+clear_depth_stencil(struct crocus_context *ice,
+                    struct pipe_resource *p_res,
+                    unsigned level,
+                    const struct pipe_box *box,
+                    bool render_condition_enabled,
+                    bool clear_depth,
+                    bool clear_stencil,
+                    float depth,
+                    uint8_t stencil)
+{
+   struct crocus_resource *res = (void *) p_res;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = batch->screen;
+   enum blorp_batch_flags blorp_flags = 0;
+
+   if (render_condition_enabled) {
+      if (!crocus_check_conditional_render(ice))
+         return;
+
+      if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)
+         blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;
+   }
+
+   crocus_batch_maybe_flush(batch, 1500);
+
+   struct crocus_resource *z_res;
+   struct crocus_resource *stencil_res;
+   struct blorp_surf z_surf;
+   struct blorp_surf stencil_surf;
+
+   crocus_get_depth_stencil_resources(&batch->screen->devinfo, p_res, &z_res, &stencil_res);
+   if (z_res && clear_depth &&
+       can_fast_clear_depth(ice, z_res, level, box, render_condition_enabled,
+                            depth)) {
+      fast_clear_depth(ice, z_res, level, box, depth);
+      crocus_flush_and_dirty_for_history(ice, batch, res, 0,
+                                         "cache history: post fast Z clear");
+      clear_depth = false;
+      z_res = NULL;
+   }
+
+   /* At this point, we might have fast cleared the depth buffer. So if there's
+    * no stencil clear pending, return early.
+    */
+   if (!(clear_depth || (clear_stencil && stencil_res))) {
+      return;
+   }
+
+   if (clear_depth && z_res) {
+      const enum isl_aux_usage aux_usage =
+         crocus_resource_render_aux_usage(ice, z_res, level, z_res->surf.format,
+                                          false);
+      crocus_resource_prepare_render(ice, z_res, level, box->z, box->depth,
+                                     aux_usage);
+      crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev,
+                                     &z_surf, &z_res->base, aux_usage,
+                                     level, true);
+   }
+
+   struct blorp_batch blorp_batch;
+   blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);
+
+   uint8_t stencil_mask = clear_stencil && stencil_res ? 0xff : 0;
+   if (stencil_mask) {
+      crocus_resource_prepare_access(ice, stencil_res, level, 1, box->z,
+                                     box->depth, stencil_res->aux.usage, false);
+      crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev,
+                                     &stencil_surf, &stencil_res->base,
+                                     stencil_res->aux.usage, level, true);
+   }
+
+   blorp_clear_depth_stencil(&blorp_batch, &z_surf, &stencil_surf,
+                             level, box->z, box->depth,
+                             box->x, box->y,
+                             box->x + box->width,
+                             box->y + box->height,
+                             clear_depth && z_res, depth,
+                             stencil_mask, stencil);
+
+   blorp_batch_finish(&blorp_batch);
+   crocus_flush_and_dirty_for_history(ice, batch, res, 0,
+                                      "cache history: post slow ZS clear");
+
+   if (clear_depth && z_res) {
+      crocus_resource_finish_render(ice, z_res, level,
+                                    box->z, box->depth, z_surf.aux_usage);
+   }
+
+   if (stencil_mask) {
+      crocus_resource_finish_write(ice, stencil_res, level, box->z, box->depth,
+                                   stencil_res->aux.usage);
+   }
+}
+
+/**
+ * The pipe->clear() driver hook.
+ *
+ * This clears buffers attached to the current draw framebuffer.
+ */
+static void
+crocus_clear(struct pipe_context *ctx,
+             unsigned buffers,
+             const struct pipe_scissor_state *scissor_state,
+             const union pipe_color_union *p_color,
+             double depth,
+             unsigned stencil)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
+   struct crocus_screen *screen = (void *) ctx->screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+   assert(buffers != 0);
+
+   struct pipe_box box = {
+      .width = cso_fb->width,
+      .height = cso_fb->height,
+   };
+
+   if (scissor_state) {
+      box.x = scissor_state->minx;
+      box.y = scissor_state->miny;
+      box.width = MIN2(box.width, scissor_state->maxx - scissor_state->minx);
+      box.height = MIN2(box.height, scissor_state->maxy - scissor_state->miny);
+   }
+
+   if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+      if (devinfo->ver < 6) {
+         crocus_blitter_begin(ice, CROCUS_SAVE_FRAGMENT_STATE, true);
+         util_blitter_clear(ice->blitter, cso_fb->width, cso_fb->height,
+                            util_framebuffer_get_num_layers(cso_fb),
+                            buffers & PIPE_CLEAR_DEPTHSTENCIL, p_color, depth, stencil, false);
+      } else {
+         struct pipe_surface *psurf = cso_fb->zsbuf;
+         box.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1;
+         box.z = psurf->u.tex.first_layer;
+
+         clear_depth_stencil(ice, psurf->texture, psurf->u.tex.level, &box, true,
+                             buffers & PIPE_CLEAR_DEPTH,
+                             buffers & PIPE_CLEAR_STENCIL,
+                             depth, stencil);
+      }
+      buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
+   }
+
+   if (buffers & PIPE_CLEAR_COLOR) {
+      /* pipe_color_union and isl_color_value are interchangeable */
+      union isl_color_value *color = (void *) p_color;
+
+      for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
+         if (buffers & (PIPE_CLEAR_COLOR0 << i)) {
+            struct pipe_surface *psurf = cso_fb->cbufs[i];
+            struct crocus_surface *isurf = (void *) psurf;
+            box.depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1,
+            box.z = psurf->u.tex.first_layer,
+
+            clear_color(ice, psurf->texture, psurf->u.tex.level, &box,
+                        true, isurf->view.format, isurf->view.swizzle,
+                        *color);
+         }
+      }
+   }
+}
+
+/**
+ * The pipe->clear_texture() driver hook.
+ *
+ * This clears the given texture resource.
+ */
+static void
+crocus_clear_texture(struct pipe_context *ctx,
+                     struct pipe_resource *p_res,
+                     unsigned level,
+                     const struct pipe_box *box,
+                     const void *data)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_screen *screen = (void *) ctx->screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+   struct crocus_resource *res = (void *) p_res;
+
+   if (devinfo->ver < 6) {
+      util_clear_texture(ctx, p_res,
+                         level, box, data);
+      return;
+   }
+
+   if (crocus_resource_unfinished_aux_import(res))
+      crocus_resource_finish_aux_import(ctx->screen, res);
+
+   if (util_format_is_depth_or_stencil(p_res->format)) {
+      const struct util_format_unpack_description *fmt_unpack =
+         util_format_unpack_description(p_res->format);
+
+      float depth = 0.0;
+      uint8_t stencil = 0;
+
+      if (fmt_unpack->unpack_z_float)
+         fmt_unpack->unpack_z_float(&depth, 0, data, 0, 1, 1);
+
+      if (fmt_unpack->unpack_s_8uint)
+         fmt_unpack->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
+
+      clear_depth_stencil(ice, p_res, level, box, true, true, true,
+                          depth, stencil);
+   } else {
+      union isl_color_value color;
+      struct crocus_resource *res = (void *) p_res;
+      enum isl_format format = res->surf.format;
+
+      if (!isl_format_supports_rendering(devinfo, format)) {
+         const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+         // XXX: actually just get_copy_format_for_bpb from BLORP
+         // XXX: don't cut and paste this
+         switch (fmtl->bpb) {
+         case 8:   format = ISL_FORMAT_R8_UINT;           break;
+         case 16:  format = ISL_FORMAT_R8G8_UINT;         break;
+         case 24:  format = ISL_FORMAT_R8G8B8_UINT;       break;
+         case 32:  format = ISL_FORMAT_R8G8B8A8_UINT;     break;
+         case 48:  format = ISL_FORMAT_R16G16B16_UINT;    break;
+         case 64:  format = ISL_FORMAT_R16G16B16A16_UINT; break;
+         case 96:  format = ISL_FORMAT_R32G32B32_UINT;    break;
+         case 128: format = ISL_FORMAT_R32G32B32A32_UINT; break;
+         default:
+            unreachable("Unknown format bpb");
+         }
+
+         /* No aux surfaces for non-renderable surfaces */
+         assert(res->aux.usage == ISL_AUX_USAGE_NONE);
+      }
+
+      isl_color_value_unpack(&color, format, data);
+
+      clear_color(ice, p_res, level, box, true, format,
+                  ISL_SWIZZLE_IDENTITY, color);
+   }
+}
+
+/**
+ * The pipe->clear_render_target() driver hook.
+ *
+ * This clears the given render target surface.
+ */
+static void
+crocus_clear_render_target(struct pipe_context *ctx,
+                           struct pipe_surface *psurf,
+                           const union pipe_color_union *p_color,
+                           unsigned dst_x, unsigned dst_y,
+                           unsigned width, unsigned height,
+                           bool render_condition_enabled)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_surface *isurf = (void *) psurf;
+   struct pipe_box box = {
+      .x = dst_x,
+      .y = dst_y,
+      .z = psurf->u.tex.first_layer,
+      .width = width,
+      .height = height,
+      .depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1
+   };
+
+   /* pipe_color_union and isl_color_value are interchangeable */
+   union isl_color_value *color = (void *) p_color;
+
+   clear_color(ice, psurf->texture, psurf->u.tex.level, &box,
+               render_condition_enabled,
+               isurf->view.format, isurf->view.swizzle, *color);
+}
+
+/**
+ * The pipe->clear_depth_stencil() driver hook.
+ *
+ * This clears the given depth/stencil surface.
+ */
+static void
+crocus_clear_depth_stencil(struct pipe_context *ctx,
+                           struct pipe_surface *psurf,
+                           unsigned flags,
+                           double depth,
+                           unsigned stencil,
+                           unsigned dst_x, unsigned dst_y,
+                           unsigned width, unsigned height,
+                           bool render_condition_enabled)
+{
+   return;
+#if 0
+   struct crocus_context *ice = (void *) ctx;
+   struct pipe_box box = {
+      .x = dst_x,
+      .y = dst_y,
+      .z = psurf->u.tex.first_layer,
+      .width = width,
+      .height = height,
+      .depth = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1
+   };
+   uint32_t blit_flags = 0;
+
+   assert(util_format_is_depth_or_stencil(psurf->texture->format));
+
+   crocus_blitter_begin(ice, CROCUS_SAVE_FRAGMENT_STATE);
+   util_blitter_clear(ice->blitter, width, height,
+                      1, flags, NULL, depth, stencil, render_condition_enabled);
+#if 0
+   clear_depth_stencil(ice, psurf->texture, psurf->u.tex.level, &box,
+                       render_condition_enabled,
+                       flags & PIPE_CLEAR_DEPTH, flags & PIPE_CLEAR_STENCIL,
+                       depth, stencil);
+#endif
+#endif
+}
+
+void
+crocus_init_clear_functions(struct pipe_context *ctx)
+{
+   ctx->clear = crocus_clear;
+   ctx->clear_texture = crocus_clear_texture;
+   ctx->clear_render_target = crocus_clear_render_target;
+   ctx->clear_depth_stencil = crocus_clear_depth_stencil;
+}
--- a/src/gallium/drivers/crocus/crocus_context.c
+++ b/src/gallium/drivers/crocus/crocus_context.c
@ -0,0 +1,336 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/ralloc.h"
+#include "util/u_inlines.h"
+#include "util/format/u_format.h"
+#include "util/u_upload_mgr.h"
+#include "drm-uapi/i915_drm.h"
+#include "crocus_context.h"
+#include "crocus_resource.h"
+#include "crocus_screen.h"
+#include "common/intel_defines.h"
+#include "common/intel_sample_positions.h"
+
+/**
+ * The pipe->set_debug_callback() driver hook.
+ */
+static void
+crocus_set_debug_callback(struct pipe_context *ctx,
+                          const struct pipe_debug_callback *cb)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+
+   if (cb)
+      ice->dbg = *cb;
+   else
+      memset(&ice->dbg, 0, sizeof(ice->dbg));
+}
+
+static bool
+crocus_init_identifier_bo(struct crocus_context *ice)
+{
+   void *bo_map;
+
+   bo_map = crocus_bo_map(NULL, ice->workaround_bo, MAP_READ | MAP_WRITE);
+   if (!bo_map)
+      return false;
+
+   ice->workaround_bo->kflags |= EXEC_OBJECT_CAPTURE;
+   ice->workaround_offset = ALIGN(
+      intel_debug_write_identifiers(bo_map, 4096, "Crocus") + 8, 8);
+
+   crocus_bo_unmap(ice->workaround_bo);
+
+   return true;
+}
+
+/**
+ * Called from the batch module when it detects a GPU hang.
+ *
+ * In this case, we've lost our GEM context, and can't rely on any existing
+ * state on the GPU.  We must mark everything dirty and wipe away any saved
+ * assumptions about the last known state of the GPU.
+ */
+void
+crocus_lost_context_state(struct crocus_batch *batch)
+{
+   /* The batch module doesn't have an crocus_context, because we want to
+    * avoid introducing lots of layering violations.  Unfortunately, here
+    * we do need to inform the context of batch catastrophe.  We know the
+    * batch is one of our context's, so hackily claw our way back.
+    */
+   struct crocus_context *ice = batch->ice;
+   struct crocus_screen *screen = batch->screen;
+   if (batch->name == CROCUS_BATCH_RENDER) {
+      screen->vtbl.init_render_context(batch);
+   } else if (batch->name == CROCUS_BATCH_COMPUTE) {
+      screen->vtbl.init_compute_context(batch);
+   } else {
+      unreachable("unhandled batch reset");
+   }
+
+   ice->state.dirty = ~0ull;
+   memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
+   batch->state_base_address_emitted = false;
+   screen->vtbl.lost_genx_state(ice, batch);
+}
+
+static enum pipe_reset_status
+crocus_get_device_reset_status(struct pipe_context *ctx)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+
+   enum pipe_reset_status worst_reset = PIPE_NO_RESET;
+
+   /* Check the reset status of each batch's hardware context, and take the
+    * worst status (if one was guilty, proclaim guilt).
+    */
+   for (int i = 0; i < ice->batch_count; i++) {
+      /* This will also recreate the hardware contexts as necessary, so any
+       * future queries will show no resets.  We only want to report once.
+       */
+      enum pipe_reset_status batch_reset =
+         crocus_batch_check_for_reset(&ice->batches[i]);
+
+      if (batch_reset == PIPE_NO_RESET)
+         continue;
+
+      if (worst_reset == PIPE_NO_RESET) {
+         worst_reset = batch_reset;
+      } else {
+         /* GUILTY < INNOCENT < UNKNOWN */
+         worst_reset = MIN2(worst_reset, batch_reset);
+      }
+   }
+
+   if (worst_reset != PIPE_NO_RESET && ice->reset.reset)
+      ice->reset.reset(ice->reset.data, worst_reset);
+
+   return worst_reset;
+}
+
+static void
+crocus_set_device_reset_callback(struct pipe_context *ctx,
+                                 const struct pipe_device_reset_callback *cb)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+
+   if (cb)
+      ice->reset = *cb;
+   else
+      memset(&ice->reset, 0, sizeof(ice->reset));
+}
+
+static void
+crocus_get_sample_position(struct pipe_context *ctx,
+                           unsigned sample_count,
+                           unsigned sample_index,
+                           float *out_value)
+{
+   union {
+      struct {
+         float x[16];
+         float y[16];
+      } a;
+      struct {
+         float  _0XOffset,  _1XOffset,  _2XOffset,  _3XOffset,
+                _4XOffset,  _5XOffset,  _6XOffset,  _7XOffset,
+                _8XOffset,  _9XOffset, _10XOffset, _11XOffset,
+               _12XOffset, _13XOffset, _14XOffset, _15XOffset;
+         float  _0YOffset,  _1YOffset,  _2YOffset,  _3YOffset,
+                _4YOffset,  _5YOffset,  _6YOffset,  _7YOffset,
+                _8YOffset,  _9YOffset, _10YOffset, _11YOffset,
+               _12YOffset, _13YOffset, _14YOffset, _15YOffset;
+      } v;
+   } u;
+   switch (sample_count) {
+   case 1:  INTEL_SAMPLE_POS_1X(u.v._);  break;
+   case 2:  INTEL_SAMPLE_POS_2X(u.v._);  break;
+   case 4:  INTEL_SAMPLE_POS_4X(u.v._);  break;
+   case 8:  INTEL_SAMPLE_POS_8X(u.v._);  break;
+   case 16: INTEL_SAMPLE_POS_16X(u.v._); break;
+   default: unreachable("invalid sample count");
+   }
+
+   out_value[0] = u.a.x[sample_index];
+   out_value[1] = u.a.y[sample_index];
+}
+
+/**
+ * Destroy a context, freeing any associated memory.
+ */
+static void
+crocus_destroy_context(struct pipe_context *ctx)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
+   if (ctx->stream_uploader)
+      u_upload_destroy(ctx->stream_uploader);
+
+   if (ice->blitter)
+      util_blitter_destroy(ice->blitter);
+   screen->vtbl.destroy_state(ice);
+   crocus_destroy_program_cache(ice);
+   u_upload_destroy(ice->query_buffer_uploader);
+
+   crocus_bo_unreference(ice->workaround_bo);
+
+   slab_destroy_child(&ice->transfer_pool);
+
+   crocus_batch_free(&ice->batches[CROCUS_BATCH_RENDER]);
+   if (ice->batches[CROCUS_BATCH_COMPUTE].ice)
+      crocus_batch_free(&ice->batches[CROCUS_BATCH_COMPUTE]);
+
+   ralloc_free(ice);
+}
+
+#define genX_call(devinfo, func, ...)                   \
+   switch ((devinfo)->verx10) {                         \
+   case 75:                                             \
+      gfx75_##func(__VA_ARGS__);                        \
+      break;                                            \
+   case 70:                                             \
+      gfx7_##func(__VA_ARGS__);                         \
+      break;                                            \
+   case 60:                                             \
+      gfx6_##func(__VA_ARGS__);                         \
+      break;                                            \
+   case 50:                                             \
+      gfx5_##func(__VA_ARGS__);                         \
+      break;                                            \
+   case 45:                                             \
+      gfx45_##func(__VA_ARGS__);                        \
+      break;                                            \
+   case 40:                                             \
+      gfx4_##func(__VA_ARGS__);                         \
+      break;                                            \
+   default:                                             \
+      unreachable("Unknown hardware generation");       \
+   }
+
+/**
+ * Create a context.
+ *
+ * This is where each context begins.
+ */
+struct pipe_context *
+crocus_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags)
+{
+   struct crocus_screen *screen = (struct crocus_screen*)pscreen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+   struct crocus_context *ice = rzalloc(NULL, struct crocus_context);
+
+   if (!ice)
+      return NULL;
+
+   struct pipe_context *ctx = &ice->ctx;
+
+   ctx->screen = pscreen;
+   ctx->priv = priv;
+
+   ctx->stream_uploader = u_upload_create_default(ctx);
+   if (!ctx->stream_uploader) {
+      free(ctx);
+      return NULL;
+   }
+   ctx->const_uploader = ctx->stream_uploader;
+
+   ctx->destroy = crocus_destroy_context;
+   ctx->set_debug_callback = crocus_set_debug_callback;
+   ctx->set_device_reset_callback = crocus_set_device_reset_callback;
+   ctx->get_device_reset_status = crocus_get_device_reset_status;
+   ctx->get_sample_position = crocus_get_sample_position;
+
+   ice->shaders.urb_size = devinfo->urb.size;
+
+   crocus_init_context_fence_functions(ctx);
+   crocus_init_blit_functions(ctx);
+   crocus_init_clear_functions(ctx);
+   crocus_init_program_functions(ctx);
+   crocus_init_resource_functions(ctx);
+   crocus_init_flush_functions(ctx);
+
+   crocus_init_program_cache(ice);
+
+   slab_create_child(&ice->transfer_pool, &screen->transfer_pool);
+
+   ice->query_buffer_uploader =
+      u_upload_create(ctx, 4096, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING,
+                      0);
+
+   ice->workaround_bo =
+      crocus_bo_alloc(screen->bufmgr, "workaround", 4096);
+   if (!ice->workaround_bo)
+      return NULL;
+
+   if (!crocus_init_identifier_bo(ice))
+      return NULL;
+
+   genX_call(devinfo, init_state, ice);
+   genX_call(devinfo, init_blorp, ice);
+   genX_call(devinfo, init_query, ice);
+
+   ice->blitter = util_blitter_create(&ice->ctx);
+   if (ice->blitter == NULL)
+      return NULL;
+   int priority = 0;
+   if (flags & PIPE_CONTEXT_HIGH_PRIORITY)
+      priority = INTEL_CONTEXT_HIGH_PRIORITY;
+   if (flags & PIPE_CONTEXT_LOW_PRIORITY)
+      priority = INTEL_CONTEXT_LOW_PRIORITY;
+
+   ice->batch_count = devinfo->ver >= 7 ? CROCUS_BATCH_COUNT : 1;
+   for (int i = 0; i < ice->batch_count; i++) {
+      crocus_init_batch(ice, (enum crocus_batch_name) i,
+                        priority);
+   }
+
+   ice->urb.size = devinfo->urb.size;
+   screen->vtbl.init_render_context(&ice->batches[CROCUS_BATCH_RENDER]);
+   if (ice->batch_count > 1)
+      screen->vtbl.init_compute_context(&ice->batches[CROCUS_BATCH_COMPUTE]);
+
+   return ctx;
+}
+
+bool
+crocus_sw_check_cond_render(struct crocus_context *ice)
+{
+   struct crocus_query *q = ice->condition.query;
+   union pipe_query_result result;
+
+   bool wait = ice->condition.mode == PIPE_RENDER_COND_WAIT ||
+      ice->condition.mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+   if (!q)
+      return true;
+
+   bool ret = ice->ctx.get_query_result(&ice->ctx, (void *)q, wait, &result);
+   if (!ret)
+      return true;
+
+   return ice->condition.condition ? result.u64 == 0 : result.u64 != 0;
+}
--- a/src/gallium/drivers/crocus/crocus_context.h
+++ b/src/gallium/drivers/crocus/crocus_context.h
@ -0,0 +1,955 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef CROCUS_CONTEXT_H
+#define CROCUS_CONTEXT_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_debug.h"
+#include "intel/blorp/blorp.h"
+#include "intel/dev/intel_debug.h"
+#include "intel/compiler/brw_compiler.h"
+#include "crocus_batch.h"
+#include "crocus_fence.h"
+#include "crocus_resource.h"
+#include "crocus_screen.h"
+#include "util/u_blitter.h"
+
+struct crocus_bo;
+struct crocus_context;
+struct blorp_batch;
+struct blorp_params;
+
+#define CROCUS_MAX_TEXTURE_BUFFER_SIZE (1 << 27)
+#define CROCUS_MAX_TEXTURE_SAMPLERS 32
+/* CROCUS_MAX_ABOS and CROCUS_MAX_SSBOS must be the same. */
+#define CROCUS_MAX_ABOS 16
+#define CROCUS_MAX_SSBOS 16
+#define CROCUS_MAX_VIEWPORTS 16
+#define CROCUS_MAX_CLIP_PLANES 8
+
+enum crocus_param_domain {
+   BRW_PARAM_DOMAIN_BUILTIN = 0,
+   BRW_PARAM_DOMAIN_IMAGE,
+};
+
+enum {
+   DRI_CONF_BO_REUSE_DISABLED,
+   DRI_CONF_BO_REUSE_ALL
+};
+
+#define BRW_PARAM(domain, val)   (BRW_PARAM_DOMAIN_##domain << 24 | (val))
+#define BRW_PARAM_DOMAIN(param)  ((uint32_t)(param) >> 24)
+#define BRW_PARAM_VALUE(param)   ((uint32_t)(param) & 0x00ffffff)
+#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset))
+#define BRW_PARAM_IMAGE_IDX(value)   (BRW_PARAM_VALUE(value) >> 8)
+#define BRW_PARAM_IMAGE_OFFSET(value)(BRW_PARAM_VALUE(value) & 0xf)
+
+/**
+ * Dirty flags.  When state changes, we flag some combination of these
+ * to indicate that particular GPU commands need to be re-emitted.
+ *
+ * Each bit typically corresponds to a single 3DSTATE_* command packet, but
+ * in rare cases they map to a group of related packets that need to be
+ * emitted together.
+ *
+ * See crocus_upload_render_state().
+ */
+#define CROCUS_DIRTY_COLOR_CALC_STATE         (1ull <<  0)
+#define CROCUS_DIRTY_POLYGON_STIPPLE          (1ull <<  1)
+#define CROCUS_DIRTY_CC_VIEWPORT              (1ull <<  2)
+#define CROCUS_DIRTY_SF_CL_VIEWPORT           (1ull <<  3)
+#define CROCUS_DIRTY_RASTER                   (1ull <<  4)
+#define CROCUS_DIRTY_CLIP                     (1ull <<  5)
+#define CROCUS_DIRTY_LINE_STIPPLE             (1ull <<  6)
+#define CROCUS_DIRTY_VERTEX_ELEMENTS          (1ull <<  7)
+#define CROCUS_DIRTY_VERTEX_BUFFERS           (1ull <<  8)
+#define CROCUS_DIRTY_DRAWING_RECTANGLE        (1ull <<  9)
+#define CROCUS_DIRTY_GEN6_URB                 (1ull << 10)
+#define CROCUS_DIRTY_DEPTH_BUFFER             (1ull << 11)
+#define CROCUS_DIRTY_WM                       (1ull << 12)
+#define CROCUS_DIRTY_SO_DECL_LIST             (1ull << 13)
+#define CROCUS_DIRTY_STREAMOUT                (1ull << 14)
+#define CROCUS_DIRTY_GEN4_CONSTANT_COLOR      (1ull << 15)
+#define CROCUS_DIRTY_GEN4_CURBE               (1ull << 16)
+#define CROCUS_DIRTY_GEN4_URB_FENCE           (1ull << 17)
+#define CROCUS_DIRTY_GEN5_PIPELINED_POINTERS  (1ull << 18)
+#define CROCUS_DIRTY_GEN5_BINDING_TABLE_POINTERS  (1ull << 19)
+#define CROCUS_DIRTY_GEN6_BLEND_STATE         (1ull << 20)
+#define CROCUS_DIRTY_GEN6_SCISSOR_RECT        (1ull << 21)
+#define CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL    (1ull << 22)
+#define CROCUS_DIRTY_GEN6_MULTISAMPLE         (1ull << 23)
+#define CROCUS_DIRTY_GEN6_SAMPLE_MASK         (1ull << 24)
+#define CROCUS_DIRTY_GEN7_SBE                 (1ull << 25)
+#define CROCUS_DIRTY_GEN7_L3_CONFIG           (1ull << 26)
+#define CROCUS_DIRTY_GEN7_SO_BUFFERS          (1ull << 27)
+#define CROCUS_DIRTY_GEN75_VF                 (1ull << 28)
+#define CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES  (1ull << 29)
+#define CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES (1ull << 30)
+#define CROCUS_DIRTY_VF_STATISTICS            (1ull << 31)
+#define CROCUS_DIRTY_GEN4_CLIP_PROG           (1ull << 32)
+#define CROCUS_DIRTY_GEN4_SF_PROG             (1ull << 33)
+#define CROCUS_DIRTY_GEN4_FF_GS_PROG          (1ull << 34)
+#define CROCUS_DIRTY_GEN6_SAMPLER_STATE_POINTERS (1ull << 35)
+#define CROCUS_DIRTY_GEN6_SVBI                (1ull << 36)
+
+#define CROCUS_ALL_DIRTY_FOR_COMPUTE (CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES)
+
+#define CROCUS_ALL_DIRTY_FOR_RENDER (~CROCUS_ALL_DIRTY_FOR_COMPUTE)
+
+/**
+ * Per-stage dirty flags.  When state changes, we flag some combination of
+ * these to indicate that particular GPU commands need to be re-emitted.
+ * Unlike the IRIS_DIRTY_* flags these are shader stage-specific and can be
+ * indexed by shifting the mask by the shader stage index.
+ *
+ * See crocus_upload_render_state().
+ */
+#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS        (1ull << 0)
+#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS       (1ull << 1)
+#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES       (1ull << 2)
+#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS        (1ull << 3)
+#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_PS        (1ull << 4)
+#define CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS        (1ull << 5)
+#define CROCUS_STAGE_DIRTY_UNCOMPILED_VS            (1ull << 6)
+#define CROCUS_STAGE_DIRTY_UNCOMPILED_TCS           (1ull << 7)
+#define CROCUS_STAGE_DIRTY_UNCOMPILED_TES           (1ull << 8)
+#define CROCUS_STAGE_DIRTY_UNCOMPILED_GS            (1ull << 9)
+#define CROCUS_STAGE_DIRTY_UNCOMPILED_FS            (1ull << 10)
+#define CROCUS_STAGE_DIRTY_UNCOMPILED_CS            (1ull << 11)
+#define CROCUS_STAGE_DIRTY_VS                       (1ull << 12)
+#define CROCUS_STAGE_DIRTY_TCS                      (1ull << 13)
+#define CROCUS_STAGE_DIRTY_TES                      (1ull << 14)
+#define CROCUS_STAGE_DIRTY_GS                       (1ull << 15)
+#define CROCUS_STAGE_DIRTY_FS                       (1ull << 16)
+#define CROCUS_STAGE_DIRTY_CS                       (1ull << 17)
+#define CROCUS_SHIFT_FOR_STAGE_DIRTY_CONSTANTS      18
+#define CROCUS_STAGE_DIRTY_CONSTANTS_VS             (1ull << 18)
+#define CROCUS_STAGE_DIRTY_CONSTANTS_TCS            (1ull << 19)
+#define CROCUS_STAGE_DIRTY_CONSTANTS_TES            (1ull << 20)
+#define CROCUS_STAGE_DIRTY_CONSTANTS_GS             (1ull << 21)
+#define CROCUS_STAGE_DIRTY_CONSTANTS_FS             (1ull << 22)
+#define CROCUS_STAGE_DIRTY_CONSTANTS_CS             (1ull << 23)
+#define CROCUS_STAGE_DIRTY_BINDINGS_VS              (1ull << 24)
+#define CROCUS_STAGE_DIRTY_BINDINGS_TCS             (1ull << 25)
+#define CROCUS_STAGE_DIRTY_BINDINGS_TES             (1ull << 26)
+#define CROCUS_STAGE_DIRTY_BINDINGS_GS              (1ull << 27)
+#define CROCUS_STAGE_DIRTY_BINDINGS_FS              (1ull << 28)
+#define CROCUS_STAGE_DIRTY_BINDINGS_CS              (1ull << 29)
+
+#define CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE (CROCUS_STAGE_DIRTY_CS | \
+                                          CROCUS_STAGE_DIRTY_SAMPLER_STATES_CS | \
+                                          CROCUS_STAGE_DIRTY_UNCOMPILED_CS |    \
+                                          CROCUS_STAGE_DIRTY_CONSTANTS_CS |     \
+                                          CROCUS_STAGE_DIRTY_BINDINGS_CS)
+
+#define CROCUS_ALL_STAGE_DIRTY_FOR_RENDER (~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE)
+
+#define CROCUS_ALL_STAGE_DIRTY_BINDINGS (CROCUS_STAGE_DIRTY_BINDINGS_VS  | \
+                                       CROCUS_STAGE_DIRTY_BINDINGS_TCS | \
+                                       CROCUS_STAGE_DIRTY_BINDINGS_TES | \
+                                       CROCUS_STAGE_DIRTY_BINDINGS_GS  | \
+                                       CROCUS_STAGE_DIRTY_BINDINGS_FS  | \
+                                       CROCUS_STAGE_DIRTY_BINDINGS_CS)
+
+#define CROCUS_RENDER_STAGE_DIRTY_CONSTANTS (CROCUS_STAGE_DIRTY_CONSTANTS_VS  | \
+                                             CROCUS_STAGE_DIRTY_CONSTANTS_TCS | \
+                                             CROCUS_STAGE_DIRTY_CONSTANTS_TES | \
+                                             CROCUS_STAGE_DIRTY_CONSTANTS_GS  | \
+                                             CROCUS_STAGE_DIRTY_CONSTANTS_FS)
+
+/**
+ * Non-orthogonal state (NOS) dependency flags.
+ *
+ * Shader programs may depend on non-orthogonal state.  These flags are
+ * used to indicate that a shader's key depends on the state provided by
+ * a certain Gallium CSO.  Changing any CSOs marked as a dependency will
+ * cause the driver to re-compute the shader key, possibly triggering a
+ * shader recompile.
+ */
+enum crocus_nos_dep {
+   CROCUS_NOS_FRAMEBUFFER,
+   CROCUS_NOS_DEPTH_STENCIL_ALPHA,
+   CROCUS_NOS_RASTERIZER,
+   CROCUS_NOS_BLEND,
+   CROCUS_NOS_LAST_VUE_MAP,
+   CROCUS_NOS_TEXTURES,
+   CROCUS_NOS_VERTEX_ELEMENTS,
+   CROCUS_NOS_COUNT,
+};
+
+struct crocus_depth_stencil_alpha_state;
+
+/**
+ * Cache IDs for the in-memory program cache (ice->shaders.cache).
+ */
+enum crocus_program_cache_id {
+   CROCUS_CACHE_VS  = MESA_SHADER_VERTEX,
+   CROCUS_CACHE_TCS = MESA_SHADER_TESS_CTRL,
+   CROCUS_CACHE_TES = MESA_SHADER_TESS_EVAL,
+   CROCUS_CACHE_GS  = MESA_SHADER_GEOMETRY,
+   CROCUS_CACHE_FS  = MESA_SHADER_FRAGMENT,
+   CROCUS_CACHE_CS  = MESA_SHADER_COMPUTE,
+   CROCUS_CACHE_BLORP,
+   CROCUS_CACHE_SF,
+   CROCUS_CACHE_CLIP,
+   CROCUS_CACHE_FF_GS,
+};
+
+/** @{
+ *
+ * Defines for PIPE_CONTROL operations, which trigger cache flushes,
+ * synchronization, pipelined memory writes, and so on.
+ *
+ * The bits here are not the actual hardware values.  The actual fields
+ * move between various generations, so we just have flags for each
+ * potential operation, and use genxml to encode the actual packet.
+ */
+enum pipe_control_flags
+{
+   PIPE_CONTROL_FLUSH_LLC                       = (1 << 1),
+   PIPE_CONTROL_LRI_POST_SYNC_OP                = (1 << 2),
+   PIPE_CONTROL_STORE_DATA_INDEX                = (1 << 3),
+   PIPE_CONTROL_CS_STALL                        = (1 << 4),
+   PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     = (1 << 5),
+   PIPE_CONTROL_SYNC_GFDT                       = (1 << 6),
+   PIPE_CONTROL_TLB_INVALIDATE                  = (1 << 7),
+   PIPE_CONTROL_MEDIA_STATE_CLEAR               = (1 << 8),
+   PIPE_CONTROL_WRITE_IMMEDIATE                 = (1 << 9),
+   PIPE_CONTROL_WRITE_DEPTH_COUNT               = (1 << 10),
+   PIPE_CONTROL_WRITE_TIMESTAMP                 = (1 << 11),
+   PIPE_CONTROL_DEPTH_STALL                     = (1 << 12),
+   PIPE_CONTROL_RENDER_TARGET_FLUSH             = (1 << 13),
+   PIPE_CONTROL_INSTRUCTION_INVALIDATE          = (1 << 14),
+   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        = (1 << 15),
+   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
+   PIPE_CONTROL_NOTIFY_ENABLE                   = (1 << 17),
+   PIPE_CONTROL_FLUSH_ENABLE                    = (1 << 18),
+   PIPE_CONTROL_DATA_CACHE_FLUSH                = (1 << 19),
+   PIPE_CONTROL_VF_CACHE_INVALIDATE             = (1 << 20),
+   PIPE_CONTROL_CONST_CACHE_INVALIDATE          = (1 << 21),
+   PIPE_CONTROL_STATE_CACHE_INVALIDATE          = (1 << 22),
+   PIPE_CONTROL_STALL_AT_SCOREBOARD             = (1 << 23),
+   PIPE_CONTROL_DEPTH_CACHE_FLUSH               = (1 << 24),
+   PIPE_CONTROL_TILE_CACHE_FLUSH                = (1 << 25),
+};
+
+#define PIPE_CONTROL_CACHE_FLUSH_BITS           \
+   (PIPE_CONTROL_DEPTH_CACHE_FLUSH |            \
+    PIPE_CONTROL_DATA_CACHE_FLUSH |             \
+    PIPE_CONTROL_RENDER_TARGET_FLUSH)
+
+#define PIPE_CONTROL_CACHE_INVALIDATE_BITS      \
+   (PIPE_CONTROL_STATE_CACHE_INVALIDATE |       \
+    PIPE_CONTROL_CONST_CACHE_INVALIDATE |       \
+    PIPE_CONTROL_VF_CACHE_INVALIDATE |          \
+    PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |     \
+    PIPE_CONTROL_INSTRUCTION_INVALIDATE)
+
+enum crocus_predicate_state {
+   /* The first two states are used if we can determine whether to draw
+    * without having to look at the values in the query object buffer. This
+    * will happen if there is no conditional render in progress, if the query
+    * object is already completed or if something else has already added
+    * samples to the preliminary result.
+    */
+   CROCUS_PREDICATE_STATE_RENDER,
+   CROCUS_PREDICATE_STATE_DONT_RENDER,
+
+   /* In this case whether to draw or not depends on the result of an
+    * MI_PREDICATE command so the predicate enable bit needs to be checked.
+    */
+   CROCUS_PREDICATE_STATE_USE_BIT,
+   /* In this case, either MI_PREDICATE doesn't exist or we lack the
+    * necessary kernel features to use it.  Stall for the query result.
+    */
+   CROCUS_PREDICATE_STATE_STALL_FOR_QUERY,
+};
+
+/** @} */
+
+/**
+ * An uncompiled, API-facing shader.  This is the Gallium CSO for shaders.
+ * It primarily contains the NIR for the shader.
+ *
+ * Each API-facing shader can be compiled into multiple shader variants,
+ * based on non-orthogonal state dependencies, recorded in the shader key.
+ *
+ * See crocus_compiled_shader, which represents a compiled shader variant.
+ */
+struct crocus_uncompiled_shader {
+   struct nir_shader *nir;
+
+   struct pipe_stream_output_info stream_output;
+
+   /* A SHA1 of the serialized NIR for the disk cache. */
+   unsigned char nir_sha1[20];
+
+   unsigned program_id;
+
+   /** Bitfield of (1 << CROCUS_NOS_*) flags. */
+   unsigned nos;
+
+   /** Have any shader variants been compiled yet? */
+   bool compiled_once;
+
+   /** Should we use ALT mode for math?  Useful for ARB programs. */
+   bool use_alt_mode;
+
+   bool needs_edge_flag;
+
+   /** Constant data scraped from the shader by nir_opt_large_constants */
+   struct pipe_resource *const_data;
+
+   /** Surface state for const_data */
+   struct crocus_state_ref const_data_state;
+};
+
+enum crocus_surface_group {
+   CROCUS_SURFACE_GROUP_RENDER_TARGET,
+   CROCUS_SURFACE_GROUP_RENDER_TARGET_READ,
+   CROCUS_SURFACE_GROUP_SOL,
+   CROCUS_SURFACE_GROUP_CS_WORK_GROUPS,
+   CROCUS_SURFACE_GROUP_TEXTURE,
+   CROCUS_SURFACE_GROUP_TEXTURE_GATHER,
+   CROCUS_SURFACE_GROUP_IMAGE,
+   CROCUS_SURFACE_GROUP_UBO,
+   CROCUS_SURFACE_GROUP_SSBO,
+
+   CROCUS_SURFACE_GROUP_COUNT,
+};
+
+enum {
+   /* Invalid value for a binding table index. */
+   CROCUS_SURFACE_NOT_USED = 0xa0a0a0a0,
+};
+
+struct crocus_binding_table {
+   uint32_t size_bytes;
+
+   /** Number of surfaces in each group, before compacting. */
+   uint32_t sizes[CROCUS_SURFACE_GROUP_COUNT];
+
+   /** Initial offset of each group. */
+   uint32_t offsets[CROCUS_SURFACE_GROUP_COUNT];
+
+   /** Mask of surfaces used in each group. */
+   uint64_t used_mask[CROCUS_SURFACE_GROUP_COUNT];
+};
+
+/**
+ * A compiled shader variant, containing a pointer to the GPU assembly,
+ * as well as program data and other packets needed by state upload.
+ *
+ * There can be several crocus_compiled_shader variants per API-level shader
+ * (crocus_uncompiled_shader), due to state-based recompiles (brw_*_prog_key).
+ */
+struct crocus_compiled_shader {
+   /** Reference to the uploaded assembly. */
+   uint32_t offset;
+
+   /* asm size in map */
+   uint32_t map_size;
+
+   /** The program data (owned by the program cache hash table) */
+   struct brw_stage_prog_data *prog_data;
+   uint32_t prog_data_size;
+
+   /** A list of system values to be uploaded as uniforms. */
+   enum brw_param_builtin *system_values;
+   unsigned num_system_values;
+
+   /** Number of constbufs expected by the shader. */
+   unsigned num_cbufs;
+
+   /**
+    * Derived 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets
+    * (the VUE-based information for transform feedback outputs).
+    */
+   uint32_t *streamout;
+
+   struct crocus_binding_table bt;
+
+   uint32_t bind_bo_offset;
+   uint32_t surf_offset[128];//TODO
+};
+
+/**
+ * API context state that is replicated per shader stage.
+ */
+struct crocus_shader_state {
+   /** Uniform Buffers */
+   struct pipe_constant_buffer constbufs[PIPE_MAX_CONSTANT_BUFFERS];
+
+   bool sysvals_need_upload;
+
+   /** Shader Storage Buffers */
+   struct pipe_shader_buffer ssbo[PIPE_MAX_SHADER_BUFFERS];
+
+   /** Shader Storage Images (image load store) */
+   struct crocus_image_view image[PIPE_MAX_SHADER_IMAGES];
+
+   struct crocus_sampler_state *samplers[CROCUS_MAX_TEXTURE_SAMPLERS];
+   struct crocus_sampler_view *textures[CROCUS_MAX_TEXTURE_SAMPLERS];
+
+   /** Bitfield of which constant buffers are bound (non-null). */
+   uint32_t bound_cbufs;
+
+   /** Bitfield of which image views are bound (non-null). */
+   uint32_t bound_image_views;
+
+   /** Bitfield of which sampler views are bound (non-null). */
+   uint32_t bound_sampler_views;
+
+   /** Bitfield of which shader storage buffers are bound (non-null). */
+   uint32_t bound_ssbos;
+
+   /** Bitfield of which shader storage buffers are writable. */
+   uint32_t writable_ssbos;
+
+   uint32_t sampler_offset;
+};
+
+/**
+ * The API context (derived from pipe_context).
+ *
+ * Most driver state is tracked here.
+ */
+struct crocus_context {
+   struct pipe_context ctx;
+
+   /** A debug callback for KHR_debug output. */
+   struct pipe_debug_callback dbg;
+
+   /** A device reset status callback for notifying that the GPU is hosed. */
+   struct pipe_device_reset_callback reset;
+
+   /** Slab allocator for crocus_transfer_map objects. */
+   struct slab_child_pool transfer_pool;
+
+   struct blorp_context blorp;
+
+   int batch_count;
+   struct crocus_batch batches[CROCUS_BATCH_COUNT];
+
+   struct u_upload_mgr *query_buffer_uploader;
+
+   struct blitter_context *blitter;
+
+   struct {
+      struct {
+         /**
+          * Either the value of BaseVertex for indexed draw calls or the value
+          * of the argument <first> for non-indexed draw calls.
+          */
+         int firstvertex;
+         int baseinstance;
+      } params;
+
+      /**
+       * Are the above values the ones stored in the draw_params buffer?
+       * If so, we can compare them against new values to see if anything
+       * changed.  If not, we need to assume they changed.
+       */
+      bool params_valid;
+
+      /**
+       * Resource and offset that stores draw_parameters from the indirect
+       * buffer or to the buffer that stures the previous values for non
+       * indirect draws.
+       */
+      struct crocus_state_ref draw_params;
+
+      struct {
+         /**
+          * The value of DrawID. This always comes in from it's own vertex
+          * buffer since it's not part of the indirect draw parameters.
+          */
+         int drawid;
+
+         /**
+          * Stores if an indexed or non-indexed draw (~0/0). Useful to
+          * calculate BaseVertex as an AND of firstvertex and is_indexed_draw.
+          */
+         int is_indexed_draw;
+      } derived_params;
+
+      /**
+       * Resource and offset used for GL_ARB_shader_draw_parameters which
+       * contains parameters that are not present in the indirect buffer as
+       * drawid and is_indexed_draw. They will go in their own vertex element.
+       */
+      struct crocus_state_ref derived_draw_params;
+   } draw;
+
+   struct {
+      struct crocus_uncompiled_shader *uncompiled[MESA_SHADER_STAGES];
+      struct crocus_compiled_shader *prog[MESA_SHADER_STAGES];
+      struct brw_vue_map *last_vue_map;
+
+      struct crocus_bo *cache_bo;
+      uint32_t cache_next_offset;
+      void *cache_bo_map;
+      struct hash_table *cache;
+
+      unsigned urb_size;
+
+      /* gen 4/5 clip/sf progs */
+      struct crocus_compiled_shader *clip_prog;
+      struct crocus_compiled_shader *sf_prog;
+      /* gen4/5 prims, gen6 streamout */
+      struct crocus_compiled_shader *ff_gs_prog;
+      uint32_t clip_offset;
+      uint32_t sf_offset;
+      uint32_t wm_offset;
+      uint32_t vs_offset;
+      uint32_t gs_offset;
+      uint32_t cc_offset;
+
+      /** Is a GS or TES outputting points or lines? */
+      bool output_topology_is_points_or_lines;
+
+      /* Track last VS URB entry size */
+      unsigned last_vs_entry_size;
+
+      /**
+       * Scratch buffers for various sizes and stages.
+       *
+       * Indexed by the "Per-Thread Scratch Space" field's 4-bit encoding,
+       * and shader stage.
+       */
+      struct crocus_bo *scratch_bos[1 << 4][MESA_SHADER_STAGES];
+   } shaders;
+
+   struct {
+      struct crocus_query *query;
+      bool condition;
+      enum pipe_render_cond_flag mode;
+   } condition;
+
+   struct intel_perf_context *perf_ctx;
+
+   struct {
+      uint64_t dirty;
+      uint64_t stage_dirty;
+      uint64_t stage_dirty_for_nos[CROCUS_NOS_COUNT];
+
+      unsigned num_viewports;
+      unsigned sample_mask;
+      struct crocus_blend_state *cso_blend;
+      struct crocus_rasterizer_state *cso_rast;
+      struct crocus_depth_stencil_alpha_state *cso_zsa;
+      struct crocus_vertex_element_state *cso_vertex_elements;
+      struct pipe_blend_color blend_color;
+      struct pipe_poly_stipple poly_stipple;
+      struct pipe_viewport_state viewports[CROCUS_MAX_VIEWPORTS];
+      struct pipe_scissor_state scissors[CROCUS_MAX_VIEWPORTS];
+      struct pipe_stencil_ref stencil_ref;
+      struct pipe_framebuffer_state framebuffer;
+      struct pipe_clip_state clip_planes;
+
+      float default_outer_level[4];
+      float default_inner_level[2];
+
+      /** Bitfield of which vertex buffers are bound (non-null). */
+      uint32_t bound_vertex_buffers;
+      struct pipe_vertex_buffer vertex_buffers[16];
+      uint32_t vb_end[16];
+
+      bool primitive_restart;
+      unsigned cut_index;
+      enum pipe_prim_type prim_mode:8;
+      bool prim_is_points_or_lines;
+      uint8_t vertices_per_patch;
+
+      bool window_space_position;
+
+      /** The last compute group size */
+      uint32_t last_block[3];
+
+      /** The last compute grid size */
+      uint32_t last_grid[3];
+      /** Reference to the BO containing the compute grid size */
+      struct crocus_state_ref grid_size;
+
+      /**
+       * Array of aux usages for drawing, altered to account for any
+       * self-dependencies from resources bound for sampling and rendering.
+       */
+      enum isl_aux_usage draw_aux_usage[BRW_MAX_DRAW_BUFFERS];
+
+      /** Aux usage of the fb's depth buffer (which may or may not exist). */
+      enum isl_aux_usage hiz_usage;
+
+      /** Bitfield of whether color blending is enabled for RT[i] */
+      uint8_t blend_enables;
+
+      /** Are depth writes enabled?  (Depth buffer may or may not exist.) */
+      bool depth_writes_enabled;
+
+      /** Are stencil writes enabled?  (Stencil buffer may or may not exist.) */
+      bool stencil_writes_enabled;
+
+      /** GenX-specific current state */
+      struct crocus_genx_state *genx;
+
+      struct crocus_shader_state shaders[MESA_SHADER_STAGES];
+
+      /** Do vertex shader uses shader draw parameters ? */
+      bool vs_uses_draw_params;
+      bool vs_uses_derived_draw_params;
+      bool vs_needs_sgvs_element;
+      bool vs_uses_vertexid;
+      bool vs_uses_instanceid;
+
+      /** Do vertex shader uses edge flag ? */
+      bool vs_needs_edge_flag;
+
+      struct pipe_stream_output_target *so_target[PIPE_MAX_SO_BUFFERS];
+      bool streamout_active;
+      int so_targets;
+
+      bool statistics_counters_enabled;
+
+      /** Current conditional rendering mode */
+      enum crocus_predicate_state predicate;
+      bool predicate_supported;
+
+      /**
+       * Query BO with a MI_PREDICATE_RESULT snapshot calculated on the
+       * render context that needs to be uploaded to the compute context.
+       */
+      struct crocus_bo *compute_predicate;
+
+      /** Is a PIPE_QUERY_PRIMITIVES_GENERATED query active? */
+      bool prims_generated_query_active;
+
+      /** 3DSTATE_STREAMOUT and 3DSTATE_SO_DECL_LIST packets */
+      uint32_t *streamout;
+
+      /**
+       * Resources containing streamed state which our render context
+       * currently points to.  Used to re-add these to the validation
+       * list when we start a new batch and haven't resubmitted commands.
+       */
+      struct {
+         struct pipe_resource *res;
+         uint32_t offset;
+         uint32_t size;
+         uint32_t index_size;
+         bool prim_restart;
+      } index_buffer;
+
+      uint32_t sf_vp_address;
+      uint32_t clip_vp_address;
+      uint32_t cc_vp_address;
+
+      uint32_t stats_wm;
+      float global_depth_offset_clamp;
+
+      uint32_t last_xfb_verts_per_prim;
+      uint64_t svbi;
+   } state;
+
+   /* BRW_NEW_URB_ALLOCATIONS:
+    */
+   struct {
+      uint32_t vsize;                /* vertex size plus header in urb registers */
+      uint32_t gsize;                /* GS output size in urb registers */
+      uint32_t hsize;             /* Tessellation control output size in urb registers */
+      uint32_t dsize;             /* Tessellation evaluation output size in urb registers */
+      uint32_t csize;                /* constant buffer size in urb registers */
+      uint32_t sfsize;                /* setup data size in urb registers */
+
+      bool constrained;
+
+      uint32_t nr_vs_entries;
+      uint32_t nr_hs_entries;
+      uint32_t nr_ds_entries;
+      uint32_t nr_gs_entries;
+      uint32_t nr_clip_entries;
+      uint32_t nr_sf_entries;
+      uint32_t nr_cs_entries;
+
+      uint32_t vs_start;
+      uint32_t hs_start;
+      uint32_t ds_start;
+      uint32_t gs_start;
+      uint32_t clip_start;
+      uint32_t sf_start;
+      uint32_t cs_start;
+      /**
+       * URB size in the current configuration.  The units this is expressed
+       * in are somewhat inconsistent, see intel_device_info::urb::size.
+       *
+       * FINISHME: Represent the URB size consistently in KB on all platforms.
+       */
+      uint32_t size;
+
+      /* True if the most recently sent _3DSTATE_URB message allocated
+       * URB space for the GS.
+       */
+      bool gs_present;
+
+      /* True if the most recently sent _3DSTATE_URB message allocated
+       * URB space for the HS and DS.
+       */
+      bool tess_present;
+   } urb;
+
+   /* GEN4/5 curbe */
+   struct {
+      unsigned wm_start;
+      unsigned wm_size;
+      unsigned clip_start;
+      unsigned clip_size;
+      unsigned vs_start;
+      unsigned vs_size;
+      unsigned total_size;
+
+      struct crocus_resource *curbe_res;
+      unsigned curbe_offset;
+   } curbe;
+
+   /**
+    * A buffer containing a marker + description of the driver. This buffer is
+    * added to all execbufs syscalls so that we can identify the driver that
+    * generated a hang by looking at the content of the buffer in the error
+    * state. It is also used for hardware workarounds that require scratch
+    * writes or reads from some unimportant memory. To avoid overriding the
+    * debug data, use the workaround_address field for workarounds.
+    */
+   struct crocus_bo *workaround_bo;
+   unsigned workaround_offset;
+};
+
+#define perf_debug(dbg, ...) do {                      \
+   if (INTEL_DEBUG & DEBUG_PERF)                       \
+      dbg_printf(__VA_ARGS__);                         \
+   if (unlikely(dbg))                                  \
+      pipe_debug_message(dbg, PERF_INFO, __VA_ARGS__); \
+} while(0)
+
+
+struct pipe_context *
+crocus_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
+
+void crocus_lost_context_state(struct crocus_batch *batch);
+
+void crocus_init_blit_functions(struct pipe_context *ctx);
+void crocus_init_clear_functions(struct pipe_context *ctx);
+void crocus_init_program_functions(struct pipe_context *ctx);
+void crocus_init_resource_functions(struct pipe_context *ctx);
+bool crocus_update_compiled_shaders(struct crocus_context *ice);
+void crocus_update_compiled_compute_shader(struct crocus_context *ice);
+void crocus_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
+                                      unsigned threads, uint32_t *dst);
+
+
+/* crocus_blit.c */
+enum crocus_blitter_op
+{
+   CROCUS_SAVE_TEXTURES      = 1,
+   CROCUS_SAVE_FRAMEBUFFER   = 2,
+   CROCUS_SAVE_FRAGMENT_STATE = 4,
+   CROCUS_DISABLE_RENDER_COND = 8,
+};
+void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond);
+
+void crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,
+                                    struct isl_device *isl_dev,
+                                    struct blorp_surf *surf,
+                                    struct pipe_resource *p_res,
+                                    enum isl_aux_usage aux_usage,
+                                    unsigned level,
+                                    bool is_render_target);
+void crocus_copy_region(struct blorp_context *blorp,
+                        struct crocus_batch *batch,
+                        struct pipe_resource *dst,
+                        unsigned dst_level,
+                        unsigned dstx, unsigned dsty, unsigned dstz,
+                        struct pipe_resource *src,
+                        unsigned src_level,
+                        const struct pipe_box *src_box);
+
+/* crocus_draw.c */
+void crocus_draw_vbo(struct pipe_context *ctx,
+                     const struct pipe_draw_info *info,
+                     unsigned drawid_offset,
+                     const struct pipe_draw_indirect_info *indirect,
+                     const struct pipe_draw_start_count_bias *draws,
+                     unsigned num_draws);
+void crocus_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
+
+/* crocus_pipe_control.c */
+
+void crocus_emit_pipe_control_flush(struct crocus_batch *batch,
+                                    const char *reason, uint32_t flags);
+void crocus_emit_pipe_control_write(struct crocus_batch *batch,
+                                    const char *reason, uint32_t flags,
+                                    struct crocus_bo *bo, uint32_t offset,
+                                    uint64_t imm);
+void crocus_emit_mi_flush(struct crocus_batch *batch);
+void crocus_emit_depth_stall_flushes(struct crocus_batch *batch);
+void crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch);
+void crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
+                                  const char *reason, uint32_t flags);
+void crocus_flush_all_caches(struct crocus_batch *batch);
+
+#define crocus_handle_always_flush_cache(batch)                 \
+   if (unlikely(batch->screen->driconf.always_flush_cache))     \
+      crocus_flush_all_caches(batch);
+
+void crocus_init_flush_functions(struct pipe_context *ctx);
+
+/* crocus_program.c */
+const struct shader_info *crocus_get_shader_info(const struct crocus_context *ice,
+                                                 gl_shader_stage stage);
+struct crocus_bo *crocus_get_scratch_space(struct crocus_context *ice,
+                                           unsigned per_thread_scratch,
+                                           gl_shader_stage stage);
+uint32_t crocus_group_index_to_bti(const struct crocus_binding_table *bt,
+                                   enum crocus_surface_group group,
+                                   uint32_t index);
+uint32_t crocus_bti_to_group_index(const struct crocus_binding_table *bt,
+                                   enum crocus_surface_group group,
+                                   uint32_t bti);
+
+/* crocus_disk_cache.c */
+
+void crocus_disk_cache_store(struct disk_cache *cache,
+                             const struct crocus_uncompiled_shader *ish,
+                             const struct crocus_compiled_shader *shader,
+                             void *map,
+                             const void *prog_key,
+                             uint32_t prog_key_size);
+struct crocus_compiled_shader *
+crocus_disk_cache_retrieve(struct crocus_context *ice,
+                           const struct crocus_uncompiled_shader *ish,
+                           const void *prog_key,
+                           uint32_t prog_key_size);
+
+/* crocus_program_cache.c */
+
+void crocus_init_program_cache(struct crocus_context *ice);
+void crocus_destroy_program_cache(struct crocus_context *ice);
+void crocus_print_program_cache(struct crocus_context *ice);
+struct crocus_compiled_shader *crocus_find_cached_shader(struct crocus_context *ice,
+                                                         enum crocus_program_cache_id,
+                                                         uint32_t key_size,
+                                                         const void *key);
+struct crocus_compiled_shader *crocus_upload_shader(struct crocus_context *ice,
+                                                    enum crocus_program_cache_id,
+                                                    uint32_t key_size,
+                                                    const void *key,
+                                                    const void *assembly,
+                                                    uint32_t asm_size,
+                                                    struct brw_stage_prog_data *,
+                                                    uint32_t prog_data_size,
+                                                    uint32_t *streamout,
+                                                    enum brw_param_builtin *sysv,
+                                                    unsigned num_system_values,
+                                                    unsigned num_cbufs,
+                                                    const struct crocus_binding_table *bt);
+const void *crocus_find_previous_compile(const struct crocus_context *ice,
+                                         enum crocus_program_cache_id cache_id,
+                                         unsigned program_string_id);
+bool crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch,
+                                const void *key,
+                                uint32_t key_size,
+                                uint32_t *kernel_out,
+                                void *prog_data_out);
+bool crocus_blorp_upload_shader(struct blorp_batch *blorp_batch,
+                                uint32_t stage,
+                                const void *key, uint32_t key_size,
+                                const void *kernel, uint32_t kernel_size,
+                                const struct brw_stage_prog_data *prog_data,
+                                uint32_t prog_data_size,
+                                uint32_t *kernel_out,
+                                void *prog_data_out);
+
+/* crocus_resolve.c */
+
+void crocus_predraw_resolve_inputs(struct crocus_context *ice,
+                                   struct crocus_batch *batch,
+                                   bool *draw_aux_buffer_disabled,
+                                   gl_shader_stage stage,
+                                   bool consider_framebuffer);
+void crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
+                                        struct crocus_batch *batch,
+                                        bool *draw_aux_buffer_disabled);
+void crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
+                                             struct crocus_batch *batch);
+void crocus_cache_sets_clear(struct crocus_batch *batch);
+void crocus_flush_depth_and_render_caches(struct crocus_batch *batch);
+void crocus_cache_flush_for_read(struct crocus_batch *batch, struct crocus_bo *bo);
+void crocus_cache_flush_for_render(struct crocus_batch *batch,
+                                   struct crocus_bo *bo,
+                                   enum isl_format format,
+                                   enum isl_aux_usage aux_usage);
+void crocus_render_cache_add_bo(struct crocus_batch *batch,
+                                struct crocus_bo *bo,
+                                enum isl_format format,
+                                enum isl_aux_usage aux_usage);
+void crocus_cache_flush_for_depth(struct crocus_batch *batch, struct crocus_bo *bo);
+void crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo);
+int crocus_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
+                                 struct pipe_driver_query_info *info);
+int crocus_get_driver_query_group_info(struct pipe_screen *pscreen,
+                                       unsigned index,
+                                       struct pipe_driver_query_group_info *info);
+
+struct pipe_rasterizer_state *crocus_get_rast_state(struct crocus_context *ctx);
+
+bool crocus_sw_check_cond_render(struct crocus_context *ice);
+static inline bool crocus_check_conditional_render(struct crocus_context *ice)
+{
+   if (ice->state.predicate == CROCUS_PREDICATE_STATE_STALL_FOR_QUERY)
+      return crocus_sw_check_cond_render(ice);
+   return ice->state.predicate != CROCUS_PREDICATE_STATE_DONT_RENDER;
+}
+
+#ifdef genX
+#  include "crocus_genx_protos.h"
+#else
+#  define genX(x) gfx4_##x
+#  include "crocus_genx_protos.h"
+#  undef genX
+#  define genX(x) gfx45_##x
+#  include "crocus_genx_protos.h"
+#  undef genX
+#  define genX(x) gfx5_##x
+#  include "crocus_genx_protos.h"
+#  undef genX
+#  define genX(x) gfx6_##x
+#  include "crocus_genx_protos.h"
+#  undef genX
+#  define genX(x) gfx7_##x
+#  include "crocus_genx_protos.h"
+#  undef genX
+#  define genX(x) gfx75_##x
+#  include "crocus_genx_protos.h"
+#  undef genX
+#endif
+
+#endif
--- a/src/gallium/drivers/crocus/crocus_defines.h
+++ b/src/gallium/drivers/crocus/crocus_defines.h
@ -0,0 +1,58 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef CROCUS_DEFINES_H
+#define CROCUS_DEFINES_H
+
+/**
+ * @file crocus_defines.h
+ *
+ * Random hardware #defines that we're not using GENXML for.
+ */
+
+#define MI_PREDICATE                         (0xC << 23)
+# define MI_PREDICATE_LOADOP_KEEP            (0 << 6)
+# define MI_PREDICATE_LOADOP_LOAD            (2 << 6)
+# define MI_PREDICATE_LOADOP_LOADINV         (3 << 6)
+# define MI_PREDICATE_COMBINEOP_SET          (0 << 3)
+# define MI_PREDICATE_COMBINEOP_AND          (1 << 3)
+# define MI_PREDICATE_COMBINEOP_OR           (2 << 3)
+# define MI_PREDICATE_COMBINEOP_XOR          (3 << 3)
+# define MI_PREDICATE_COMPAREOP_TRUE         (0 << 0)
+# define MI_PREDICATE_COMPAREOP_FALSE        (1 << 0)
+# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL   (2 << 0)
+# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0)
+
+/* Predicate registers */
+#define MI_PREDICATE_SRC0                    0x2400
+#define MI_PREDICATE_SRC1                    0x2408
+#define MI_PREDICATE_DATA                    0x2410
+#define MI_PREDICATE_RESULT                  0x2418
+#define MI_PREDICATE_RESULT_1                0x241C
+#define MI_PREDICATE_RESULT_2                0x2214
+
+#define CS_GPR(n) (0x2600 + (n) * 8)
+
+/* The number of bits in our TIMESTAMP queries. */
+#define TIMESTAMP_BITS 36
+
+#endif
--- a/src/gallium/drivers/crocus/crocus_disk_cache.c
+++ b/src/gallium/drivers/crocus/crocus_disk_cache.c
@ -0,0 +1,263 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_disk_cache.c
+ *
+ * Functions for interacting with the on-disk shader cache.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <string.h>
+
+#include "compiler/nir/nir.h"
+#include "util/blob.h"
+#include "util/build_id.h"
+#include "util/disk_cache.h"
+#include "util/mesa-sha1.h"
+
+#include "crocus_context.h"
+
+static bool debug = false;
+
+/**
+ * Compute a disk cache key for the given uncompiled shader and NOS key.
+ */
+static void
+crocus_disk_cache_compute_key(struct disk_cache *cache,
+                              const struct crocus_uncompiled_shader *ish,
+                              const void *orig_prog_key,
+                              uint32_t prog_key_size,
+                              cache_key cache_key)
+{
+   /* Create a copy of the program key with program_string_id zeroed out.
+    * It's essentially random data which we don't want to include in our
+    * hashing and comparisons.  We'll set a proper value on a cache hit.
+    */
+   union brw_any_prog_key prog_key;
+   memcpy(&prog_key, orig_prog_key, prog_key_size);
+   prog_key.base.program_string_id = 0;
+
+   uint8_t data[sizeof(prog_key) + sizeof(ish->nir_sha1)];
+   uint32_t data_size = prog_key_size + sizeof(ish->nir_sha1);
+
+   memcpy(data, ish->nir_sha1, sizeof(ish->nir_sha1));
+   memcpy(data + sizeof(ish->nir_sha1), &prog_key, prog_key_size);
+
+   disk_cache_compute_key(cache, data, data_size, cache_key);
+}
+
+/**
+ * Store the given compiled shader in the disk cache.
+ *
+ * This should only be called on newly compiled shaders.  No checking is
+ * done to prevent repeated stores of the same shader.
+ */
+void
+crocus_disk_cache_store(struct disk_cache *cache,
+                        const struct crocus_uncompiled_shader *ish,
+                        const struct crocus_compiled_shader *shader,
+                        void *map,
+                        const void *prog_key,
+                        uint32_t prog_key_size)
+{
+#ifdef ENABLE_SHADER_CACHE
+   if (!cache)
+      return;
+
+   gl_shader_stage stage = ish->nir->info.stage;
+   const struct brw_stage_prog_data *prog_data = shader->prog_data;
+
+   cache_key cache_key;
+   crocus_disk_cache_compute_key(cache, ish, prog_key, prog_key_size, cache_key);
+
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
+   }
+
+   struct blob blob;
+   blob_init(&blob);
+
+   /* We write the following data to the cache blob:
+    *
+    * 1. Prog data (must come first because it has the assembly size)
+    * 2. Assembly code
+    * 3. Number of entries in the system value array
+    * 4. System value array
+    * 5. Legacy param array (only used for compute workgroup ID)
+    * 6. Binding table
+    */
+   blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
+   blob_write_bytes(&blob, map + shader->offset, shader->prog_data->program_size);
+   blob_write_bytes(&blob, &shader->num_system_values, sizeof(unsigned));
+   blob_write_bytes(&blob, shader->system_values,
+                    shader->num_system_values * sizeof(enum brw_param_builtin));
+   blob_write_bytes(&blob, prog_data->param,
+                    prog_data->nr_params * sizeof(uint32_t));
+   blob_write_bytes(&blob, &shader->bt, sizeof(shader->bt));
+
+   disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
+   blob_finish(&blob);
+#endif
+}
+
+/**
+ * Search for a compiled shader in the disk cache.  If found, upload it
+ * to the in-memory program cache so we can use it.
+ */
+struct crocus_compiled_shader *
+crocus_disk_cache_retrieve(struct crocus_context *ice,
+                           const struct crocus_uncompiled_shader *ish,
+                           const void *prog_key,
+                           uint32_t key_size)
+{
+#ifdef ENABLE_SHADER_CACHE
+   struct crocus_screen *screen = (void *) ice->ctx.screen;
+   struct disk_cache *cache = screen->disk_cache;
+   gl_shader_stage stage = ish->nir->info.stage;
+
+   if (!cache)
+      return NULL;
+
+   cache_key cache_key;
+   crocus_disk_cache_compute_key(cache, ish, prog_key, key_size, cache_key);
+
+   if (debug) {
+      char sha1[41];
+      _mesa_sha1_format(sha1, cache_key);
+      fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
+   }
+
+   size_t size;
+   void *buffer = disk_cache_get(screen->disk_cache, cache_key, &size);
+
+   if (debug)
+      fprintf(stderr, "%s\n", buffer ? "found" : "missing");
+
+   if (!buffer)
+      return NULL;
+
+   const uint32_t prog_data_size = brw_prog_data_size(stage);
+
+   struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
+   const void *assembly;
+   uint32_t num_system_values;
+   uint32_t *system_values = NULL;
+   uint32_t *so_decls = NULL;
+
+   struct blob_reader blob;
+   blob_reader_init(&blob, buffer, size);
+   blob_copy_bytes(&blob, prog_data, prog_data_size);
+   assembly = blob_read_bytes(&blob, prog_data->program_size);
+   num_system_values = blob_read_uint32(&blob);
+   if (num_system_values) {
+      system_values =
+         ralloc_array(NULL, enum brw_param_builtin, num_system_values);
+      blob_copy_bytes(&blob, system_values,
+                      num_system_values * sizeof(enum brw_param_builtin));
+   }
+
+   prog_data->param = NULL;
+   prog_data->pull_param = NULL;
+   assert(prog_data->nr_pull_params == 0);
+
+   if (prog_data->nr_params) {
+      prog_data->param = ralloc_array(NULL, uint32_t, prog_data->nr_params);
+      blob_copy_bytes(&blob, prog_data->param,
+                      prog_data->nr_params * sizeof(uint32_t));
+   }
+
+   struct crocus_binding_table bt;
+   blob_copy_bytes(&blob, &bt, sizeof(bt));
+
+   if ((stage == MESA_SHADER_VERTEX ||
+        stage == MESA_SHADER_TESS_EVAL ||
+        stage == MESA_SHADER_GEOMETRY) && screen->devinfo.ver > 6) {
+      struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
+      so_decls = screen->vtbl.create_so_decl_list(&ish->stream_output,
+                                                  &vue_prog_data->vue_map);
+   }
+
+   /* System values and uniforms are stored in constant buffer 0, the
+    * user-facing UBOs are indexed by one.  So if any constant buffer is
+    * needed, the constant buffer 0 will be needed, so account for it.
+    */
+   unsigned num_cbufs = ish->nir->info.num_ubos;
+
+   if (num_cbufs || ish->nir->num_uniforms)
+      num_cbufs++;
+
+   if (num_system_values)
+      num_cbufs++;
+
+   /* Upload our newly read shader to the in-memory program cache and
+    * return it to the caller.
+    */
+   struct crocus_compiled_shader *shader =
+      crocus_upload_shader(ice, stage, key_size, prog_key, assembly,
+                           prog_data->program_size,
+                           prog_data, prog_data_size, so_decls, system_values,
+                           num_system_values, num_cbufs, &bt);
+
+   free(buffer);
+
+   return shader;
+#else
+   return NULL;
+#endif
+}
+
+/**
+ * Initialize the on-disk shader cache.
+ */
+void
+crocus_disk_cache_init(struct crocus_screen *screen)
+{
+#ifdef ENABLE_SHADER_CACHE
+   if (INTEL_DEBUG & DEBUG_DISK_CACHE_DISABLE_MASK)
+      return;
+
+   /* array length = print length + nul char + 1 extra to verify it's unused */
+   char renderer[13];
+   UNUSED int len =
+      snprintf(renderer, sizeof(renderer), "crocus_%04x", screen->pci_id);
+   assert(len == sizeof(renderer) - 2);
+
+   const struct build_id_note *note =
+      build_id_find_nhdr_for_addr(crocus_disk_cache_init);
+   assert(note && build_id_length(note) == 20); /* sha1 */
+
+   const uint8_t *id_sha1 = build_id_data(note);
+   assert(id_sha1);
+
+   char timestamp[41];
+   _mesa_sha1_format(timestamp, id_sha1);
+
+   const uint64_t driver_flags =
+      brw_get_compiler_config_value(screen->compiler);
+   screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
+#endif
+}
--- a/src/gallium/drivers/crocus/crocus_draw.c
+++ b/src/gallium/drivers/crocus/crocus_draw.c
@ -0,0 +1,511 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_draw.c
+ *
+ * The main driver hooks for drawing and launching compute shaders.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/u_draw.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "util/u_upload_mgr.h"
+#include "intel/compiler/brw_compiler.h"
+#include "intel/compiler/brw_eu_defines.h"
+#include "crocus_context.h"
+#include "crocus_defines.h"
+#include "util/u_prim_restart.h"
+#include "indices/u_primconvert.h"
+#include "util/u_prim.h"
+
+static bool
+prim_is_points_or_lines(enum pipe_prim_type mode)
+{
+   /* We don't need to worry about adjacency - it can only be used with
+    * geometry shaders, and we don't care about this info when GS is on.
+    */
+   return mode == PIPE_PRIM_POINTS ||
+          mode == PIPE_PRIM_LINES ||
+          mode == PIPE_PRIM_LINE_LOOP ||
+          mode == PIPE_PRIM_LINE_STRIP;
+}
+
+static bool
+can_cut_index_handle_restart_index(struct crocus_context *ice,
+                                   const struct pipe_draw_info *draw)
+{
+   switch (draw->index_size) {
+   case 1:
+      return draw->restart_index == 0xff;
+   case 2:
+      return draw->restart_index == 0xffff;
+   case 4:
+      return draw->restart_index == 0xffffffff;
+   default:
+      unreachable("illegal index size\n");
+   }
+
+   return false;
+}
+
+static bool
+can_cut_index_handle_prim(struct crocus_context *ice,
+                          const struct pipe_draw_info *draw)
+{
+   struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   /* Haswell can do it all. */
+   if (devinfo->is_haswell)
+      return true;
+
+   if (!can_cut_index_handle_restart_index(ice, draw))
+      return false;
+
+   switch (draw->mode) {
+   case PIPE_PRIM_POINTS:
+   case PIPE_PRIM_LINES:
+   case PIPE_PRIM_LINE_STRIP:
+   case PIPE_PRIM_TRIANGLES:
+   case PIPE_PRIM_TRIANGLE_STRIP:
+   case PIPE_PRIM_LINES_ADJACENCY:
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return true;
+   default:
+      break;
+   }
+   return false;
+}
+
+/**
+ * Record the current primitive mode and restart information, flagging
+ * related packets as dirty if necessary.
+ *
+ * This must be called before updating compiled shaders, because the patch
+ * information informs the TCS key.
+ */
+static void
+crocus_update_draw_info(struct crocus_context *ice,
+                        const struct pipe_draw_info *info,
+                        const struct pipe_draw_start_count_bias *draw)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
+   enum pipe_prim_type mode = info->mode;
+
+   if (screen->devinfo.ver < 6) {
+      /* Slight optimization to avoid the GS program when not needed:
+       */
+      struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);
+      if (mode == PIPE_PRIM_QUAD_STRIP && !rs_state->flatshade &&
+          rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&
+          rs_state->fill_back == PIPE_POLYGON_MODE_FILL)
+         mode = PIPE_PRIM_TRIANGLE_STRIP;
+      if (mode == PIPE_PRIM_QUADS &&
+          draw->count == 4 &&
+          !rs_state->flatshade &&
+          rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&
+          rs_state->fill_back == PIPE_POLYGON_MODE_FILL)
+         mode = PIPE_PRIM_TRIANGLE_FAN;
+   }
+
+   if (ice->state.prim_mode != mode) {
+      ice->state.prim_mode = mode;
+
+      if (screen->devinfo.ver < 6)
+         ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;
+      if (screen->devinfo.ver <= 6)
+         ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;
+
+      if (screen->devinfo.ver >= 7)
+         ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;
+
+      /* For XY Clip enables */
+      bool points_or_lines = prim_is_points_or_lines(mode);
+      if (points_or_lines != ice->state.prim_is_points_or_lines) {
+         ice->state.prim_is_points_or_lines = points_or_lines;
+         ice->state.dirty |= CROCUS_DIRTY_CLIP;
+      }
+   }
+
+   if (info->mode == PIPE_PRIM_PATCHES &&
+       ice->state.vertices_per_patch != info->vertices_per_patch) {
+      ice->state.vertices_per_patch = info->vertices_per_patch;
+
+      /* This is needed for key->input_vertices */
+      ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_TCS;
+
+      /* Flag constants dirty for gl_PatchVerticesIn if needed. */
+      const struct shader_info *tcs_info =
+         crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
+      if (tcs_info &&
+          BITSET_TEST(tcs_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
+         ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;
+         ice->state.shaders[MESA_SHADER_TESS_CTRL].sysvals_need_upload = true;
+      }
+   }
+
+   const unsigned cut_index = info->primitive_restart ? info->restart_index :
+                                                        ice->state.cut_index;
+   if (ice->state.primitive_restart != info->primitive_restart ||
+       ice->state.cut_index != cut_index) {
+      if (screen->devinfo.is_haswell)
+         ice->state.dirty |= CROCUS_DIRTY_GEN75_VF;
+      ice->state.primitive_restart = info->primitive_restart;
+      ice->state.cut_index = info->restart_index;
+   }
+}
+
+/**
+ * Update shader draw parameters, flagging VF packets as dirty if necessary.
+ */
+static void
+crocus_update_draw_parameters(struct crocus_context *ice,
+                              const struct pipe_draw_info *info,
+                              unsigned drawid_offset,
+                              const struct pipe_draw_indirect_info *indirect,
+                              const struct pipe_draw_start_count_bias *draw)
+{
+   bool changed = false;
+
+   if (ice->state.vs_uses_draw_params) {
+      struct crocus_state_ref *draw_params = &ice->draw.draw_params;
+
+      if (indirect && indirect->buffer) {
+         pipe_resource_reference(&draw_params->res, indirect->buffer);
+         draw_params->offset =
+            indirect->offset + (info->index_size ? 12 : 8);
+
+         changed = true;
+         ice->draw.params_valid = false;
+      } else {
+         int firstvertex = info->index_size ? draw->index_bias : draw->start;
+
+         if (!ice->draw.params_valid ||
+             ice->draw.params.firstvertex != firstvertex ||
+             ice->draw.params.baseinstance != info->start_instance) {
+
+            changed = true;
+            ice->draw.params.firstvertex = firstvertex;
+            ice->draw.params.baseinstance = info->start_instance;
+            ice->draw.params_valid = true;
+
+            u_upload_data(ice->ctx.stream_uploader, 0,
+                          sizeof(ice->draw.params), 4, &ice->draw.params,
+                          &draw_params->offset, &draw_params->res);
+         }
+      }
+   }
+
+   if (ice->state.vs_uses_derived_draw_params) {
+      struct crocus_state_ref *derived_params = &ice->draw.derived_draw_params;
+      int is_indexed_draw = info->index_size ? -1 : 0;
+
+      if (ice->draw.derived_params.drawid != drawid_offset ||
+          ice->draw.derived_params.is_indexed_draw != is_indexed_draw) {
+
+         changed = true;
+         ice->draw.derived_params.drawid = drawid_offset;
+         ice->draw.derived_params.is_indexed_draw = is_indexed_draw;
+
+         u_upload_data(ice->ctx.stream_uploader, 0,
+                       sizeof(ice->draw.derived_params), 4,
+                       &ice->draw.derived_params, &derived_params->offset,
+                       &derived_params->res);
+      }
+   }
+
+   if (changed) {
+      ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS |
+                          CROCUS_DIRTY_VERTEX_ELEMENTS;
+   }
+}
+
+static void
+crocus_indirect_draw_vbo(struct crocus_context *ice,
+                         const struct pipe_draw_info *dinfo,
+                         unsigned drawid_offset,
+                         const struct pipe_draw_indirect_info *dindirect,
+                         const struct pipe_draw_start_count_bias *draws)
+{
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = batch->screen;
+   struct pipe_draw_info info = *dinfo;
+   struct pipe_draw_indirect_info indirect = *dindirect;
+   const struct intel_device_info *devinfo = &batch->screen->devinfo;
+
+   if (devinfo->is_haswell && indirect.indirect_draw_count &&
+       ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
+      /* Upload MI_PREDICATE_RESULT to GPR15.*/
+      screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);
+   }
+
+   uint64_t orig_dirty = ice->state.dirty;
+   uint64_t orig_stage_dirty = ice->state.stage_dirty;
+
+   for (int i = 0; i < indirect.draw_count; i++) {
+      crocus_batch_maybe_flush(batch, 1500);
+      crocus_require_statebuffer_space(batch, 2400);
+
+      crocus_update_draw_parameters(ice, &info, drawid_offset + i, &indirect, draws);
+
+      screen->vtbl.upload_render_state(ice, batch, &info, drawid_offset + i, &indirect, draws);
+
+      ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;
+      ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
+
+      indirect.offset += indirect.stride;
+   }
+
+   if (devinfo->is_haswell && indirect.indirect_draw_count &&
+       ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {
+      /* Restore MI_PREDICATE_RESULT. */
+      screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));
+   }
+
+   /* Put this back for post-draw resolves, we'll clear it again after. */
+   ice->state.dirty = orig_dirty;
+   ice->state.stage_dirty = orig_stage_dirty;
+}
+
+static void
+crocus_simple_draw_vbo(struct crocus_context *ice,
+                       const struct pipe_draw_info *draw,
+                       unsigned drawid_offset,
+                       const struct pipe_draw_indirect_info *indirect,
+                       const struct pipe_draw_start_count_bias *sc)
+{
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = batch->screen;
+
+   crocus_batch_maybe_flush(batch, 1500);
+   crocus_require_statebuffer_space(batch, 2400);
+
+   crocus_update_draw_parameters(ice, draw, drawid_offset, indirect, sc);
+
+   screen->vtbl.upload_render_state(ice, batch, draw, drawid_offset, indirect, sc);
+}
+
+static void
+crocus_draw_vbo_get_vertex_count(struct pipe_context *ctx,
+                                 const struct pipe_draw_info *info_in,
+                                 unsigned drawid_offset,
+                                 const struct pipe_draw_indirect_info *indirect)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
+   struct pipe_draw_info info = *info_in;
+   struct pipe_draw_start_count_bias draw;
+
+   uint32_t val = screen->vtbl.get_so_offset(indirect->count_from_stream_output);
+
+   draw.start = 0;
+   draw.count = val;
+   ctx->draw_vbo(ctx, &info, drawid_offset, NULL, &draw, 1);
+}
+
+/**
+ * The pipe->draw_vbo() driver hook.  Performs a draw on the GPU.
+ */
+void
+crocus_draw_vbo(struct pipe_context *ctx,
+                const struct pipe_draw_info *info,
+                unsigned drawid_offset,
+                const struct pipe_draw_indirect_info *indirect,
+                const struct pipe_draw_start_count_bias *draws,
+                unsigned num_draws)
+{
+   if (num_draws > 1) {
+      util_draw_multi(ctx, info, drawid_offset, indirect, draws, num_draws);
+      return;
+   }
+
+   if (!indirect && (!draws[0].count || !info->instance_count))
+      return;
+
+   struct crocus_context *ice = (struct crocus_context *) ctx;
+   struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+
+   if (!crocus_check_conditional_render(ice))
+      return;
+
+   if (info->primitive_restart && !can_cut_index_handle_prim(ice, info)) {
+      util_draw_vbo_without_prim_restart(ctx, info, drawid_offset,
+                                         indirect, draws);
+      return;
+   }
+
+   if (indirect && indirect->count_from_stream_output &&
+       !screen->devinfo.is_haswell) {
+      crocus_draw_vbo_get_vertex_count(ctx, info, drawid_offset, indirect);
+      return;
+   }
+
+   /**
+    * The hardware is capable of removing dangling vertices on its own; however,
+    * prior to Gen6, we sometimes convert quads into trifans (and quad strips
+    * into tristrips), since pre-Gen6 hardware requires a GS to render quads.
+    * This function manually trims dangling vertices from a draw call involving
+    * quads so that those dangling vertices won't get drawn when we convert to
+    * trifans/tristrips.
+    */
+   if (screen->devinfo.ver < 6) {
+      if (info->mode == PIPE_PRIM_QUADS || info->mode == PIPE_PRIM_QUAD_STRIP) {
+         bool trim = u_trim_pipe_prim(info->mode, (unsigned *)&draws[0].count);
+         if (!trim)
+            return;
+      }
+   }
+
+   /* We can't safely re-emit 3DSTATE_SO_BUFFERS because it may zero the
+    * write offsets, changing the behavior.
+    */
+   if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
+      ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER & ~CROCUS_DIRTY_GEN7_SO_BUFFERS;
+      ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
+   }
+
+   /* Emit Sandybridge workaround flushes on every primitive, for safety. */
+   if (screen->devinfo.ver == 6)
+      crocus_emit_post_sync_nonzero_flush(batch);
+
+   crocus_update_draw_info(ice, info, draws);
+
+   if (!crocus_update_compiled_shaders(ice))
+      return;
+
+   if (ice->state.dirty & CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES) {
+      bool draw_aux_buffer_disabled[BRW_MAX_DRAW_BUFFERS] = { };
+      for (gl_shader_stage stage = 0; stage < MESA_SHADER_COMPUTE; stage++) {
+         if (ice->shaders.prog[stage])
+            crocus_predraw_resolve_inputs(ice, batch, draw_aux_buffer_disabled,
+                                          stage, true);
+      }
+      crocus_predraw_resolve_framebuffer(ice, batch, draw_aux_buffer_disabled);
+   }
+
+   crocus_handle_always_flush_cache(batch);
+
+   if (indirect && indirect->buffer)
+      crocus_indirect_draw_vbo(ice, info, drawid_offset, indirect, draws);
+   else
+      crocus_simple_draw_vbo(ice, info, drawid_offset, indirect, draws);
+
+   crocus_handle_always_flush_cache(batch);
+
+   crocus_postdraw_update_resolve_tracking(ice, batch);
+
+   ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;
+   ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;
+}
+
+static void
+crocus_update_grid_size_resource(struct crocus_context *ice,
+                                 const struct pipe_grid_info *grid)
+{
+   struct crocus_state_ref *grid_ref = &ice->state.grid_size;
+   const struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_COMPUTE];
+   bool grid_needs_surface = shader->bt.used_mask[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS];
+
+   if (grid->indirect) {
+      pipe_resource_reference(&grid_ref->res, grid->indirect);
+      grid_ref->offset = grid->indirect_offset;
+
+      /* Zero out the grid size so that the next non-indirect grid launch will
+       * re-upload it properly.
+       */
+      memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));
+   } else if (memcmp(ice->state.last_grid, grid->grid, sizeof(grid->grid)) != 0) {
+      memcpy(ice->state.last_grid, grid->grid, sizeof(grid->grid));
+      u_upload_data(ice->ctx.const_uploader, 0, sizeof(grid->grid), 4,
+                    grid->grid, &grid_ref->offset, &grid_ref->res);
+   }
+
+   /* Skip surface upload if we don't need it or we already have one */
+   if (!grid_needs_surface)
+      return;
+
+   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_CS;
+}
+
+
+void
+crocus_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid)
+{
+   struct crocus_context *ice = (struct crocus_context *) ctx;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_COMPUTE];
+   struct crocus_screen *screen = batch->screen;
+
+   if (!crocus_check_conditional_render(ice))
+      return;
+
+   if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
+      ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE;
+      ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
+   }
+
+   /* We can't do resolves on the compute engine, so awkwardly, we have to
+    * do them on the render batch...
+    */
+   if (ice->state.dirty & CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES) {
+      crocus_predraw_resolve_inputs(ice, &ice->batches[CROCUS_BATCH_RENDER], NULL,
+                                    MESA_SHADER_COMPUTE, false);
+   }
+
+   crocus_batch_maybe_flush(batch, 1500);
+   crocus_require_statebuffer_space(batch, 2500);
+   crocus_update_compiled_compute_shader(ice);
+
+   if (memcmp(ice->state.last_block, grid->block, sizeof(grid->block)) != 0) {
+      memcpy(ice->state.last_block, grid->block, sizeof(grid->block));
+      ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS;
+      ice->state.shaders[MESA_SHADER_COMPUTE].sysvals_need_upload = true;
+   }
+
+   crocus_update_grid_size_resource(ice, grid);
+
+   if (ice->state.compute_predicate) {
+      screen->vtbl.emit_compute_predicate(batch);
+      ice->state.compute_predicate = NULL;
+   }
+
+   crocus_handle_always_flush_cache(batch);
+
+   screen->vtbl.upload_compute_state(ice, batch, grid);
+
+   crocus_handle_always_flush_cache(batch);
+
+   ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_COMPUTE;
+   ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;
+
+   /* Note: since compute shaders can't access the framebuffer, there's
+    * no need to call crocus_postdraw_update_resolve_tracking.
+    */
+}
--- a/src/gallium/drivers/crocus/crocus_fence.c
+++ b/src/gallium/drivers/crocus/crocus_fence.c
@ -0,0 +1,571 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_fence.c
+ *
+ * Fences for driver and IPC serialisation, scheduling and synchronisation.
+ */
+
+#include "util/u_inlines.h"
+#include "intel/common/intel_gem.h"
+
+#include "crocus_batch.h"
+#include "crocus_bufmgr.h"
+#include "crocus_context.h"
+#include "crocus_fence.h"
+#include "crocus_screen.h"
+
+static uint32_t
+gem_syncobj_create(int fd, uint32_t flags)
+{
+   struct drm_syncobj_create args = {
+      .flags = flags,
+   };
+
+   intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &args);
+
+   return args.handle;
+}
+
+static void
+gem_syncobj_destroy(int fd, uint32_t handle)
+{
+   struct drm_syncobj_destroy args = {
+      .handle = handle,
+   };
+
+   intel_ioctl(fd, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
+}
+
+/**
+ * Make a new sync-point.
+ */
+struct crocus_syncobj *
+crocus_create_syncobj(struct crocus_screen *screen)
+{
+   struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
+
+   if (!syncobj)
+      return NULL;
+
+   syncobj->handle = gem_syncobj_create(screen->fd, 0);
+   assert(syncobj->handle);
+
+   pipe_reference_init(&syncobj->ref, 1);
+
+   return syncobj;
+}
+
+void
+crocus_syncobj_destroy(struct crocus_screen *screen,
+                       struct crocus_syncobj *syncobj)
+{
+   gem_syncobj_destroy(screen->fd, syncobj->handle);
+   free(syncobj);
+}
+
+/**
+ * Add a sync-point to the batch, with the given flags.
+ *
+ * \p flags   One of I915_EXEC_FENCE_WAIT or I915_EXEC_FENCE_SIGNAL.
+ */
+void
+crocus_batch_add_syncobj(struct crocus_batch *batch,
+                         struct crocus_syncobj *syncobj, unsigned flags)
+{
+   struct drm_i915_gem_exec_fence *fence =
+      util_dynarray_grow(&batch->exec_fences, struct drm_i915_gem_exec_fence, 1);
+
+   *fence = (struct drm_i915_gem_exec_fence){
+      .handle = syncobj->handle,
+      .flags = flags,
+   };
+
+   struct crocus_syncobj **store =
+      util_dynarray_grow(&batch->syncobjs, struct crocus_syncobj *, 1);
+
+   *store = NULL;
+   crocus_syncobj_reference(batch->screen, store, syncobj);
+}
+
+/**
+ * Walk through a batch's dependencies (any I915_EXEC_FENCE_WAIT syncobjs)
+ * and unreference any which have already passed.
+ *
+ * Sometimes the compute batch is seldom used, and accumulates references
+ * to stale render batches that are no longer of interest, so we can free
+ * those up.
+ */
+static void
+clear_stale_syncobjs(struct crocus_batch *batch)
+{
+   struct crocus_screen *screen = batch->screen;
+
+   int n = util_dynarray_num_elements(&batch->syncobjs, struct crocus_syncobj *);
+
+   assert(n == util_dynarray_num_elements(&batch->exec_fences,
+                                          struct drm_i915_gem_exec_fence));
+
+   /* Skip the first syncobj, as it's the signalling one. */
+   for (int i = n - 1; i > 1; i--) {
+      struct crocus_syncobj **syncobj =
+         util_dynarray_element(&batch->syncobjs, struct crocus_syncobj *, i);
+      struct drm_i915_gem_exec_fence *fence =
+         util_dynarray_element(&batch->exec_fences,
+                               struct drm_i915_gem_exec_fence, i);
+      assert(fence->flags & I915_EXEC_FENCE_WAIT);
+
+      if (crocus_wait_syncobj(&screen->base, *syncobj, 0))
+         continue;
+
+      /* This sync object has already passed, there's no need to continue
+       * marking it as a dependency; we can stop holding on to the reference.
+       */
+      crocus_syncobj_reference(screen, syncobj, NULL);
+
+      /* Remove it from the lists; move the last element here. */
+      struct crocus_syncobj **nth_syncobj =
+         util_dynarray_pop_ptr(&batch->syncobjs, struct crocus_syncobj *);
+      struct drm_i915_gem_exec_fence *nth_fence =
+         util_dynarray_pop_ptr(&batch->exec_fences,
+                               struct drm_i915_gem_exec_fence);
+
+      if (syncobj != nth_syncobj) {
+         *syncobj = *nth_syncobj;
+         memcpy(fence, nth_fence, sizeof(*fence));
+      }
+   }
+}
+
+/* ------------------------------------------------------------------- */
+
+struct pipe_fence_handle {
+   struct pipe_reference ref;
+
+   struct pipe_context *unflushed_ctx;
+
+   struct crocus_fine_fence *fine[CROCUS_BATCH_COUNT];
+};
+
+static void
+crocus_fence_destroy(struct pipe_screen *p_screen,
+                     struct pipe_fence_handle *fence)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++)
+      crocus_fine_fence_reference(screen, &fence->fine[i], NULL);
+
+   free(fence);
+}
+
+static void
+crocus_fence_reference(struct pipe_screen *p_screen,
+                       struct pipe_fence_handle **dst,
+                       struct pipe_fence_handle *src)
+{
+   if (pipe_reference(&(*dst)->ref, &src->ref))
+      crocus_fence_destroy(p_screen, *dst);
+
+   *dst = src;
+}
+
+bool
+crocus_wait_syncobj(struct pipe_screen *p_screen,
+                    struct crocus_syncobj *syncobj, int64_t timeout_nsec)
+{
+   if (!syncobj)
+      return false;
+
+   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
+   struct drm_syncobj_wait args = {
+      .handles = (uintptr_t)&syncobj->handle,
+      .count_handles = 1,
+      .timeout_nsec = timeout_nsec,
+   };
+   return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
+}
+
+static void
+crocus_fence_flush(struct pipe_context *ctx,
+                   struct pipe_fence_handle **out_fence, unsigned flags)
+{
+   struct crocus_screen *screen = (void *)ctx->screen;
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+
+   const bool deferred = flags & PIPE_FLUSH_DEFERRED;
+
+   if (!deferred) {
+      for (unsigned i = 0; i < ice->batch_count; i++)
+         crocus_batch_flush(&ice->batches[i]);
+   }
+
+   if (!out_fence)
+      return;
+
+   struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
+   if (!fence)
+      return;
+
+   pipe_reference_init(&fence->ref, 1);
+
+   if (deferred)
+      fence->unflushed_ctx = ctx;
+
+   for (unsigned b = 0; b < ice->batch_count; b++) {
+      struct crocus_batch *batch = &ice->batches[b];
+
+      if (deferred && crocus_batch_bytes_used(batch) > 0) {
+         struct crocus_fine_fence *fine =
+            crocus_fine_fence_new(batch, CROCUS_FENCE_BOTTOM_OF_PIPE);
+         crocus_fine_fence_reference(screen, &fence->fine[b], fine);
+         crocus_fine_fence_reference(screen, &fine, NULL);
+      } else {
+         /* This batch has no commands queued up (perhaps we just flushed,
+          * or all the commands are on the other batch).  Wait for the last
+          * syncobj on this engine - unless it's already finished by now.
+          */
+         if (crocus_fine_fence_signaled(batch->last_fence))
+            continue;
+
+         crocus_fine_fence_reference(screen, &fence->fine[b],
+                                     batch->last_fence);
+      }
+   }
+
+   crocus_fence_reference(ctx->screen, out_fence, NULL);
+   *out_fence = fence;
+}
+
+static void
+crocus_fence_await(struct pipe_context *ctx, struct pipe_fence_handle *fence)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+
+   /* Unflushed fences from the same context are no-ops. */
+   if (ctx && ctx == fence->unflushed_ctx)
+      return;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
+      struct crocus_fine_fence *fine = fence->fine[i];
+
+      if (crocus_fine_fence_signaled(fine))
+         continue;
+
+      for (unsigned b = 0; b < ice->batch_count; b++) {
+         struct crocus_batch *batch = &ice->batches[b];
+
+         /* We're going to make any future work in this batch wait for our
+          * fence to have gone by.  But any currently queued work doesn't
+          * need to wait.  Flush the batch now, so it can happen sooner.
+          */
+         crocus_batch_flush(batch);
+
+         /* Before adding a new reference, clean out any stale ones. */
+         clear_stale_syncobjs(batch);
+
+         crocus_batch_add_syncobj(batch, fine->syncobj, I915_EXEC_FENCE_WAIT);
+      }
+   }
+}
+
+#define NSEC_PER_SEC (1000 * USEC_PER_SEC)
+#define USEC_PER_SEC (1000 * MSEC_PER_SEC)
+#define MSEC_PER_SEC (1000)
+
+static uint64_t
+gettime_ns(void)
+{
+   struct timespec current;
+   clock_gettime(CLOCK_MONOTONIC, &current);
+   return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
+}
+
+static uint64_t
+rel2abs(uint64_t timeout)
+{
+   if (timeout == 0)
+      return 0;
+
+   uint64_t current_time = gettime_ns();
+   uint64_t max_timeout = (uint64_t)INT64_MAX - current_time;
+
+   timeout = MIN2(max_timeout, timeout);
+
+   return current_time + timeout;
+}
+
+static bool
+crocus_fence_finish(struct pipe_screen *p_screen, struct pipe_context *ctx,
+                    struct pipe_fence_handle *fence, uint64_t timeout)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
+
+   /* If we created the fence with PIPE_FLUSH_DEFERRED, we may not have
+    * flushed yet.  Check if our syncobj is the current batch's signalling
+    * syncobj - if so, we haven't flushed and need to now.
+    *
+    * The Gallium docs mention that a flush will occur if \p ctx matches
+    * the context the fence was created with.  It may be NULL, so we check
+    * that it matches first.
+    */
+   if (ctx && ctx == fence->unflushed_ctx) {
+      for (unsigned i = 0; i < ice->batch_count; i++) {
+         struct crocus_fine_fence *fine = fence->fine[i];
+
+         if (crocus_fine_fence_signaled(fine))
+            continue;
+
+         if (fine->syncobj == crocus_batch_get_signal_syncobj(&ice->batches[i]))
+            crocus_batch_flush(&ice->batches[i]);
+      }
+
+      /* The fence is no longer deferred. */
+      fence->unflushed_ctx = NULL;
+   }
+
+   unsigned int handle_count = 0;
+   uint32_t handles[ARRAY_SIZE(fence->fine)];
+   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
+      struct crocus_fine_fence *fine = fence->fine[i];
+
+      if (crocus_fine_fence_signaled(fine))
+         continue;
+
+      handles[handle_count++] = fine->syncobj->handle;
+   }
+
+   if (handle_count == 0)
+      return true;
+
+   struct drm_syncobj_wait args = {
+      .handles = (uintptr_t)handles,
+      .count_handles = handle_count,
+      .timeout_nsec = rel2abs(timeout),
+      .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
+   };
+   if (fence->unflushed_ctx) {
+      /* This fence had a deferred flush from another context.  We can't
+       * safely flush it here, because the context might be bound to a
+       * different thread, and poking at its internals wouldn't be safe.
+       *
+       * Instead, use the WAIT_FOR_SUBMIT flag to block and hope that
+       * another thread submits the work.
+       */
+      args.flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+   }
+   return intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0;
+}
+
+#ifndef SYNC_IOC_MAGIC
+/* duplicated from linux/sync_file.h to avoid build-time dependency
+ * on new (v4.7) kernel headers.  Once distro's are mostly using
+ * something newer than v4.7 drop this and #include <linux/sync_file.h>
+ * instead.
+ */
+struct sync_merge_data {
+   char name[32];
+   __s32 fd2;
+   __s32 fence;
+   __u32 flags;
+   __u32 pad;
+};
+
+#define SYNC_IOC_MAGIC '>'
+#define SYNC_IOC_MERGE _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
+#endif
+
+static int
+sync_merge_fd(int sync_fd, int new_fd)
+{
+   if (sync_fd == -1)
+      return new_fd;
+
+   if (new_fd == -1)
+      return sync_fd;
+
+   struct sync_merge_data args = {
+      .name = "crocus fence",
+      .fd2 = new_fd,
+      .fence = -1,
+   };
+
+   intel_ioctl(sync_fd, SYNC_IOC_MERGE, &args);
+   close(new_fd);
+   close(sync_fd);
+
+   return args.fence;
+}
+
+static int
+crocus_fence_get_fd(struct pipe_screen *p_screen,
+                    struct pipe_fence_handle *fence)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)p_screen;
+   int fd = -1;
+
+   /* Deferred fences aren't supported. */
+   if (fence->unflushed_ctx)
+      return -1;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
+      struct crocus_fine_fence *fine = fence->fine[i];
+
+      if (crocus_fine_fence_signaled(fine))
+         continue;
+
+      struct drm_syncobj_handle args = {
+         .handle = fine->syncobj->handle,
+         .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
+         .fd = -1,
+      };
+
+      intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
+      fd = sync_merge_fd(fd, args.fd);
+   }
+
+   if (fd == -1) {
+      /* Our fence has no syncobj's recorded.  This means that all of the
+       * batches had already completed, their syncobj's had been signalled,
+       * and so we didn't bother to record them.  But we're being asked to
+       * export such a fence.  So export a dummy already-signalled syncobj.
+       */
+      struct drm_syncobj_handle args = {
+         .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
+         .fd = -1,
+      };
+
+      args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
+      intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
+      gem_syncobj_destroy(screen->fd, args.handle);
+      return args.fd;
+   }
+
+   return fd;
+}
+
+static void
+crocus_fence_create_fd(struct pipe_context *ctx, struct pipe_fence_handle **out,
+                       int fd, enum pipe_fd_type type)
+{
+   assert(type == PIPE_FD_TYPE_NATIVE_SYNC || type == PIPE_FD_TYPE_SYNCOBJ);
+
+   struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;
+   struct drm_syncobj_handle args = {
+      .fd = fd,
+   };
+
+   if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
+      args.flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE;
+      args.handle = gem_syncobj_create(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED);
+   }
+
+   if (intel_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args) == -1) {
+      fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE failed: %s\n",
+              strerror(errno));
+      if (type == PIPE_FD_TYPE_NATIVE_SYNC)
+         gem_syncobj_destroy(screen->fd, args.handle);
+      *out = NULL;
+      return;
+   }
+
+   struct crocus_syncobj *syncobj = malloc(sizeof(*syncobj));
+   if (!syncobj) {
+      *out = NULL;
+      return;
+   }
+   syncobj->handle = args.handle;
+   pipe_reference_init(&syncobj->ref, 1);
+
+   struct crocus_fine_fence *fine = calloc(1, sizeof(*fine));
+   if (!fine) {
+      free(syncobj);
+      *out = NULL;
+      return;
+   }
+
+   static const uint32_t zero = 0;
+
+   /* Fences work in terms of crocus_fine_fence, but we don't actually have a
+    * seqno for an imported fence.  So, create a fake one which always
+    * returns as 'not signaled' so we fall back to using the sync object.
+    */
+   fine->seqno = UINT32_MAX;
+   fine->map = &zero;
+   fine->syncobj = syncobj;
+   fine->flags = CROCUS_FENCE_END;
+   pipe_reference_init(&fine->reference, 1);
+
+   struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
+   if (!fence) {
+      free(fine);
+      free(syncobj);
+      *out = NULL;
+      return;
+   }
+   pipe_reference_init(&fence->ref, 1);
+   fence->fine[0] = fine;
+
+   *out = fence;
+}
+
+static void
+crocus_fence_signal(struct pipe_context *ctx, struct pipe_fence_handle *fence)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+
+   if (ctx == fence->unflushed_ctx)
+      return;
+
+   for (unsigned b = 0; b < ice->batch_count; b++) {
+      for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
+         struct crocus_fine_fence *fine = fence->fine[i];
+
+         /* already signaled fence skipped */
+         if (crocus_fine_fence_signaled(fine))
+            continue;
+
+         ice->batches[b].contains_fence_signal = true;
+         crocus_batch_add_syncobj(&ice->batches[b], fine->syncobj,
+                                  I915_EXEC_FENCE_SIGNAL);
+      }
+   }
+}
+
+void
+crocus_init_screen_fence_functions(struct pipe_screen *screen)
+{
+   screen->fence_reference = crocus_fence_reference;
+   screen->fence_finish = crocus_fence_finish;
+   screen->fence_get_fd = crocus_fence_get_fd;
+}
+
+void
+crocus_init_context_fence_functions(struct pipe_context *ctx)
+{
+   ctx->flush = crocus_fence_flush;
+   ctx->create_fence_fd = crocus_fence_create_fd;
+   ctx->fence_server_sync = crocus_fence_await;
+   ctx->fence_server_signal = crocus_fence_signal;
+}
--- a/src/gallium/drivers/crocus/crocus_fence.h
+++ b/src/gallium/drivers/crocus/crocus_fence.h
@ -0,0 +1,60 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CROCUS_FENCE_H
+#define CROCUS_FENCE_H
+
+#include "util/u_inlines.h"
+
+struct pipe_screen;
+struct crocus_screen;
+struct crocus_batch;
+
+struct crocus_syncobj {
+   struct pipe_reference ref;
+   uint32_t handle;
+};
+
+void crocus_init_context_fence_functions(struct pipe_context *ctx);
+void crocus_init_screen_fence_functions(struct pipe_screen *screen);
+
+struct crocus_syncobj *crocus_create_syncobj(struct crocus_screen *screen);
+void crocus_syncobj_destroy(struct crocus_screen *, struct crocus_syncobj *);
+void crocus_batch_add_syncobj(struct crocus_batch *batch,
+                              struct crocus_syncobj *syncobj,
+                              unsigned flags);
+bool crocus_wait_syncobj(struct pipe_screen *screen,
+                         struct crocus_syncobj *syncobj,
+                         int64_t timeout_nsec);
+static inline void
+crocus_syncobj_reference(struct crocus_screen *screen,
+                         struct crocus_syncobj **dst,
+                         struct crocus_syncobj *src)
+{
+   if (pipe_reference(&(*dst)->ref, &src->ref))
+      crocus_syncobj_destroy(screen, *dst);
+
+   *dst = src;
+}
+
+#endif
--- a/src/gallium/drivers/crocus/crocus_fine_fence.c
+++ b/src/gallium/drivers/crocus/crocus_fine_fence.c
@ -0,0 +1,85 @@
+#include "crocus_context.h"
+#include "crocus_fine_fence.h"
+#include "util/u_upload_mgr.h"
+
+static void
+crocus_fine_fence_reset(struct crocus_batch *batch)
+{
+   u_upload_alloc(batch->fine_fences.uploader,
+                  0, sizeof(uint64_t), sizeof(uint64_t),
+                  &batch->fine_fences.ref.offset, &batch->fine_fences.ref.res,
+                  (void **)&batch->fine_fences.map);
+   WRITE_ONCE(*batch->fine_fences.map, 0);
+   batch->fine_fences.next++;
+}
+
+void
+crocus_fine_fence_init(struct crocus_batch *batch)
+{
+   batch->fine_fences.ref.res = NULL;
+   batch->fine_fences.next = 0;
+   if (batch_has_fine_fence(batch))
+      crocus_fine_fence_reset(batch);
+}
+
+static uint32_t
+crocus_fine_fence_next(struct crocus_batch *batch)
+{
+   if (!batch_has_fine_fence(batch))
+      return UINT32_MAX;
+
+   uint32_t seqno = batch->fine_fences.next++;
+
+   if (batch->fine_fences.next == 0)
+      crocus_fine_fence_reset(batch);
+
+   return seqno;
+}
+
+void
+crocus_fine_fence_destroy(struct crocus_screen *screen,
+                          struct crocus_fine_fence *fine)
+{
+   crocus_syncobj_reference(screen, &fine->syncobj, NULL);
+   pipe_resource_reference(&fine->ref.res, NULL);
+   free(fine);
+}
+
+struct crocus_fine_fence *
+crocus_fine_fence_new(struct crocus_batch *batch, unsigned flags)
+{
+   struct crocus_fine_fence *fine = calloc(1, sizeof(*fine));
+   if (!fine)
+      return NULL;
+
+   pipe_reference_init(&fine->reference, 1);
+
+   fine->seqno = crocus_fine_fence_next(batch);
+
+   crocus_syncobj_reference(batch->screen, &fine->syncobj,
+                            crocus_batch_get_signal_syncobj(batch));
+
+   if (!batch_has_fine_fence(batch))
+      return fine;
+   pipe_resource_reference(&fine->ref.res, batch->fine_fences.ref.res);
+   fine->ref.offset = batch->fine_fences.ref.offset;
+   fine->map = batch->fine_fences.map;
+   fine->flags = flags;
+
+   unsigned pc;
+   if (flags & CROCUS_FENCE_TOP_OF_PIPE) {
+      pc = PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_CS_STALL;
+   } else {
+      pc = PIPE_CONTROL_WRITE_IMMEDIATE |
+           PIPE_CONTROL_RENDER_TARGET_FLUSH |
+           PIPE_CONTROL_TILE_CACHE_FLUSH |
+           PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+           PIPE_CONTROL_DATA_CACHE_FLUSH;
+   }
+   crocus_emit_pipe_control_write(batch, "fence: fine", pc,
+                                  crocus_resource_bo(fine->ref.res),
+                                  fine->ref.offset,
+                                  fine->seqno);
+
+   return fine;
+}
--- a/src/gallium/drivers/crocus/crocus_fine_fence.h
+++ b/src/gallium/drivers/crocus/crocus_fine_fence.h
@ -0,0 +1,109 @@
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CROCUS_FINE_FENCE_DOT_H
+#define CROCUS_FINE_FENCE_DOT_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "crocus_screen.h"
+#include "crocus_resource.h"
+
+/**
+ * A lightweight sequence number fence.
+ *
+ * We emit PIPE_CONTROLs inside a batch (possibly in the middle)
+ * which update a monotonically increasing, 32-bit counter.  We
+ * can then check if that moment has passed by either:
+ *
+ * 1. Checking on the CPU by snooping on the DWord via a coherent map
+ *
+ * 2. Blocking on the GPU with MI_SEMAPHORE_WAIT from a second batch
+ *    (relying on mid-batch preemption to switch GPU execution to the
+ *    batch that writes it).
+ */
+struct crocus_fine_fence {
+   struct pipe_reference reference;
+
+   /** Buffer where the seqno lives */
+   struct crocus_state_ref ref;
+
+   /** Coherent CPU map of the buffer containing the seqno DWord. */
+   const uint32_t *map;
+
+   /**
+    * A drm_syncobj pointing which will be signaled at the end of the
+    * batch which writes this seqno.  This can be used to block until
+    * the seqno has definitely passed (but may wait longer than necessary).
+    */
+   struct crocus_syncobj *syncobj;
+
+#define CROCUS_FENCE_BOTTOM_OF_PIPE 0x0 /**< Written by bottom-of-pipe flush */
+#define CROCUS_FENCE_TOP_OF_PIPE    0x1 /**< Written by top-of-pipe flush */
+#define CROCUS_FENCE_END            0x2 /**< Written at the end of a batch */
+
+   /** Information about the type of flush involved (see CROCUS_FENCE_*) */
+   uint32_t flags;
+
+   /**
+    * Sequence number expected to be written by the flush we inserted
+    * when creating this fence.  The crocus_fine_fence is 'signaled' when *@map
+    * (written by the flush on the GPU) is greater-than-or-equal to @seqno.
+    */
+   uint32_t seqno;
+};
+
+void crocus_fine_fence_init(struct crocus_batch *batch);
+
+struct crocus_fine_fence *crocus_fine_fence_new(struct crocus_batch *batch,
+                                                unsigned flags);
+
+void crocus_fine_fence_destroy(struct crocus_screen *screen,
+                               struct crocus_fine_fence *sq);
+
+static inline void
+crocus_fine_fence_reference(struct crocus_screen *screen,
+                            struct crocus_fine_fence **dst,
+                            struct crocus_fine_fence *src)
+{
+   if (pipe_reference(&(*dst)->reference, &src->reference))
+      crocus_fine_fence_destroy(screen, *dst);
+
+   *dst = src;
+}
+
+/**
+ * Return true if this seqno has passed.
+ *
+ * NULL is considered signaled.
+ */
+static inline bool
+crocus_fine_fence_signaled(const struct crocus_fine_fence *sq)
+{
+   if (sq && !sq->map)
+      return false;
+   return !sq || (READ_ONCE(*sq->map) >= sq->seqno);
+}
+
+#endif
--- a/src/gallium/drivers/crocus/crocus_formats.c
+++ b/src/gallium/drivers/crocus/crocus_formats.c
@ -0,0 +1,576 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_formats.c
+ *
+ * Converts Gallium formats (PIPE_FORMAT_*) to hardware ones (ISL_FORMAT_*).
+ * Provides information about which formats support what features.
+ */
+
+#include "util/bitscan.h"
+#include "util/macros.h"
+#include "util/format/u_format.h"
+
+#include "crocus_resource.h"
+#include "crocus_screen.h"
+
+static enum isl_format
+crocus_isl_format_for_pipe_format(enum pipe_format pf)
+{
+   static const enum isl_format table[PIPE_FORMAT_COUNT] = {
+      [0 ... PIPE_FORMAT_COUNT-1] = ISL_FORMAT_UNSUPPORTED,
+
+      [PIPE_FORMAT_B8G8R8A8_UNORM]          = ISL_FORMAT_B8G8R8A8_UNORM,
+      [PIPE_FORMAT_B8G8R8X8_UNORM]          = ISL_FORMAT_B8G8R8X8_UNORM,
+      [PIPE_FORMAT_B5G5R5A1_UNORM]          = ISL_FORMAT_B5G5R5A1_UNORM,
+      [PIPE_FORMAT_B4G4R4A4_UNORM]          = ISL_FORMAT_B4G4R4A4_UNORM,
+      [PIPE_FORMAT_B5G6R5_UNORM]            = ISL_FORMAT_B5G6R5_UNORM,
+      [PIPE_FORMAT_R10G10B10A2_UNORM]       = ISL_FORMAT_R10G10B10A2_UNORM,
+
+      [PIPE_FORMAT_Z16_UNORM]               = ISL_FORMAT_R16_UNORM,
+      [PIPE_FORMAT_Z32_UNORM]               = ISL_FORMAT_R32_UNORM,
+      [PIPE_FORMAT_Z32_FLOAT]               = ISL_FORMAT_R32_FLOAT,
+
+      /* We translate the combined depth/stencil formats to depth only here */
+      [PIPE_FORMAT_Z24_UNORM_S8_UINT]       = ISL_FORMAT_R24_UNORM_X8_TYPELESS,
+      [PIPE_FORMAT_Z24X8_UNORM]             = ISL_FORMAT_R24_UNORM_X8_TYPELESS,
+      [PIPE_FORMAT_Z32_FLOAT_S8X24_UINT]    = ISL_FORMAT_R32_FLOAT,
+
+      [PIPE_FORMAT_S8_UINT]                 = ISL_FORMAT_R8_UINT,
+      [PIPE_FORMAT_X24S8_UINT]              = ISL_FORMAT_R8_UINT,
+      [PIPE_FORMAT_X32_S8X24_UINT]          = ISL_FORMAT_R8_UINT,
+
+      [PIPE_FORMAT_R64_FLOAT]               = ISL_FORMAT_R64_FLOAT,
+      [PIPE_FORMAT_R64G64_FLOAT]            = ISL_FORMAT_R64G64_FLOAT,
+      [PIPE_FORMAT_R64G64B64_FLOAT]         = ISL_FORMAT_R64G64B64_FLOAT,
+      [PIPE_FORMAT_R64G64B64A64_FLOAT]      = ISL_FORMAT_R64G64B64A64_FLOAT,
+      [PIPE_FORMAT_R32_FLOAT]               = ISL_FORMAT_R32_FLOAT,
+      [PIPE_FORMAT_R32G32_FLOAT]            = ISL_FORMAT_R32G32_FLOAT,
+      [PIPE_FORMAT_R32G32B32_FLOAT]         = ISL_FORMAT_R32G32B32_FLOAT,
+      [PIPE_FORMAT_R32G32B32A32_FLOAT]      = ISL_FORMAT_R32G32B32A32_FLOAT,
+      [PIPE_FORMAT_R32_UNORM]               = ISL_FORMAT_R32_UNORM,
+      [PIPE_FORMAT_R32G32_UNORM]            = ISL_FORMAT_R32G32_UNORM,
+      [PIPE_FORMAT_R32G32B32_UNORM]         = ISL_FORMAT_R32G32B32_UNORM,
+      [PIPE_FORMAT_R32G32B32A32_UNORM]      = ISL_FORMAT_R32G32B32A32_UNORM,
+      [PIPE_FORMAT_R32_USCALED]             = ISL_FORMAT_R32_USCALED,
+      [PIPE_FORMAT_R32G32_USCALED]          = ISL_FORMAT_R32G32_USCALED,
+      [PIPE_FORMAT_R32G32B32_USCALED]       = ISL_FORMAT_R32G32B32_USCALED,
+      [PIPE_FORMAT_R32G32B32A32_USCALED]    = ISL_FORMAT_R32G32B32A32_USCALED,
+      [PIPE_FORMAT_R32_SNORM]               = ISL_FORMAT_R32_SNORM,
+      [PIPE_FORMAT_R32G32_SNORM]            = ISL_FORMAT_R32G32_SNORM,
+      [PIPE_FORMAT_R32G32B32_SNORM]         = ISL_FORMAT_R32G32B32_SNORM,
+      [PIPE_FORMAT_R32G32B32A32_SNORM]      = ISL_FORMAT_R32G32B32A32_SNORM,
+      [PIPE_FORMAT_R32_SSCALED]             = ISL_FORMAT_R32_SSCALED,
+      [PIPE_FORMAT_R32G32_SSCALED]          = ISL_FORMAT_R32G32_SSCALED,
+      [PIPE_FORMAT_R32G32B32_SSCALED]       = ISL_FORMAT_R32G32B32_SSCALED,
+      [PIPE_FORMAT_R32G32B32A32_SSCALED]    = ISL_FORMAT_R32G32B32A32_SSCALED,
+      [PIPE_FORMAT_R16_UNORM]               = ISL_FORMAT_R16_UNORM,
+      [PIPE_FORMAT_R16G16_UNORM]            = ISL_FORMAT_R16G16_UNORM,
+      [PIPE_FORMAT_R16G16B16_UNORM]         = ISL_FORMAT_R16G16B16_UNORM,
+      [PIPE_FORMAT_R16G16B16A16_UNORM]      = ISL_FORMAT_R16G16B16A16_UNORM,
+      [PIPE_FORMAT_R16_USCALED]             = ISL_FORMAT_R16_USCALED,
+      [PIPE_FORMAT_R16G16_USCALED]          = ISL_FORMAT_R16G16_USCALED,
+      [PIPE_FORMAT_R16G16B16_USCALED]       = ISL_FORMAT_R16G16B16_USCALED,
+      [PIPE_FORMAT_R16G16B16A16_USCALED]    = ISL_FORMAT_R16G16B16A16_USCALED,
+      [PIPE_FORMAT_R16_SNORM]               = ISL_FORMAT_R16_SNORM,
+      [PIPE_FORMAT_R16G16_SNORM]            = ISL_FORMAT_R16G16_SNORM,
+      [PIPE_FORMAT_R16G16B16_SNORM]         = ISL_FORMAT_R16G16B16_SNORM,
+      [PIPE_FORMAT_R16G16B16A16_SNORM]      = ISL_FORMAT_R16G16B16A16_SNORM,
+      [PIPE_FORMAT_R16_SSCALED]             = ISL_FORMAT_R16_SSCALED,
+      [PIPE_FORMAT_R16G16_SSCALED]          = ISL_FORMAT_R16G16_SSCALED,
+      [PIPE_FORMAT_R16G16B16_SSCALED]       = ISL_FORMAT_R16G16B16_SSCALED,
+      [PIPE_FORMAT_R16G16B16A16_SSCALED]    = ISL_FORMAT_R16G16B16A16_SSCALED,
+      [PIPE_FORMAT_R8_UNORM]                = ISL_FORMAT_R8_UNORM,
+      [PIPE_FORMAT_R8G8_UNORM]              = ISL_FORMAT_R8G8_UNORM,
+      [PIPE_FORMAT_R8G8B8_UNORM]            = ISL_FORMAT_R8G8B8_UNORM,
+      [PIPE_FORMAT_R8G8B8A8_UNORM]          = ISL_FORMAT_R8G8B8A8_UNORM,
+      [PIPE_FORMAT_R8_USCALED]              = ISL_FORMAT_R8_USCALED,
+      [PIPE_FORMAT_R8G8_USCALED]            = ISL_FORMAT_R8G8_USCALED,
+      [PIPE_FORMAT_R8G8B8_USCALED]          = ISL_FORMAT_R8G8B8_USCALED,
+      [PIPE_FORMAT_R8G8B8A8_USCALED]        = ISL_FORMAT_R8G8B8A8_USCALED,
+      [PIPE_FORMAT_R8_SNORM]                = ISL_FORMAT_R8_SNORM,
+      [PIPE_FORMAT_R8G8_SNORM]              = ISL_FORMAT_R8G8_SNORM,
+      [PIPE_FORMAT_R8G8B8_SNORM]            = ISL_FORMAT_R8G8B8_SNORM,
+      [PIPE_FORMAT_R8G8B8A8_SNORM]          = ISL_FORMAT_R8G8B8A8_SNORM,
+      [PIPE_FORMAT_R8_SSCALED]              = ISL_FORMAT_R8_SSCALED,
+      [PIPE_FORMAT_R8G8_SSCALED]            = ISL_FORMAT_R8G8_SSCALED,
+      [PIPE_FORMAT_R8G8B8_SSCALED]          = ISL_FORMAT_R8G8B8_SSCALED,
+      [PIPE_FORMAT_R8G8B8A8_SSCALED]        = ISL_FORMAT_R8G8B8A8_SSCALED,
+      [PIPE_FORMAT_R32_FIXED]               = ISL_FORMAT_R32_SFIXED,
+      [PIPE_FORMAT_R32G32_FIXED]            = ISL_FORMAT_R32G32_SFIXED,
+      [PIPE_FORMAT_R32G32B32_FIXED]         = ISL_FORMAT_R32G32B32_SFIXED,
+      [PIPE_FORMAT_R32G32B32A32_FIXED]      = ISL_FORMAT_R32G32B32A32_SFIXED,
+      [PIPE_FORMAT_R16_FLOAT]               = ISL_FORMAT_R16_FLOAT,
+      [PIPE_FORMAT_R16G16_FLOAT]            = ISL_FORMAT_R16G16_FLOAT,
+      [PIPE_FORMAT_R16G16B16_FLOAT]         = ISL_FORMAT_R16G16B16_FLOAT,
+      [PIPE_FORMAT_R16G16B16A16_FLOAT]      = ISL_FORMAT_R16G16B16A16_FLOAT,
+
+      [PIPE_FORMAT_R8G8B8_SRGB]             = ISL_FORMAT_R8G8B8_UNORM_SRGB,
+      [PIPE_FORMAT_B8G8R8A8_SRGB]           = ISL_FORMAT_B8G8R8A8_UNORM_SRGB,
+      [PIPE_FORMAT_B8G8R8X8_SRGB]           = ISL_FORMAT_B8G8R8X8_UNORM_SRGB,
+      [PIPE_FORMAT_R8G8B8A8_SRGB]           = ISL_FORMAT_R8G8B8A8_UNORM_SRGB,
+
+      [PIPE_FORMAT_DXT1_RGB]                = ISL_FORMAT_BC1_UNORM,
+      [PIPE_FORMAT_DXT1_RGBA]               = ISL_FORMAT_BC1_UNORM,
+      [PIPE_FORMAT_DXT3_RGBA]               = ISL_FORMAT_BC2_UNORM,
+      [PIPE_FORMAT_DXT5_RGBA]               = ISL_FORMAT_BC3_UNORM,
+
+      [PIPE_FORMAT_DXT1_SRGB]               = ISL_FORMAT_BC1_UNORM_SRGB,
+      [PIPE_FORMAT_DXT1_SRGBA]              = ISL_FORMAT_BC1_UNORM_SRGB,
+      [PIPE_FORMAT_DXT3_SRGBA]              = ISL_FORMAT_BC2_UNORM_SRGB,
+      [PIPE_FORMAT_DXT5_SRGBA]              = ISL_FORMAT_BC3_UNORM_SRGB,
+
+      [PIPE_FORMAT_RGTC1_UNORM]             = ISL_FORMAT_BC4_UNORM,
+      [PIPE_FORMAT_RGTC1_SNORM]             = ISL_FORMAT_BC4_SNORM,
+      [PIPE_FORMAT_RGTC2_UNORM]             = ISL_FORMAT_BC5_UNORM,
+      [PIPE_FORMAT_RGTC2_SNORM]             = ISL_FORMAT_BC5_SNORM,
+
+      [PIPE_FORMAT_R10G10B10A2_USCALED]     = ISL_FORMAT_R10G10B10A2_USCALED,
+      [PIPE_FORMAT_R11G11B10_FLOAT]         = ISL_FORMAT_R11G11B10_FLOAT,
+      [PIPE_FORMAT_R9G9B9E5_FLOAT]          = ISL_FORMAT_R9G9B9E5_SHAREDEXP,
+      [PIPE_FORMAT_R1_UNORM]                = ISL_FORMAT_R1_UNORM,
+      [PIPE_FORMAT_R10G10B10X2_USCALED]     = ISL_FORMAT_R10G10B10X2_USCALED,
+      [PIPE_FORMAT_B10G10R10A2_UNORM]       = ISL_FORMAT_B10G10R10A2_UNORM,
+      [PIPE_FORMAT_R8G8B8X8_UNORM]          = ISL_FORMAT_R8G8B8X8_UNORM,
+
+      [PIPE_FORMAT_I8_UNORM]                = ISL_FORMAT_R8_UNORM,
+      [PIPE_FORMAT_I16_UNORM]               = ISL_FORMAT_R16_UNORM,
+      [PIPE_FORMAT_I8_SNORM]                = ISL_FORMAT_R8_SNORM,
+      [PIPE_FORMAT_I16_SNORM]               = ISL_FORMAT_R16_SNORM,
+      [PIPE_FORMAT_I16_FLOAT]               = ISL_FORMAT_R16_FLOAT,
+      [PIPE_FORMAT_I32_FLOAT]               = ISL_FORMAT_R32_FLOAT,
+
+      [PIPE_FORMAT_L8_UINT]                 = ISL_FORMAT_L8_UINT,
+      [PIPE_FORMAT_L8_UNORM]                = ISL_FORMAT_L8_UNORM,
+      [PIPE_FORMAT_L8_SNORM]                = ISL_FORMAT_R8_SNORM,
+      [PIPE_FORMAT_L8_SINT]                 = ISL_FORMAT_L8_SINT,
+      [PIPE_FORMAT_L16_UNORM]               = ISL_FORMAT_L16_UNORM,
+      [PIPE_FORMAT_L16_SNORM]               = ISL_FORMAT_R16_SNORM,
+      [PIPE_FORMAT_L16_FLOAT]               = ISL_FORMAT_L16_FLOAT,
+      [PIPE_FORMAT_L32_FLOAT]               = ISL_FORMAT_L32_FLOAT,
+
+      [PIPE_FORMAT_A8_UNORM]                = ISL_FORMAT_A8_UNORM,
+      [PIPE_FORMAT_A16_UNORM]               = ISL_FORMAT_A16_UNORM,
+      [PIPE_FORMAT_A16_FLOAT]               = ISL_FORMAT_A16_FLOAT,
+      [PIPE_FORMAT_A32_FLOAT]               = ISL_FORMAT_A32_FLOAT,
+
+      [PIPE_FORMAT_L8A8_UNORM]              = ISL_FORMAT_L8A8_UNORM,
+      [PIPE_FORMAT_L16A16_UNORM]            = ISL_FORMAT_L16A16_UNORM,
+      [PIPE_FORMAT_L16A16_FLOAT]            = ISL_FORMAT_L16A16_FLOAT,
+      [PIPE_FORMAT_L32A32_FLOAT]            = ISL_FORMAT_L32A32_FLOAT,
+
+      /* Sadly, we have to use luminance[-alpha] formats for sRGB decoding. */
+      [PIPE_FORMAT_R8_SRGB]                 = ISL_FORMAT_L8_UNORM_SRGB,
+      [PIPE_FORMAT_L8_SRGB]                 = ISL_FORMAT_L8_UNORM_SRGB,
+      [PIPE_FORMAT_L8A8_SRGB]               = ISL_FORMAT_L8A8_UNORM_SRGB,
+
+      [PIPE_FORMAT_R10G10B10A2_SSCALED]     = ISL_FORMAT_R10G10B10A2_SSCALED,
+      [PIPE_FORMAT_R10G10B10A2_SNORM]       = ISL_FORMAT_R10G10B10A2_SNORM,
+
+      [PIPE_FORMAT_B10G10R10A2_USCALED]     = ISL_FORMAT_B10G10R10A2_USCALED,
+      [PIPE_FORMAT_B10G10R10A2_SSCALED]     = ISL_FORMAT_B10G10R10A2_SSCALED,
+      [PIPE_FORMAT_B10G10R10A2_SNORM]       = ISL_FORMAT_B10G10R10A2_SNORM,
+
+      [PIPE_FORMAT_R8_UINT]                 = ISL_FORMAT_R8_UINT,
+      [PIPE_FORMAT_R8G8_UINT]               = ISL_FORMAT_R8G8_UINT,
+      [PIPE_FORMAT_R8G8B8_UINT]             = ISL_FORMAT_R8G8B8_UINT,
+      [PIPE_FORMAT_R8G8B8A8_UINT]           = ISL_FORMAT_R8G8B8A8_UINT,
+
+      [PIPE_FORMAT_R8_SINT]                 = ISL_FORMAT_R8_SINT,
+      [PIPE_FORMAT_R8G8_SINT]               = ISL_FORMAT_R8G8_SINT,
+      [PIPE_FORMAT_R8G8B8_SINT]             = ISL_FORMAT_R8G8B8_SINT,
+      [PIPE_FORMAT_R8G8B8A8_SINT]           = ISL_FORMAT_R8G8B8A8_SINT,
+
+      [PIPE_FORMAT_R16_UINT]                = ISL_FORMAT_R16_UINT,
+      [PIPE_FORMAT_R16G16_UINT]             = ISL_FORMAT_R16G16_UINT,
+      [PIPE_FORMAT_R16G16B16_UINT]          = ISL_FORMAT_R16G16B16_UINT,
+      [PIPE_FORMAT_R16G16B16A16_UINT]       = ISL_FORMAT_R16G16B16A16_UINT,
+
+      [PIPE_FORMAT_R16_SINT]                = ISL_FORMAT_R16_SINT,
+      [PIPE_FORMAT_R16G16_SINT]             = ISL_FORMAT_R16G16_SINT,
+      [PIPE_FORMAT_R16G16B16_SINT]          = ISL_FORMAT_R16G16B16_SINT,
+      [PIPE_FORMAT_R16G16B16A16_SINT]       = ISL_FORMAT_R16G16B16A16_SINT,
+
+      [PIPE_FORMAT_R32_UINT]                = ISL_FORMAT_R32_UINT,
+      [PIPE_FORMAT_R32G32_UINT]             = ISL_FORMAT_R32G32_UINT,
+      [PIPE_FORMAT_R32G32B32_UINT]          = ISL_FORMAT_R32G32B32_UINT,
+      [PIPE_FORMAT_R32G32B32A32_UINT]       = ISL_FORMAT_R32G32B32A32_UINT,
+
+      [PIPE_FORMAT_R32_SINT]                = ISL_FORMAT_R32_SINT,
+      [PIPE_FORMAT_R32G32_SINT]             = ISL_FORMAT_R32G32_SINT,
+      [PIPE_FORMAT_R32G32B32_SINT]          = ISL_FORMAT_R32G32B32_SINT,
+      [PIPE_FORMAT_R32G32B32A32_SINT]       = ISL_FORMAT_R32G32B32A32_SINT,
+
+      [PIPE_FORMAT_B10G10R10A2_UINT]        = ISL_FORMAT_B10G10R10A2_UINT,
+
+      [PIPE_FORMAT_ETC1_RGB8]               = ISL_FORMAT_ETC1_RGB8,
+
+      [PIPE_FORMAT_R8G8B8X8_SRGB]           = ISL_FORMAT_R8G8B8X8_UNORM_SRGB,
+      [PIPE_FORMAT_B10G10R10X2_UNORM]       = ISL_FORMAT_B10G10R10X2_UNORM,
+      [PIPE_FORMAT_R16G16B16X16_UNORM]      = ISL_FORMAT_R16G16B16X16_UNORM,
+      [PIPE_FORMAT_R16G16B16X16_FLOAT]      = ISL_FORMAT_R16G16B16X16_FLOAT,
+      [PIPE_FORMAT_R32G32B32X32_FLOAT]      = ISL_FORMAT_R32G32B32X32_FLOAT,
+
+      [PIPE_FORMAT_R10G10B10A2_UINT]        = ISL_FORMAT_R10G10B10A2_UINT,
+
+      [PIPE_FORMAT_B5G6R5_SRGB]             = ISL_FORMAT_B5G6R5_UNORM_SRGB,
+
+      [PIPE_FORMAT_BPTC_RGBA_UNORM]         = ISL_FORMAT_BC7_UNORM,
+      [PIPE_FORMAT_BPTC_SRGBA]              = ISL_FORMAT_BC7_UNORM_SRGB,
+      [PIPE_FORMAT_BPTC_RGB_FLOAT]          = ISL_FORMAT_BC6H_SF16,
+      [PIPE_FORMAT_BPTC_RGB_UFLOAT]         = ISL_FORMAT_BC6H_UF16,
+
+      [PIPE_FORMAT_ETC2_RGB8]               = ISL_FORMAT_ETC2_RGB8,
+      [PIPE_FORMAT_ETC2_SRGB8]              = ISL_FORMAT_ETC2_SRGB8,
+      [PIPE_FORMAT_ETC2_RGB8A1]             = ISL_FORMAT_ETC2_RGB8_PTA,
+      [PIPE_FORMAT_ETC2_SRGB8A1]            = ISL_FORMAT_ETC2_SRGB8_PTA,
+      [PIPE_FORMAT_ETC2_RGBA8]              = ISL_FORMAT_ETC2_EAC_RGBA8,
+      [PIPE_FORMAT_ETC2_SRGBA8]             = ISL_FORMAT_ETC2_EAC_SRGB8_A8,
+      [PIPE_FORMAT_ETC2_R11_UNORM]          = ISL_FORMAT_EAC_R11,
+      [PIPE_FORMAT_ETC2_R11_SNORM]          = ISL_FORMAT_EAC_SIGNED_R11,
+      [PIPE_FORMAT_ETC2_RG11_UNORM]         = ISL_FORMAT_EAC_RG11,
+      [PIPE_FORMAT_ETC2_RG11_SNORM]         = ISL_FORMAT_EAC_SIGNED_RG11,
+
+      [PIPE_FORMAT_FXT1_RGB]                = ISL_FORMAT_FXT1,
+      [PIPE_FORMAT_FXT1_RGBA]               = ISL_FORMAT_FXT1,
+
+      [PIPE_FORMAT_ASTC_4x4]                = ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16,
+      [PIPE_FORMAT_ASTC_5x4]                = ISL_FORMAT_ASTC_LDR_2D_5X4_FLT16,
+      [PIPE_FORMAT_ASTC_5x5]                = ISL_FORMAT_ASTC_LDR_2D_5X5_FLT16,
+      [PIPE_FORMAT_ASTC_6x5]                = ISL_FORMAT_ASTC_LDR_2D_6X5_FLT16,
+      [PIPE_FORMAT_ASTC_6x6]                = ISL_FORMAT_ASTC_LDR_2D_6X6_FLT16,
+      [PIPE_FORMAT_ASTC_8x5]                = ISL_FORMAT_ASTC_LDR_2D_8X5_FLT16,
+      [PIPE_FORMAT_ASTC_8x6]                = ISL_FORMAT_ASTC_LDR_2D_8X6_FLT16,
+      [PIPE_FORMAT_ASTC_8x8]                = ISL_FORMAT_ASTC_LDR_2D_8X8_FLT16,
+      [PIPE_FORMAT_ASTC_10x5]               = ISL_FORMAT_ASTC_LDR_2D_10X5_FLT16,
+      [PIPE_FORMAT_ASTC_10x6]               = ISL_FORMAT_ASTC_LDR_2D_10X6_FLT16,
+      [PIPE_FORMAT_ASTC_10x8]               = ISL_FORMAT_ASTC_LDR_2D_10X8_FLT16,
+      [PIPE_FORMAT_ASTC_10x10]              = ISL_FORMAT_ASTC_LDR_2D_10X10_FLT16,
+      [PIPE_FORMAT_ASTC_12x10]              = ISL_FORMAT_ASTC_LDR_2D_12X10_FLT16,
+      [PIPE_FORMAT_ASTC_12x12]              = ISL_FORMAT_ASTC_LDR_2D_12X12_FLT16,
+
+      [PIPE_FORMAT_ASTC_4x4_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_4X4_U8SRGB,
+      [PIPE_FORMAT_ASTC_5x4_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_5X4_U8SRGB,
+      [PIPE_FORMAT_ASTC_5x5_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_5X5_U8SRGB,
+      [PIPE_FORMAT_ASTC_6x5_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_6X5_U8SRGB,
+      [PIPE_FORMAT_ASTC_6x6_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_6X6_U8SRGB,
+      [PIPE_FORMAT_ASTC_8x5_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_8X5_U8SRGB,
+      [PIPE_FORMAT_ASTC_8x6_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_8X6_U8SRGB,
+      [PIPE_FORMAT_ASTC_8x8_SRGB]           = ISL_FORMAT_ASTC_LDR_2D_8X8_U8SRGB,
+      [PIPE_FORMAT_ASTC_10x5_SRGB]          = ISL_FORMAT_ASTC_LDR_2D_10X5_U8SRGB,
+      [PIPE_FORMAT_ASTC_10x6_SRGB]          = ISL_FORMAT_ASTC_LDR_2D_10X6_U8SRGB,
+      [PIPE_FORMAT_ASTC_10x8_SRGB]          = ISL_FORMAT_ASTC_LDR_2D_10X8_U8SRGB,
+      [PIPE_FORMAT_ASTC_10x10_SRGB]         = ISL_FORMAT_ASTC_LDR_2D_10X10_U8SRGB,
+      [PIPE_FORMAT_ASTC_12x10_SRGB]         = ISL_FORMAT_ASTC_LDR_2D_12X10_U8SRGB,
+      [PIPE_FORMAT_ASTC_12x12_SRGB]         = ISL_FORMAT_ASTC_LDR_2D_12X12_U8SRGB,
+
+      [PIPE_FORMAT_A1B5G5R5_UNORM]          = ISL_FORMAT_A1B5G5R5_UNORM,
+
+      /* We support these so that we know the API expects no alpha channel.
+       * Otherwise, the state tracker would just give us a format with alpha
+       * and we wouldn't know to override the swizzle to 1.
+       */
+      [PIPE_FORMAT_R16G16B16X16_UINT]       = ISL_FORMAT_R16G16B16A16_UINT,
+      [PIPE_FORMAT_R16G16B16X16_SINT]       = ISL_FORMAT_R16G16B16A16_SINT,
+      [PIPE_FORMAT_R32G32B32X32_UINT]       = ISL_FORMAT_R32G32B32A32_UINT,
+      [PIPE_FORMAT_R32G32B32X32_SINT]       = ISL_FORMAT_R32G32B32A32_SINT,
+      [PIPE_FORMAT_R10G10B10X2_SNORM]       = ISL_FORMAT_R10G10B10A2_SNORM,
+   };
+   assert(pf < PIPE_FORMAT_COUNT);
+   return table[pf];
+}
+
+static enum isl_format
+get_render_format(enum pipe_format pformat, enum isl_format def_format)
+{
+   switch (pformat) {
+   case PIPE_FORMAT_A16_UNORM:            return ISL_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_A16_FLOAT:            return ISL_FORMAT_R16_FLOAT;
+   case PIPE_FORMAT_A32_FLOAT:            return ISL_FORMAT_R32_FLOAT;
+
+   case PIPE_FORMAT_I8_UNORM:             return ISL_FORMAT_R8_UNORM;
+   case PIPE_FORMAT_I16_UNORM:            return ISL_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_I16_FLOAT:            return ISL_FORMAT_R16_FLOAT;
+   case PIPE_FORMAT_I32_FLOAT:            return ISL_FORMAT_R32_FLOAT;
+
+   case PIPE_FORMAT_L8_UNORM:             return ISL_FORMAT_R8_UNORM;
+   case PIPE_FORMAT_L8_UINT:              return ISL_FORMAT_R8_UINT;
+   case PIPE_FORMAT_L8_SINT:              return ISL_FORMAT_R8_SINT;
+   case PIPE_FORMAT_L16_UNORM:            return ISL_FORMAT_R16_UNORM;
+   case PIPE_FORMAT_L16_FLOAT:            return ISL_FORMAT_R16_FLOAT;
+   case PIPE_FORMAT_L32_FLOAT:            return ISL_FORMAT_R32_FLOAT;
+
+   case PIPE_FORMAT_L8A8_UNORM:           return ISL_FORMAT_R8G8_UNORM;
+   case PIPE_FORMAT_L16A16_UNORM:         return ISL_FORMAT_R16G16_UNORM;
+   case PIPE_FORMAT_L16A16_FLOAT:         return ISL_FORMAT_R16G16_FLOAT;
+   case PIPE_FORMAT_L32A32_FLOAT:         return ISL_FORMAT_R32G32_FLOAT;
+
+   default:
+      return def_format;
+   }
+}
+
+struct crocus_format_info
+crocus_format_for_usage(const struct intel_device_info *devinfo,
+                        enum pipe_format pformat,
+                        isl_surf_usage_flags_t usage)
+{
+   struct crocus_format_info info = { crocus_isl_format_for_pipe_format(pformat),
+                                      { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W } };
+
+   if (info.fmt == ISL_FORMAT_UNSUPPORTED)
+      return info;
+
+   if (pformat == PIPE_FORMAT_A8_UNORM) {
+      info.fmt = ISL_FORMAT_A8_UNORM;
+   }
+
+   if (usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)
+      info.fmt = get_render_format(pformat, info.fmt);
+   if (devinfo->ver < 6) {
+      if (pformat == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+         info.fmt = ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+      if (pformat == PIPE_FORMAT_X32_S8X24_UINT)
+         info.fmt = ISL_FORMAT_X32_TYPELESS_G8X24_UINT;
+      if (pformat == PIPE_FORMAT_X24S8_UINT)
+         info.fmt = ISL_FORMAT_X24_TYPELESS_G8_UINT;
+   }
+
+   const struct isl_format_layout *fmtl = isl_format_get_layout(info.fmt);
+
+   if (util_format_is_snorm(pformat)) {
+      if (util_format_is_intensity(pformat)) {
+         info.swizzles[0] = PIPE_SWIZZLE_X;
+         info.swizzles[1] = PIPE_SWIZZLE_X;
+         info.swizzles[2] = PIPE_SWIZZLE_X;
+         info.swizzles[3] = PIPE_SWIZZLE_X;
+      } else if (util_format_is_luminance(pformat)) {
+         info.swizzles[0] = PIPE_SWIZZLE_X;
+         info.swizzles[1] = PIPE_SWIZZLE_X;
+         info.swizzles[2] = PIPE_SWIZZLE_X;
+         info.swizzles[3] = PIPE_SWIZZLE_1;
+      } else if (util_format_is_luminance_alpha(pformat)) {
+         info.swizzles[0] = PIPE_SWIZZLE_X;
+         info.swizzles[1] = PIPE_SWIZZLE_X;
+         info.swizzles[2] = PIPE_SWIZZLE_X;
+         info.swizzles[3] = PIPE_SWIZZLE_Y;
+      } else if (util_format_is_alpha(pformat)) {
+         info.swizzles[0] = PIPE_SWIZZLE_0;
+         info.swizzles[1] = PIPE_SWIZZLE_0;
+         info.swizzles[2] = PIPE_SWIZZLE_0;
+         info.swizzles[3] = PIPE_SWIZZLE_X;
+      }
+   }
+
+   /* When faking RGBX pipe formats with RGBA ISL formats, override alpha. */
+   if (!util_format_has_alpha(pformat) && fmtl->channels.a.type != ISL_VOID) {
+      info.swizzles[0] = PIPE_SWIZZLE_X;
+      info.swizzles[1] = PIPE_SWIZZLE_Y;
+      info.swizzles[2] = PIPE_SWIZZLE_Z;
+      info.swizzles[3] = PIPE_SWIZZLE_1;
+   }
+
+   /* We choose RGBA over RGBX for rendering the hardware doesn't support
+    * rendering to RGBX. However, when this internal override is used on Gen9+,
+    * fast clears don't work correctly.
+    *
+    * i965 fixes this by pretending to not support RGBX formats, and the higher
+    * layers of Mesa pick the RGBA format instead. Gallium doesn't work that
+    * way, and might choose a different format, like BGRX instead of RGBX,
+    * which will also cause problems when sampling from a surface fast cleared
+    * as RGBX. So we always choose RGBA instead of RGBX explicitly
+    * here.
+    */
+   if (isl_format_is_rgbx(info.fmt) &&
+       !isl_format_supports_rendering(devinfo, info.fmt) &&
+       (usage & ISL_SURF_USAGE_RENDER_TARGET_BIT)) {
+      info.fmt = isl_format_rgbx_to_rgba(info.fmt);
+      info.swizzles[0] = PIPE_SWIZZLE_X;
+      info.swizzles[1] = PIPE_SWIZZLE_Y;
+      info.swizzles[2] = PIPE_SWIZZLE_Z;
+      info.swizzles[3] = PIPE_SWIZZLE_1;
+   }
+
+   return info;
+}
+
+/**
+ * The pscreen->is_format_supported() driver hook.
+ *
+ * Returns true if the given format is supported for the given usage
+ * (PIPE_BIND_*) and sample count.
+ */
+bool
+crocus_is_format_supported(struct pipe_screen *pscreen,
+                           enum pipe_format pformat,
+                           enum pipe_texture_target target,
+                           unsigned sample_count, unsigned storage_sample_count,
+                           unsigned usage)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   if (!util_is_power_of_two_or_zero(sample_count))
+      return false;
+   if (devinfo->ver >= 7) {
+      if (sample_count > 8 || sample_count == 2)
+         return false;
+   } else if (devinfo->ver == 6) {
+      if (sample_count > 4 || sample_count == 2)
+         return false;
+   } else if (sample_count > 1) {
+      return false;
+   }
+
+   if (pformat == PIPE_FORMAT_NONE)
+      return true;
+
+   enum isl_format format = crocus_isl_format_for_pipe_format(pformat);
+
+   if (format == ISL_FORMAT_UNSUPPORTED)
+      return false;
+
+   /* no stencil texturing prior to haswell */
+   if (!devinfo->is_haswell) {
+      if (pformat == PIPE_FORMAT_S8_UINT ||
+          pformat == PIPE_FORMAT_X24S8_UINT ||
+          pformat == PIPE_FORMAT_S8X24_UINT ||
+          pformat == PIPE_FORMAT_X32_S8X24_UINT)
+         return FALSE;
+   }
+
+   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+   const bool is_integer = isl_format_has_int_channel(format);
+   bool supported = true;
+
+   if (sample_count > 1)
+      supported &= isl_format_supports_multisampling(devinfo, format);
+
+   if (usage & PIPE_BIND_DEPTH_STENCIL) {
+      supported &= format == ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS ||
+                   format == ISL_FORMAT_R32_FLOAT ||
+                   format == ISL_FORMAT_R24_UNORM_X8_TYPELESS ||
+                   format == ISL_FORMAT_R16_UNORM ||
+                   format == ISL_FORMAT_R8_UINT;
+   }
+
+   if (usage & PIPE_BIND_RENDER_TARGET) {
+      /* Alpha and luminance-alpha formats other than A8_UNORM are not
+       * renderable.
+       *
+       * For BLORP, we can apply the swizzle in the shader.  But for
+       * general rendering, this would mean recompiling the shader, which
+       * we'd like to avoid doing.  So we mark these formats non-renderable.
+       *
+       * We do support A8_UNORM as it's required and is renderable.
+       */
+      if (pformat != PIPE_FORMAT_A8_UNORM &&
+          (util_format_is_alpha(pformat) ||
+           util_format_is_luminance_alpha(pformat)))
+         supported = false;
+
+      enum isl_format rt_format = format;
+
+      if (isl_format_is_rgbx(format) &&
+          !isl_format_supports_rendering(devinfo, format))
+         rt_format = isl_format_rgbx_to_rgba(format);
+
+      supported &= isl_format_supports_rendering(devinfo, rt_format);
+
+      if (!is_integer)
+         supported &= isl_format_supports_alpha_blending(devinfo, rt_format);
+   }
+
+   if (usage & PIPE_BIND_SHADER_IMAGE) {
+      /* Dataport doesn't support compression, and we can't resolve an MCS
+       * compressed surface.  (Buffer images may have sample count of 0.)
+       */
+      supported &= sample_count == 0;
+
+      supported &= isl_format_supports_typed_writes(devinfo, format);
+      supported &= isl_has_matching_typed_storage_image_format(devinfo, format);
+   }
+
+   if (usage & PIPE_BIND_SAMPLER_VIEW) {
+      supported &= isl_format_supports_sampling(devinfo, format);
+      bool ignore_filtering = false;
+
+      if (is_integer)
+         ignore_filtering = true;
+
+      /* I said them, but I lied them. */
+      if (devinfo->ver < 5 && (format == ISL_FORMAT_R32G32B32A32_FLOAT ||
+                               format == ISL_FORMAT_R24_UNORM_X8_TYPELESS ||
+                               format == ISL_FORMAT_R32_FLOAT ||
+                               format == ISL_FORMAT_R32_FLOAT_X8X24_TYPELESS))
+         ignore_filtering = true;
+      if (!ignore_filtering)
+         supported &= isl_format_supports_filtering(devinfo, format);
+
+      /* Don't advertise 3-component RGB formats for non-buffer textures.
+       * This ensures that they are renderable from an API perspective since
+       * the state tracker will fall back to RGBA or RGBX, which are
+       * renderable.  We want to render internally for copies and blits,
+       * even if the application doesn't.
+       *
+       * Buffer textures don't need to be renderable, so we support real RGB.
+       * This is useful for PBO upload, and 32-bit RGB support is mandatory.
+       */
+      if (target != PIPE_BUFFER)
+         supported &= fmtl->bpb != 24 && fmtl->bpb != 48 && fmtl->bpb != 96;
+   }
+
+   if (usage & PIPE_BIND_VERTEX_BUFFER) {
+      supported &= isl_format_supports_vertex_fetch(devinfo, format);
+
+      if (!devinfo->is_haswell) {
+         /* W/A: Pre-Haswell, the hardware doesn't really support the formats
+          * we'd like to use here, so upload everything as UINT and fix it in
+          * the shader
+          */
+         if (format == ISL_FORMAT_R10G10B10A2_UNORM ||
+             format == ISL_FORMAT_B10G10R10A2_UNORM ||
+             format == ISL_FORMAT_R10G10B10A2_SNORM ||
+             format == ISL_FORMAT_B10G10R10A2_SNORM ||
+             format == ISL_FORMAT_R10G10B10A2_USCALED ||
+             format == ISL_FORMAT_B10G10R10A2_USCALED ||
+             format == ISL_FORMAT_R10G10B10A2_SSCALED ||
+             format == ISL_FORMAT_B10G10R10A2_SSCALED)
+            supported = true;
+
+         if (format == ISL_FORMAT_R8G8B8_SINT ||
+             format == ISL_FORMAT_R8G8B8_UINT ||
+             format == ISL_FORMAT_R16G16B16_SINT ||
+             format == ISL_FORMAT_R16G16B16_UINT)
+            supported = true;
+      }
+   }
+
+   if (usage & PIPE_BIND_INDEX_BUFFER) {
+      supported &= format == ISL_FORMAT_R8_UINT ||
+                   format == ISL_FORMAT_R16_UINT ||
+                   format == ISL_FORMAT_R32_UINT;
+   }
+
+   return supported;
+}
--- a/src/gallium/drivers/crocus/crocus_genx_macros.h
+++ b/src/gallium/drivers/crocus/crocus_genx_macros.h
@ -0,0 +1,164 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Macro and function definitions needed in order to use genxml.
+ *
+ * This should only be included in sources compiled per-generation.
+ */
+
+#include "crocus_batch.h"
+
+#include "genxml/gen_macros.h"
+
+#define __gen_address_type struct crocus_address
+#define __gen_user_data struct crocus_batch
+#define __gen_combine_address crocus_combine_address
+
+static inline void *
+__gen_get_batch_dwords(struct crocus_batch *batch, unsigned dwords)
+{
+   return crocus_get_command_space(batch, dwords * sizeof(uint32_t));
+}
+
+static inline struct crocus_address
+__gen_address_offset(struct crocus_address addr, uint64_t offset)
+{
+   addr.offset += offset;
+   return addr;
+}
+
+static uint64_t
+__gen_combine_address(struct crocus_batch *batch, void *location,
+                      struct crocus_address addr, uint32_t delta)
+{
+   uint32_t offset = (char *)location - (char *)batch->command.map;
+
+   if (addr.bo == NULL) {
+      return addr.offset + delta;
+   } else {
+      if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) {
+         offset = (char *) location - (char *) batch->state.map;
+         return crocus_state_reloc(batch, offset, addr.bo,
+                                   addr.offset + delta,
+                                   addr.reloc_flags);
+      }
+
+      assert(!crocus_ptr_in_state_buffer(batch, location));
+
+      offset = (char *) location - (char *) batch->command.map;
+      return crocus_command_reloc(batch, offset, addr.bo,
+                                  addr.offset + delta,
+                                  addr.reloc_flags);
+   }
+}
+
+#define __gen_address_type struct crocus_address
+#define __gen_user_data struct crocus_batch
+
+#define __genxml_cmd_length(cmd) cmd ## _length
+#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
+#define __genxml_cmd_header(cmd) cmd ## _header
+#define __genxml_cmd_pack(cmd) cmd ## _pack
+#define __genxml_reg_num(cmd) cmd ## _num
+
+#include "genxml/genX_pack.h"
+#include "genxml/gen_macros.h"
+#include "genxml/genX_bits.h"
+
+/* CS_GPR(15) is reserved for combining conditional rendering predicates
+ * with GL_ARB_indirect_parameters draw number predicates.
+ */
+#define MI_BUILDER_NUM_ALLOC_GPRS 15
+#include "common/mi_builder.h"
+
+#define _crocus_pack_command(batch, cmd, dst, name)                 \
+   for (struct cmd name = { __genxml_cmd_header(cmd) },           \
+        *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
+        ({ __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name);    \
+           _dst = NULL;                                           \
+           }))
+
+#define crocus_pack_command(cmd, dst, name) \
+   _crocus_pack_command(NULL, cmd, dst, name)
+
+#define _crocus_pack_state(batch, cmd, dst, name)                   \
+   for (struct cmd name = {},                                     \
+        *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
+        __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name),       \
+        _dst = NULL)
+
+#define crocus_pack_state(cmd, dst, name)                           \
+   _crocus_pack_state(NULL, cmd, dst, name)
+
+#define crocus_emit_cmd(batch, cmd, name) \
+   _crocus_pack_command(batch, cmd, __gen_get_batch_dwords(batch, __genxml_cmd_length(cmd)), name)
+
+#define crocus_emit_merge(batch, dwords0, dwords1, num_dwords)    \
+   do {                                                         \
+      uint32_t *dw = __gen_get_batch_dwords(batch, num_dwords); \
+      for (uint32_t i = 0; i < num_dwords; i++)                 \
+         dw[i] = (dwords0)[i] | (dwords1)[i];                   \
+      VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords));        \
+   } while (0)
+
+#define crocus_emit_reg(batch, reg, name)                                 \
+   for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL;  \
+        ({                                                              \
+            uint32_t _dw[__genxml_cmd_length(reg)];                     \
+            __genxml_cmd_pack(reg)(NULL, _dw, &name);                   \
+            for (unsigned i = 0; i < __genxml_cmd_length(reg); i++) {   \
+               crocus_emit_cmd(batch, GENX(MI_LOAD_REGISTER_IMM), lri) {  \
+                  lri.RegisterOffset   = __genxml_reg_num(reg);         \
+                  lri.DataDWord        = _dw[i];                        \
+               }                                                        \
+            }                                                           \
+           _cont = NULL;                                                \
+         }))
+
+
+/**
+ * crocus_address constructor helpers:
+ *
+ * When using these to construct a CSO, pass NULL for \p bo, and manually
+ * pin the BO later.  Otherwise, genxml's address handling will add the
+ * BO to the current batch's validation list at CSO creation time, rather
+ * than at draw time as desired.
+ */
+
+UNUSED static struct crocus_address
+ro_bo(struct crocus_bo *bo, uint64_t offset)
+{
+   return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_32BIT };
+}
+
+UNUSED static struct crocus_address
+rw_bo(struct crocus_bo *bo, uint64_t offset)
+{
+   return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_32BIT | RELOC_WRITE };
+}
+
+UNUSED static struct crocus_address
+ggtt_bo(struct crocus_bo *bo, uint64_t offset)
+{
+   return (struct crocus_address) { .bo = bo, .offset = offset, .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT };
+}
--- a/src/gallium/drivers/crocus/crocus_genx_protos.h
+++ b/src/gallium/drivers/crocus/crocus_genx_protos.h
@ -0,0 +1,56 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* GenX-specific function declarations.
+ *
+ * Don't include this directly, it will be included by crocus_context.h.
+ *
+ * NOTE: This header can be included multiple times, from the same file.
+ */
+
+/* crocus_state.c */
+void genX(init_state)(struct crocus_context *ice);
+void genX(init_screen_state)(struct crocus_screen *screen);
+void genX(upload_urb)(struct crocus_batch *batch,
+                      unsigned vs_size,
+                      bool gs_present,
+                      unsigned gs_size);
+void genX(emit_hashing_mode)(struct crocus_context *ice,
+                             struct crocus_batch *batch,
+                             unsigned width, unsigned height,
+                             unsigned scale);
+
+/* crocus_blorp.c */
+void genX(init_blorp)(struct crocus_context *ice);
+
+/* crocus_query.c */
+void genX(init_query)(struct crocus_context *ice);
+void genX(init_screen_query)(struct crocus_screen *screen);
+void genX(math_add32_gpr0)(struct crocus_context *ice,
+                           struct crocus_batch *batch,
+                           uint32_t x);
+void genX(math_div32_gpr0)(struct crocus_context *ice,
+                           struct crocus_batch *batch,
+                           uint32_t D);
+
+/* crocus_blt.c */
+void genX(init_blt)(struct crocus_screen *screen);
--- a/src/gallium/drivers/crocus/crocus_monitor.c
+++ b/src/gallium/drivers/crocus/crocus_monitor.c
@ -0,0 +1,484 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "crocus_monitor.h"
+
+#include <xf86drm.h>
+
+#include "crocus_screen.h"
+#include "crocus_context.h"
+
+#include "perf/intel_perf.h"
+#include "perf/intel_perf_query.h"
+#include "perf/intel_perf_regs.h"
+
+struct crocus_monitor_object {
+   int num_active_counters;
+   int *active_counters;
+
+   size_t result_size;
+   unsigned char *result_buffer;
+
+   struct intel_perf_query_object *query;
+};
+
+int
+crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
+                        struct pipe_driver_query_info *info)
+{
+   const struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   assert(screen->monitor_cfg);
+   if (!screen->monitor_cfg)
+      return 0;
+
+   const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
+
+   if (!info) {
+      /* return the number of metrics */
+      return monitor_cfg->num_counters;
+   }
+
+   const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
+   const int group = monitor_cfg->counters[index].group;
+   const int counter_index = monitor_cfg->counters[index].counter;
+   struct intel_perf_query_counter *counter =
+      &perf_cfg->queries[group].counters[counter_index];
+
+   info->group_id = group;
+   info->name = counter->name;
+   info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;
+
+   if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT)
+      info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
+   else
+      info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
+   switch (counter->data_type) {
+   case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
+   case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
+      info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
+      info->max_value.u32 = 0;
+      break;
+   case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
+      info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
+      info->max_value.u64 = 0;
+      break;
+   case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
+   case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
+      info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;
+      info->max_value.u64 = -1;
+      break;
+   default:
+      assert(false);
+      break;
+   }
+
+   /* indicates that this is an OA query, not a pipeline statistics query */
+   info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+   return 1;
+}
+
+typedef void (*bo_unreference_t)(void *);
+typedef void *(*bo_map_t)(void *, void *, unsigned flags);
+typedef void (*bo_unmap_t)(void *);
+typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);
+typedef void (*emit_mi_flush_t)(void *);
+typedef void (*capture_frequency_stat_register_t)(void *, void *,
+                                                  uint32_t );
+typedef void (*store_register_mem64_t)(void *ctx, void *bo,
+                                       uint32_t reg, uint32_t offset);
+typedef bool (*batch_references_t)(void *batch, void *bo);
+typedef void (*bo_wait_rendering_t)(void *bo);
+typedef int (*bo_busy_t)(void *bo);
+
+static void *
+crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)
+{
+   return crocus_bo_alloc(bufmgr, name, size);
+}
+
+#if 0
+static void
+crocus_monitor_emit_mi_flush(struct crocus_context *ice)
+{
+   const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                     PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+                     PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+                     PIPE_CONTROL_DATA_CACHE_FLUSH |
+                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                     PIPE_CONTROL_VF_CACHE_INVALIDATE |
+                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+                     PIPE_CONTROL_CS_STALL;
+   crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],
+                                  "OA metrics", flags);
+}
+#endif
+
+static void
+crocus_monitor_emit_mi_report_perf_count(void *c,
+                                         void *bo,
+                                         uint32_t offset_in_bytes,
+                                         uint32_t report_id)
+{
+   struct crocus_context *ice = c;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = batch->screen;
+   screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);
+}
+
+static void
+crocus_monitor_batchbuffer_flush(void *c, const char *file, int line)
+{
+   struct crocus_context *ice = c;
+   _crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__);
+}
+
+#if 0
+static void
+crocus_monitor_capture_frequency_stat_register(void *ctx,
+                                               void *bo,
+                                               uint32_t bo_offset)
+{
+   struct crocus_context *ice = ctx;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);
+}
+
+static void
+crocus_monitor_store_register_mem64(void *ctx, void *bo,
+                                    uint32_t reg, uint32_t offset)
+{
+   struct crocus_context *ice = ctx;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);
+}
+#endif
+
+static bool
+crocus_monitor_init_metrics(struct crocus_screen *screen)
+{
+   struct crocus_monitor_config *monitor_cfg =
+      rzalloc(screen, struct crocus_monitor_config);
+   struct intel_perf_config *perf_cfg = NULL;
+   if (unlikely(!monitor_cfg))
+      goto allocation_error;
+   perf_cfg = intel_perf_new(monitor_cfg);
+   if (unlikely(!perf_cfg))
+      goto allocation_error;
+
+   monitor_cfg->perf_cfg = perf_cfg;
+
+   perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc;
+   perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference;
+   perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map;
+   perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap;
+
+   perf_cfg->vtbl.emit_mi_report_perf_count =
+      (emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count;
+   perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush;
+   perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references;
+   perf_cfg->vtbl.bo_wait_rendering =
+      (bo_wait_rendering_t)crocus_bo_wait_rendering;
+   perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy;
+
+   intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false);
+   screen->monitor_cfg = monitor_cfg;
+
+   /* a gallium "group" is equivalent to a gen "query"
+    * a gallium "query" is equivalent to a gen "query_counter"
+    *
+    * Each gen_query supports a specific number of query_counters.  To
+    * allocate the array of crocus_monitor_counter, we need an upper bound
+    * (ignoring duplicate query_counters).
+    */
+   int gen_query_counters_count = 0;
+   for (int gen_query_id = 0;
+        gen_query_id < perf_cfg->n_queries;
+        ++gen_query_id) {
+      gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;
+   }
+
+   monitor_cfg->counters = rzalloc_size(monitor_cfg,
+                                        sizeof(struct crocus_monitor_counter) *
+                                        gen_query_counters_count);
+   if (unlikely(!monitor_cfg->counters))
+      goto allocation_error;
+
+   int crocus_monitor_id = 0;
+   for (int group = 0; group < perf_cfg->n_queries; ++group) {
+      for (int counter = 0;
+           counter < perf_cfg->queries[group].n_counters;
+           ++counter) {
+         /* Check previously identified metrics to filter out duplicates. The
+          * user is not helped by having the same metric available in several
+          * groups. (n^2 algorithm).
+          */
+         bool duplicate = false;
+         for (int existing_group = 0;
+              existing_group < group && !duplicate;
+              ++existing_group) {
+            for (int existing_counter = 0;
+                 existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;
+                 ++existing_counter) {
+               const char *current_name =
+                  perf_cfg->queries[group].counters[counter].name;
+               const char *existing_name =
+                  perf_cfg->queries[existing_group].counters[existing_counter].name;
+               if (strcmp(current_name, existing_name) == 0) {
+                  duplicate = true;
+               }
+            }
+         }
+         if (duplicate)
+            continue;
+         monitor_cfg->counters[crocus_monitor_id].group = group;
+         monitor_cfg->counters[crocus_monitor_id].counter = counter;
+         ++crocus_monitor_id;
+      }
+   }
+   monitor_cfg->num_counters = crocus_monitor_id;
+   return monitor_cfg->num_counters;
+
+allocation_error:
+   if (monitor_cfg)
+      free(monitor_cfg->counters);
+   free(perf_cfg);
+   free(monitor_cfg);
+   return false;
+}
+
+int
+crocus_get_monitor_group_info(struct pipe_screen *pscreen,
+                              unsigned group_index,
+                              struct pipe_driver_query_group_info *info)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   if (!screen->monitor_cfg) {
+      if (!crocus_monitor_init_metrics(screen))
+         return 0;
+   }
+
+   const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
+   const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
+
+   if (!info) {
+      /* return the count that can be queried */
+      return perf_cfg->n_queries;
+   }
+
+   if (group_index >= perf_cfg->n_queries) {
+      /* out of range */
+      return 0;
+   }
+
+   struct intel_perf_query_info *query = &perf_cfg->queries[group_index];
+
+   info->name = query->name;
+   info->max_active_queries = query->n_counters;
+   info->num_queries = query->n_counters;
+
+   return 1;
+}
+
+static void
+crocus_init_monitor_ctx(struct crocus_context *ice)
+{
+   struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
+   struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
+
+   ice->perf_ctx = intel_perf_new_context(ice);
+   if (unlikely(!ice->perf_ctx))
+      return;
+
+   struct intel_perf_context *perf_ctx = ice->perf_ctx;
+   struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
+   intel_perf_init_context(perf_ctx,
+                           perf_cfg,
+                           ice,
+                           ice,
+                           screen->bufmgr,
+                           &screen->devinfo,
+                           ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id,
+                           screen->fd);
+}
+
+/* entry point for GenPerfMonitorsAMD */
+struct crocus_monitor_object *
+crocus_create_monitor_object(struct crocus_context *ice,
+                             unsigned num_queries,
+                             unsigned *query_types)
+{
+   struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;
+   struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;
+   struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;
+   struct intel_perf_query_object *query_obj = NULL;
+
+   /* initialize perf context if this has not already been done.  This
+    * function is the first entry point that carries the gl context.
+    */
+   if (ice->perf_ctx == NULL) {
+      crocus_init_monitor_ctx(ice);
+   }
+   struct intel_perf_context *perf_ctx = ice->perf_ctx;
+
+   assert(num_queries > 0);
+   int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;
+   assert(query_index <= monitor_cfg->num_counters);
+   const int group = monitor_cfg->counters[query_index].group;
+
+   struct crocus_monitor_object *monitor =
+      calloc(1, sizeof(struct crocus_monitor_object));
+   if (unlikely(!monitor))
+      goto allocation_failure;
+
+   monitor->num_active_counters = num_queries;
+   monitor->active_counters = calloc(num_queries, sizeof(int));
+   if (unlikely(!monitor->active_counters))
+      goto allocation_failure;
+
+   for (int i = 0; i < num_queries; ++i) {
+      unsigned current_query = query_types[i];
+      unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;
+
+      /* all queries must be in the same group */
+      assert(current_query_index <= monitor_cfg->num_counters);
+      assert(monitor_cfg->counters[current_query_index].group == group);
+      monitor->active_counters[i] =
+         monitor_cfg->counters[current_query_index].counter;
+   }
+
+   /* create the intel_perf_query */
+   query_obj = intel_perf_new_query(perf_ctx, group);
+   if (unlikely(!query_obj))
+      goto allocation_failure;
+
+   monitor->query = query_obj;
+   monitor->result_size = perf_cfg->queries[group].data_size;
+   monitor->result_buffer = calloc(1, monitor->result_size);
+   if (unlikely(!monitor->result_buffer))
+      goto allocation_failure;
+
+   return monitor;
+
+allocation_failure:
+   if (monitor) {
+      free(monitor->active_counters);
+      free(monitor->result_buffer);
+   }
+   free(query_obj);
+   free(monitor);
+   return NULL;
+}
+
+void
+crocus_destroy_monitor_object(struct pipe_context *ctx,
+                              struct crocus_monitor_object *monitor)
+{
+   struct crocus_context *ice = (struct crocus_context *)ctx;
+
+   intel_perf_delete_query(ice->perf_ctx, monitor->query);
+   free(monitor->result_buffer);
+   monitor->result_buffer = NULL;
+   free(monitor->active_counters);
+   monitor->active_counters = NULL;
+   free(monitor);
+}
+
+bool
+crocus_begin_monitor(struct pipe_context *ctx,
+                     struct crocus_monitor_object *monitor)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct intel_perf_context *perf_ctx = ice->perf_ctx;
+
+   return intel_perf_begin_query(perf_ctx, monitor->query);
+}
+
+bool
+crocus_end_monitor(struct pipe_context *ctx,
+                   struct crocus_monitor_object *monitor)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct intel_perf_context *perf_ctx = ice->perf_ctx;
+
+   intel_perf_end_query(perf_ctx, monitor->query);
+   return true;
+}
+
+bool
+crocus_get_monitor_result(struct pipe_context *ctx,
+                          struct crocus_monitor_object *monitor,
+                          bool wait,
+                          union pipe_numeric_type_union *result)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct intel_perf_context *perf_ctx = ice->perf_ctx;
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+
+   bool monitor_ready =
+      intel_perf_is_query_ready(perf_ctx, monitor->query, batch);
+
+   if (!monitor_ready) {
+      if (!wait)
+         return false;
+      intel_perf_wait_query(perf_ctx, monitor->query, batch);
+   }
+
+   assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));
+
+   unsigned bytes_written;
+   intel_perf_get_query_data(perf_ctx, monitor->query, batch,
+                             monitor->result_size,
+                             (unsigned*) monitor->result_buffer,
+                             &bytes_written);
+   if (bytes_written != monitor->result_size)
+      return false;
+
+   /* copy metrics into the batch result */
+   for (int i = 0; i < monitor->num_active_counters; ++i) {
+      int current_counter = monitor->active_counters[i];
+      const struct intel_perf_query_info *info =
+         intel_perf_query_info(monitor->query);
+      const struct intel_perf_query_counter *counter =
+         &info->counters[current_counter];
+      assert(intel_perf_query_counter_get_size(counter));
+      switch (counter->data_type) {
+      case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
+         result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);
+         break;
+      case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
+         result[i].f = *(float*)(monitor->result_buffer + counter->offset);
+         break;
+      case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
+      case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
+         result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);
+         break;
+      case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: {
+         double v = *(double*)(monitor->result_buffer + counter->offset);
+         result[i].f = v;
+         break;
+      }
+      default:
+         unreachable("unexpected counter data type");
+      }
+   }
+   return true;
+}
--- a/src/gallium/drivers/crocus/crocus_monitor.h
+++ b/src/gallium/drivers/crocus/crocus_monitor.h
@ -0,0 +1,72 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef CROCUS_MONITOR_H
+#define CROCUS_MONITOR_H
+
+#include "pipe/p_screen.h"
+
+struct crocus_monitor_counter {
+   int group;
+   int counter;
+};
+
+struct crocus_monitor_config {
+   struct intel_perf_config *perf_cfg;
+
+   /* gallium requires an index for each counter */
+   int num_counters;
+   struct crocus_monitor_counter *counters;
+};
+
+int crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,
+                            struct pipe_driver_query_info *info);
+int crocus_get_monitor_group_info(struct pipe_screen *pscreen,
+                                  unsigned index,
+                                  struct pipe_driver_query_group_info *info);
+
+struct crocus_context;
+struct crocus_screen;
+
+struct crocus_monitor_object *
+crocus_create_monitor_object(struct crocus_context *ice,
+                             unsigned num_queries,
+                             unsigned *query_types);
+
+struct pipe_query;
+void crocus_destroy_monitor_object(struct pipe_context *ctx,
+                                   struct crocus_monitor_object *monitor);
+
+bool
+crocus_begin_monitor(struct pipe_context *ctx,
+                     struct crocus_monitor_object *monitor);
+bool
+crocus_end_monitor(struct pipe_context *ctx,
+                   struct crocus_monitor_object *monitor);
+
+bool
+crocus_get_monitor_result(struct pipe_context *ctx,
+                          struct crocus_monitor_object *monitor,
+                          bool wait,
+                          union pipe_numeric_type_union *result);
+
+#endif
--- a/src/gallium/drivers/crocus/crocus_pipe.h
+++ b/src/gallium/drivers/crocus/crocus_pipe.h
@ -0,0 +1,74 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef CROCUS_PIPE_H
+#define CROCUS_PIPE_H
+
+#include "pipe/p_defines.h"
+#include "compiler/shader_enums.h"
+
+static inline gl_shader_stage
+stage_from_pipe(enum pipe_shader_type pstage)
+{
+   static const gl_shader_stage stages[PIPE_SHADER_TYPES] = {
+      [PIPE_SHADER_VERTEX] = MESA_SHADER_VERTEX,
+      [PIPE_SHADER_TESS_CTRL] = MESA_SHADER_TESS_CTRL,
+      [PIPE_SHADER_TESS_EVAL] = MESA_SHADER_TESS_EVAL,
+      [PIPE_SHADER_GEOMETRY] = MESA_SHADER_GEOMETRY,
+      [PIPE_SHADER_FRAGMENT] = MESA_SHADER_FRAGMENT,
+      [PIPE_SHADER_COMPUTE] = MESA_SHADER_COMPUTE,
+   };
+   return stages[pstage];
+}
+
+static inline enum pipe_shader_type
+stage_to_pipe(gl_shader_stage stage)
+{
+   static const enum pipe_shader_type pstages[MESA_SHADER_STAGES] = {
+      [MESA_SHADER_VERTEX] = PIPE_SHADER_VERTEX,
+      [MESA_SHADER_TESS_CTRL] = PIPE_SHADER_TESS_CTRL,
+      [MESA_SHADER_TESS_EVAL] = PIPE_SHADER_TESS_EVAL,
+      [MESA_SHADER_GEOMETRY] = PIPE_SHADER_GEOMETRY,
+      [MESA_SHADER_FRAGMENT] = PIPE_SHADER_FRAGMENT,
+      [MESA_SHADER_COMPUTE] = PIPE_SHADER_COMPUTE,
+   };
+   return pstages[stage];
+}
+
+/**
+ * Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the HW's
+ * "Shader Channel Select" enumerations (i.e. SCS_RED).  The mappings are
+ *
+ * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
+ *         0          1          2          3             4            5
+ *         4          5          6          7             0            1
+ *   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA,     SCS_ZERO,     SCS_ONE
+ *
+ * which is simply adding 4 then modding by 8 (or anding with 7).
+ */
+static inline enum isl_channel_select
+pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
+{
+   return (swizzle + 4) & 7;
+}
+
+#endif
--- a/src/gallium/drivers/crocus/crocus_pipe_control.c
+++ b/src/gallium/drivers/crocus/crocus_pipe_control.c
@ -0,0 +1,368 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_pipe_control.c
+ *
+ * PIPE_CONTROL is the main flushing and synchronization primitive on Intel
+ * GPUs.  It can invalidate caches, stall until rendering reaches various
+ * stages of completion, write to memory, and other things.  In a way, it's
+ * a swiss army knife command - it has all kinds of capabilities, but some
+ * significant limitations as well.
+ *
+ * Unfortunately, it's notoriously complicated and difficult to use.  Many
+ * sub-commands can't be used together.  Some are meant to be used at the
+ * top of the pipeline (invalidating caches before drawing), while some are
+ * meant to be used at the end (stalling or flushing after drawing).
+ *
+ * Also, there's a list of restrictions a mile long, which vary by generation.
+ * Do this before doing that, or suffer the consequences (usually a GPU hang).
+ *
+ * This file contains helpers for emitting them safely.  You can simply call
+ * crocus_emit_pipe_control_flush() with the desired operations (as logical
+ * PIPE_CONTROL_* bits), and it will take care of splitting it into multiple
+ * PIPE_CONTROL commands as necessary.  The per-generation workarounds are
+ * applied in crocus_emit_raw_pipe_control() in crocus_state.c.
+ */
+
+#include "crocus_context.h"
+#include "util/hash_table.h"
+#include "util/set.h"
+
+/**
+ * Emit a PIPE_CONTROL with various flushing flags.
+ *
+ * The caller is responsible for deciding what flags are appropriate for the
+ * given generation.
+ */
+void
+crocus_emit_pipe_control_flush(struct crocus_batch *batch,
+                               const char *reason,
+                               uint32_t flags)
+{
+   const struct intel_device_info *devinfo = &batch->screen->devinfo;
+
+   if (devinfo->ver >= 6 &&
+       (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) &&
+       (flags & PIPE_CONTROL_CACHE_INVALIDATE_BITS)) {
+      /* A pipe control command with flush and invalidate bits set
+       * simultaneously is an inherently racy operation on Gen6+ if the
+       * contents of the flushed caches were intended to become visible from
+       * any of the invalidated caches.  Split it in two PIPE_CONTROLs, the
+       * first one should stall the pipeline to make sure that the flushed R/W
+       * caches are coherent with memory once the specified R/O caches are
+       * invalidated.  On pre-Gen6 hardware the (implicit) R/O cache
+       * invalidation seems to happen at the bottom of the pipeline together
+       * with any write cache flush, so this shouldn't be a concern.  In order
+       * to ensure a full stall, we do an end-of-pipe sync.
+       */
+      crocus_emit_end_of_pipe_sync(batch, reason,
+                                   flags & PIPE_CONTROL_CACHE_FLUSH_BITS);
+      flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
+   }
+
+   batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, NULL, 0, 0);
+}
+
+/**
+ * Emit a PIPE_CONTROL that writes to a buffer object.
+ *
+ * \p flags should contain one of the following items:
+ *  - PIPE_CONTROL_WRITE_IMMEDIATE
+ *  - PIPE_CONTROL_WRITE_TIMESTAMP
+ *  - PIPE_CONTROL_WRITE_DEPTH_COUNT
+ */
+void
+crocus_emit_pipe_control_write(struct crocus_batch *batch,
+                               const char *reason, uint32_t flags,
+                               struct crocus_bo *bo, uint32_t offset,
+                               uint64_t imm)
+{
+   batch->screen->vtbl.emit_raw_pipe_control(batch, reason, flags, bo, offset, imm);
+}
+
+/**
+ * Restriction [DevSNB, DevIVB]:
+ *
+ * Prior to changing Depth/Stencil Buffer state (i.e. any combination of
+ * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,
+ * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall
+ * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth
+ * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by
+ * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),
+ * unless SW can otherwise guarantee that the pipeline from WM onwards is
+ * already flushed (e.g., via a preceding MI_FLUSH).
+ */
+void
+crocus_emit_depth_stall_flushes(struct crocus_batch *batch)
+{
+   UNUSED const struct intel_device_info *devinfo = &batch->screen->devinfo;
+
+   assert(devinfo->ver >= 6);
+
+   crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_STALL);
+   crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_CACHE_FLUSH);
+   crocus_emit_pipe_control_flush(batch, "depth stall", PIPE_CONTROL_DEPTH_STALL);
+}
+
+/*
+ * From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
+ *
+ *  Write synchronization is a special case of end-of-pipe
+ *  synchronization that requires that the render cache and/or depth
+ *  related caches are flushed to memory, where the data will become
+ *  globally visible. This type of synchronization is required prior to
+ *  SW (CPU) actually reading the result data from memory, or initiating
+ *  an operation that will use as a read surface (such as a texture
+ *  surface) a previous render target and/or depth/stencil buffer
+ *
+ * From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
+ *
+ *  Exercising the write cache flush bits (Render Target Cache Flush
+ *  Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
+ *  ensures the write caches are flushed and doesn't guarantee the data
+ *  is globally visible.
+ *
+ *  SW can track the completion of the end-of-pipe-synchronization by
+ *  using "Notify Enable" and "PostSync Operation - Write Immediate
+ *  Data" in the PIPE_CONTROL command.
+ */
+void
+crocus_emit_end_of_pipe_sync(struct crocus_batch *batch,
+                             const char *reason, uint32_t flags)
+{
+   const struct intel_device_info *devinfo = &batch->screen->devinfo;
+
+   if (devinfo->ver >= 6) {
+      /* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
+       *
+       *    "The most common action to perform upon reaching a synchronization
+       *    point is to write a value out to memory. An immediate value
+       *    (included with the synchronization command) may be written."
+       *
+       * From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
+       *
+       *    "In case the data flushed out by the render engine is to be read
+       *    back in to the render engine in coherent manner, then the render
+       *    engine has to wait for the fence completion before accessing the
+       *    flushed data. This can be achieved by following means on various
+       *    products: PIPE_CONTROL command with CS Stall and the required
+       *    write caches flushed with Post-Sync-Operation as Write Immediate
+       *    Data.
+       *
+       *    Example:
+       *       - Workload-1 (3D/GPGPU/MEDIA)
+       *       - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
+       *         Data, Required Write Cache Flush bits set)
+       *       - Workload-2 (Can use the data produce or output by Workload-1)
+       */
+      crocus_emit_pipe_control_write(batch, reason,
+                                     flags | PIPE_CONTROL_CS_STALL |
+                                     PIPE_CONTROL_WRITE_IMMEDIATE,
+                                     batch->ice->workaround_bo,
+                                     batch->ice->workaround_offset, 0);
+
+      if (batch->screen->devinfo.is_haswell) {
+#define GEN7_3DPRIM_START_INSTANCE      0x243C
+         batch->screen->vtbl.load_register_mem32(batch, GEN7_3DPRIM_START_INSTANCE,
+                                                 batch->ice->workaround_bo,
+                                                 batch->ice->workaround_offset);
+      }
+   } else {
+      /* On gen4-5, a regular pipe control seems to suffice. */
+      crocus_emit_pipe_control_flush(batch, reason, flags);
+   }
+}
+
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+crocus_emit_mi_flush(struct crocus_batch *batch)
+{
+   const struct intel_device_info *devinfo = &batch->screen->devinfo;
+   int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH;
+   if (devinfo->ver >= 6) {
+      flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+               PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+               PIPE_CONTROL_DATA_CACHE_FLUSH |
+               PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+               PIPE_CONTROL_VF_CACHE_INVALIDATE |
+               PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+               PIPE_CONTROL_CS_STALL;
+   }
+   crocus_emit_pipe_control_flush(batch, "mi flush", flags);
+}
+
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+void
+crocus_emit_post_sync_nonzero_flush(struct crocus_batch *batch)
+{
+   crocus_emit_pipe_control_flush(batch, "nonzero",
+                                  PIPE_CONTROL_CS_STALL |
+                                  PIPE_CONTROL_STALL_AT_SCOREBOARD);
+
+   crocus_emit_pipe_control_write(batch, "nonzero",
+                                  PIPE_CONTROL_WRITE_IMMEDIATE,
+                                  batch->ice->workaround_bo,
+                                  batch->ice->workaround_offset, 0);
+}
+
+/**
+ * Flush and invalidate all caches (for debugging purposes).
+ */
+void
+crocus_flush_all_caches(struct crocus_batch *batch)
+{
+   crocus_emit_pipe_control_flush(batch, "debug: flush all caches",
+                                  PIPE_CONTROL_CS_STALL |
+                                  PIPE_CONTROL_DATA_CACHE_FLUSH |
+                                  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                  PIPE_CONTROL_VF_CACHE_INVALIDATE |
+                                  PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+                                  PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+                                  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
+                                  PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+}
+
+static void
+crocus_texture_barrier(struct pipe_context *ctx, unsigned flags)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_batch *render_batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_batch *compute_batch = &ice->batches[CROCUS_BATCH_COMPUTE];
+   const struct intel_device_info *devinfo = &render_batch->screen->devinfo;
+
+   if (devinfo->ver < 6) {
+      crocus_emit_mi_flush(render_batch);
+      return;
+   }
+
+   if (render_batch->contains_draw) {
+      crocus_batch_maybe_flush(render_batch, 48);
+      crocus_emit_pipe_control_flush(render_batch,
+                                     "API: texture barrier (1/2)",
+                                     (flags == 1 ? PIPE_CONTROL_DEPTH_CACHE_FLUSH  : 0) |
+                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                     PIPE_CONTROL_CS_STALL);
+      crocus_emit_pipe_control_flush(render_batch,
+                                     "API: texture barrier (2/2)",
+                                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+   }
+
+   if (compute_batch->contains_draw) {
+      crocus_batch_maybe_flush(compute_batch, 48);
+      crocus_emit_pipe_control_flush(compute_batch,
+                                     "API: texture barrier (1/2)",
+                                     PIPE_CONTROL_CS_STALL);
+      crocus_emit_pipe_control_flush(compute_batch,
+                                     "API: texture barrier (2/2)",
+                                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+   }
+}
+
+static void
+crocus_memory_barrier(struct pipe_context *ctx, unsigned flags)
+{
+   struct crocus_context *ice = (void *) ctx;
+   unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
+   const struct intel_device_info *devinfo = &ice->batches[0].screen->devinfo;
+
+   assert(devinfo->ver == 7);
+
+   if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
+                PIPE_BARRIER_INDEX_BUFFER |
+                PIPE_BARRIER_INDIRECT_BUFFER)) {
+      bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+   }
+
+   if (flags & PIPE_BARRIER_CONSTANT_BUFFER) {
+      bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+              PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+   }
+
+   if (flags & (PIPE_BARRIER_TEXTURE | PIPE_BARRIER_FRAMEBUFFER)) {
+      bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+              PIPE_CONTROL_RENDER_TARGET_FLUSH;
+   }
+
+   /* Typed surface messages are handled by the render cache on IVB, so we
+    * need to flush it too.
+    */
+   if (!devinfo->is_haswell)
+      bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+
+   for (int i = 0; i < ice->batch_count; i++) {
+      if (ice->batches[i].contains_draw) {
+         crocus_batch_maybe_flush(&ice->batches[i], 24);
+         crocus_emit_pipe_control_flush(&ice->batches[i], "API: memory barrier",
+                                        bits);
+      }
+   }
+}
+
+void
+crocus_init_flush_functions(struct pipe_context *ctx)
+{
+   ctx->memory_barrier = crocus_memory_barrier;
+   ctx->texture_barrier = crocus_texture_barrier;
+}
--- a/src/gallium/drivers/crocus/crocus_program.c
+++ b/src/gallium/drivers/crocus/crocus_program.c
--- a/src/gallium/drivers/crocus/crocus_program_cache.c
+++ b/src/gallium/drivers/crocus/crocus_program_cache.c
@ -0,0 +1,347 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_program_cache.c
+ *
+ * The in-memory program cache.  This is basically a hash table mapping
+ * API-specified shaders and a state key to a compiled variant.  It also
+ * takes care of uploading shader assembly into a BO for use on the GPU.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/u_atomic.h"
+#include "util/u_upload_mgr.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "intel/compiler/brw_compiler.h"
+#include "intel/compiler/brw_eu.h"
+#include "intel/compiler/brw_nir.h"
+#include "crocus_context.h"
+#include "crocus_resource.h"
+
+struct keybox {
+   uint16_t size;
+   enum crocus_program_cache_id cache_id;
+   uint8_t data[0];
+};
+
+static struct keybox *
+make_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id,
+            const void *key, uint32_t key_size)
+{
+   struct keybox *keybox =
+      ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
+
+   keybox->cache_id = cache_id;
+   keybox->size = key_size;
+   memcpy(keybox->data, key, key_size);
+
+   return keybox;
+}
+
+static uint32_t
+keybox_hash(const void *void_key)
+{
+   const struct keybox *key = void_key;
+   return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
+}
+
+static bool
+keybox_equals(const void *void_a, const void *void_b)
+{
+   const struct keybox *a = void_a, *b = void_b;
+   if (a->size != b->size)
+      return false;
+
+   return memcmp(a->data, b->data, a->size) == 0;
+}
+
+struct crocus_compiled_shader *
+crocus_find_cached_shader(struct crocus_context *ice,
+                          enum crocus_program_cache_id cache_id,
+                          uint32_t key_size, const void *key)
+{
+   struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
+   struct hash_entry *entry =
+      _mesa_hash_table_search(ice->shaders.cache, keybox);
+
+   ralloc_free(keybox);
+
+   return entry ? entry->data : NULL;
+}
+
+const void *
+crocus_find_previous_compile(const struct crocus_context *ice,
+                             enum crocus_program_cache_id cache_id,
+                             unsigned program_string_id)
+{
+   hash_table_foreach(ice->shaders.cache, entry) {
+      const struct keybox *keybox = entry->key;
+      const struct brw_base_prog_key *key = (const void *)keybox->data;
+      if (keybox->cache_id == cache_id &&
+          key->program_string_id == program_string_id) {
+         return keybox->data;
+      }
+   }
+
+   return NULL;
+}
+
+/**
+ * Look for an existing entry in the cache that has identical assembly code.
+ *
+ * This is useful for programs generating shaders at runtime, where multiple
+ * distinct shaders (from an API perspective) may compile to the same assembly
+ * in our backend.  This saves space in the program cache buffer.
+ */
+static const struct crocus_compiled_shader *
+find_existing_assembly(struct hash_table *cache, void *map,
+                       const void *assembly, unsigned assembly_size)
+{
+   hash_table_foreach (cache, entry) {
+      const struct crocus_compiled_shader *existing = entry->data;
+
+      if (existing->map_size != assembly_size)
+         continue;
+
+      if (memcmp(map + existing->offset, assembly, assembly_size) == 0)
+         return existing;
+   }
+   return NULL;
+}
+
+static void
+crocus_cache_new_bo(struct crocus_context *ice,
+                    uint32_t new_size)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
+   struct crocus_bo *new_bo;
+   new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size);
+
+   void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE |
+                             MAP_ASYNC | MAP_PERSISTENT);
+
+   if (ice->shaders.cache_next_offset != 0) {
+      memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset);
+   }
+
+   crocus_bo_unmap(ice->shaders.cache_bo);
+   crocus_bo_unreference(ice->shaders.cache_bo);
+   ice->shaders.cache_bo = new_bo;
+   ice->shaders.cache_bo_map = map;
+
+   if (screen->devinfo.ver == 4) {
+      /* reemit all shaders on GEN4 only. */
+      ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER |
+         CROCUS_DIRTY_WM;
+   }
+   ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false;
+   ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false;
+   /* unset state base address */
+}
+
+static uint32_t
+crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
+{
+   if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) {
+      uint32_t new_size = ice->shaders.cache_bo->size * 2;
+      while (ice->shaders.cache_next_offset + size > new_size)
+         new_size *= 2;
+
+      crocus_cache_new_bo(ice, new_size);
+   }
+   uint32_t offset = ice->shaders.cache_next_offset;
+
+   /* Programs are always 64-byte aligned, so set up the next one now */
+   ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
+   return offset;
+}
+
+struct crocus_compiled_shader *
+crocus_upload_shader(struct crocus_context *ice,
+                     enum crocus_program_cache_id cache_id, uint32_t key_size,
+                     const void *key, const void *assembly, uint32_t asm_size,
+                     struct brw_stage_prog_data *prog_data,
+                     uint32_t prog_data_size, uint32_t *streamout,
+                     enum brw_param_builtin *system_values,
+                     unsigned num_system_values, unsigned num_cbufs,
+                     const struct crocus_binding_table *bt)
+{
+   struct hash_table *cache = ice->shaders.cache;
+   struct crocus_compiled_shader *shader =
+      rzalloc_size(cache, sizeof(struct crocus_compiled_shader));
+   const struct crocus_compiled_shader *existing = find_existing_assembly(
+      cache, ice->shaders.cache_bo_map, assembly, asm_size);
+
+   /* If we can find a matching prog in the cache already, then reuse the
+    * existing stuff without creating new copy into the underlying buffer
+    * object.  This is notably useful for programs generating shaders at
+    * runtime, where multiple shaders may compile to the same thing in our
+    * backend.
+    */
+   if (existing) {
+      shader->offset = existing->offset;
+      shader->map_size = existing->map_size;
+   } else {
+      shader->offset = crocus_alloc_item_data(ice, asm_size);
+      shader->map_size = asm_size;
+
+      memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size);
+   }
+
+   shader->prog_data = prog_data;
+   shader->prog_data_size = prog_data_size;
+   shader->streamout = streamout;
+   shader->system_values = system_values;
+   shader->num_system_values = num_system_values;
+   shader->num_cbufs = num_cbufs;
+   shader->bt = *bt;
+
+   ralloc_steal(shader, shader->prog_data);
+   if (prog_data_size > 16) {
+      ralloc_steal(shader->prog_data, prog_data->param);
+      ralloc_steal(shader->prog_data, prog_data->pull_param);
+   }
+   ralloc_steal(shader, shader->streamout);
+   ralloc_steal(shader, shader->system_values);
+
+   struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
+   _mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
+
+   return shader;
+}
+
+bool
+crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key,
+                           uint32_t key_size, uint32_t *kernel_out,
+                           void *prog_data_out)
+{
+   struct blorp_context *blorp = blorp_batch->blorp;
+   struct crocus_context *ice = blorp->driver_ctx;
+   struct crocus_compiled_shader *shader =
+      crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key);
+
+   if (!shader)
+      return false;
+
+   *kernel_out = shader->offset;
+   *((void **)prog_data_out) = shader->prog_data;
+
+   return true;
+}
+
+bool
+crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
+                           const void *key, uint32_t key_size,
+                           const void *kernel, uint32_t kernel_size,
+                           const struct brw_stage_prog_data *prog_data_templ,
+                           uint32_t prog_data_size, uint32_t *kernel_out,
+                           void *prog_data_out)
+{
+   struct blorp_context *blorp = blorp_batch->blorp;
+   struct crocus_context *ice = blorp->driver_ctx;
+
+   struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
+   memcpy(prog_data, prog_data_templ, prog_data_size);
+
+   struct crocus_binding_table bt;
+   memset(&bt, 0, sizeof(bt));
+
+   struct crocus_compiled_shader *shader = crocus_upload_shader(
+      ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data,
+      prog_data_size, NULL, NULL, 0, 0, &bt);
+
+   *kernel_out = shader->offset;
+   *((void **)prog_data_out) = shader->prog_data;
+
+   return true;
+}
+
+void
+crocus_init_program_cache(struct crocus_context *ice)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
+   ice->shaders.cache =
+      _mesa_hash_table_create(ice, keybox_hash, keybox_equals);
+
+   ice->shaders.cache_bo =
+      crocus_bo_alloc(screen->bufmgr, "program_cache", 16384);
+   ice->shaders.cache_bo_map =
+      crocus_bo_map(NULL, ice->shaders.cache_bo,
+                    MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT);
+}
+
+void
+crocus_destroy_program_cache(struct crocus_context *ice)
+{
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      ice->shaders.prog[i] = NULL;
+   }
+
+   if (ice->shaders.cache_bo) {
+      crocus_bo_unmap(ice->shaders.cache_bo);
+      crocus_bo_unreference(ice->shaders.cache_bo);
+      ice->shaders.cache_bo_map = NULL;
+      ice->shaders.cache_bo = NULL;
+   }
+
+   ralloc_free(ice->shaders.cache);
+}
+
+static const char *
+cache_name(enum crocus_program_cache_id cache_id)
+{
+   if (cache_id == CROCUS_CACHE_BLORP)
+      return "BLORP";
+
+   if (cache_id == CROCUS_CACHE_SF)
+      return "SF";
+
+   if (cache_id == CROCUS_CACHE_CLIP)
+      return "CLIP";
+
+   if (cache_id == CROCUS_CACHE_FF_GS)
+      return "FF_GS";
+
+   return _mesa_shader_stage_to_string(cache_id);
+}
+
+void
+crocus_print_program_cache(struct crocus_context *ice)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   hash_table_foreach(ice->shaders.cache, entry) {
+      const struct keybox *keybox = entry->key;
+      struct crocus_compiled_shader *shader = entry->data;
+      fprintf(stderr, "%s:\n", cache_name(keybox->cache_id));
+      brw_disassemble(devinfo, ice->shaders.cache_bo_map + shader->offset, 0,
+                      shader->prog_data->program_size, NULL, stderr);
+   }
+}
--- a/src/gallium/drivers/crocus/crocus_query.c
+++ b/src/gallium/drivers/crocus/crocus_query.c
@ -0,0 +1,996 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_query.c
+ *
+ * ============================= GENXML CODE =============================
+ *              [This file is compiled once per generation.]
+ * =======================================================================
+ *
+ * Query object support.  This allows measuring various simple statistics
+ * via counters on the GPU.  We use GenX code for MI_MATH calculations.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include "perf/intel_perf.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
+#include "crocus_context.h"
+#include "crocus_defines.h"
+#include "crocus_fence.h"
+#include "crocus_monitor.h"
+#include "crocus_resource.h"
+#include "crocus_screen.h"
+
+#include "crocus_genx_macros.h"
+
+#if GFX_VER == 6
+// TOOD: Add these to genxml?
+#define SO_PRIM_STORAGE_NEEDED(n) (0x2280)
+#define SO_NUM_PRIMS_WRITTEN(n)   (0x2288)
+
+// TODO: remove HS/DS/CS
+#define GFX6_IA_VERTICES_COUNT_num          0x2310
+#define GFX6_IA_PRIMITIVES_COUNT_num        0x2318
+#define GFX6_VS_INVOCATION_COUNT_num        0x2320
+#define GFX6_HS_INVOCATION_COUNT_num        0x2300
+#define GFX6_DS_INVOCATION_COUNT_num        0x2308
+#define GFX6_GS_INVOCATION_COUNT_num        0x2328
+#define GFX6_GS_PRIMITIVES_COUNT_num        0x2330
+#define GFX6_CL_INVOCATION_COUNT_num        0x2338
+#define GFX6_CL_PRIMITIVES_COUNT_num        0x2340
+#define GFX6_PS_INVOCATION_COUNT_num        0x2348
+#define GFX6_CS_INVOCATION_COUNT_num        0x2290
+#define GFX6_PS_DEPTH_COUNT_num             0x2350
+
+#elif GFX_VER == 7
+#define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
+#define SO_NUM_PRIMS_WRITTEN(n)   (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
+#endif
+
+struct crocus_query {
+   enum pipe_query_type type;
+   int index;
+
+   bool ready;
+
+   bool stalled;
+
+   uint64_t result;
+
+   struct crocus_state_ref query_state_ref;
+   struct crocus_query_snapshots *map;
+   struct crocus_syncobj *syncobj;
+
+   int batch_idx;
+
+   struct crocus_monitor_object *monitor;
+
+   /* Fence for PIPE_QUERY_GPU_FINISHED. */
+   struct pipe_fence_handle *fence;
+};
+
+struct crocus_query_snapshots {
+   /** crocus_render_condition's saved MI_PREDICATE_RESULT value. */
+   uint64_t predicate_result;
+
+   /** Have the start/end snapshots landed? */
+   uint64_t snapshots_landed;
+
+   /** Starting and ending counter snapshots */
+   uint64_t start;
+   uint64_t end;
+};
+
+struct crocus_query_so_overflow {
+   uint64_t predicate_result;
+   uint64_t snapshots_landed;
+
+   struct {
+      uint64_t prim_storage_needed[2];
+      uint64_t num_prims[2];
+   } stream[4];
+};
+
+#if GFX_VERx10 == 75
+static struct mi_value
+query_mem64(struct crocus_query *q, uint32_t offset)
+{
+   return mi_mem64(rw_bo(crocus_resource_bo(q->query_state_ref.res),
+                         q->query_state_ref.offset + offset));
+}
+#endif
+
+/**
+ * Is this type of query written by PIPE_CONTROL?
+ */
+static bool
+crocus_is_query_pipelined(struct crocus_query *q)
+{
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_TIME_ELAPSED:
+      return true;
+
+   default:
+      return false;
+   }
+}
+
+static void
+mark_available(struct crocus_context *ice, struct crocus_query *q)
+{
+#if GFX_VERx10 == 75
+   struct crocus_batch *batch = &ice->batches[q->batch_idx];
+   struct crocus_screen *screen = batch->screen;
+   unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
+   unsigned offset = offsetof(struct crocus_query_snapshots, snapshots_landed);
+   struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
+   offset += q->query_state_ref.offset;
+
+   if (!crocus_is_query_pipelined(q)) {
+      screen->vtbl.store_data_imm64(batch, bo, offset, true);
+   } else {
+      /* Order available *after* the query results. */
+      flags |= PIPE_CONTROL_FLUSH_ENABLE;
+      crocus_emit_pipe_control_write(batch, "query: mark available",
+                                     flags, bo, offset, true);
+   }
+#endif
+}
+
+/**
+ * Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
+ */
+static void
+crocus_pipelined_write(struct crocus_batch *batch,
+                       struct crocus_query *q,
+                       enum pipe_control_flags flags,
+                       unsigned offset)
+{
+   struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
+
+   crocus_emit_pipe_control_write(batch, "query: pipelined snapshot write",
+                                  flags,
+                                  bo, offset, 0ull);
+}
+
+static void
+write_value(struct crocus_context *ice, struct crocus_query *q, unsigned offset)
+{
+   struct crocus_batch *batch = &ice->batches[q->batch_idx];
+#if GFX_VER >= 6
+   struct crocus_screen *screen = batch->screen;
+   struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
+#endif
+
+   if (!crocus_is_query_pipelined(q)) {
+      crocus_emit_pipe_control_flush(batch,
+                                     "query: non-pipelined snapshot write",
+                                     PIPE_CONTROL_CS_STALL |
+                                     PIPE_CONTROL_STALL_AT_SCOREBOARD);
+      q->stalled = true;
+   }
+
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
+                             PIPE_CONTROL_WRITE_DEPTH_COUNT |
+                             PIPE_CONTROL_DEPTH_STALL,
+                             offset);
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+      crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
+                             PIPE_CONTROL_WRITE_TIMESTAMP,
+                             offset);
+      break;
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+#if GFX_VER >= 6
+      screen->vtbl.store_register_mem64(batch,
+                                        q->index == 0 ?
+                                        GENX(CL_INVOCATION_COUNT_num) :
+                                        SO_PRIM_STORAGE_NEEDED(q->index),
+                                        bo, offset, false);
+#endif
+      break;
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+#if GFX_VER >= 6
+      screen->vtbl.store_register_mem64(batch,
+                                        SO_NUM_PRIMS_WRITTEN(q->index),
+                                        bo, offset, false);
+#endif
+      break;
+   case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
+#if GFX_VER >= 6
+      static const uint32_t index_to_reg[] = {
+         GENX(IA_VERTICES_COUNT_num),
+         GENX(IA_PRIMITIVES_COUNT_num),
+         GENX(VS_INVOCATION_COUNT_num),
+         GENX(GS_INVOCATION_COUNT_num),
+         GENX(GS_PRIMITIVES_COUNT_num),
+         GENX(CL_INVOCATION_COUNT_num),
+         GENX(CL_PRIMITIVES_COUNT_num),
+         GENX(PS_INVOCATION_COUNT_num),
+         GENX(HS_INVOCATION_COUNT_num),
+         GENX(DS_INVOCATION_COUNT_num),
+         GENX(CS_INVOCATION_COUNT_num),
+      };
+      uint32_t reg = index_to_reg[q->index];
+
+#if GFX_VER == 6
+      /* Gfx6 GS code counts full primitives, that is, it won't count individual
+       * triangles in a triangle strip. Use CL_INVOCATION_COUNT for that.
+       */
+      if (q->index == PIPE_STAT_QUERY_GS_PRIMITIVES)
+         reg = GENX(CL_INVOCATION_COUNT_num);
+#endif
+
+      screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);
+#endif
+      break;
+   }
+   default:
+      assert(false);
+   }
+}
+
+#if GFX_VER >= 6
+static void
+write_overflow_values(struct crocus_context *ice, struct crocus_query *q, bool end)
+{
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_screen *screen = batch->screen;
+   uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
+   struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
+   uint32_t offset = q->query_state_ref.offset;
+   crocus_emit_pipe_control_flush(batch,
+                                  "query: write SO overflow snapshots",
+                                  PIPE_CONTROL_CS_STALL |
+                                  PIPE_CONTROL_STALL_AT_SCOREBOARD);
+   for (uint32_t i = 0; i < count; i++) {
+      int s = q->index + i;
+      int g_idx = offset + offsetof(struct crocus_query_so_overflow,
+                                    stream[s].num_prims[end]);
+      int w_idx = offset + offsetof(struct crocus_query_so_overflow,
+                                    stream[s].prim_storage_needed[end]);
+      screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
+                                        bo, g_idx, false);
+      screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
+                                        bo, w_idx, false);
+   }
+}
+#endif
+static uint64_t
+crocus_raw_timestamp_delta(uint64_t time0, uint64_t time1)
+{
+   if (time0 > time1) {
+      return (1ULL << TIMESTAMP_BITS) + time1 - time0;
+   } else {
+      return time1 - time0;
+   }
+}
+
+static bool
+stream_overflowed(struct crocus_query_so_overflow *so, int s)
+{
+   return (so->stream[s].prim_storage_needed[1] -
+           so->stream[s].prim_storage_needed[0]) !=
+          (so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
+}
+
+static void
+calculate_result_on_cpu(const struct intel_device_info *devinfo,
+                        struct crocus_query *q)
+{
+   switch (q->type) {
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+      q->result = q->map->end != q->map->start;
+      break;
+   case PIPE_QUERY_TIMESTAMP:
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+      /* The timestamp is the single starting snapshot. */
+      q->result = intel_device_info_timebase_scale(devinfo, q->map->start);
+      q->result &= (1ull << TIMESTAMP_BITS) - 1;
+      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+      q->result = crocus_raw_timestamp_delta(q->map->start, q->map->end);
+      q->result = intel_device_info_timebase_scale(devinfo, q->result);
+      q->result &= (1ull << TIMESTAMP_BITS) - 1;
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      q->result = stream_overflowed((void *) q->map, q->index);
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+      q->result = false;
+      for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
+         q->result |= stream_overflowed((void *) q->map, i);
+      break;
+   case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
+      q->result = q->map->end - q->map->start;
+
+      /* WaDividePSInvocationCountBy4:HSW,BDW */
+      if (GFX_VER == 7 && devinfo->is_haswell && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
+         q->result /= 4;
+      break;
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+   case PIPE_QUERY_PRIMITIVES_GENERATED:
+   case PIPE_QUERY_PRIMITIVES_EMITTED:
+   default:
+      q->result = q->map->end - q->map->start;
+      break;
+   }
+
+   q->ready = true;
+}
+
+#if GFX_VERx10 == 75
+/**
+ * Calculate the streamout overflow for stream \p idx:
+ *
+ * (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
+ */
+static struct mi_value
+calc_overflow_for_stream(struct mi_builder *b,
+                         struct crocus_query *q,
+                         int idx)
+{
+#define C(counter, i) query_mem64(q, \
+   offsetof(struct crocus_query_so_overflow, stream[idx].counter[i]))
+
+   return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
+                  mi_isub(b, C(prim_storage_needed, 1),
+                          C(prim_storage_needed, 0)));
+#undef C
+}
+
+/**
+ * Calculate whether any stream has overflowed.
+ */
+static struct mi_value
+calc_overflow_any_stream(struct mi_builder *b, struct crocus_query *q)
+{
+   struct mi_value stream_result[MAX_VERTEX_STREAMS];
+   for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
+      stream_result[i] = calc_overflow_for_stream(b, q, i);
+
+   struct mi_value result = stream_result[0];
+   for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
+      result = mi_ior(b, result, stream_result[i]);
+
+   return result;
+}
+
+
+static bool
+query_is_boolean(enum pipe_query_type type)
+{
+   switch (type) {
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+   case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+      return true;
+   default:
+      return false;
+   }
+}
+
+/**
+ * Calculate the result using MI_MATH.
+ */
+static struct mi_value
+calculate_result_on_gpu(const struct intel_device_info *devinfo,
+                        struct mi_builder *b,
+                        struct crocus_query *q)
+{
+   struct mi_value result;
+   struct mi_value start_val =
+      query_mem64(q, offsetof(struct crocus_query_snapshots, start));
+   struct mi_value end_val =
+      query_mem64(q, offsetof(struct crocus_query_snapshots, end));
+
+   switch (q->type) {
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      result = calc_overflow_for_stream(b, q, q->index);
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+      result = calc_overflow_any_stream(b, q);
+      break;
+   case PIPE_QUERY_TIMESTAMP: {
+      /* TODO: This discards any fractional bits of the timebase scale.
+       * We would need to do a bit of fixed point math on the CS ALU, or
+       * launch an actual shader to calculate this with full precision.
+       */
+      uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
+      result = mi_iand(b, mi_imm((1ull << 36) - 1),
+                       mi_imul_imm(b, start_val, scale));
+      break;
+   }
+   case PIPE_QUERY_TIME_ELAPSED: {
+      /* TODO: This discards fractional bits (see above). */
+      uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
+      result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale);
+      break;
+   }
+   default:
+      result = mi_isub(b, end_val, start_val);
+      break;
+   }
+   /* WaDividePSInvocationCountBy4:HSW,BDW */
+   if (GFX_VER == 7 && devinfo->is_haswell &&
+       q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
+       q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
+      result = mi_ushr32_imm(b, result, 2);
+
+   if (query_is_boolean(q->type))
+      result = mi_iand(b, mi_nz(b, result), mi_imm(1));
+
+   return result;
+}
+#endif
+
+static struct pipe_query *
+crocus_create_query(struct pipe_context *ctx,
+                    unsigned query_type,
+                    unsigned index)
+{
+   struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
+
+   q->type = query_type;
+   q->index = index;
+   q->monitor = NULL;
+
+   if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
+       q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
+      q->batch_idx = CROCUS_BATCH_COMPUTE;
+   else
+      q->batch_idx = CROCUS_BATCH_RENDER;
+   return (struct pipe_query *) q;
+}
+
+static struct pipe_query *
+crocus_create_batch_query(struct pipe_context *ctx,
+                          unsigned num_queries,
+                          unsigned *query_types)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
+   if (unlikely(!q))
+      return NULL;
+   q->type = PIPE_QUERY_DRIVER_SPECIFIC;
+   q->index = -1;
+   q->monitor = crocus_create_monitor_object(ice, num_queries, query_types);
+   if (unlikely(!q->monitor)) {
+      free(q);
+      return NULL;
+   }
+
+   return (struct pipe_query *) q;
+}
+
+static void
+crocus_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
+{
+   struct crocus_query *query = (void *) p_query;
+   struct crocus_screen *screen = (void *) ctx->screen;
+   if (query->monitor) {
+      crocus_destroy_monitor_object(ctx, query->monitor);
+      query->monitor = NULL;
+   } else {
+      crocus_syncobj_reference(screen, &query->syncobj, NULL);
+      screen->base.fence_reference(ctx->screen, &query->fence, NULL);
+   }
+   free(query);
+}
+
+
+static bool
+crocus_begin_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_query *q = (void *) query;
+
+   if (q->monitor)
+      return crocus_begin_monitor(ctx, q->monitor);
+
+   void *ptr = NULL;
+   uint32_t size;
+
+   if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+       q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+      size = sizeof(struct crocus_query_so_overflow);
+   else
+      size = sizeof(struct crocus_query_snapshots);
+
+   u_upload_alloc(ice->query_buffer_uploader, 0,
+                  size, size, &q->query_state_ref.offset,
+                  &q->query_state_ref.res, &ptr);
+
+   if (!crocus_resource_bo(q->query_state_ref.res))
+      return false;
+
+   q->map = ptr;
+   if (!q->map)
+      return false;
+
+   q->result = 0ull;
+   q->ready = false;
+   WRITE_ONCE(q->map->snapshots_landed, false);
+
+   if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
+      ice->state.prims_generated_query_active = true;
+      ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
+   }
+
+#if GFX_VER <= 5
+   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+       q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+      ice->state.stats_wm++;
+      ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
+   }
+#endif
+#if GFX_VER >= 6
+   if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+       q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+      write_overflow_values(ice, q, false);
+   else
+#endif
+      write_value(ice, q,
+                  q->query_state_ref.offset +
+                  offsetof(struct crocus_query_snapshots, start));
+
+   return true;
+}
+
+static bool
+crocus_end_query(struct pipe_context *ctx, struct pipe_query *query)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_query *q = (void *) query;
+
+   if (q->monitor)
+      return crocus_end_monitor(ctx, q->monitor);
+
+   if (q->type == PIPE_QUERY_GPU_FINISHED) {
+      ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
+      return true;
+   }
+
+   struct crocus_batch *batch = &ice->batches[q->batch_idx];
+
+   if (q->type == PIPE_QUERY_TIMESTAMP) {
+      crocus_begin_query(ctx, query);
+      crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
+      mark_available(ice, q);
+      return true;
+   }
+
+#if GFX_VER <= 5
+   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
+       q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+      ice->state.stats_wm--;
+      ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
+   }
+#endif
+   if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
+      ice->state.prims_generated_query_active = false;
+      ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
+   }
+
+#if GFX_VER >= 6
+   if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+       q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+      write_overflow_values(ice, q, true);
+   else
+#endif
+      write_value(ice, q,
+                  q->query_state_ref.offset +
+                  offsetof(struct crocus_query_snapshots, end));
+
+   crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
+   mark_available(ice, q);
+
+   return true;
+}
+
+/**
+ * See if the snapshots have landed for a query, and if so, compute the
+ * result and mark it ready.  Does not flush (unlike crocus_get_query_result).
+ */
+static void
+crocus_check_query_no_flush(struct crocus_context *ice, struct crocus_query *q)
+{
+   struct crocus_screen *screen = (void *) ice->ctx.screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
+      calculate_result_on_cpu(devinfo, q);
+   }
+}
+
+static bool
+crocus_get_query_result(struct pipe_context *ctx,
+                        struct pipe_query *query,
+                        bool wait,
+                        union pipe_query_result *result)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_query *q = (void *) query;
+
+   if (q->monitor)
+      return crocus_get_monitor_result(ctx, q->monitor, wait, result->batch);
+
+   struct crocus_screen *screen = (void *) ctx->screen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   if (unlikely(screen->no_hw)) {
+      result->u64 = 0;
+      return true;
+   }
+
+   if (!q->ready) {
+      struct crocus_batch *batch = &ice->batches[q->batch_idx];
+      if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
+         crocus_batch_flush(batch);
+
+#if GFX_VERx10 == 75
+      while (!READ_ONCE(q->map->snapshots_landed)) {
+         if (wait)
+            crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
+         else
+            return false;
+      }
+      assert(READ_ONCE(q->map->snapshots_landed));
+#else
+      if (wait)
+         crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
+#endif
+      calculate_result_on_cpu(devinfo, q);
+   }
+
+   assert(q->ready);
+
+   result->u64 = q->result;
+
+   return true;
+}
+
+#if GFX_VER == 7
+static void
+crocus_get_query_result_resource(struct pipe_context *ctx,
+                                 struct pipe_query *query,
+                                 bool wait,
+                                 enum pipe_query_value_type result_type,
+                                 int index,
+                                 struct pipe_resource *p_res,
+                                 unsigned offset)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_query *q = (void *) query;
+   struct crocus_batch *batch = &ice->batches[q->batch_idx];
+   struct crocus_screen *screen = batch->screen;
+   const struct intel_device_info *devinfo = &batch->screen->devinfo;
+   struct crocus_resource *res = (void *) p_res;
+   struct crocus_bo *query_bo = crocus_resource_bo(q->query_state_ref.res);
+   struct crocus_bo *dst_bo = crocus_resource_bo(p_res);
+   unsigned snapshots_landed_offset =
+      offsetof(struct crocus_query_snapshots, snapshots_landed);
+
+   res->bind_history |= PIPE_BIND_QUERY_BUFFER;
+
+   if (index == -1) {
+      /* They're asking for the availability of the result.  If we still
+       * have commands queued up which produce the result, submit them
+       * now so that progress happens.  Either way, copy the snapshots
+       * landed field to the destination resource.
+       */
+      if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
+         crocus_batch_flush(batch);
+
+      screen->vtbl.copy_mem_mem(batch, dst_bo, offset,
+                                query_bo, snapshots_landed_offset,
+                                result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
+      return;
+   }
+
+   if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
+      /* The final snapshots happen to have landed, so let's just compute
+       * the result on the CPU now...
+       */
+      calculate_result_on_cpu(devinfo, q);
+   }
+
+   if (q->ready) {
+      /* We happen to have the result on the CPU, so just copy it. */
+      if (result_type <= PIPE_QUERY_TYPE_U32) {
+         screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
+      } else {
+         screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
+      }
+
+      /* Make sure the result lands before they use bind the QBO elsewhere
+       * and use the result.
+       */
+      // XXX: Why?  i965 doesn't do this.
+      crocus_emit_pipe_control_flush(batch,
+                                     "query: unknown QBO flushing hack",
+                                     PIPE_CONTROL_CS_STALL);
+      return;
+   }
+
+#if GFX_VERx10 == 75
+   bool predicated = !wait && !q->stalled;
+
+   struct mi_builder b;
+   mi_builder_init(&b, &batch->screen->devinfo, batch);
+
+   struct mi_value result = calculate_result_on_gpu(devinfo, &b, q);
+   struct mi_value dst =
+      result_type <= PIPE_QUERY_TYPE_U32 ? mi_mem32(rw_bo(dst_bo, offset))
+                                         : mi_mem64(rw_bo(dst_bo, offset));
+
+   if (predicated) {
+      mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
+                   mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
+      mi_store_if(&b, dst, result);
+   } else {
+      mi_store(&b, dst, result);
+   }
+#endif
+}
+#endif
+
+static void
+crocus_set_active_query_state(struct pipe_context *ctx, bool enable)
+{
+   struct crocus_context *ice = (void *) ctx;
+
+   if (ice->state.statistics_counters_enabled == enable)
+      return;
+
+   // XXX: most packets aren't paying attention to this yet, because it'd
+   // have to be done dynamically at draw time, which is a pain
+   ice->state.statistics_counters_enabled = enable;
+   ice->state.dirty |= CROCUS_DIRTY_CLIP |
+                       CROCUS_DIRTY_RASTER |
+                       CROCUS_DIRTY_STREAMOUT |
+                       CROCUS_DIRTY_WM;
+   ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS |
+                             CROCUS_STAGE_DIRTY_TCS |
+                             CROCUS_STAGE_DIRTY_TES |
+                             CROCUS_STAGE_DIRTY_VS;
+}
+
+static void
+set_predicate_enable(struct crocus_context *ice, bool value)
+{
+   if (value)
+      ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
+   else
+      ice->state.predicate = CROCUS_PREDICATE_STATE_DONT_RENDER;
+}
+
+#if GFX_VER == 7
+static void
+set_predicate_for_result(struct crocus_context *ice,
+                         struct crocus_query *q,
+                         bool inverted)
+{
+   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
+   struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
+
+#if GFX_VERx10 != 75
+   /* IVB doesn't have enough MI for this */
+   if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
+       q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
+      ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
+      return;
+   }
+#endif
+
+   /* The CPU doesn't have the query result yet; use hardware predication */
+   ice->state.predicate = CROCUS_PREDICATE_STATE_USE_BIT;
+
+   /* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
+   crocus_emit_pipe_control_flush(batch,
+                                  "conditional rendering: set predicate",
+                                  PIPE_CONTROL_FLUSH_ENABLE);
+   q->stalled = true;
+
+#if GFX_VERx10 != 75
+   struct crocus_screen *screen = batch->screen;
+   screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo,
+                                    q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, start));
+   screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, bo,
+                                    q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, end));
+
+   uint32_t mi_predicate = MI_PREDICATE | MI_PREDICATE_COMBINEOP_SET |
+      MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+   if (inverted)
+      mi_predicate |= MI_PREDICATE_LOADOP_LOAD;
+   else
+      mi_predicate |= MI_PREDICATE_LOADOP_LOADINV;
+   crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+#else
+   struct mi_builder b;
+   mi_builder_init(&b, &batch->screen->devinfo, batch);
+
+   struct mi_value result;
+
+   switch (q->type) {
+   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      result = calc_overflow_for_stream(&b, q, q->index);
+      break;
+   case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+      result = calc_overflow_any_stream(&b, q);
+      break;
+   default: {
+      /* PIPE_QUERY_OCCLUSION_* */
+      struct mi_value start =
+         query_mem64(q, offsetof(struct crocus_query_snapshots, start));
+      struct mi_value end =
+         query_mem64(q, offsetof(struct crocus_query_snapshots, end));
+      result = mi_isub(&b, end, start);
+      break;
+   }
+   }
+
+   result = inverted ? mi_z(&b, result) : mi_nz(&b, result);
+   result = mi_iand(&b, result, mi_imm(1));
+
+   /* We immediately set the predicate on the render batch, as all the
+    * counters come from 3D operations.  However, we may need to predicate
+    * a compute dispatch, which executes in a different GEM context and has
+    * a different MI_PREDICATE_RESULT register.  So, we save the result to
+    * memory and reload it in crocus_launch_grid.
+    */
+   mi_value_ref(&b, result);
+
+   mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), result);
+   mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
+
+   unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+      MI_PREDICATE_COMBINEOP_SET |
+      MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+
+   crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+   mi_store(&b, query_mem64(q, offsetof(struct crocus_query_snapshots,
+                                        predicate_result)), result);
+#endif
+   ice->state.compute_predicate = bo;
+}
+#endif
+
+static void
+crocus_render_condition(struct pipe_context *ctx,
+                        struct pipe_query *query,
+                        bool condition,
+                        enum pipe_render_cond_flag mode)
+{
+   struct crocus_context *ice = (void *) ctx;
+   struct crocus_query *q = (void *) query;
+
+   /* The old condition isn't relevant; we'll update it if necessary */
+   ice->state.compute_predicate = NULL;
+   ice->condition.query = q;
+   ice->condition.condition = condition;
+   ice->condition.mode = mode;
+
+   if (!q) {
+      ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
+      return;
+   }
+
+   crocus_check_query_no_flush(ice, q);
+
+   if (q->result || q->ready) {
+      set_predicate_enable(ice, (q->result != 0) ^ condition);
+   } else {
+      if (mode == PIPE_RENDER_COND_NO_WAIT ||
+          mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
+         perf_debug(&ice->dbg, "Conditional rendering demoted from "
+                    "\"no wait\" to \"wait\".");
+      }
+#if GFX_VER == 7
+      set_predicate_for_result(ice, q, condition);
+#else
+      ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
+#endif
+   }
+}
+
+static void
+crocus_resolve_conditional_render(struct crocus_context *ice)
+{
+   struct pipe_context *ctx = (void *) ice;
+   struct crocus_query *q = ice->condition.query;
+   struct pipe_query *query = (void *) q;
+   union pipe_query_result result;
+
+   if (ice->state.predicate != CROCUS_PREDICATE_STATE_USE_BIT)
+      return;
+
+   assert(q);
+
+   crocus_get_query_result(ctx, query, true, &result);
+   set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
+}
+
+#if GFX_VER >= 7
+static void
+crocus_emit_compute_predicate(struct crocus_batch *batch)
+{
+   struct crocus_context *ice = batch->ice;
+   struct crocus_screen *screen = batch->screen;
+   screen->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0,
+                                    ice->state.compute_predicate, 0);
+   screen->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC1, 0);
+   unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
+      MI_PREDICATE_COMBINEOP_SET |
+      MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
+
+   crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
+}
+#endif
+
+void
+genX(init_screen_query)(struct crocus_screen *screen)
+{
+   screen->vtbl.resolve_conditional_render = crocus_resolve_conditional_render;
+#if GFX_VER >= 7
+   screen->vtbl.emit_compute_predicate = crocus_emit_compute_predicate;
+#endif
+}
+
+void
+genX(init_query)(struct crocus_context *ice)
+{
+   struct pipe_context *ctx = &ice->ctx;
+
+   ctx->create_query = crocus_create_query;
+   ctx->create_batch_query = crocus_create_batch_query;
+   ctx->destroy_query = crocus_destroy_query;
+   ctx->begin_query = crocus_begin_query;
+   ctx->end_query = crocus_end_query;
+   ctx->get_query_result = crocus_get_query_result;
+#if GFX_VER == 7
+   ctx->get_query_result_resource = crocus_get_query_result_resource;
+#endif
+   ctx->set_active_query_state = crocus_set_active_query_state;
+   ctx->render_condition = crocus_render_condition;
+
+}
--- a/src/gallium/drivers/crocus/crocus_resolve.c
+++ b/src/gallium/drivers/crocus/crocus_resolve.c
--- a/src/gallium/drivers/crocus/crocus_resource.c
+++ b/src/gallium/drivers/crocus/crocus_resource.c
--- a/src/gallium/drivers/crocus/crocus_resource.h
+++ b/src/gallium/drivers/crocus/crocus_resource.h
@ -0,0 +1,501 @@
+/*
+ * Copyright 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef CROCUS_RESOURCE_H
+#define CROCUS_RESOURCE_H
+
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_range.h"
+#include "intel/isl/isl.h"
+
+#include "crocus_bufmgr.h"
+
+struct crocus_batch;
+struct crocus_context;
+
+#define CROCUS_MAX_MIPLEVELS 15
+
+struct crocus_format_info {
+   enum isl_format fmt;
+   enum pipe_swizzle swizzles[4];
+};
+
+static inline enum isl_channel_select
+pipe_to_isl_swizzle(const enum pipe_swizzle pswz, bool green_to_blue)
+{
+   unsigned swz = (pswz + 4) & 7;
+
+   return (green_to_blue && swz == ISL_CHANNEL_SELECT_GREEN) ? ISL_CHANNEL_SELECT_BLUE : swz;
+}
+
+static inline struct isl_swizzle
+pipe_to_isl_swizzles(const enum pipe_swizzle pswz[4])
+{
+   struct isl_swizzle swz;
+   swz.r = pipe_to_isl_swizzle(pswz[0], false);
+   swz.g = pipe_to_isl_swizzle(pswz[1], false);
+   swz.b = pipe_to_isl_swizzle(pswz[2], false);
+   swz.a = pipe_to_isl_swizzle(pswz[3], false);
+   return swz;
+}
+
+static inline void
+crocus_combine_swizzle(enum pipe_swizzle outswz[4],
+                       const enum pipe_swizzle fswz[4],
+                       const enum pipe_swizzle vswz[4])
+{
+   for (unsigned i = 0; i < 4; i++) {
+      switch (vswz[i]) {
+      case PIPE_SWIZZLE_X: outswz[i] = fswz[0]; break;
+      case PIPE_SWIZZLE_Y: outswz[i] = fswz[1]; break;
+      case PIPE_SWIZZLE_Z: outswz[i] = fswz[2]; break;
+      case PIPE_SWIZZLE_W: outswz[i] = fswz[3]; break;
+      case PIPE_SWIZZLE_1: outswz[i] = PIPE_SWIZZLE_1; break;
+      case PIPE_SWIZZLE_0: outswz[i] = PIPE_SWIZZLE_0; break;
+      default: unreachable("invalid swizzle");
+      }
+   }
+}
+
+/**
+ * Resources represent a GPU buffer object or image (mipmap tree).
+ *
+ * They contain the storage (BO) and layout information (ISL surface).
+ */
+struct crocus_resource {
+   struct pipe_resource base;
+   enum pipe_format internal_format;
+
+   /**
+    * The ISL surface layout information for this resource.
+    *
+    * This is not filled out for PIPE_BUFFER resources, but is guaranteed
+    * to be zeroed.  Note that this also guarantees that res->surf.tiling
+    * will be ISL_TILING_LINEAR, so it's safe to check that.
+    */
+   struct isl_surf surf;
+
+   /** Backing storage for the resource */
+   struct crocus_bo *bo;
+
+   /** offset at which data starts in the BO */
+   uint64_t offset;
+
+   /**
+    * A bitfield of PIPE_BIND_* indicating how this resource was bound
+    * in the past.  Only meaningful for PIPE_BUFFER; used for flushing.
+    */
+   unsigned bind_history;
+
+   /**
+    * A bitfield of MESA_SHADER_* stages indicating where this resource
+    * was bound.
+    */
+   unsigned bind_stages;
+
+   /**
+    * For PIPE_BUFFER resources, a range which may contain valid data.
+    *
+    * This is a conservative estimate of what part of the buffer contains
+    * valid data that we have to preserve.  The rest of the buffer is
+    * considered invalid, and we can promote writes to that region to
+    * be unsynchronized writes, avoiding blit copies.
+    */
+   struct util_range valid_buffer_range;
+
+   /**
+    * Auxiliary buffer information (CCS, MCS, or HiZ).
+    */
+   struct {
+      /** The surface layout for the auxiliary buffer. */
+      struct isl_surf surf;
+
+      /** The buffer object containing the auxiliary data. */
+      struct crocus_bo *bo;
+
+      /** Offset into 'bo' where the auxiliary surface starts. */
+      uint32_t offset;
+
+      struct {
+         struct isl_surf surf;
+
+         /** Offset into 'bo' where the auxiliary surface starts. */
+         uint32_t offset;
+      } extra_aux;
+
+      /**
+       * Fast clear color for this surface.  For depth surfaces, the clear
+       * value is stored as a float32 in the red component.
+       */
+      union isl_color_value clear_color;
+
+      /**
+       * \brief The type of auxiliary compression used by this resource.
+       *
+       * This describes the type of auxiliary compression that is intended to
+       * be used by this resource.  An aux usage of ISL_AUX_USAGE_NONE means
+       * that auxiliary compression is permanently disabled.  An aux usage
+       * other than ISL_AUX_USAGE_NONE does not imply that auxiliary
+       * compression will always be enabled for this surface.
+       */
+      enum isl_aux_usage usage;
+
+      /**
+       * \brief Maps miptree slices to their current aux state.
+       *
+       * This two-dimensional array is indexed as [level][layer] and stores an
+       * aux state for each slice.
+       */
+      enum isl_aux_state **state;
+
+      /**
+       * If (1 << level) is set, HiZ is enabled for that miplevel.
+       */
+      uint16_t has_hiz;
+   } aux;
+
+   /**
+    * \brief Shadow miptree for sampling when the main isn't supported by HW.
+    *
+    * To workaround various sampler bugs and limitations, we blit the main
+    * texture into a new texture that can be sampled.
+    *
+    * This miptree may be used for:
+    * - Stencil texturing (pre-BDW) as required by GL_ARB_stencil_texturing.
+    */
+   struct crocus_resource *shadow;
+   bool shadow_needs_update;
+
+   /**
+    * For external surfaces, this is format that was used to create or import
+    * the surface. For internal surfaces, this will always be
+    * PIPE_FORMAT_NONE.
+    */
+   enum pipe_format external_format;
+
+   /**
+    * For external surfaces, this is DRM format modifier that was used to
+    * create or import the surface.  For internal surfaces, this will always
+    * be DRM_FORMAT_MOD_INVALID.
+    */
+   const struct isl_drm_modifier_info *mod_info;
+
+   /**
+    * The screen the resource was originally created with, stored for refcounting.
+    */
+   struct pipe_screen *orig_screen;
+};
+
+/**
+ * A simple <resource, offset> tuple for storing a reference to a
+ * piece of state stored in a GPU buffer object.
+ */
+struct crocus_state_ref {
+   struct pipe_resource *res;
+   uint32_t offset;
+};
+
+/**
+ * Gallium CSO for sampler views (texture views).
+ *
+ * In addition to the normal pipe_resource, this adds an ISL view
+ * which may reinterpret the format or restrict levels/layers.
+ *
+ * These can also be linear texture buffers.
+ */
+struct crocus_sampler_view {
+   struct pipe_sampler_view base;
+   struct isl_view view;
+   struct isl_view gather_view;
+
+   enum pipe_swizzle swizzle[4];
+   union isl_color_value clear_color;
+
+   /* A short-cut (not a reference) to the actual resource being viewed.
+    * Multi-planar (or depth+stencil) images may have multiple resources
+    * chained together; this skips having to traverse base->texture->*.
+    */
+   struct crocus_resource *res;
+};
+
+/**
+ * Image view representation.
+ */
+struct crocus_image_view {
+   struct pipe_image_view base;
+   struct isl_view view;
+};
+
+/**
+ * Gallium CSO for surfaces (framebuffer attachments).
+ *
+ * A view of a surface that can be bound to a color render target or
+ * depth/stencil attachment.
+ */
+struct crocus_surface {
+   struct pipe_surface base;
+   struct isl_view view;
+   struct isl_view read_view;
+   struct isl_surf surf;
+   union isl_color_value clear_color;
+
+   struct pipe_resource *align_res;
+};
+
+/**
+ * Transfer object - information about a buffer mapping.
+ */
+struct crocus_transfer {
+   struct pipe_transfer base;
+   struct pipe_debug_callback *dbg;
+   void *buffer;
+   void *ptr;
+
+   /** A linear staging resource for GPU-based copy_region transfers. */
+   struct pipe_resource *staging;
+   struct blorp_context *blorp;
+   struct crocus_batch *batch;
+
+   bool dest_had_defined_contents;
+   bool has_swizzling;
+
+   void (*unmap)(struct crocus_transfer *);
+};
+
+/**
+ * Unwrap a pipe_resource to get the underlying crocus_bo (for convenience).
+ */
+static inline struct crocus_bo *
+crocus_resource_bo(struct pipe_resource *p_res)
+{
+   struct crocus_resource *res = (void *) p_res;
+   return res->bo;
+}
+
+static inline uint32_t
+crocus_mocs(const struct crocus_bo *bo,
+            const struct isl_device *dev)
+{
+   return isl_mocs(dev, 0, bo && crocus_bo_is_external(bo));
+}
+
+struct crocus_format_info crocus_format_for_usage(const struct intel_device_info *,
+                                                  enum pipe_format pf,
+                                                  isl_surf_usage_flags_t usage);
+
+struct pipe_resource *crocus_resource_get_separate_stencil(struct pipe_resource *);
+
+void crocus_get_depth_stencil_resources(const struct intel_device_info *devinfo,
+                                        struct pipe_resource *res,
+                                        struct crocus_resource **out_z,
+                                        struct crocus_resource **out_s);
+bool crocus_resource_set_clear_color(struct crocus_context *ice,
+                                     struct crocus_resource *res,
+                                     union isl_color_value color);
+union isl_color_value
+crocus_resource_get_clear_color(const struct crocus_resource *res);
+
+void crocus_init_screen_resource_functions(struct pipe_screen *pscreen);
+
+void crocus_dirty_for_history(struct crocus_context *ice,
+                              struct crocus_resource *res);
+uint32_t crocus_flush_bits_for_history(struct crocus_resource *res);
+
+void crocus_flush_and_dirty_for_history(struct crocus_context *ice,
+                                        struct crocus_batch *batch,
+                                        struct crocus_resource *res,
+                                        uint32_t extra_flags,
+                                        const char *reason);
+
+unsigned crocus_get_num_logical_layers(const struct crocus_resource *res,
+                                       unsigned level);
+
+void crocus_resource_disable_aux(struct crocus_resource *res);
+
+#define INTEL_REMAINING_LAYERS UINT32_MAX
+#define INTEL_REMAINING_LEVELS UINT32_MAX
+
+void
+crocus_hiz_exec(struct crocus_context *ice,
+                struct crocus_batch *batch,
+                struct crocus_resource *res,
+                unsigned int level, unsigned int start_layer,
+                unsigned int num_layers, enum isl_aux_op op,
+                bool update_clear_depth);
+
+/**
+ * Prepare a miptree for access
+ *
+ * This function should be called prior to any access to miptree in order to
+ * perform any needed resolves.
+ *
+ * \param[in]  start_level    The first mip level to be accessed
+ *
+ * \param[in]  num_levels     The number of miplevels to be accessed or
+ *                            INTEL_REMAINING_LEVELS to indicate every level
+ *                            above start_level will be accessed
+ *
+ * \param[in]  start_layer    The first array slice or 3D layer to be accessed
+ *
+ * \param[in]  num_layers     The number of array slices or 3D layers be
+ *                            accessed or INTEL_REMAINING_LAYERS to indicate
+ *                            every layer above start_layer will be accessed
+ *
+ * \param[in]  aux_supported  Whether or not the access will support the
+ *                            miptree's auxiliary compression format;  this
+ *                            must be false for uncompressed miptrees
+ *
+ * \param[in]  fast_clear_supported Whether or not the access will support
+ *                                  fast clears in the miptree's auxiliary
+ *                                  compression format
+ */
+void
+crocus_resource_prepare_access(struct crocus_context *ice,
+                               struct crocus_resource *res,
+                               uint32_t start_level, uint32_t num_levels,
+                               uint32_t start_layer, uint32_t num_layers,
+                               enum isl_aux_usage aux_usage,
+                               bool fast_clear_supported);
+
+/**
+ * Complete a write operation
+ *
+ * This function should be called after any operation writes to a miptree.
+ * This will update the miptree's compression state so that future resolves
+ * happen correctly.  Technically, this function can be called before the
+ * write occurs but the caller must ensure that they don't interlace
+ * crocus_resource_prepare_access and crocus_resource_finish_write calls to
+ * overlapping layer/level ranges.
+ *
+ * \param[in]  level             The mip level that was written
+ *
+ * \param[in]  start_layer       The first array slice or 3D layer written
+ *
+ * \param[in]  num_layers        The number of array slices or 3D layers
+ *                               written or INTEL_REMAINING_LAYERS to indicate
+ *                               every layer above start_layer was written
+ *
+ * \param[in]  written_with_aux  Whether or not the write was done with
+ *                               auxiliary compression enabled
+ */
+void
+crocus_resource_finish_write(struct crocus_context *ice,
+                             struct crocus_resource *res, uint32_t level,
+                             uint32_t start_layer, uint32_t num_layers,
+                             enum isl_aux_usage aux_usage);
+
+/** Get the auxiliary compression state of a miptree slice */
+enum isl_aux_state
+crocus_resource_get_aux_state(const struct crocus_resource *res,
+                              uint32_t level, uint32_t layer);
+
+/**
+ * Set the auxiliary compression state of a miptree slice range
+ *
+ * This function directly sets the auxiliary compression state of a slice
+ * range of a miptree.  It only modifies data structures and does not do any
+ * resolves.  This should only be called by code which directly performs
+ * compression operations such as fast clears and resolves.  Most code should
+ * use crocus_resource_prepare_access or crocus_resource_finish_write.
+ */
+void
+crocus_resource_set_aux_state(struct crocus_context *ice,
+                              struct crocus_resource *res, uint32_t level,
+                              uint32_t start_layer, uint32_t num_layers,
+                              enum isl_aux_state aux_state);
+
+/**
+ * Prepare a miptree for raw access
+ *
+ * This helper prepares the miptree for access that knows nothing about any
+ * sort of compression whatsoever.  This is useful when mapping the surface or
+ * using it with the blitter.
+ */
+static inline void
+crocus_resource_access_raw(struct crocus_context *ice,
+                           struct crocus_resource *res,
+                           uint32_t level, uint32_t layer,
+                           uint32_t num_layers,
+                           bool write)
+{
+   crocus_resource_prepare_access(ice, res, level, 1, layer, num_layers,
+                                  ISL_AUX_USAGE_NONE, false);
+   if (write) {
+      crocus_resource_finish_write(ice, res, level, layer, num_layers,
+                                   ISL_AUX_USAGE_NONE);
+   }
+}
+
+void
+crocus_resource_get_image_offset(struct crocus_resource *res,
+                                 uint32_t level, uint32_t z,
+                                 uint32_t *x, uint32_t *y);
+static inline enum isl_aux_usage
+crocus_resource_texture_aux_usage(const struct crocus_resource *res)
+{
+   return res->aux.usage == ISL_AUX_USAGE_MCS ? ISL_AUX_USAGE_MCS : ISL_AUX_USAGE_NONE;
+}
+
+void crocus_resource_prepare_texture(struct crocus_context *ice,
+                                     struct crocus_resource *res,
+                                     enum isl_format view_format,
+                                     uint32_t start_level, uint32_t num_levels,
+                                     uint32_t start_layer, uint32_t num_layers);
+
+static inline bool
+crocus_resource_unfinished_aux_import(struct crocus_resource *res)
+{
+   return res->base.next != NULL && res->mod_info &&
+      res->mod_info->aux_usage != ISL_AUX_USAGE_NONE;
+}
+
+void crocus_resource_finish_aux_import(struct pipe_screen *pscreen,
+                                       struct crocus_resource *res);
+
+bool crocus_has_invalid_primary(const struct crocus_resource *res,
+                                unsigned start_level, unsigned num_levels,
+                                unsigned start_layer, unsigned num_layers);
+
+void crocus_resource_check_level_layer(const struct crocus_resource *res,
+                                       uint32_t level, uint32_t layer);
+
+bool crocus_resource_level_has_hiz(const struct crocus_resource *res,
+                                   uint32_t level);
+bool crocus_has_color_unresolved(const struct crocus_resource *res,
+                                 unsigned start_level, unsigned num_levels,
+                                 unsigned start_layer, unsigned num_layers);
+
+enum isl_aux_usage crocus_resource_render_aux_usage(struct crocus_context *ice,
+                                                    struct crocus_resource *res,
+                                                    enum isl_format render_fmt,
+                                                    bool blend_enabled,
+                                                    bool draw_aux_disabled);
+void crocus_resource_prepare_render(struct crocus_context *ice,
+                                    struct crocus_resource *res, uint32_t level,
+                                    uint32_t start_layer, uint32_t layer_count,
+                                    enum isl_aux_usage aux_usage);
+void crocus_resource_finish_render(struct crocus_context *ice,
+                                   struct crocus_resource *res, uint32_t level,
+                                   uint32_t start_layer, uint32_t layer_count,
+                                   enum isl_aux_usage aux_usage);
+#endif
--- a/src/gallium/drivers/crocus/crocus_screen.c
+++ b/src/gallium/drivers/crocus/crocus_screen.c
@ -0,0 +1,829 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file crocus_screen.c
+ *
+ * Screen related driver hooks and capability lists.
+ *
+ * A program may use multiple rendering contexts (crocus_context), but
+ * they all share a common screen (crocus_screen).  Global driver state
+ * can be stored in the screen; it may be accessed by multiple threads.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+#include "util/debug.h"
+#include "util/u_inlines.h"
+#include "util/format/u_format.h"
+#include "util/u_transfer_helper.h"
+#include "util/u_upload_mgr.h"
+#include "util/ralloc.h"
+#include "util/xmlconfig.h"
+#include "drm-uapi/i915_drm.h"
+#include "crocus_context.h"
+#include "crocus_defines.h"
+#include "crocus_fence.h"
+#include "crocus_pipe.h"
+#include "crocus_resource.h"
+#include "crocus_screen.h"
+#include "intel/compiler/brw_compiler.h"
+#include "intel/common/intel_gem.h"
+#include "intel/common/intel_l3_config.h"
+#include "crocus_monitor.h"
+
+#define genX_call(devinfo, func, ...)                   \
+   switch ((devinfo)->verx10) {                         \
+   case 75:                                             \
+      gfx75_##func(__VA_ARGS__);                        \
+      break;                                            \
+   case 70:                                             \
+      gfx7_##func(__VA_ARGS__);                         \
+      break;                                            \
+   case 60:                                             \
+      gfx6_##func(__VA_ARGS__);                         \
+      break;                                            \
+   case 50:                                             \
+      gfx5_##func(__VA_ARGS__);                         \
+      break;                                            \
+   case 45:                                             \
+      gfx45_##func(__VA_ARGS__);                        \
+      break;                                            \
+   case 40:                                             \
+      gfx4_##func(__VA_ARGS__);                         \
+      break;                                            \
+   default:                                             \
+      unreachable("Unknown hardware generation");       \
+   }
+
+static void
+crocus_flush_frontbuffer(struct pipe_screen *_screen,
+                         struct pipe_context *_pipe,
+                         struct pipe_resource *resource,
+                         unsigned level, unsigned layer,
+                         void *context_private, struct pipe_box *box)
+{
+}
+
+static const char *
+crocus_get_vendor(struct pipe_screen *pscreen)
+{
+   return "Intel";
+}
+
+static const char *
+crocus_get_device_vendor(struct pipe_screen *pscreen)
+{
+   return "Intel";
+}
+
+static const char *
+crocus_get_name(struct pipe_screen *pscreen)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   static char buf[128];
+
+   const char *name = intel_get_device_name(screen->pci_id);
+
+   if (!name)
+      name = "Intel Unknown";
+
+   snprintf(buf, sizeof(buf), "Mesa %s", name);
+   return buf;
+}
+
+static uint64_t
+get_aperture_size(int fd)
+{
+   struct drm_i915_gem_get_aperture aperture = {};
+   intel_ioctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
+   return aperture.aper_size;
+}
+
+static int
+crocus_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   switch (param) {
+   case PIPE_CAP_NPOT_TEXTURES:
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+   case PIPE_CAP_POINT_SPRITE:
+   case PIPE_CAP_OCCLUSION_QUERY:
+   case PIPE_CAP_TEXTURE_SWIZZLE:
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+   case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
+   case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
+   case PIPE_CAP_VERTEX_SHADER_SATURATE:
+   case PIPE_CAP_PRIMITIVE_RESTART:
+   case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+   case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:
+   case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+   case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+   case PIPE_CAP_DEPTH_CLIP_DISABLE:
+   case PIPE_CAP_TGSI_INSTANCEID:
+   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+   case PIPE_CAP_CONDITIONAL_RENDER:
+   case PIPE_CAP_TEXTURE_BARRIER:
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+   case PIPE_CAP_START_INSTANCE:
+   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+   case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+   case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+   case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+   case PIPE_CAP_ACCELERATED:
+   case PIPE_CAP_UMA:
+   case PIPE_CAP_CLIP_HALFZ:
+   case PIPE_CAP_TGSI_TEXCOORD:
+   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+   case PIPE_CAP_TGSI_TEX_TXF_LZ:
+   case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+   case PIPE_CAP_CLEAR_TEXTURE:
+   case PIPE_CAP_TGSI_VOTE:
+   case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+   case PIPE_CAP_TEXTURE_GATHER_SM5:
+   case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+   case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
+   case PIPE_CAP_NIR_COMPACT_ARRAYS:
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+   case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+   case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:
+   case PIPE_CAP_FENCE_SIGNAL:
+   case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:
+      return true;
+   case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
+   case PIPE_CAP_TGSI_BALLOT:
+   case PIPE_CAP_PACKED_UNIFORMS:
+   case PIPE_CAP_GL_CLAMP:
+      return false;
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+      return devinfo->ver <= 5;
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_QUERY_TIME_ELAPSED:
+      return devinfo->ver >= 5;
+   case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+   case PIPE_CAP_TGSI_CLOCK:
+   case PIPE_CAP_TGSI_TXQS:
+   case PIPE_CAP_COMPUTE:
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
+   case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_GL_SPIRV:
+   case PIPE_CAP_GL_SPIRV_VARIABLE_POINTERS:
+   case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
+   case PIPE_CAP_DOUBLES:
+      return devinfo->ver >= 7;
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+      return devinfo->is_haswell;
+   case PIPE_CAP_CULL_DISTANCE:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+   case PIPE_CAP_SAMPLE_SHADING:
+   case PIPE_CAP_CUBE_MAP_ARRAY:
+   case PIPE_CAP_QUERY_SO_OVERFLOW:
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+   case PIPE_CAP_QUERY_TIMESTAMP:
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+   case PIPE_CAP_TEXTURE_SHADOW_LOD:
+   case PIPE_CAP_LOAD_CONSTBUF:
+   case PIPE_CAP_DRAW_PARAMETERS:
+   case PIPE_CAP_CLEAR_SCISSORED:
+      return devinfo->ver >= 6;
+   case PIPE_CAP_FBFETCH:
+      return devinfo->verx10 >= 45 ? BRW_MAX_DRAW_BUFFERS : 0;
+   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+      return devinfo->ver >= 6 ? 1 : 0;
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      return BRW_MAX_DRAW_BUFFERS;
+   case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
+      if (devinfo->ver >= 7)
+         return 16384;
+      else
+         return 8192;
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      if (devinfo->ver >= 7)
+         return CROCUS_MAX_MIPLEVELS; /* 16384x16384 */
+      else
+         return CROCUS_MAX_MIPLEVELS - 1; /* 8192x8192 */
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      return 12; /* 2048x2048 */
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return (devinfo->ver >= 6) ? 4 : 0;
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return devinfo->ver >= 7 ? 2048 : 512;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+      return BRW_MAX_SOL_BINDINGS / CROCUS_MAX_SOL_BUFFERS;
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+      return BRW_MAX_SOL_BINDINGS;
+   case PIPE_CAP_GLSL_FEATURE_LEVEL: {
+      if (devinfo->is_haswell)
+         return 460;
+      else if (devinfo->ver >= 7)
+         return 420;
+      else if (devinfo->ver >= 6)
+         return 330;
+      return 120;
+   }
+   case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
+      return devinfo->ver < 6 ? 120 : 130;
+
+   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+      /* 3DSTATE_CONSTANT_XS requires the start of UBOs to be 32B aligned */
+      return 32;
+   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+      return CROCUS_MAP_BUFFER_ALIGNMENT;
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+      /* Choose a cacheline (64 bytes) so that we can safely have the CPU and
+       * GPU writing the same SSBO on non-coherent systems (Atom CPUs).  With
+       * UBOs, the GPU never writes, so there's no problem.  For an SSBO, the
+       * GPU and the CPU can be updating disjoint regions of the buffer
+       * simultaneously and that will break if the regions overlap the same
+       * cacheline.
+       */
+      return devinfo->ver >= 7 ? 64 : 0;
+   case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:
+      return devinfo->ver >= 7 ? (1 << 27) : 0;
+   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+      return 16; // XXX: u_screen says 256 is the minimum value...
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+      return true;
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return CROCUS_MAX_TEXTURE_BUFFER_SIZE;
+   case PIPE_CAP_MAX_VIEWPORTS:
+      return devinfo->ver >= 6 ? 16 : 1;
+   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+      return devinfo->ver >= 6 ? 256 : 0;
+   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+      return devinfo->ver >= 6 ? 1024 : 0;
+   case PIPE_CAP_MAX_GS_INVOCATIONS:
+      return devinfo->ver >= 7 ? 32 : 1;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+      if (devinfo->ver >= 7)
+         return 4;
+      else if (devinfo->ver == 6)
+         return 1;
+      else
+         return 0;
+   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+      if (devinfo->ver >= 7)
+         return -32;
+      else if (devinfo->ver == 6)
+         return -8;
+      else
+         return 0;
+   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+      if (devinfo->ver >= 7)
+         return 31;
+      else if (devinfo->ver == 6)
+         return 7;
+      else
+         return 0;
+   case PIPE_CAP_MAX_VERTEX_STREAMS:
+      return devinfo->ver >= 7 ? 4 : 1;
+   case PIPE_CAP_VENDOR_ID:
+      return 0x8086;
+   case PIPE_CAP_DEVICE_ID:
+      return screen->pci_id;
+   case PIPE_CAP_VIDEO_MEMORY: {
+      /* Once a batch uses more than 75% of the maximum mappable size, we
+       * assume that there's some fragmentation, and we start doing extra
+       * flushing, etc.  That's the big cliff apps will care about.
+       */
+      const unsigned gpu_mappable_megabytes =
+         (screen->aperture_bytes * 3 / 4) / (1024 * 1024);
+
+      const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
+      const long system_page_size = sysconf(_SC_PAGE_SIZE);
+
+      if (system_memory_pages <= 0 || system_page_size <= 0)
+         return -1;
+
+      const uint64_t system_memory_bytes =
+         (uint64_t) system_memory_pages * (uint64_t) system_page_size;
+
+      const unsigned system_memory_megabytes =
+         (unsigned) (system_memory_bytes / (1024 * 1024));
+
+      return MIN2(system_memory_megabytes, gpu_mappable_megabytes);
+   }
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_MAX_VARYINGS:
+      return (screen->devinfo.ver >= 6) ? 32 : 16;
+   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+      /* AMD_pinned_memory assumes the flexibility of using client memory
+       * for any buffer (incl. vertex buffers) which rules out the prospect
+       * of using snooped buffers, as using snooped buffers without
+       * cogniscience is likely to be detrimental to performance and require
+       * extensive checking in the driver for correctness, e.g. to prevent
+       * illegal snoop <-> snoop transfers.
+       */
+      return devinfo->has_llc;
+   case PIPE_CAP_THROTTLE:
+      return screen->driconf.disable_throttling ? 0 : 1;
+
+   case PIPE_CAP_CONTEXT_PRIORITY_MASK:
+      return PIPE_CONTEXT_PRIORITY_LOW |
+             PIPE_CONTEXT_PRIORITY_MEDIUM |
+             PIPE_CONTEXT_PRIORITY_HIGH;
+
+   case PIPE_CAP_FRONTEND_NOOP:
+      return true;
+      // XXX: don't hardcode 00:00:02.0 PCI here
+   case PIPE_CAP_PCI_GROUP:
+      return 0;
+   case PIPE_CAP_PCI_BUS:
+      return 0;
+   case PIPE_CAP_PCI_DEVICE:
+      return 2;
+   case PIPE_CAP_PCI_FUNCTION:
+      return 0;
+
+   default:
+      return u_pipe_screen_get_param_defaults(pscreen, param);
+   }
+   return 0;
+}
+
+static float
+crocus_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   switch (param) {
+   case PIPE_CAPF_MAX_LINE_WIDTH:
+   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+      if (devinfo->ver >= 6)
+         return 7.375f;
+      else
+         return 7.0f;
+
+   case PIPE_CAPF_MAX_POINT_WIDTH:
+   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+      return 255.0f;
+
+   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+      return 16.0f;
+   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+      return 15.0f;
+   case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
+   case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
+      return 0.0f;
+   default:
+      unreachable("unknown param");
+   }
+}
+
+static int
+crocus_get_shader_param(struct pipe_screen *pscreen,
+                        enum pipe_shader_type p_stage,
+                        enum pipe_shader_cap param)
+{
+   gl_shader_stage stage = stage_from_pipe(p_stage);
+   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   if (devinfo->ver < 6 &&
+       p_stage != PIPE_SHADER_VERTEX &&
+       p_stage != PIPE_SHADER_FRAGMENT)
+      return 0;
+
+   if (devinfo->ver == 6 &&
+       p_stage != PIPE_SHADER_VERTEX &&
+       p_stage != PIPE_SHADER_FRAGMENT &&
+       p_stage != PIPE_SHADER_GEOMETRY)
+      return 0;
+
+   /* this is probably not totally correct.. but it's a start: */
+   switch (param) {
+   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+      return stage == MESA_SHADER_FRAGMENT ? 1024 : 16384;
+   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      return stage == MESA_SHADER_FRAGMENT ? 1024 : 0;
+
+   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      return UINT_MAX;
+
+   case PIPE_SHADER_CAP_MAX_INPUTS:
+      if (stage == MESA_SHADER_VERTEX ||
+          stage == MESA_SHADER_GEOMETRY)
+         return 16; /* Gen7 vec4 geom backend */
+      return 32;
+   case PIPE_SHADER_CAP_MAX_OUTPUTS:
+      return 32;
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      return 16 * 1024 * sizeof(float);
+   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+      return devinfo->ver >= 6 ? 16 : 1;
+   case PIPE_SHADER_CAP_MAX_TEMPS:
+      return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
+   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+      return 0;
+   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+      /* Lie about these to avoid st/mesa's GLSL IR lowering of indirects,
+       * which we don't want.  Our compiler backend will check brw_compiler's
+       * options and call nir_lower_indirect_derefs appropriately anyway.
+       */
+      return true;
+   case PIPE_SHADER_CAP_SUBROUTINES:
+      return 0;
+   case PIPE_SHADER_CAP_INTEGERS:
+      return 1;
+   case PIPE_SHADER_CAP_INT64_ATOMICS:
+   case PIPE_SHADER_CAP_FP16:
+      return 0;
+   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+      return devinfo->is_haswell ? CROCUS_MAX_TEXTURE_SAMPLERS : 16;
+   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+      if (devinfo->ver >= 7 &&
+          (p_stage == PIPE_SHADER_FRAGMENT ||
+           p_stage == PIPE_SHADER_COMPUTE))
+         return CROCUS_MAX_TEXTURE_SAMPLERS;
+      return 0;
+   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+      return devinfo->ver >= 7 ? (CROCUS_MAX_ABOS + CROCUS_MAX_SSBOS) : 0;
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+      return 0;
+   case PIPE_SHADER_CAP_PREFERRED_IR:
+      return PIPE_SHADER_IR_NIR;
+   case PIPE_SHADER_CAP_SUPPORTED_IRS:
+      return 1 << PIPE_SHADER_IR_NIR;
+   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+      return 1;
+   case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+   case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+   case PIPE_SHADER_CAP_FP16_DERIVATIVES:
+   case PIPE_SHADER_CAP_INT16:
+   case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
+   case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
+      return 0;
+   default:
+      unreachable("unknown shader param");
+   }
+}
+
+static int
+crocus_get_compute_param(struct pipe_screen *pscreen,
+                         enum pipe_shader_ir ir_type,
+                         enum pipe_compute_cap param,
+                         void *ret)
+{
+   struct crocus_screen *screen = (struct crocus_screen *)pscreen;
+   const struct intel_device_info *devinfo = &screen->devinfo;
+
+   const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
+   const uint32_t max_invocations = 32 * max_threads;
+
+   if (devinfo->ver < 7)
+      return 0;
+#define RET(x) do {                  \
+   if (ret)                          \
+      memcpy(ret, x, sizeof(x));     \
+   return sizeof(x);                 \
+} while (0)
+
+   switch (param) {
+   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+      RET((uint32_t []){ 32 });
+
+   case PIPE_COMPUTE_CAP_IR_TARGET:
+      if (ret)
+         strcpy(ret, "gen");
+      return 4;
+
+   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+      RET((uint64_t []) { 3 });
+
+   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+      RET(((uint64_t []) { 65535, 65535, 65535 }));
+
+   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+      /* MaxComputeWorkGroupSize[0..2] */
+      RET(((uint64_t []) {max_invocations, max_invocations, max_invocations}));
+
+   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+      /* MaxComputeWorkGroupInvocations */
+      RET((uint64_t []) { max_invocations });
+
+   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+      /* MaxComputeSharedMemorySize */
+      RET((uint64_t []) { 64 * 1024 });
+
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      RET((uint32_t []) { 1 });
+
+   case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+      RET((uint32_t []) { BRW_SUBGROUP_SIZE });
+
+   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+      RET((uint64_t []) { max_invocations });
+
+   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+
+      // XXX: I think these are for Clover...
+      return 0;
+
+   default:
+      unreachable("unknown compute param");
+   }
+}
+
+static uint64_t
+crocus_get_timestamp(struct pipe_screen *pscreen)
+{
+   struct crocus_screen *screen = (struct crocus_screen *) pscreen;
+   const unsigned TIMESTAMP = 0x2358;
+   uint64_t result;
+
+   crocus_reg_read(screen->bufmgr, TIMESTAMP | 1, &result);
+
+   result = intel_device_info_timebase_scale(&screen->devinfo, result);
+   result &= (1ull << TIMESTAMP_BITS) - 1;
+
+   return result;
+}
+
+void
+crocus_screen_destroy(struct crocus_screen *screen)
+{
+   u_transfer_helper_destroy(screen->base.transfer_helper);
+   crocus_bufmgr_unref(screen->bufmgr);
+   disk_cache_destroy(screen->disk_cache);
+   close(screen->winsys_fd);
+   ralloc_free(screen);
+}
+
+static void
+crocus_screen_unref(struct pipe_screen *pscreen)
+{
+   crocus_pscreen_unref(pscreen);
+}
+
+static void
+crocus_query_memory_info(struct pipe_screen *pscreen,
+                         struct pipe_memory_info *info)
+{
+}
+
+static const void *
+crocus_get_compiler_options(struct pipe_screen *pscreen,
+                            enum pipe_shader_ir ir,
+                            enum pipe_shader_type pstage)
+{
+   struct crocus_screen *screen = (struct crocus_screen *) pscreen;
+   gl_shader_stage stage = stage_from_pipe(pstage);
+   assert(ir == PIPE_SHADER_IR_NIR);
+
+   return screen->compiler->glsl_compiler_options[stage].NirOptions;
+}
+
+static struct disk_cache *
+crocus_get_disk_shader_cache(struct pipe_screen *pscreen)
+{
+   struct crocus_screen *screen = (struct crocus_screen *) pscreen;
+   return screen->disk_cache;
+}
+
+static const struct intel_l3_config *
+crocus_get_default_l3_config(const struct intel_device_info *devinfo,
+                             bool compute)
+{
+   bool wants_dc_cache = true;
+   bool has_slm = compute;
+   const struct intel_l3_weights w =
+      intel_get_default_l3_weights(devinfo, wants_dc_cache, has_slm);
+   return intel_get_l3_config(devinfo, w);
+}
+
+static void
+crocus_shader_debug_log(void *data, const char *fmt, ...)
+{
+   struct pipe_debug_callback *dbg = data;
+   unsigned id = 0;
+   va_list args;
+
+   if (!dbg->debug_message)
+      return;
+
+   va_start(args, fmt);
+   dbg->debug_message(dbg->data, &id, PIPE_DEBUG_TYPE_SHADER_INFO, fmt, args);
+   va_end(args);
+}
+
+static void
+crocus_shader_perf_log(void *data, const char *fmt, ...)
+{
+   struct pipe_debug_callback *dbg = data;
+   unsigned id = 0;
+   va_list args;
+   va_start(args, fmt);
+
+   if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+      va_list args_copy;
+      va_copy(args_copy, args);
+      vfprintf(stderr, fmt, args_copy);
+      va_end(args_copy);
+   }
+
+   if (dbg->debug_message) {
+      dbg->debug_message(dbg->data, &id, PIPE_DEBUG_TYPE_PERF_INFO, fmt, args);
+   }
+
+   va_end(args);
+}
+
+static bool
+crocus_detect_swizzling(struct crocus_screen *screen)
+{
+   /* Broadwell PRM says:
+    *
+    *   "Before Gen8, there was a historical configuration control field to
+    *    swizzle address bit[6] for in X/Y tiling modes. This was set in three
+    *    different places: TILECTL[1:0], ARB_MODE[5:4], and
+    *    DISP_ARB_CTL[14:13].
+    *
+    *    For Gen8 and subsequent generations, the swizzle fields are all
+    *    reserved, and the CPU's memory controller performs all address
+    *    swizzling modifications."
+    */
+   uint32_t tiling = I915_TILING_X;
+   uint32_t swizzle_mode = 0;
+   struct crocus_bo *buffer =
+      crocus_bo_alloc_tiled(screen->bufmgr, "swizzle test", 32768,
+                            0, tiling, 512, 0);
+   if (buffer == NULL)
+      return false;
+
+   crocus_bo_get_tiling(buffer, &tiling, &swizzle_mode);
+   crocus_bo_unreference(buffer);
+
+   return swizzle_mode != I915_BIT_6_SWIZZLE_NONE;
+}
+
+struct pipe_screen *
+crocus_screen_create(int fd, const struct pipe_screen_config *config)
+{
+   struct crocus_screen *screen = rzalloc(NULL, struct crocus_screen);
+   if (!screen)
+      return NULL;
+
+   if (!intel_get_device_info_from_fd(fd, &screen->devinfo))
+      return NULL;
+   screen->pci_id = screen->devinfo.chipset_id;
+   screen->no_hw = screen->devinfo.no_hw;
+
+   if (screen->devinfo.ver >= 8)
+      return NULL;
+
+   p_atomic_set(&screen->refcount, 1);
+
+   screen->aperture_bytes = get_aperture_size(fd);
+
+   if (getenv("INTEL_NO_HW") != NULL)
+      screen->no_hw = true;
+
+   bool bo_reuse = false;
+   int bo_reuse_mode = driQueryOptioni(config->options, "bo_reuse");
+   switch (bo_reuse_mode) {
+   case DRI_CONF_BO_REUSE_DISABLED:
+      break;
+   case DRI_CONF_BO_REUSE_ALL:
+      bo_reuse = true;
+      break;
+   }
+
+   screen->bufmgr = crocus_bufmgr_get_for_fd(&screen->devinfo, fd, bo_reuse);
+   if (!screen->bufmgr)
+      return NULL;
+   screen->fd = crocus_bufmgr_get_fd(screen->bufmgr);
+   screen->winsys_fd = fd;
+
+   screen->has_swizzling = crocus_detect_swizzling(screen);
+   brw_process_intel_debug_variable();
+
+   screen->driconf.dual_color_blend_by_location =
+      driQueryOptionb(config->options, "dual_color_blend_by_location");
+   screen->driconf.disable_throttling =
+      driQueryOptionb(config->options, "disable_throttling");
+   screen->driconf.always_flush_cache =
+      driQueryOptionb(config->options, "always_flush_cache");
+
+   screen->precompile = env_var_as_boolean("shader_precompile", true);
+
+   isl_device_init(&screen->isl_dev, &screen->devinfo,
+                   screen->has_swizzling);
+
+   screen->compiler = brw_compiler_create(screen, &screen->devinfo);
+   screen->compiler->shader_debug_log = crocus_shader_debug_log;
+   screen->compiler->shader_perf_log = crocus_shader_perf_log;
+   screen->compiler->supports_pull_constants = false;
+   screen->compiler->supports_shader_constants = false;
+   screen->compiler->compact_params = false;
+   screen->compiler->constant_buffer_0_is_relative = true;
+
+   if (screen->devinfo.ver == 7) {
+      screen->l3_config_3d = crocus_get_default_l3_config(&screen->devinfo, false);
+      screen->l3_config_cs = crocus_get_default_l3_config(&screen->devinfo, true);
+   }
+
+   crocus_disk_cache_init(screen);
+
+   slab_create_parent(&screen->transfer_pool,
+                      sizeof(struct crocus_transfer), 64);
+
+   screen->subslice_total = intel_device_info_subslice_total(&screen->devinfo);
+   assert(screen->subslice_total >= 1);
+
+   struct pipe_screen *pscreen = &screen->base;
+
+   crocus_init_screen_fence_functions(pscreen);
+   crocus_init_screen_resource_functions(pscreen);
+
+   pscreen->destroy = crocus_screen_unref;
+   pscreen->get_name = crocus_get_name;
+   pscreen->get_vendor = crocus_get_vendor;
+   pscreen->get_device_vendor = crocus_get_device_vendor;
+   pscreen->get_param = crocus_get_param;
+   pscreen->get_shader_param = crocus_get_shader_param;
+   pscreen->get_compute_param = crocus_get_compute_param;
+   pscreen->get_paramf = crocus_get_paramf;
+   pscreen->get_compiler_options = crocus_get_compiler_options;
+   pscreen->get_disk_shader_cache = crocus_get_disk_shader_cache;
+   pscreen->is_format_supported = crocus_is_format_supported;
+   pscreen->context_create = crocus_create_context;
+   pscreen->flush_frontbuffer = crocus_flush_frontbuffer;
+   pscreen->get_timestamp = crocus_get_timestamp;
+   pscreen->query_memory_info = crocus_query_memory_info;
+   pscreen->get_driver_query_group_info = crocus_get_monitor_group_info;
+   pscreen->get_driver_query_info = crocus_get_monitor_info;
+
+   genX_call(&screen->devinfo, init_screen_state, screen);
+   genX_call(&screen->devinfo, init_screen_query, screen);
+   return pscreen;
+}
--- a/src/gallium/drivers/crocus/crocus_screen.h
+++ b/src/gallium/drivers/crocus/crocus_screen.h
@ -0,0 +1,253 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef CROCUS_SCREEN_H
+#define CROCUS_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+#include "frontend/drm_driver.h"
+#include "util/disk_cache.h"
+#include "util/slab.h"
+#include "util/u_screen.h"
+#include "intel/dev/intel_device_info.h"
+#include "intel/isl/isl.h"
+#include "crocus_bufmgr.h"
+#include "compiler/shader_enums.h"
+
+struct crocus_monitor_config;
+struct crocus_resource;
+struct crocus_context;
+struct crocus_sampler_state;
+struct brw_vue_map;
+struct brw_tcs_prog_key;
+struct brw_tes_prog_key;
+struct brw_cs_prog_key;
+struct brw_wm_prog_key;
+struct brw_vs_prog_key;
+struct brw_gs_prog_key;
+struct shader_info;
+
+#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
+#define WRITE_ONCE(x, v) *(volatile __typeof__(x) *)&(x) = (v)
+
+#define CROCUS_MAX_TEXTURE_SAMPLERS 32
+#define CROCUS_MAX_SOL_BUFFERS 4
+#define CROCUS_MAP_BUFFER_ALIGNMENT 64
+
+
+/**
+ * Virtual table for generation-specific (genxml) function calls.
+ */
+struct crocus_vtable {
+   void (*destroy_state)(struct crocus_context *ice);
+   void (*init_render_context)(struct crocus_batch *batch);
+   void (*init_compute_context)(struct crocus_batch *batch);
+   void (*upload_render_state)(struct crocus_context *ice,
+                               struct crocus_batch *batch,
+                               const struct pipe_draw_info *draw,
+                               unsigned drawid_offset,
+                               const struct pipe_draw_indirect_info *indirect,
+                               const struct pipe_draw_start_count_bias *sc);
+   void (*update_surface_base_address)(struct crocus_batch *batch);
+
+   void (*upload_compute_state)(struct crocus_context *ice,
+                                struct crocus_batch *batch,
+                                const struct pipe_grid_info *grid);
+   void (*rebind_buffer)(struct crocus_context *ice,
+                         struct crocus_resource *res);
+   void (*resolve_conditional_render)(struct crocus_context *ice);
+   void (*emit_compute_predicate)(struct crocus_batch *batch);
+   void (*load_register_reg32)(struct crocus_batch *batch, uint32_t dst,
+                               uint32_t src);
+   void (*load_register_reg64)(struct crocus_batch *batch, uint32_t dst,
+                               uint32_t src);
+   void (*load_register_imm32)(struct crocus_batch *batch, uint32_t reg,
+                               uint32_t val);
+   void (*load_register_imm64)(struct crocus_batch *batch, uint32_t reg,
+                               uint64_t val);
+   void (*load_register_mem32)(struct crocus_batch *batch, uint32_t reg,
+                               struct crocus_bo *bo, uint32_t offset);
+   void (*load_register_mem64)(struct crocus_batch *batch, uint32_t reg,
+                               struct crocus_bo *bo, uint32_t offset);
+   void (*store_register_mem32)(struct crocus_batch *batch, uint32_t reg,
+                                struct crocus_bo *bo, uint32_t offset,
+                                bool predicated);
+   void (*store_register_mem64)(struct crocus_batch *batch, uint32_t reg,
+                                struct crocus_bo *bo, uint32_t offset,
+                                bool predicated);
+   void (*store_data_imm32)(struct crocus_batch *batch,
+                            struct crocus_bo *bo, uint32_t offset,
+                            uint32_t value);
+   void (*store_data_imm64)(struct crocus_batch *batch,
+                            struct crocus_bo *bo, uint32_t offset,
+                            uint64_t value);
+   void (*copy_mem_mem)(struct crocus_batch *batch,
+                        struct crocus_bo *dst_bo, uint32_t dst_offset,
+                        struct crocus_bo *src_bo, uint32_t src_offset,
+                        unsigned bytes);
+   void (*emit_raw_pipe_control)(struct crocus_batch *batch,
+                                 const char *reason, uint32_t flags,
+                                 struct crocus_bo *bo, uint32_t offset,
+                                 uint64_t imm);
+
+   void (*emit_mi_report_perf_count)(struct crocus_batch *batch,
+                                     struct crocus_bo *bo,
+                                     uint32_t offset_in_bytes,
+                                     uint32_t report_id);
+
+   uint32_t *(*create_so_decl_list)(const struct pipe_stream_output_info *sol,
+                                    const struct brw_vue_map *vue_map);
+   void (*populate_vs_key)(const struct crocus_context *ice,
+                           const struct shader_info *info,
+                           gl_shader_stage last_stage,
+                           struct brw_vs_prog_key *key);
+   void (*populate_tcs_key)(const struct crocus_context *ice,
+                            struct brw_tcs_prog_key *key);
+   void (*populate_tes_key)(const struct crocus_context *ice,
+                            const struct shader_info *info,
+                            gl_shader_stage last_stage,
+                            struct brw_tes_prog_key *key);
+   void (*populate_gs_key)(const struct crocus_context *ice,
+                           const struct shader_info *info,
+                           gl_shader_stage last_stage,
+                           struct brw_gs_prog_key *key);
+   void (*populate_fs_key)(const struct crocus_context *ice,
+                           const struct shader_info *info,
+                           struct brw_wm_prog_key *key);
+   void (*populate_cs_key)(const struct crocus_context *ice,
+                           struct brw_cs_prog_key *key);
+   void (*lost_genx_state)(struct crocus_context *ice, struct crocus_batch *batch);
+
+   void (*finish_batch)(struct crocus_batch *batch); /* haswell only */
+
+   void (*upload_urb_fence)(struct crocus_batch *batch); /* gen4/5 only */
+
+   bool (*blit_blt)(struct crocus_batch *batch,
+                    const struct pipe_blit_info *info);
+   bool (*copy_region_blt)(struct crocus_batch *batch,
+                           struct crocus_resource *dst,
+                           unsigned dst_level,
+                           unsigned dstx, unsigned dsty, unsigned dstz,
+                           struct crocus_resource *src,
+                           unsigned src_level,
+                           const struct pipe_box *src_box);
+   bool (*calculate_urb_fence)(struct crocus_batch *batch, unsigned csize,
+                               unsigned vsize, unsigned sfsize);
+   void (*batch_reset_dirty)(struct crocus_batch *batch);
+   unsigned (*translate_prim_type)(enum pipe_prim_type prim, uint8_t verts_per_patch);
+
+   void (*update_so_strides)(struct crocus_context *ice,
+                             uint16_t *strides);
+
+   uint32_t (*get_so_offset)(struct pipe_stream_output_target *tgt);
+};
+
+struct crocus_screen {
+   struct pipe_screen base;
+
+   uint32_t refcount;
+
+   /** Global slab allocator for crocus_transfer_map objects */
+   struct slab_parent_pool transfer_pool;
+
+   /** drm device file descriptor, shared with bufmgr, do not close. */
+   int fd;
+
+   /**
+    * drm device file descriptor to used for window system integration, owned
+    * by iris_screen, can be a different DRM instance than fd.
+    */
+   int winsys_fd;
+
+   /** PCI ID for our GPU device */
+   int pci_id;
+
+   bool no_hw;
+
+   struct crocus_vtable vtbl;
+
+   /** Global program_string_id counter (see get_program_string_id()) */
+   unsigned program_id;
+
+   /** Precompile shaders at link time?  (Can be disabled for debugging.) */
+   bool precompile;
+
+   /** driconf options and application workarounds */
+   struct {
+      /** Dual color blend by location instead of index (for broken apps) */
+      bool dual_color_blend_by_location;
+      bool disable_throttling;
+      bool always_flush_cache;
+   } driconf;
+
+   unsigned subslice_total;
+
+   uint64_t aperture_bytes;
+
+   struct intel_device_info devinfo;
+   struct isl_device isl_dev;
+   struct crocus_bufmgr *bufmgr;
+   struct brw_compiler *compiler;
+   struct crocus_monitor_config *monitor_cfg;
+   bool has_swizzling;
+
+   const struct intel_l3_config *l3_config_3d;
+   const struct intel_l3_config *l3_config_cs;
+
+   struct disk_cache *disk_cache;
+};
+
+struct pipe_screen *
+crocus_screen_create(int fd, const struct pipe_screen_config *config);
+
+void crocus_screen_destroy(struct crocus_screen *screen);
+
+UNUSED static inline struct pipe_screen *
+crocus_pscreen_ref(struct pipe_screen *pscreen)
+{
+   struct crocus_screen *screen = (struct crocus_screen *) pscreen;
+
+   p_atomic_inc(&screen->refcount);
+   return pscreen;
+}
+
+UNUSED static inline void
+crocus_pscreen_unref(struct pipe_screen *pscreen)
+{
+   struct crocus_screen *screen = (struct crocus_screen *) pscreen;
+
+   if (p_atomic_dec_zero(&screen->refcount))
+      crocus_screen_destroy(screen);
+}
+
+bool
+crocus_is_format_supported(struct pipe_screen *pscreen,
+                           enum pipe_format format,
+                           enum pipe_texture_target target,
+                           unsigned sample_count,
+                           unsigned storage_sample_count,
+                           unsigned usage);
+
+void crocus_disk_cache_init(struct crocus_screen *screen);
+
+#endif
--- a/src/gallium/drivers/crocus/crocus_state.c
+++ b/src/gallium/drivers/crocus/crocus_state.c
--- a/src/gallium/drivers/crocus/crocus_todo.txt
+++ b/src/gallium/drivers/crocus/crocus_todo.txt
@ -0,0 +1,16 @@
+Quick TODO list from what I can see:
+
+General:
+Re-emit SURFACE_STATE_BASE_ADDRESS at the top of every batch
+
+Gen4:
+rgb32 issue
+
+Gen5:
+rgb32 issue
+
+Gen6:
+vec4 push constants
+
+Gen7:
+
--- a/src/gallium/drivers/crocus/driinfo_crocus.h
+++ b/src/gallium/drivers/crocus/driinfo_crocus.h
@ -0,0 +1,11 @@
+// crocus specific driconf options
+
+DRI_CONF_SECTION_DEBUG
+   DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
+   DRI_CONF_DISABLE_THROTTLING(false)
+   DRI_CONF_ALWAYS_FLUSH_CACHE(false)
+DRI_CONF_SECTION_END
+
+DRI_CONF_SECTION_PERFORMANCE
+   DRI_CONF_OPT_E(bo_reuse, 1, 0, 1, "Buffer object reuse",)
+DRI_CONF_SECTION_END
--- a/src/gallium/drivers/crocus/gen4_blorp_exec.h
+++ b/src/gallium/drivers/crocus/gen4_blorp_exec.h
@ -0,0 +1,190 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+static inline struct blorp_address
+dynamic_state_address(struct blorp_batch *blorp_batch, uint32_t offset)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+
+   return (struct blorp_address) {
+      .buffer = batch->state.bo,
+      .offset = offset,
+   };
+
+}
+
+static inline struct blorp_address
+instruction_state_address(struct blorp_batch *blorp_batch, uint32_t offset)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+
+   return (struct blorp_address) {
+      .buffer = batch->ice->shaders.cache_bo,
+      .offset = offset,
+   };
+}
+
+static struct blorp_address
+blorp_emit_vs_state(struct blorp_batch *blorp_batch)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+
+   uint32_t offset;
+   blorp_emit_dynamic(blorp_batch, GENX(VS_STATE), vs, 64, &offset) {
+      vs.Enable = false;
+      vs.URBEntryAllocationSize = batch->ice->urb.vsize - 1;
+#if GFX_VER == 5
+      vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries >> 2;
+#else
+      vs.NumberofURBEntries = batch->ice->urb.nr_vs_entries;
+#endif
+   }
+
+   return dynamic_state_address(blorp_batch, offset);
+}
+
+static struct blorp_address
+blorp_emit_sf_state(struct blorp_batch *blorp_batch,
+                    const struct blorp_params *params)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+   const struct brw_sf_prog_data *prog_data = params->sf_prog_data;
+
+   uint32_t offset;
+   blorp_emit_dynamic(blorp_batch, GENX(SF_STATE), sf, 64, &offset) {
+#if GFX_VER == 4
+      sf.KernelStartPointer =
+         instruction_state_address(blorp_batch, params->sf_prog_kernel);
+#else
+      sf.KernelStartPointer = params->sf_prog_kernel;
+#endif
+      sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1;
+      sf.VertexURBEntryReadLength = prog_data->urb_read_length;
+      sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
+      sf.DispatchGRFStartRegisterForURBData = 3;
+      sf.URBEntryAllocationSize = batch->ice->urb.sfsize - 1;
+      sf.NumberofURBEntries = batch->ice->urb.nr_sf_entries;
+
+#if GFX_VER == 5
+      sf.MaximumNumberofThreads = MIN2(48, batch->ice->urb.nr_sf_entries) - 1;
+#else
+      sf.MaximumNumberofThreads = MIN2(24, batch->ice->urb.nr_sf_entries) - 1;
+#endif
+      sf.ViewportTransformEnable = false;
+
+      sf.CullMode = CULLMODE_NONE;
+   }
+
+   return dynamic_state_address(blorp_batch, offset);
+}
+
+static struct blorp_address
+blorp_emit_wm_state(struct blorp_batch *blorp_batch,
+                    const struct blorp_params *params)
+{
+   const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
+
+   uint32_t offset;
+   blorp_emit_dynamic(blorp_batch, GENX(WM_STATE), wm, 64, &offset) {
+      if (params->src.enabled) {
+         /* Iron Lake can't do sampler prefetch */
+         wm.SamplerCount = (GFX_VER != 5);
+         wm.BindingTableEntryCount = 2;
+         uint32_t sampler = blorp_emit_sampler_state(blorp_batch);
+         wm.SamplerStatePointer = dynamic_state_address(blorp_batch, sampler);
+      }
+
+      if (prog_data) {
+         wm.DispatchGRFStartRegisterForConstantSetupData0 =
+            prog_data->base.dispatch_grf_start_reg;
+         wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2;
+         wm.SetupURBEntryReadOffset = 0;
+
+         wm.DepthCoefficientURBReadOffset = 1;
+         wm.PixelShaderKillsPixel = prog_data->uses_kill;
+         wm.ThreadDispatchEnable = true;
+         wm.EarlyDepthTestEnable = true;
+
+         wm._8PixelDispatchEnable = prog_data->dispatch_8;
+         wm._16PixelDispatchEnable = prog_data->dispatch_16;
+         wm._32PixelDispatchEnable = prog_data->dispatch_32;
+
+#if GFX_VER == 4
+         wm.KernelStartPointer0 =
+            instruction_state_address(blorp_batch, params->wm_prog_kernel);
+         wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
+#else
+         wm.KernelStartPointer0 = params->wm_prog_kernel +
+                                  brw_wm_prog_data_prog_offset(prog_data, wm, 0);
+         wm.KernelStartPointer1 = params->wm_prog_kernel +
+                                  brw_wm_prog_data_prog_offset(prog_data, wm, 1);
+         wm.KernelStartPointer2 = params->wm_prog_kernel +
+                                  brw_wm_prog_data_prog_offset(prog_data, wm, 2);
+         wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
+         wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
+         wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
+#endif
+      }
+
+      wm.MaximumNumberofThreads =
+         blorp_batch->blorp->compiler->devinfo->max_wm_threads - 1;
+   }
+
+   return dynamic_state_address(blorp_batch, offset);
+}
+
+static struct blorp_address
+blorp_emit_color_calc_state(struct blorp_batch *blorp_batch)
+{
+   uint32_t cc_viewport = blorp_emit_cc_viewport(blorp_batch);
+
+   uint32_t offset;
+   blorp_emit_dynamic(blorp_batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) {
+      cc.CCViewportStatePointer = dynamic_state_address(blorp_batch, cc_viewport);
+   }
+
+   return dynamic_state_address(blorp_batch, offset);
+}
+
+static void
+blorp_emit_pipeline(struct blorp_batch *blorp_batch,
+                    const struct blorp_params *params)
+{
+   struct crocus_batch *batch = blorp_batch->driver_batch;
+
+   emit_urb_config(blorp_batch, params, NULL);
+
+   blorp_emit(blorp_batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) {
+      pp.PointertoVSState = blorp_emit_vs_state(blorp_batch);
+      pp.GSEnable = false;
+      pp.ClipEnable = false;
+      pp.PointertoSFState = blorp_emit_sf_state(blorp_batch, params);
+      pp.PointertoWMState = blorp_emit_wm_state(blorp_batch, params);
+      pp.PointertoColorCalcState = blorp_emit_color_calc_state(blorp_batch);
+   }
+
+   batch->screen->vtbl.upload_urb_fence(batch);
+
+   blorp_emit(blorp_batch, GENX(CS_URB_STATE), curb);
+   blorp_emit(blorp_batch, GENX(CONSTANT_BUFFER), curb);
+}
--- a/src/gallium/drivers/crocus/meson.build
+++ b/src/gallium/drivers/crocus/meson.build
@ -0,0 +1,90 @@
+# Copyright © 2017-2019 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_libcrocus = files(
+  'gen4_blorp_exec.h',
+  'driinfo_crocus.h',
+  'crocus_batch.c',
+  'crocus_batch.h',
+  'crocus_blit.c',
+  'crocus_bufmgr.c',
+  'crocus_bufmgr.h',
+  'crocus_clear.c',
+  'crocus_context.c',
+  'crocus_context.h',
+  'crocus_draw.c',
+  'crocus_fence.c',
+  'crocus_fence.h',
+  'crocus_fine_fence.c',
+  'crocus_fine_fence.h',
+  'crocus_formats.c',
+  'crocus_genx_macros.h',
+  'crocus_genx_protos.h',
+  'crocus_monitor.c',
+  'crocus_pipe.h',
+  'crocus_pipe_control.c',
+  'crocus_program.c',
+  'crocus_program_cache.c',
+  'crocus_resolve.c',
+  'crocus_resource.c',
+  'crocus_resource.h',
+  'crocus_screen.c',
+  'crocus_screen.h',
+  'crocus_disk_cache.c',
+)
+
+crocus_per_hw_ver_libs = []
+foreach v : ['40', '45', '50', '60', '70', '75']
+  crocus_per_hw_ver_libs += static_library(
+    'crocus_per_hw_ver@0@'.format(v),
+    ['crocus_blorp.c', 'crocus_query.c', 'crocus_state.c', 'crocus_blt.c', gen_xml_pack],
+    include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_intel],
+    c_args : [
+      no_override_init_args, c_sse2_args,
+      '-DGFX_VERx10=@0@'.format(v),
+    ],
+    gnu_symbol_visibility : 'hidden',
+    dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
+  )
+endforeach
+
+libcrocus = static_library(
+  'crocus',
+  [files_libcrocus, gen_xml_pack],
+  include_directories : [
+    inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_intel,
+    inc_gallium_drivers,
+    # these should not be necessary, but main/macros.h...
+    inc_mesa, inc_mapi
+  ],
+  c_args : [c_sse2_args],
+  cpp_args : [c_sse2_args],
+  gnu_symbol_visibility : 'hidden',
+  dependencies : [dep_libdrm, dep_valgrind, idep_genxml, idep_libintel_common, idep_nir_headers],
+  link_with : [
+    crocus_per_hw_ver_libs, libintel_compiler, libintel_dev, libisl,
+    libblorp, libintel_perf
+  ],
+)
+
+driver_crocus = declare_dependency(
+  compile_args : '-DGALLIUM_CROCUS',
+  link_with : [libcrocus, libcrocuswinsys],
+)
--- a/src/gallium/meson.build
+++ b/src/gallium/meson.build
@ -129,6 +129,12 @@ if with_gallium_tegra
 else
  driver_tegra = declare_dependency()
 endif
+if with_gallium_crocus
+  subdir('winsys/crocus/drm')
+  subdir('drivers/crocus')
+else
+  driver_crocus = declare_dependency()
+endif
 if with_gallium_iris
  subdir('winsys/iris/drm')
  subdir('drivers/iris')
--- a/src/gallium/targets/d3dadapter9/meson.build
+++ b/src/gallium/targets/d3dadapter9/meson.build
@ -64,7 +64,7 @@ libgallium_nine = shared_library(
    dep_selinux, dep_libdrm, dep_llvm, dep_thread,
    idep_xmlconfig, idep_mesautil, idep_nir,
    driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
-    driver_i915, driver_svga, driver_iris
+    driver_i915, driver_svga, driver_iris, driver_crocus
  ],
  name_prefix : '',
  version : '.'.join(nine_version),
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@ -58,7 +58,7 @@ libgallium_dri = shared_library(
    driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
    driver_tegra, driver_i915, driver_svga, driver_virgl,
    driver_swr, driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
-    driver_asahi
+    driver_asahi, driver_crocus
  ],
  # Will be deleted during installation, see install_megadrivers.py
  install : true,
@ -98,6 +98,7 @@ foreach d : [[with_gallium_kmsro, [
             [with_gallium_panfrost, 'panfrost_dri.so'],
             [with_gallium_etnaviv, 'etnaviv_dri.so'],
             [with_gallium_tegra, 'tegra_dri.so'],
+             [with_gallium_crocus, 'crocus_dri.so'],
             [with_gallium_iris, 'iris_dri.so'],
             [with_gallium_i915, 'i915_dri.so'],
             [with_gallium_r300, 'r300_dri.so'],
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@ -42,6 +42,10 @@ DEFINE_LOADER_DRM_ENTRYPOINT(i915)
 DEFINE_LOADER_DRM_ENTRYPOINT(iris)
 #endif

+#if defined(GALLIUM_CROCUS)
+DEFINE_LOADER_DRM_ENTRYPOINT(crocus)
+#endif
+
 #if defined(GALLIUM_NOUVEAU)
 DEFINE_LOADER_DRM_ENTRYPOINT(nouveau)
 #endif
--- a/src/gallium/winsys/crocus/drm/crocus_drm_public.h
+++ b/src/gallium/winsys/crocus/drm/crocus_drm_public.h
@ -0,0 +1,33 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef CROCUS_DRM_PUBLIC_H
+#define CROCUS_DRM_PUBLIC_H
+
+struct pipe_screen;
+struct pipe_screen_config;
+
+struct pipe_screen *
+crocus_drm_screen_create(int drm_fd, const struct pipe_screen_config *config);
+
+#endif /* CROCUS_DRM_PUBLIC_H */
--- a/src/gallium/winsys/crocus/drm/crocus_drm_winsys.c
+++ b/src/gallium/winsys/crocus/drm/crocus_drm_winsys.c
@ -0,0 +1,39 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "util/os_file.h"
+
+#include "crocus_drm_public.h"
+#include "crocus/crocus_screen.h"
+
+struct pipe_screen *
+crocus_drm_screen_create(int fd, const struct pipe_screen_config *config)
+{
+   int newfd = os_dupfd_cloexec(fd);
+   if (newfd < 0)
+      return NULL;
+   return crocus_screen_create(newfd, config);
+}
--- a/src/gallium/winsys/crocus/drm/meson.build
+++ b/src/gallium/winsys/crocus/drm/meson.build
@ -0,0 +1,29 @@
+# Copyright © 2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libcrocuswinsys = static_library(
+  'crocuswinsys',
+  files('crocus_drm_winsys.c'),
+  include_directories : [
+    inc_src, inc_include,
+    inc_gallium, inc_gallium_aux, inc_gallium_drivers,
+  ],
+  gnu_symbol_visibility : 'hidden',
+)
--- a/src/intel/common/intel_batch_decoder.c
+++ b/src/intel/common/intel_batch_decoder.c
@ -829,7 +829,7 @@ decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
   struct intel_field_iterator iter;
   intel_field_iterator_init(&iter, inst, p, 0, false);
   while (intel_field_iterator_next(&iter)) {
-      if (str_ends_with(iter.name, "Pointer")) {
+      if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
         state_offset = iter.raw_value;
         break;
      }
@ -900,6 +900,13 @@ decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
   decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
 }

+static void
+decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
+                                 const uint32_t *p)
+{
+   decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
+}
+
 static void
 decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
                                      const uint32_t *p)
@ -1208,6 +1215,7 @@ struct custom_decoder {
   { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
   { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
   { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
+   { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
   { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
   { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
   { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
--- a/src/loader/pci_id_driver_map.h
+++ b/src/loader/pci_id_driver_map.h
@ -76,6 +76,7 @@ static const struct {
   { 0x8086, "i915", i915_chip_ids, ARRAY_SIZE(i915_chip_ids) },
   { 0x8086, "i965", i965_chip_ids, ARRAY_SIZE(i965_chip_ids) },
   { 0x8086, "iris", NULL, -1, is_kernel_i915 },
+   { 0x8086, "crocus", NULL, -1, is_kernel_i915 },
   { 0x1002, "radeon", r100_chip_ids, ARRAY_SIZE(r100_chip_ids) },
   { 0x1002, "r200", r200_chip_ids, ARRAY_SIZE(r200_chip_ids) },
   { 0x1002, "r300", r300_chip_ids, ARRAY_SIZE(r300_chip_ids) },