From 4861835d1cc07e5068694905b5a3538303f6de32 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 3 Nov 2015 12:51:32 -0800
Subject: [PATCH 001/287] i965: Fix the fs_visitor GS constructor to take
 shader_time_index.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jason reworked this so it isn't simply ST_GS anymore...it's either -1
(not enabled) or an actual offset.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs.h           | 3 ++-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8058b344b7a..caf56555981 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -105,7 +105,8 @@ public:
               void *mem_ctx,
               struct brw_gs_compile *gs_compile,
               struct brw_gs_prog_data *prog_data,
-              const nir_shader *shader);
+              const nir_shader *shader,
+              int shader_time_index);
    void init();
    ~fs_visitor();
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5c57944ca39..b6d1c3b6d4a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1112,13 +1112,14 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
                        void *mem_ctx,
                        struct brw_gs_compile *c,
                        struct brw_gs_prog_data *prog_data,
-                       const nir_shader *shader)
+                       const nir_shader *shader,
+                       int shader_time_index)
    : backend_shader(compiler, log_data, mem_ctx, shader,
                     &prog_data->base.base),
      key(&c->key), gs_compile(c),
      prog_data(&prog_data->base.base), prog(NULL),
      dispatch_width(8),
-     shader_time_index(ST_GS),
+     shader_time_index(shader_time_index),
      bld(fs_builder(this, dispatch_width).at_end())
 {
    init();

From c9541a74e4d179ad844bdf8af1e3de541c5b14c2 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 23 Sep 2015 20:52:19 -0700
Subject: [PATCH 002/287] i965: Add scalar GS input lowering code.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We really ought to compute the VUE map at link time and stash it, rather
than recomputing it here, but with the mess of program structures I
wasn't sure where to put it.  We can improve that later.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_nir.c | 44 +++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 11f111382f4..a7a5eb511cd 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -56,7 +56,8 @@ remap_vs_attrs(nir_block *block, void *closure)
 }
 
 static void
-brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
+brw_nir_lower_inputs(const struct brw_device_info *devinfo,
+                     nir_shader *nir, bool is_scalar)
 {
    switch (nir->stage) {
    case MESA_SHADER_VERTEX:
@@ -90,11 +91,43 @@ brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
          }
       }
       break;
-   case MESA_SHADER_GEOMETRY:
-      foreach_list_typed(nir_variable, var, node, &nir->inputs) {
-         var->data.driver_location = var->data.location;
+   case MESA_SHADER_GEOMETRY: {
+      if (!is_scalar) {
+         foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+            var->data.driver_location = var->data.location;
+         }
+      } else {
+         /* The GLSL linker will have already matched up GS inputs and
+          * the outputs of prior stages.  The driver does extend VS outputs
+          * in some cases, but only for legacy OpenGL or Gen4-5 hardware,
+          * neither of which offer geometry shader support.  So we can
+          * safely ignore that.
+          *
+          * For SSO pipelines, we use a fixed VUE map layout based on variable
+          * locations, so we can rely on rendezvous-by-location to make this
+          * work.
+          *
+          * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
+          * written by previous stages and shows up via payload magic.
+          */
+         struct brw_vue_map input_vue_map;
+         GLbitfield64 inputs_read =
+            nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
+         brw_compute_vue_map(devinfo, &input_vue_map, inputs_read,
+                             nir->info.separate_shader);
+
+         /* Start with the slot for the variable's base. */
+         foreach_list_typed(nir_variable, var, node, &nir->inputs) {
+            assert(input_vue_map.varying_to_slot[var->data.location] != -1);
+            var->data.driver_location =
+               input_vue_map.varying_to_slot[var->data.location];
+         }
+
+         /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
+         nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
       }
       break;
+   }
    case MESA_SHADER_FRAGMENT:
       assert(is_scalar);
       nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
@@ -187,6 +220,7 @@ brw_create_nir(struct brw_context *brw,
                bool is_scalar)
 {
    struct gl_context *ctx = &brw->ctx;
+   const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
    const nir_shader_compiler_options *options =
       ctx->Const.ShaderCompilerOptions[stage].NirOptions;
    static const nir_lower_tex_options tex_options = {
@@ -230,7 +264,7 @@ brw_create_nir(struct brw_context *brw,
    /* Get rid of split copies */
    nir_optimize(nir, is_scalar);
 
-   brw_nir_lower_inputs(nir, is_scalar);
+   brw_nir_lower_inputs(devinfo, nir, is_scalar);
    brw_nir_lower_outputs(nir, is_scalar);
    nir_assign_var_locations(&nir->uniforms,
                             &nir->num_uniforms,

From 36fd65381756ed1b8f774f7fcdd555941a3d39e1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 11 Mar 2015 23:14:31 -0700
Subject: [PATCH 003/287] i965: Add scalar geometry shader support.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is hidden behind INTEL_SCALAR_GS=1 for now, as we don't yet support
instanced geometry shaders, and Orbital Explorer's shader spills like
crazy.  But the infrastructure is in place, and it's largely working.

v2: Lots of rebasing.

v3: (feedback from Kristian Høgsberg)
- Handle stride and subreg_offset correctly for ATTRs; use a helper.
- Fix missing emit_shader_time_end() call.
- Delete dead code after early EOT in static vertex case to avoid
  tripping asserts in emit_shader_time_end().
- Use proper D/UD type in intexp2().
- Fix "EndPrimitve" and "to that" typos.
- Assert that invocations == 1 so we know this is missing.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 210 +++++++++-
 src/mesa/drivers/dri/i965/brw_fs.h            |  17 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp      | 391 ++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  49 ++-
 .../drivers/dri/i965/brw_vec4_gs_visitor.cpp  |  25 ++
 5 files changed, 667 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5ab8c15bc0c..4cc962613b3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -43,6 +43,7 @@
 #include "brw_wm.h"
 #include "brw_fs.h"
 #include "brw_cs.h"
+#include "brw_vec4_gs_visitor.h"
 #include "brw_cfg.h"
 #include "brw_dead_control_flow.h"
 #include "main/uniforms.h"
@@ -1360,6 +1361,57 @@ fs_visitor::emit_discard_jump()
    discard_jump->predicate_inverse = true;
 }
 
+void
+fs_visitor::emit_gs_thread_end()
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+
+   struct brw_gs_prog_data *gs_prog_data =
+      (struct brw_gs_prog_data *) prog_data;
+
+   if (gs_compile->control_data_header_size_bits > 0) {
+      emit_gs_control_data_bits(this->final_gs_vertex_count);
+   }
+
+   const fs_builder abld = bld.annotate("thread end");
+   fs_inst *inst;
+
+   if (gs_prog_data->static_vertex_count != -1) {
+      foreach_in_list_reverse(fs_inst, prev, &this->instructions) {
+         if (prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8 ||
+             prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED ||
+             prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT ||
+             prev->opcode == SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT) {
+            prev->eot = true;
+
+            /* Delete now dead instructions. */
+            foreach_in_list_reverse_safe(exec_node, dead, &this->instructions) {
+               if (dead == prev)
+                  break;
+               dead->remove();
+            }
+            return;
+         } else if (prev->is_control_flow() || prev->has_side_effects()) {
+            break;
+         }
+      }
+      fs_reg hdr = abld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+      abld.MOV(hdr, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)));
+      inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, hdr);
+      inst->mlen = 1;
+   } else {
+      fs_reg payload = abld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+      fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2);
+      sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+      sources[1] = this->final_gs_vertex_count;
+      abld.LOAD_PAYLOAD(payload, sources, 2, 2);
+      inst = abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+      inst->mlen = 2;
+   }
+   inst->eot = true;
+   inst->offset = 0;
+}
+
 void
 fs_visitor::assign_curb_setup()
 {
@@ -1531,6 +1583,26 @@ fs_visitor::assign_urb_setup()
    this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
 }
 
+void
+fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
+{
+   for (int i = 0; i < inst->sources; i++) {
+      if (inst->src[i].file == ATTR) {
+         int grf = payload.num_regs +
+                   prog_data->curb_read_length +
+                   inst->src[i].reg +
+                   inst->src[i].reg_offset;
+
+         inst->src[i].file = HW_REG;
+         inst->src[i].fixed_hw_reg =
+            stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+                               inst->src[i].subreg_offset),
+                   inst->exec_size * inst->src[i].stride,
+                   inst->exec_size, inst->src[i].stride);
+      }
+   }
+}
+
 void
 fs_visitor::assign_vs_urb_setup()
 {
@@ -1548,24 +1620,44 @@ fs_visitor::assign_vs_urb_setup()
 
    /* Rewrite all ATTR file references to the hw grf that they land in. */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == ATTR) {
-            int grf = payload.num_regs +
-                      prog_data->curb_read_length +
-                      inst->src[i].reg +
-                      inst->src[i].reg_offset;
-
-            inst->src[i].file = HW_REG;
-            inst->src[i].fixed_hw_reg =
-               stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
-                                  inst->src[i].subreg_offset),
-                      inst->exec_size * inst->src[i].stride,
-                      inst->exec_size, inst->src[i].stride);
-         }
-      }
+      convert_attr_sources_to_hw_regs(inst);
    }
 }
 
+void
+fs_visitor::assign_gs_urb_setup()
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+
+   brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data;
+
+   first_non_payload_grf +=
+      8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;
+
+   const unsigned first_icp_handle = payload.num_regs -
+      (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0);
+
+   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+      /* Lower URB_READ_SIMD8 opcodes into real messages. */
+      if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
+         assert(inst->src[0].file == IMM);
+         inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
+                                            inst->src[0].fixed_hw_reg.dw1.ud,
+                                            0), BRW_REGISTER_TYPE_UD);
+         /* for now, assume constant - we can do per-slot offsets later */
+         assert(inst->src[1].file == IMM);
+         inst->offset = inst->src[1].fixed_hw_reg.dw1.ud;
+         inst->src[1] = fs_reg();
+         inst->mlen = 1;
+         inst->base_mrf = -1;
+      }
+
+      /* Rewrite all ATTR file references to HW_REGs. */
+      convert_attr_sources_to_hw_regs(inst);
+   }
+}
+
+
 /**
  * Split large virtual GRFs into separate components if we can.
  *
@@ -4762,6 +4854,45 @@ fs_visitor::setup_vs_payload()
  *    conveying the data, and thereby reduce push constant usage.
  *
  */
+void
+fs_visitor::setup_gs_payload()
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+
+   struct brw_gs_prog_data *gs_prog_data =
+      (struct brw_gs_prog_data *) prog_data;
+   struct brw_vue_prog_data *vue_prog_data =
+      (struct brw_vue_prog_data *) prog_data;
+
+   /* R0: thread header, R1: output URB handles */
+   payload.num_regs = 2;
+
+   if (gs_prog_data->include_primitive_id) {
+      /* R2: Primitive ID 0..7 */
+      payload.num_regs++;
+   }
+
+   /* Use a maximum of 32 registers for push-model inputs. */
+   const unsigned max_push_components = 32;
+
+   /* If pushing our inputs would take too many registers, reduce the URB read
+    * length (which is in HWords, or 8 registers), and resort to pulling.
+    *
+    * Note that the GS reads <URB Read Length> HWords for every vertex - so we
+    * have to multiply by VerticesIn to obtain the total storage requirement.
+    */
+   if (8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in >
+       max_push_components) {
+      gs_prog_data->base.include_vue_handles = true;
+
+      /* R3..RN: ICP Handles for each incoming vertex (when using pull model) */
+      payload.num_regs += nir->info.gs.vertices_in;
+
+      vue_prog_data->urb_read_length =
+         ROUND_DOWN_TO(max_push_components / nir->info.gs.vertices_in, 8) / 8;
+   }
+}
+
 void
 fs_visitor::setup_cs_payload()
 {
@@ -5018,6 +5149,55 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
    return !failed;
 }
 
+bool
+fs_visitor::run_gs()
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+
+   setup_gs_payload();
+
+   this->final_gs_vertex_count = vgrf(glsl_type::uint_type);
+
+   if (gs_compile->control_data_header_size_bits > 0) {
+      /* Create a VGRF to store accumulated control data bits. */
+      this->control_data_bits = vgrf(glsl_type::uint_type);
+
+      /* If we're outputting more than 32 control data bits, then EmitVertex()
+       * will set control_data_bits to 0 after emitting the first vertex.
+       * Otherwise, we need to initialize it to 0 here.
+       */
+      if (gs_compile->control_data_header_size_bits <= 32) {
+         const fs_builder abld = bld.annotate("initialize control data bits");
+         abld.MOV(this->control_data_bits, fs_reg(0u));
+      }
+   }
+
+   if (shader_time_index >= 0)
+      emit_shader_time_begin();
+
+   emit_nir_code();
+
+   emit_gs_thread_end();
+
+   if (shader_time_index >= 0)
+      emit_shader_time_end();
+
+   if (failed)
+      return false;
+
+   calculate_cfg();
+
+   optimize();
+
+   assign_curb_setup();
+   assign_gs_urb_setup();
+
+   fixup_3src_null_dest();
+   allocate_registers();
+
+   return !failed;
+}
+
 bool
 fs_visitor::run_fs(bool do_rep_send)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index caf56555981..2dfcab1c51a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -132,18 +132,22 @@ public:
 
    bool run_fs(bool do_rep_send);
    bool run_vs(gl_clip_plane *clip_planes);
+   bool run_gs();
    bool run_cs();
    void optimize();
    void allocate_registers();
    void setup_payload_gen4();
    void setup_payload_gen6();
    void setup_vs_payload();
+   void setup_gs_payload();
    void setup_cs_payload();
    void fixup_3src_null_dest();
    void assign_curb_setup();
    void calculate_urb_setup();
    void assign_urb_setup();
+   void convert_attr_sources_to_hw_regs(fs_inst *inst);
    void assign_vs_urb_setup();
+   void assign_gs_urb_setup();
    bool assign_regs(bool allow_spilling);
    void assign_regs_trivial();
    void calculate_payload_ranges(int payload_node_count,
@@ -281,7 +285,16 @@ public:
                                  fs_reg color1, fs_reg color2,
                                  fs_reg src0_alpha, unsigned components);
    void emit_fb_writes();
-   void emit_urb_writes();
+   void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
+   void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
+                                        unsigned stream_id);
+   void emit_gs_control_data_bits(const fs_reg &vertex_count);
+   void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
+   void emit_gs_vertex(const nir_src &vertex_count_nir_src,
+                       unsigned stream_id);
+   void emit_gs_thread_end();
+   void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
+                           unsigned offset, unsigned num_components);
    void emit_cs_terminate();
    fs_reg *emit_cs_local_invocation_id_setup();
    fs_reg *emit_cs_work_group_id_setup();
@@ -389,6 +402,8 @@ public:
    fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
    fs_reg shader_start_time;
    fs_reg userplane[MAX_CLIP_PLANES];
+   fs_reg final_gs_vertex_count;
+   fs_reg control_data_bits;
 
    unsigned grf_used;
    bool spilled_any_registers;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7eeff93e465..b6eab069a1f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -28,6 +28,7 @@
 #include "program/prog_to_nir.h"
 #include "brw_fs.h"
 #include "brw_fs_surface_builder.h"
+#include "brw_vec4_gs_visitor.h"
 #include "brw_nir.h"
 #include "brw_fs_surface_builder.h"
 #include "brw_vec4_gs_visitor.h"
@@ -102,6 +103,7 @@ fs_visitor::nir_setup_outputs()
 
       switch (stage) {
       case MESA_SHADER_VERTEX:
+      case MESA_SHADER_GEOMETRY:
          for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
             int output = var->data.location + i;
             this->outputs[output] = offset(reg, bld, 4 * i);
@@ -1194,6 +1196,375 @@ emit_pixel_interpolater_send(const fs_builder &bld,
    return inst;
 }
 
+/**
+ * Computes 1 << x, given a D/UD register containing some value x.
+ */
+static fs_reg
+intexp2(const fs_builder &bld, const fs_reg &x)
+{
+   assert(x.type == BRW_REGISTER_TYPE_UD || x.type == BRW_REGISTER_TYPE_D);
+
+   fs_reg result = bld.vgrf(x.type, 1);
+   fs_reg one = bld.vgrf(x.type, 1);
+
+   bld.MOV(one, retype(fs_reg(1), one.type));
+   bld.SHL(result, one, x);
+   return result;
+}
+
+void
+fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src)
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+
+   struct brw_gs_prog_data *gs_prog_data =
+      (struct brw_gs_prog_data *) prog_data;
+
+   /* We can only do EndPrimitive() functionality when the control data
+    * consists of cut bits.  Fortunately, the only time it isn't is when the
+    * output type is points, in which case EndPrimitive() is a no-op.
+    */
+   if (gs_prog_data->control_data_format !=
+       GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
+      return;
+   }
+
+   /* Cut bits use one bit per vertex. */
+   assert(gs_compile->control_data_bits_per_vertex == 1);
+
+   fs_reg vertex_count = get_nir_src(vertex_count_nir_src);
+   vertex_count.type = BRW_REGISTER_TYPE_UD;
+
+   /* Cut bit n should be set to 1 if EndPrimitive() was called after emitting
+    * vertex n, 0 otherwise.  So all we need to do here is mark bit
+    * (vertex_count - 1) % 32 in the cut_bits register to indicate that
+    * EndPrimitive() was called after emitting vertex (vertex_count - 1);
+    * vec4_gs_visitor::emit_control_data_bits() will take care of the rest.
+    *
+    * Note that if EndPrimitive() is called before emitting any vertices, this
+    * will cause us to set bit 31 of the control_data_bits register to 1.
+    * That's fine because:
+    *
+    * - If max_vertices < 32, then vertex number 31 (zero-based) will never be
+    *   output, so the hardware will ignore cut bit 31.
+    *
+    * - If max_vertices == 32, then vertex number 31 is guaranteed to be the
+    *   last vertex, so setting cut bit 31 has no effect (since the primitive
+    *   is automatically ended when the GS terminates).
+    *
+    * - If max_vertices > 32, then the ir_emit_vertex visitor will reset the
+    *   control_data_bits register to 0 when the first vertex is emitted.
+    */
+
+   const fs_builder abld = bld.annotate("end primitive");
+
+   /* control_data_bits |= 1 << ((vertex_count - 1) % 32) */
+   fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+   abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+   fs_reg mask = intexp2(abld, prev_count);
+   /* Note: we're relying on the fact that the GEN SHL instruction only pays
+    * attention to the lower 5 bits of its second source argument, so on this
+    * architecture, 1 << (vertex_count - 1) is equivalent to 1 <<
+    * ((vertex_count - 1) % 32).
+    */
+   abld.OR(this->control_data_bits, this->control_data_bits, mask);
+}
+
+void
+fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+   assert(gs_compile->control_data_bits_per_vertex != 0);
+
+   struct brw_gs_prog_data *gs_prog_data =
+      (struct brw_gs_prog_data *) prog_data;
+
+   const fs_builder abld = bld.annotate("emit control data bits");
+   const fs_builder fwa_bld = bld.exec_all();
+
+   /* We use a single UD register to accumulate control data bits (32 bits
+    * for each of the SIMD8 channels).  So we need to write a DWord (32 bits)
+    * at a time.
+    *
+    * Unfortunately, the URB_WRITE_SIMD8 message uses 128-bit (OWord) offsets.
+    * We have select a 128-bit group via the Global and Per-Slot Offsets, then
+    * use the Channel Mask phase to enable/disable which DWord within that
+    * group to write.  (Remember, different SIMD8 channels may have emitted
+    * different numbers of vertices, so we may need per-slot offsets.)
+    *
+    * Channel masking presents an annoying problem: we may have to replicate
+    * the data up to 4 times:
+    *
+    * Msg = Handles, Per-Slot Offsets, Channel Masks, Data, Data, Data, Data.
+    *
+    * To avoid penalizing shaders that emit a small number of vertices, we
+    * can avoid these sometimes: if the size of the control data header is
+    * <= 128 bits, then there is only 1 OWord.  All SIMD8 channels will land
+    * land in the same 128-bit group, so we can skip per-slot offsets.
+    *
+    * Similarly, if the control data header is <= 32 bits, there is only one
+    * DWord, so we can skip channel masks.
+    */
+   enum opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8;
+
+   fs_reg channel_mask, per_slot_offset;
+
+   if (gs_compile->control_data_header_size_bits > 32) {
+      opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED;
+      channel_mask = vgrf(glsl_type::uint_type);
+   }
+
+   if (gs_compile->control_data_header_size_bits > 128) {
+      opcode = SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT;
+      per_slot_offset = vgrf(glsl_type::uint_type);
+   }
+
+   /* Figure out which DWord we're trying to write to using the formula:
+    *
+    *    dword_index = (vertex_count - 1) * bits_per_vertex / 32
+    *
+    * Since bits_per_vertex is a power of two, and is known at compile
+    * time, this can be optimized to:
+    *
+    *    dword_index = (vertex_count - 1) >> (6 - log2(bits_per_vertex))
+    */
+   if (opcode != SHADER_OPCODE_URB_WRITE_SIMD8) {
+      fs_reg dword_index = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+      fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+      abld.ADD(prev_count, vertex_count, fs_reg(0xffffffffu));
+      unsigned log2_bits_per_vertex =
+         _mesa_fls(gs_compile->control_data_bits_per_vertex);
+      abld.SHR(dword_index, prev_count, fs_reg(6u - log2_bits_per_vertex));
+
+      if (per_slot_offset.file != BAD_FILE) {
+         /* Set the per-slot offset to dword_index / 4, so that we'll write to
+          * the appropriate OWord within the control data header.
+          */
+         abld.SHR(per_slot_offset, dword_index, fs_reg(2u));
+      }
+
+      /* Set the channel masks to 1 << (dword_index % 4), so that we'll
+       * write to the appropriate DWORD within the OWORD.
+       */
+      fs_reg channel = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+      fwa_bld.AND(channel, dword_index, fs_reg(3u));
+      channel_mask = intexp2(fwa_bld, channel);
+      /* Then the channel masks need to be in bits 23:16. */
+      fwa_bld.SHL(channel_mask, channel_mask, fs_reg(16u));
+   }
+
+   /* Store the control data bits in the message payload and send it. */
+   int mlen = 2;
+   if (channel_mask.file != BAD_FILE)
+      mlen += 4; /* channel masks, plus 3 extra copies of the data */
+   if (per_slot_offset.file != BAD_FILE)
+      mlen++;
+
+   fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, mlen);
+   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, mlen);
+   int i = 0;
+   sources[i++] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+   if (per_slot_offset.file != BAD_FILE)
+      sources[i++] = per_slot_offset;
+   if (channel_mask.file != BAD_FILE)
+      sources[i++] = channel_mask;
+   while (i < mlen) {
+      sources[i++] = this->control_data_bits;
+   }
+
+   abld.LOAD_PAYLOAD(payload, sources, mlen, mlen);
+   fs_inst *inst = abld.emit(opcode, reg_undef, payload);
+   inst->mlen = mlen;
+   /* We need to increment Global Offset by 256-bits to make room for
+    * Broadwell's extra "Vertex Count" payload at the beginning of the
+    * URB entry.  Since this is an OWord message, Global Offset is counted
+    * in 128-bit units, so we must set it to 2.
+    */
+   if (gs_prog_data->static_vertex_count == -1)
+      inst->offset = 2;
+}
+
+void
+fs_visitor::set_gs_stream_control_data_bits(const fs_reg &vertex_count,
+                                            unsigned stream_id)
+{
+   /* control_data_bits |= stream_id << ((2 * (vertex_count - 1)) % 32) */
+
+   /* Note: we are calling this *before* increasing vertex_count, so
+    * this->vertex_count == vertex_count - 1 in the formula above.
+    */
+
+   /* Stream mode uses 2 bits per vertex */
+   assert(gs_compile->control_data_bits_per_vertex == 2);
+
+   /* Must be a valid stream */
+   assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);
+
+   /* Control data bits are initialized to 0 so we don't have to set any
+    * bits when sending vertices to stream 0.
+    */
+   if (stream_id == 0)
+      return;
+
+   const fs_builder abld = bld.annotate("set stream control data bits", NULL);
+
+   /* reg::sid = stream_id */
+   fs_reg sid = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+   abld.MOV(sid, fs_reg(stream_id));
+
+   /* reg:shift_count = 2 * (vertex_count - 1) */
+   fs_reg shift_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+   abld.SHL(shift_count, vertex_count, fs_reg(1u));
+
+   /* Note: we're relying on the fact that the GEN SHL instruction only pays
+    * attention to the lower 5 bits of its second source argument, so on this
+    * architecture, stream_id << 2 * (vertex_count - 1) is equivalent to
+    * stream_id << ((2 * (vertex_count - 1)) % 32).
+    */
+   fs_reg mask = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+   abld.SHL(mask, sid, shift_count);
+   abld.OR(this->control_data_bits, this->control_data_bits, mask);
+}
+
+void
+fs_visitor::emit_gs_vertex(const nir_src &vertex_count_nir_src,
+                           unsigned stream_id)
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+
+   struct brw_gs_prog_data *gs_prog_data =
+      (struct brw_gs_prog_data *) prog_data;
+
+   fs_reg vertex_count = get_nir_src(vertex_count_nir_src);
+   vertex_count.type = BRW_REGISTER_TYPE_UD;
+
+   /* Haswell and later hardware ignores the "Render Stream Select" bits
+    * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled,
+    * and instead sends all primitives down the pipeline for rasterization.
+    * If the SOL stage is enabled, "Render Stream Select" is honored and
+    * primitives bound to non-zero streams are discarded after stream output.
+    *
+    * Since the only purpose of primives sent to non-zero streams is to
+    * be recorded by transform feedback, we can simply discard all geometry
+    * bound to these streams when transform feedback is disabled.
+    */
+   if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
+      return;
+
+   /* If we're outputting 32 control data bits or less, then we can wait
+    * until the shader is over to output them all.  Otherwise we need to
+    * output them as we go.  Now is the time to do it, since we're about to
+    * output the vertex_count'th vertex, so it's guaranteed that the
+    * control data bits associated with the (vertex_count - 1)th vertex are
+    * correct.
+    */
+   if (gs_compile->control_data_header_size_bits > 32) {
+      const fs_builder abld =
+         bld.annotate("emit vertex: emit control data bits");
+
+      /* Only emit control data bits if we've finished accumulating a batch
+       * of 32 bits.  This is the case when:
+       *
+       *     (vertex_count * bits_per_vertex) % 32 == 0
+       *
+       * (in other words, when the last 5 bits of vertex_count *
+       * bits_per_vertex are 0).  Assuming bits_per_vertex == 2^n for some
+       * integer n (which is always the case, since bits_per_vertex is
+       * always 1 or 2), this is equivalent to requiring that the last 5-n
+       * bits of vertex_count are 0:
+       *
+       *     vertex_count & (2^(5-n) - 1) == 0
+       *
+       * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
+       * equivalent to:
+       *
+       *     vertex_count & (32 / bits_per_vertex - 1) == 0
+       *
+       * TODO: If vertex_count is an immediate, we could do some of this math
+       *       at compile time...
+       */
+      fs_inst *inst =
+         abld.AND(bld.null_reg_d(), vertex_count,
+                  fs_reg(32u / gs_compile->control_data_bits_per_vertex - 1u));
+      inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+      abld.IF(BRW_PREDICATE_NORMAL);
+      /* If vertex_count is 0, then no control data bits have been
+       * accumulated yet, so we can skip emitting them.
+       */
+      abld.CMP(bld.null_reg_d(), vertex_count, fs_reg(0u),
+               BRW_CONDITIONAL_NEQ);
+      abld.IF(BRW_PREDICATE_NORMAL);
+      emit_gs_control_data_bits(vertex_count);
+      abld.emit(BRW_OPCODE_ENDIF);
+
+      /* Reset control_data_bits to 0 so we can start accumulating a new
+       * batch.
+       *
+       * Note: in the case where vertex_count == 0, this neutralizes the
+       * effect of any call to EndPrimitive() that the shader may have
+       * made before outputting its first vertex.
+       */
+      inst = abld.MOV(this->control_data_bits, fs_reg(0u));
+      inst->force_writemask_all = true;
+      abld.emit(BRW_OPCODE_ENDIF);
+   }
+
+   emit_urb_writes(vertex_count);
+
+   /* In stream mode we have to set control data bits for all vertices
+    * unless we have disabled control data bits completely (which we do
+    * do for GL_POINTS outputs that don't use streams).
+    */
+   if (gs_compile->control_data_header_size_bits > 0 &&
+       gs_prog_data->control_data_format ==
+          GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
+      set_gs_stream_control_data_bits(vertex_count, stream_id);
+   }
+}
+
+void
+fs_visitor::emit_gs_input_load(const fs_reg &dst,
+                               const nir_src &vertex_src,
+                               unsigned input_offset,
+                               unsigned num_components)
+{
+   const brw_vue_prog_data *vue_prog_data = (const brw_vue_prog_data *) prog_data;
+   const unsigned vertex = nir_src_as_const_value(vertex_src)->u[0];
+
+   const unsigned array_stride = vue_prog_data->urb_read_length * 8;
+
+   const bool pushed = 4 * input_offset < array_stride;
+
+   if (input_offset == 0) {
+      /* This is the VUE header, containing VARYING_SLOT_LAYER [.y],
+       * VARYING_SLOT_VIEWPORT [.z], and VARYING_SLOT_PSIZ [.w].
+       * Only gl_PointSize is available as a GS input, so they must
+       * be asking for that input.
+       */
+      if (pushed) {
+         bld.MOV(dst, fs_reg(ATTR, array_stride * vertex + 3, dst.type));
+      } else {
+         fs_reg tmp = bld.vgrf(dst.type, 4);
+         fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
+                                  fs_reg(vertex), fs_reg(0));
+         inst->regs_written = 4;
+         bld.MOV(dst, offset(tmp, bld, 3));
+      }
+   } else {
+      if (pushed) {
+         int index = vertex * array_stride + 4 * input_offset;
+         for (unsigned i = 0; i < num_components; i++) {
+            bld.MOV(offset(dst, bld, i), fs_reg(ATTR, index + i, dst.type));
+         }
+      } else {
+         fs_inst *inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
+                                  fs_reg(vertex), fs_reg(input_offset));
+         inst->regs_written = num_components;
+      }
+   }
+}
+
 void
 fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
 {
@@ -1579,6 +1950,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_load_per_vertex_input_indirect:
+      assert(!"Not allowed");
+      /* fallthrough */
+   case nir_intrinsic_load_per_vertex_input:
+      emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
+                         instr->num_components);
+      break;
+
    /* Handle ARB_gpu_shader5 interpolation intrinsics
     *
     * It's worth a quick word of explanation as to why we handle the full
@@ -1929,6 +2308,18 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_emit_vertex_with_counter:
+      emit_gs_vertex(instr->src[0], instr->const_index[0]);
+      break;
+
+   case nir_intrinsic_end_primitive_with_counter:
+      emit_gs_end_primitive(instr->src[0]);
+      break;
+
+   case nir_intrinsic_set_vertex_count:
+      bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0]));
+      break;
+
    default:
       unreachable("unknown intrinsic");
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b6d1c3b6d4a..ef92098286c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -880,7 +880,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
 }
 
 void
-fs_visitor::emit_urb_writes()
+fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
 {
    int slot, urb_offset, length;
    int starting_urb_offset = 0;
@@ -916,9 +916,13 @@ fs_visitor::emit_urb_writes()
       return;
    }
 
+   opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8;
+   int header_size = 1;
+   fs_reg per_slot_offsets;
+
    if (stage == MESA_SHADER_GEOMETRY) {
       const struct brw_gs_prog_data *gs_prog_data =
-         (const struct brw_gs_prog_data *) prog_data;
+         (const struct brw_gs_prog_data *) this->prog_data;
 
       /* We need to increment the Global Offset to skip over the control data
        * header and the extra "Vertex Count" field (1 HWord) at the beginning
@@ -927,6 +931,27 @@ fs_visitor::emit_urb_writes()
       starting_urb_offset = 2 * gs_prog_data->control_data_header_size_hwords;
       if (gs_prog_data->static_vertex_count == -1)
          starting_urb_offset += 2;
+
+      /* We also need to use per-slot offsets.  The per-slot offset is the
+       * Vertex Count.  SIMD8 mode processes 8 different primitives at a
+       * time; each may output a different number of vertices.
+       */
+      opcode = SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT;
+      header_size++;
+
+      /* The URB offset is in 128-bit units, so we need to multiply by 2 */
+      const int output_vertex_size_owords =
+         gs_prog_data->output_vertex_size_hwords * 2;
+
+      fs_reg offset;
+      if (gs_vertex_count.file == IMM) {
+         per_slot_offsets = fs_reg(output_vertex_size_owords *
+                                   gs_vertex_count.fixed_hw_reg.dw1.ud);
+      } else {
+         per_slot_offsets = vgrf(glsl_type::int_type);
+         bld.MUL(per_slot_offsets, gs_vertex_count,
+                 fs_reg(output_vertex_size_owords));
+      }
    }
 
    length = 0;
@@ -1023,19 +1048,25 @@ fs_visitor::emit_urb_writes()
       if (length == 8 || last)
          flush = true;
       if (flush) {
-         fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
-         fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1),
+         fs_reg *payload_sources =
+            ralloc_array(mem_ctx, fs_reg, length + header_size);
+         fs_reg payload = fs_reg(GRF, alloc.allocate(length + header_size),
                                  BRW_REGISTER_TYPE_F);
          payload_sources[0] =
             fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
 
-         memcpy(&payload_sources[1], sources, length * sizeof sources[0]);
-         abld.LOAD_PAYLOAD(payload, payload_sources, length + 1, 1);
+         if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT)
+            payload_sources[1] = per_slot_offsets;
 
-         fs_inst *inst =
-            abld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
+         memcpy(&payload_sources[header_size], sources,
+                length * sizeof sources[0]);
+
+         abld.LOAD_PAYLOAD(payload, payload_sources, length + header_size,
+                           header_size);
+
+         fs_inst *inst = abld.emit(opcode, reg_undef, payload);
          inst->eot = last && stage == MESA_SHADER_VERTEX;
-         inst->mlen = length + 1;
+         inst->mlen = length + header_size;
          inst->offset = urb_offset;
          urb_offset = starting_urb_offset + slot + 1;
          length = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index cfb5cd95cb1..49c10837334 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -29,6 +29,7 @@
 
 #include "brw_vec4_gs_visitor.h"
 #include "gen6_gs_visitor.h"
+#include "brw_fs.h"
 
 namespace brw {
 
@@ -812,6 +813,30 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
     * program.
     */
 
+   if (compiler->scalar_gs) {
+      /* TODO: Support instanced GS.  We have basically no tests... */
+      assert(prog_data->invocations == 1);
+
+      fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader,
+                   shader_time_index);
+      if (v.run_gs()) {
+         prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+
+         fs_generator g(compiler, log_data, mem_ctx, &c.key,
+                        &prog_data->base.base, v.promoted_constants,
+                        false, "GS");
+         if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
+            const char *label =
+               shader->info.label ? shader->info.label : "unnamed";
+            char *name = ralloc_asprintf(mem_ctx, "%s geometry shader %s",
+                                         label, shader->info.name);
+            g.enable_debug(name);
+         }
+         g.generate_code(v.cfg, 8);
+         return g.get_assembly(final_assembly_size);
+      }
+   }
+
    if (compiler->devinfo->gen >= 7) {
       /* Compile the geometry shader in DUAL_OBJECT dispatch mode, if we can do
        * so without spilling. If the GS invocations count > 1, then we can't use

From 7f9122c9680a882fee5a9d5a8e09c3e3b7466937 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Wed, 28 Oct 2015 12:59:38 +0100
Subject: [PATCH 004/287] gallium/radeon: always return the last SDMA fence on
 SDMA flush if needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 11 +++++++----
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 0ad36849645..56977c06869 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -192,13 +192,15 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
 
-	if (!cs->cdw) {
-		return;
-	}
+	if (!cs->cdw)
+		goto done;
 
 	rctx->rings.dma.flushing = true;
-	rctx->ws->cs_flush(cs, flags, fence, 0);
+	rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0);
 	rctx->rings.dma.flushing = false;
+done:
+	if (fence)
+		rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
 }
 
 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx)
@@ -297,6 +299,7 @@ void r600_common_context_cleanup(struct r600_common_context *rctx)
 	if (rctx->allocator_so_filled_size) {
 		u_suballocator_destroy(rctx->allocator_so_filled_size);
 	}
+	rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL);
 }
 
 void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index c300c0b3332..b7f1a234baf 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -384,6 +384,7 @@ struct r600_common_context {
 	enum radeon_family		family;
 	enum chip_class			chip_class;
 	struct r600_rings		rings;
+	struct pipe_fence_handle	*last_sdma_fence;
 	unsigned			initial_gfx_cs_size;
 	unsigned			gpu_reset_counter;
 

From 3b37155a68acc351cba86a1fa142bd0de2192d4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Wed, 28 Oct 2015 13:50:08 +0100
Subject: [PATCH 005/287] gallium/radeon: allow returning SDMA fences from
 pipe->flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pipe->flush never returned SDMA fences. This fixes it.
This is only an issue on amdgpu where fences can signal out of order.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 62 ++++++++++++++++---
 1 file changed, 55 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 56977c06869..79e624ea12b 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_upload_mgr.h"
+#include "os/os_time.h"
 #include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 #include "radeon/radeon_video.h"
@@ -40,6 +41,12 @@
 #define HAVE_LLVM 0
 #endif
 
+struct r600_multi_fence {
+	struct pipe_reference reference;
+	struct pipe_fence_handle *gfx;
+	struct pipe_fence_handle *sdma;
+};
+
 /*
  * pipe_context
  */
@@ -174,16 +181,34 @@ static void r600_flush_from_st(struct pipe_context *ctx,
 			       struct pipe_fence_handle **fence,
 			       unsigned flags)
 {
+	struct pipe_screen *screen = ctx->screen;
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	unsigned rflags = 0;
+	struct pipe_fence_handle *gfx_fence = NULL;
+	struct pipe_fence_handle *sdma_fence = NULL;
 
 	if (flags & PIPE_FLUSH_END_OF_FRAME)
 		rflags |= RADEON_FLUSH_END_OF_FRAME;
 
 	if (rctx->rings.dma.cs) {
-		rctx->rings.dma.flush(rctx, rflags, NULL);
+		rctx->rings.dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
+	}
+	rctx->rings.gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+
+	/* Both engines can signal out of order, so we need to keep both fences. */
+	if (gfx_fence || sdma_fence) {
+		struct r600_multi_fence *multi_fence =
+			CALLOC_STRUCT(r600_multi_fence);
+		if (!multi_fence)
+			return;
+
+		multi_fence->reference.count = 1;
+		multi_fence->gfx = gfx_fence;
+		multi_fence->sdma = sdma_fence;
+
+		screen->fence_reference(screen, fence, NULL);
+		*fence = (struct pipe_fence_handle*)multi_fence;
 	}
-	rctx->rings.gfx.flush(rctx, rflags, fence);
 }
 
 static void r600_flush_dma_ring(void *ctx, unsigned flags,
@@ -757,12 +782,19 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
 }
 
 static void r600_fence_reference(struct pipe_screen *screen,
-				 struct pipe_fence_handle **ptr,
-				 struct pipe_fence_handle *fence)
+				 struct pipe_fence_handle **dst,
+				 struct pipe_fence_handle *src)
 {
-	struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+	struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws;
+	struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst;
+	struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src;
 
-	rws->fence_reference(ptr, fence);
+	if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) {
+		ws->fence_reference(&(*rdst)->gfx, NULL);
+		ws->fence_reference(&(*rdst)->sdma, NULL);
+		FREE(*rdst);
+	}
+        *rdst = rsrc;
 }
 
 static boolean r600_fence_finish(struct pipe_screen *screen,
@@ -770,8 +802,24 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
 				 uint64_t timeout)
 {
 	struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws;
+	struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence;
+	int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
 
-	return rws->fence_wait(rws, fence, timeout);
+	if (rfence->sdma) {
+		if (!rws->fence_wait(rws, rfence->sdma, timeout))
+			return false;
+
+		/* Recompute the timeout after waiting. */
+		if (timeout && timeout != PIPE_TIMEOUT_INFINITE) {
+			int64_t time = os_time_get_nano();
+			timeout = abs_timeout > time ? abs_timeout - time : 0;
+		}
+	}
+
+	if (!rfence->gfx)
+		return true;
+
+	return rws->fence_wait(rws, rfence->gfx, timeout);
 }
 
 static bool r600_interpret_tiling(struct r600_common_screen *rscreen,

From cf3121ed1885b257217dbac24a131dbfd5f8e438 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 30 Oct 2015 10:07:23 -0700
Subject: [PATCH 006/287] i965/vec4: Send from GRF in atomic operations.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 .../drivers/dri/i965/brw_vec4_visitor.cpp     | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index b8f90f2aa20..606fbd06278 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1183,24 +1183,27 @@ vec4_visitor::gs_end_primitive()
 
 void
 vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-                                  dst_reg dst, src_reg offset,
+                                  dst_reg dst, src_reg surf_offset,
                                   src_reg src0, src_reg src1)
 {
-   unsigned mlen = 0;
+   unsigned mlen = 1 + (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
+   src_reg src_payload(this, glsl_type::uint_type, mlen);
+   dst_reg payload(src_payload);
+   payload.writemask = WRITEMASK_X;
 
    /* Set the atomic operation offset. */
-   emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
-   mlen++;
+   emit(MOV(offset(payload, 0), surf_offset));
+   unsigned i = 1;
 
    /* Set the atomic operation arguments. */
    if (src0.file != BAD_FILE) {
-      emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
-      mlen++;
+      emit(MOV(offset(payload, i), src0));
+      i++;
    }
 
    if (src1.file != BAD_FILE) {
-      emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
-      mlen++;
+      emit(MOV(offset(payload, i), src1));
+      i++;
    }
 
    /* Emit the instruction.  Note that this maps to the normal SIMD8
@@ -1208,24 +1211,27 @@ vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
     * unused channels will be masked out.
     */
    vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
-                                 brw_message_reg(0),
+                                 src_payload,
                                  src_reg(surf_index), src_reg(atomic_op));
    inst->mlen = mlen;
 }
 
 void
 vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
-                                        src_reg offset)
+                                        src_reg surf_offset)
 {
+   dst_reg offset(this, glsl_type::uint_type);
+   offset.writemask = WRITEMASK_X;
+
    /* Set the surface read offset. */
-   emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
+   emit(MOV(offset, surf_offset));
 
    /* Emit the instruction.  Note that this maps to the normal SIMD8
     * untyped surface read message, but that's OK because unused
     * channels will be masked out.
     */
    vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
-                                 brw_message_reg(0),
+                                 src_reg(offset),
                                  src_reg(surf_index), src_reg(1));
    inst->mlen = 1;
 }

From 4bc16ad2176efda5f8c59e222b4735ee35c434b5 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 23 Oct 2015 16:10:02 -0700
Subject: [PATCH 007/287] mesa: rename UniformBlockStageIndex to
 InterfaceBlockStageIndex
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Cc: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Cc: Iago Toral <itoral@igalia.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
---
 src/glsl/link_uniform_initializers.cpp |  2 +-
 src/glsl/linker.cpp                    | 16 ++++++++--------
 src/glsl/standalone_scaffolding.cpp    |  4 ++--
 src/mesa/main/mtypes.h                 | 11 ++++++-----
 src/mesa/main/shader_query.cpp         |  2 +-
 src/mesa/main/shaderobj.c              |  4 ++--
 src/mesa/main/uniforms.c               |  4 ++--
 7 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp
index 682a4eef13c..58d21e5125e 100644
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -178,7 +178,7 @@ set_block_binding(gl_shader_program *prog, const char *block_name, int binding)
 
       /* This is a field of a UBO.  val is the binding index. */
       for (int i = 0; i < MESA_SHADER_STAGES; i++) {
-         int stage_index = prog->UniformBlockStageIndex[i][block_index];
+         int stage_index = prog->InterfaceBlockStageIndex[i][block_index];
 
          if (stage_index != -1) {
             struct gl_shader *sh = prog->_LinkedShaders[i];
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index c35d87acea6..9dcc2a76c9a 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1174,10 +1174,10 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       struct gl_shader *sh = prog->_LinkedShaders[i];
 
-      prog->UniformBlockStageIndex[i] = ralloc_array(prog, int,
-						     max_num_uniform_blocks);
+      prog->InterfaceBlockStageIndex[i] = ralloc_array(prog, int,
+                                                       max_num_uniform_blocks);
       for (unsigned int j = 0; j < max_num_uniform_blocks; j++)
-	 prog->UniformBlockStageIndex[i][j] = -1;
+	 prog->InterfaceBlockStageIndex[i][j] = -1;
 
       if (sh == NULL)
 	 continue;
@@ -1194,7 +1194,7 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog)
 	    return false;
 	 }
 
-	 prog->UniformBlockStageIndex[i][index] = j;
+	 prog->InterfaceBlockStageIndex[i][index] = j;
       }
    }
 
@@ -2836,9 +2836,9 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog)
       }
 
       for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
-	 if (prog->UniformBlockStageIndex[j][i] != -1) {
+	 if (prog->InterfaceBlockStageIndex[j][i] != -1) {
             struct gl_shader *sh = prog->_LinkedShaders[j];
-            int stage_index = prog->UniformBlockStageIndex[j][i];
+            int stage_index = prog->InterfaceBlockStageIndex[j][i];
             if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) {
                shader_blocks[j]++;
                total_shader_storage_blocks++;
@@ -2955,7 +2955,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog)
          total_image_units += sh->NumImages;
 
          for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) {
-            int stage_index = prog->UniformBlockStageIndex[i][j];
+            int stage_index = prog->InterfaceBlockStageIndex[i][j];
             if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage)
                total_shader_storage_blocks++;
          }
@@ -3734,7 +3734,7 @@ build_program_resource_list(struct gl_shader_program *shProg)
       int block_index = shProg->UniformStorage[i].block_index;
       if (block_index != -1) {
          for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) {
-             if (shProg->UniformBlockStageIndex[j][block_index] != -1)
+             if (shProg->InterfaceBlockStageIndex[j][block_index] != -1)
                 stageref |= (1 << j);
          }
       }
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index eccf094b5cd..fe1d820f2ea 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -120,8 +120,8 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
    shProg->NumShaderStorageBlocks = 0;
 
    for (i = 0; i < MESA_SHADER_STAGES; i++) {
-      ralloc_free(shProg->UniformBlockStageIndex[i]);
-      shProg->UniformBlockStageIndex[i] = NULL;
+      ralloc_free(shProg->InterfaceBlockStageIndex[i]);
+      shProg->InterfaceBlockStageIndex[i] = NULL;
    }
 
    ralloc_free(shProg->AtomicBuffers);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index d6c1eb8511e..fdb3b3df318 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2716,13 +2716,14 @@ struct gl_shader_program
    struct gl_uniform_block **ShaderStorageBlocks;
 
    /**
-    * Indices into the _LinkedShaders's UniformBlocks[] array for each stage
-    * they're used in, or -1.
+    * Indices into the BufferInterfaceBlocks[] array for each stage they're
+    * used in, or -1.
     *
-    * This is used to maintain the Binding values of the stage's UniformBlocks[]
-    * and to answer the GL_UNIFORM_BLOCK_REFERENCED_BY_*_SHADER queries.
+    * This is used to maintain the Binding values of the stage's
+    * BufferInterfaceBlocks[] and to answer the
+    * GL_UNIFORM_BLOCK_REFERENCED_BY_*_SHADER queries.
     */
-   int *UniformBlockStageIndex[MESA_SHADER_STAGES];
+   int *InterfaceBlockStageIndex[MESA_SHADER_STAGES];
 
    /**
     * Map of active uniform names to locations
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index dd51bba3386..5cb877b0104 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -980,7 +980,7 @@ is_resource_referenced(struct gl_shader_program *shProg,
       return RESOURCE_ATC(res)->StageReferences[stage];
 
    if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK)
-      return shProg->UniformBlockStageIndex[stage][index] != -1;
+      return shProg->InterfaceBlockStageIndex[stage][index] != -1;
 
    return res->StageReferences & (1 << stage);
 }
diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c
index ffc71931fec..203ccef7fc4 100644
--- a/src/mesa/main/shaderobj.c
+++ b/src/mesa/main/shaderobj.c
@@ -294,8 +294,8 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg)
    shProg->BufferInterfaceBlocks = NULL;
    shProg->NumBufferInterfaceBlocks = 0;
    for (i = 0; i < MESA_SHADER_STAGES; i++) {
-      ralloc_free(shProg->UniformBlockStageIndex[i]);
-      shProg->UniformBlockStageIndex[i] = NULL;
+      ralloc_free(shProg->InterfaceBlockStageIndex[i]);
+      shProg->InterfaceBlockStageIndex[i] = NULL;
    }
 
    ralloc_free(shProg->AtomicBuffers);
diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
index bc235380d97..758ca2456df 100644
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -1026,7 +1026,7 @@ _mesa_UniformBlockBinding(GLuint program,
       shProg->BufferInterfaceBlocks[uniformBlockIndex].Binding = uniformBlockBinding;
 
       for (i = 0; i < MESA_SHADER_STAGES; i++) {
-	 int stage_index = shProg->UniformBlockStageIndex[i][uniformBlockIndex];
+	 int stage_index = shProg->InterfaceBlockStageIndex[i][uniformBlockIndex];
 
 	 if (stage_index != -1) {
 	    struct gl_shader *sh = shProg->_LinkedShaders[i];
@@ -1079,7 +1079,7 @@ _mesa_ShaderStorageBlockBinding(GLuint program,
       shProg->BufferInterfaceBlocks[shaderStorageBlockIndex].Binding = shaderStorageBlockBinding;
 
       for (i = 0; i < MESA_SHADER_STAGES; i++) {
-	 int stage_index = shProg->UniformBlockStageIndex[i][shaderStorageBlockIndex];
+	 int stage_index = shProg->InterfaceBlockStageIndex[i][shaderStorageBlockIndex];
 
 	 if (stage_index != -1) {
 	    struct gl_shader *sh = shProg->_LinkedShaders[i];

From 531be601d5f9ac4f8a9cc77240ba865fda077709 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 28 Oct 2015 10:11:11 -0700
Subject: [PATCH 008/287] nir: Unexpose _impl versions of copy_prop and dce
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/nir.h                    | 2 --
 src/glsl/nir/nir_opt_copy_propagate.c | 2 +-
 src/glsl/nir/nir_opt_dce.c            | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index ac422514d52..874a03966be 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -2004,12 +2004,10 @@ bool nir_opt_constant_folding(nir_shader *shader);
 
 bool nir_opt_global_to_local(nir_shader *shader);
 
-bool nir_copy_prop_impl(nir_function_impl *impl);
 bool nir_copy_prop(nir_shader *shader);
 
 bool nir_opt_cse(nir_shader *shader);
 
-bool nir_opt_dce_impl(nir_function_impl *impl);
 bool nir_opt_dce(nir_shader *shader);
 
 bool nir_opt_dead_cf(nir_shader *shader);
diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 71367d001bb..96520f8a361 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -256,7 +256,7 @@ copy_prop_block(nir_block *block, void *_state)
    return true;
 }
 
-bool
+static bool
 nir_copy_prop_impl(nir_function_impl *impl)
 {
    bool progress = false;
diff --git a/src/glsl/nir/nir_opt_dce.c b/src/glsl/nir/nir_opt_dce.c
index e0ebdc61c2f..603252825c3 100644
--- a/src/glsl/nir/nir_opt_dce.c
+++ b/src/glsl/nir/nir_opt_dce.c
@@ -145,7 +145,7 @@ delete_block_cb(nir_block *block, void *_state)
    return true;
 }
 
-bool
+static bool
 nir_opt_dce_impl(nir_function_impl *impl)
 {
    struct exec_list *worklist = ralloc(NULL, struct exec_list);

From aea40091f003f8772afce3562b0f8c6a17dad07f Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 2 Nov 2015 21:02:37 -0800
Subject: [PATCH 009/287] nir: Properly invalidate metadata in
 nir_lower_global_vars_to_local().

v2: Preserve nir_metadata_live_variables as well (caught by Jason).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_lower_global_vars_to_local.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/glsl/nir/nir_lower_global_vars_to_local.c b/src/glsl/nir/nir_lower_global_vars_to_local.c
index fab236611a5..dcd091ae2fa 100644
--- a/src/glsl/nir/nir_lower_global_vars_to_local.c
+++ b/src/glsl/nir/nir_lower_global_vars_to_local.c
@@ -100,6 +100,9 @@ nir_lower_global_vars_to_local(nir_shader *shader)
          exec_node_remove(&var->node);
          var->data.mode = nir_var_local;
          exec_list_push_tail(&impl->locals, &var->node);
+         nir_metadata_preserve(impl, nir_metadata_block_index |
+                                     nir_metadata_dominance |
+                                     nir_metadata_live_variables);
          progress = true;
       }
    }

From 8bb44510fca5315bbdd61502c72c22c7198c0daf Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 2 Nov 2015 21:05:08 -0800
Subject: [PATCH 010/287] nir: Properly invalidate metadata in
 nir_split_var_copies().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/glsl/nir/nir_split_var_copies.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c
index d2ea58a8b7c..d463f7bdae9 100644
--- a/src/glsl/nir/nir_split_var_copies.c
+++ b/src/glsl/nir/nir_split_var_copies.c
@@ -271,6 +271,11 @@ split_var_copies_impl(nir_function_impl *impl)
 
    ralloc_free(state.dead_ctx);
 
+   if (state.progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
    return state.progress;
 }
 

From 4cb7546066f3f06b8030b8fce78f82469b0c6980 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 2 Nov 2015 21:28:26 -0800
Subject: [PATCH 011/287] nir: Properly invalidate metadata in
 nir_remove_dead_variables().

v2: Preserve live_variables too (Jason).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
---
 src/glsl/nir/nir_remove_dead_variables.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c
index d6783e78803..530a8475ed5 100644
--- a/src/glsl/nir/nir_remove_dead_variables.c
+++ b/src/glsl/nir/nir_remove_dead_variables.c
@@ -126,8 +126,14 @@ nir_remove_dead_variables(nir_shader *shader)
    progress = remove_dead_vars(&shader->globals, live) || progress;
 
    nir_foreach_overload(shader, overload) {
-      if (overload->impl)
-         progress = remove_dead_vars(&overload->impl->locals, live) || progress;
+      if (overload->impl) {
+         if (remove_dead_vars(&overload->impl->locals, live)) {
+            nir_metadata_preserve(overload->impl, nir_metadata_block_index |
+                                                  nir_metadata_dominance |
+                                                  nir_metadata_live_variables);
+            progress = true;
+         }
+      }
    }
 
    _mesa_set_destroy(live, NULL);

From 0f037bd71ffe083c05cd0867ef54bce91ff84243 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 2 Nov 2015 21:21:25 -0800
Subject: [PATCH 012/287] nir: Properly invalidate metadata in
 nir_opt_copy_prop().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/glsl/nir/nir_opt_copy_propagate.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 96520f8a361..7d8bdd7f2ca 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -262,6 +262,12 @@ nir_copy_prop_impl(nir_function_impl *impl)
    bool progress = false;
 
    nir_foreach_block(impl, copy_prop_block, &progress);
+
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
    return progress;
 }
 

From bc3942e2970c60a816cf954b1fa4d416d0852bd9 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 2 Nov 2015 21:38:56 -0800
Subject: [PATCH 013/287] nir: Properly invalidate metadata in
 nir_lower_vec_to_movs().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/glsl/nir/nir_lower_vec_to_movs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c
index c08b721dae4..736a66c8639 100644
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -288,6 +288,11 @@ nir_lower_vec_to_movs_impl(nir_function_impl *impl)
 
    nir_foreach_block(impl, lower_vec_to_movs_block, &state);
 
+   if (state.progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
    return state.progress;
 }
 

From 59bbe2681b73c3795b7298e2486d5fde7c464ed5 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 2 Nov 2015 21:43:40 -0800
Subject: [PATCH 014/287] nir: Properly invalidate metadata in
 nir_opt_remove_phis().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/glsl/nir/nir_opt_remove_phis.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/nir/nir_opt_remove_phis.c b/src/glsl/nir/nir_opt_remove_phis.c
index 5bdf7ef4da7..66d37544115 100644
--- a/src/glsl/nir/nir_opt_remove_phis.c
+++ b/src/glsl/nir/nir_opt_remove_phis.c
@@ -108,6 +108,11 @@ remove_phis_impl(nir_function_impl *impl)
 
    nir_foreach_block(impl, remove_phis_block, &progress);
 
+   if (progress) {
+      nir_metadata_preserve(impl, nir_metadata_block_index |
+                                  nir_metadata_dominance);
+   }
+
    return progress;
 }
 

From 39b4dfe6ab1003863778a25c091c080e098833ec Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Tue, 3 Nov 2015 10:36:01 +0200
Subject: [PATCH 015/287] llvmpipe: use simple coeffs calc for 128bit vectors

There are currently two methods in llvmpipe code to calculate coeffs to
be used as inputs for the fragment shader. The two methods use slightly
different ways to do the floating point calculations and thus produce
slightly different results.

The decision which method to use is determined by the size of the vector
that is used by the platform.

For vectors with size of more than 128bit, a single-step method is used,
in which coeffs_init_simple() + attribs_update_simple() are called.

For vectors with size of 128bit or less, a two-step method is used, in
which coeffs_init() + attribs_update() are called.

This causes some piglit tests (clip-distance-bulk-copy,
interface-vs-unnamed-to-fs-unnamed) to fail when using platforms with
128bit vectors (such as ppc64le or x86-64 without AVX).

This patch makes platforms with 128bit vectors use the single-step
method (aka "simple" method) instead of the two-step method.
This would make the resulting coeffs identical between more platforms,
make sure the piglit tests passes, and make debugging and maintainability
a bit easier as the generated LLVM IR will be the same for more platforms.

The performance impact is negligible for x86-64 without AVX, and
basically non-existent for ppc64le, as it can be seen from the following
benchmarking results:

- glxspheres, on ppc64le:

   - original code:  4.892745317 frames/sec 5.460303857 Mpixels/sec
   - with the patch: 4.932083873 frames/sec 5.504205571 Mpixels/sec
   - Additional 0.8% performance boost

- glxspheres, on x86-64 without AVX:

   - original code:  20.16418809 frames/sec 22.50323395 Mpixels/sec
   - with the patch: 20.31328989 frames/sec 22.66963152 Mpixels/sec
   - Additional 0.74% performance boost

- glmark2, on ppc64le:

  - original code:  score of 58
  - with my change: score of 57

- glmark2, on x86-64 without AVX:

  - original code:  score of 175
  - with the patch: score of 167
  - Impact of of -4.5% on performance

- OpenArena, on ppc64le:

  - original code:  3398 frames 1719.0 seconds 2.0 fps
                    255.0/505.9/2773.0/0.0 ms

  - with the patch: 3398 frames 1690.4 seconds 2.0 fps
                    241.0/497.5/2563.0/0.2 ms

  - 29 seconds faster with the patch, which is about 2%

- OpenArena, on x86-64 without AVX:

  - original code:  3398 frames 239.6 seconds 14.2 fps
                    38.0/70.5/719.0/14.6 ms

  - with the patch: 3398 frames 244.4 seconds 13.9 fps
                    38.0/71.9/697.0/14.3 ms

  - 0.3 fps slower with the patch (about 2%)

Additional details can be found at:
http://lists.freedesktop.org/archives/mesa-dev/2015-October/098635.html

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/gallium/drivers/llvmpipe/lp_bld_interp.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index df262fa4716..ceac86abe1d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -746,7 +746,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 
    pos_init(bld, x0, y0);
 
-   if (coeff_type.length > 4) {
+   /*
+    * Simple method (single step interpolation) may be slower if vector length
+    * is just 4, but the results are different (generally less accurate) with
+    * the other method, so always use more accurate version.
+    */
+   if (1) {
       bld->simple_interp = TRUE;
       {
          /* XXX this should use a global static table */

From 9285ed98f7557722fbb94f47c5bc138ef5dd9c70 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 27 Oct 2015 05:34:00 +0100
Subject: [PATCH 016/287] llvmpipe: add cache for compressed textures

compressed textures are very slow because decoding is rather complex
(and because there's no jit code code to decode them too for non-technical
reasons).
Thus, add some texture cache which holds a couple of decoded blocks.
Right now this handles only s3tc format albeit it could be extended to work
with other formats rather trivially as long as the result of decode fits into
32bit per texel (ideally, rgtc actually would decode to more than 8 bits
per channel, but even then making it work for it shouldn't be too difficult).
This can improve performance noticeably but don't expect wonders (uncompressed
is unsurprisingly still faster). It's also possible it might be slower in
some cases (using nearest filtering for example or if there's otherwise not
many cache hits, the cache is only direct mapped which isn't great).
Also, actual decode of a block relies on util code, thus even though always
full blocks are decoded it is done texel by texel - this could obviously
benefit greatly from simd-optimized code decoding full blocks at once...
Note the cache is per (raster) thread, and currently only used for fragment
shaders.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/auxiliary/Makefile.sources        |   2 +
 src/gallium/auxiliary/draw/draw_llvm.c        |   5 +-
 src/gallium/auxiliary/gallivm/lp_bld_format.c |  56 +++
 src/gallium/auxiliary/gallivm/lp_bld_format.h |  56 ++-
 .../auxiliary/gallivm/lp_bld_format_aos.c     |  31 +-
 .../auxiliary/gallivm/lp_bld_format_cached.c  | 374 ++++++++++++++++++
 .../auxiliary/gallivm/lp_bld_format_soa.c     |  37 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample.h |  13 +
 .../auxiliary/gallivm/lp_bld_sample_aos.c     |   6 +-
 .../auxiliary/gallivm/lp_bld_sample_soa.c     |  42 ++
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h   |   2 +
 .../auxiliary/gallivm/lp_bld_tgsi_soa.c       |   5 +
 src/gallium/drivers/llvmpipe/lp_jit.c         |   3 +
 src/gallium/drivers/llvmpipe/lp_jit.h         |   8 +-
 src/gallium/drivers/llvmpipe/lp_rast.c        |  44 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c    |   4 +-
 src/gallium/drivers/llvmpipe/lp_test_format.c |  36 +-
 src/gallium/drivers/llvmpipe/lp_tex_sample.c  |  19 +
 src/gallium/drivers/llvmpipe/lp_tex_sample.h  |   5 +-
 19 files changed, 730 insertions(+), 18 deletions(-)
 create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_format.c
 create mode 100644 src/gallium/auxiliary/gallivm/lp_bld_format_cached.c

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 9df4e265b5b..6e22ced4e41 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -378,7 +378,9 @@ GALLIVM_SOURCES := \
 	gallivm/lp_bld_flow.h \
 	gallivm/lp_bld_format_aos_array.c \
 	gallivm/lp_bld_format_aos.c \
+	gallivm/lp_bld_format_cached.c \
 	gallivm/lp_bld_format_float.c \
+	gallivm/lp_bld_format.c \
 	gallivm/lp_bld_format.h \
 	gallivm/lp_bld_format_soa.c \
 	gallivm/lp_bld_format_srgb.c \
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index b1e1bcbee04..8435991fb6b 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -625,6 +625,7 @@ generate_vs(struct draw_llvm_variant *variant,
                      inputs,
                      outputs,
                      context_ptr,
+                     NULL,
                      draw_sampler,
                      &llvm->draw->vs.vertex_shader->info,
                      NULL);
@@ -749,7 +750,8 @@ generate_fetch(struct gallivm_state *gallivm,
                                     lp_float32_vec4_type(),
                                     FALSE,
                                     map_ptr,
-                                    zero, zero, zero);
+                                    zero, zero, zero,
+                                    NULL);
       LLVMBuildStore(builder, val, temp_ptr);
    }
    lp_build_endif(&if_ctx);
@@ -2193,6 +2195,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
                      NULL,
                      outputs,
                      context_ptr,
+                     NULL,
                      sampler,
                      &llvm->draw->gs.geometry_shader->info,
                      (const struct lp_build_tgsi_gs_iface *)&gs_iface);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.c b/src/gallium/auxiliary/gallivm/lp_bld_format.c
new file mode 100644
index 00000000000..a82fd8feee8
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.c
@@ -0,0 +1,56 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "lp_bld_format.h"
+
+
+
+LLVMTypeRef
+lp_build_format_cache_type(struct gallivm_state *gallivm)
+{
+   LLVMTypeRef elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_COUNT];
+   LLVMTypeRef s;
+
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_DATA] =
+         LLVMArrayType(LLVMInt32TypeInContext(gallivm->context),
+                       LP_BUILD_FORMAT_CACHE_SIZE * 16);
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_TAGS] =
+         LLVMArrayType(LLVMInt64TypeInContext(gallivm->context),
+                       LP_BUILD_FORMAT_CACHE_SIZE);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL] =
+         LLVMInt64TypeInContext(gallivm->context);
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS] =
+         LLVMInt64TypeInContext(gallivm->context);
+#endif
+
+   s = LLVMStructTypeInContext(gallivm->context, elem_types,
+                               LP_BUILD_FORMAT_CACHE_MEMBER_COUNT, 0);
+
+   return s;
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h
index 969f1f6cc94..5c866f420bd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -44,6 +44,45 @@ struct lp_type;
 struct lp_build_context;
 
 
+#define LP_BUILD_FORMAT_CACHE_DEBUG 0
+/*
+ * Block cache
+ *
+ * Optional block cache to be used when unpacking big pixel blocks.
+ * Must be a power of 2
+ */
+
+#define LP_BUILD_FORMAT_CACHE_SIZE 128
+
+/*
+ * Note: cache_data needs 16 byte alignment.
+ */
+struct lp_build_format_cache
+{
+   PIPE_ALIGN_VAR(16) uint32_t cache_data[LP_BUILD_FORMAT_CACHE_SIZE][4][4];
+   uint64_t cache_tags[LP_BUILD_FORMAT_CACHE_SIZE];
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   uint64_t cache_access_total;
+   uint64_t cache_access_miss;
+#endif
+};
+
+
+enum {
+   LP_BUILD_FORMAT_CACHE_MEMBER_DATA = 0,
+   LP_BUILD_FORMAT_CACHE_MEMBER_TAGS,
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL,
+   LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS,
+#endif
+   LP_BUILD_FORMAT_CACHE_MEMBER_COUNT
+};
+
+
+LLVMTypeRef
+lp_build_format_cache_type(struct gallivm_state *gallivm);
+
+
 /*
  * AoS
  */
@@ -66,7 +105,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offset,
                         LLVMValueRef i,
-                        LLVMValueRef j);
+                        LLVMValueRef j,
+                        LLVMValueRef cache);
 
 LLVMValueRef
 lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
@@ -107,13 +147,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                         LLVMValueRef offsets,
                         LLVMValueRef i,
                         LLVMValueRef j,
+                        LLVMValueRef cache,
                         LLVMValueRef rgba_out[4]);
 
 /*
  * YUV
  */
 
-
 LLVMValueRef
 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
                                    const struct util_format_description *format_desc,
@@ -123,6 +163,18 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
                                    LLVMValueRef i,
                                    LLVMValueRef j);
 
+
+LLVMValueRef
+lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
+                             const struct util_format_description *format_desc,
+                             unsigned n,
+                             LLVMValueRef base_ptr,
+                             LLVMValueRef offset,
+                             LLVMValueRef i,
+                             LLVMValueRef j,
+                             LLVMValueRef cache);
+
+
 /*
  * special float formats
  */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index ddf3ad1dfc6..a41b30bbb96 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -370,7 +370,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offset,
                         LLVMValueRef i,
-                        LLVMValueRef j)
+                        LLVMValueRef j,
+                        LLVMValueRef cache)
 {
    LLVMBuilderRef builder = gallivm->builder;
    unsigned num_pixels = type.length / 4;
@@ -502,6 +503,34 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
       return tmp;
    }
 
+   /*
+    * s3tc rgb formats
+    */
+
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) {
+      struct lp_type tmp_type;
+      LLVMValueRef tmp;
+
+      memset(&tmp_type, 0, sizeof tmp_type);
+      tmp_type.width = 8;
+      tmp_type.length = num_pixels * 4;
+      tmp_type.norm = TRUE;
+
+      tmp = lp_build_fetch_cached_texels(gallivm,
+                                         format_desc,
+                                         num_pixels,
+                                         base_ptr,
+                                         offset,
+                                         i, j,
+                                         cache);
+
+      lp_build_conv(gallivm,
+                    tmp_type, type,
+                    &tmp, 1, &tmp, 1);
+
+       return tmp;
+   }
+
    /*
     * Fallback to util_format_description::fetch_rgba_8unorm().
     */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c b/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c
new file mode 100644
index 00000000000..b683e7f960c
--- /dev/null
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c
@@ -0,0 +1,374 @@
+/**************************************************************************
+ *
+ * Copyright 2015 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_bld_format.h"
+#include "lp_bld_type.h"
+#include "lp_bld_struct.h"
+#include "lp_bld_const.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_swizzle.h"
+
+#include "util/u_math.h"
+
+
+/**
+ * @file
+ * Complex block-compression based formats are handled here by using a cache,
+ * so re-decoding of every pixel is not required.
+ * Especially for bilinear filtering, texel reuse is very high hence even
+ * a small cache helps.
+ * The elements in the cache are the decoded blocks - currently things
+ * are restricted to formats which are 4x4 block based, and the decoded
+ * texels must fit into 4x8 bits.
+ * The cache is direct mapped so hitrates aren't all that great and cache
+ * thrashing could happen.
+ *
+ * @author Roland Scheidegger <sroland@vmware.com>
+ */
+
+
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+static void
+update_cache_access(struct gallivm_state *gallivm,
+                    LLVMValueRef ptr,
+                    unsigned count,
+                    unsigned index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, cache_access;
+
+   assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
+          index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+
+   member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
+   cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
+   cache_access = LLVMBuildAdd(builder, cache_access,
+                               LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
+                                                                   count, 0), "");
+   LLVMBuildStore(builder, cache_access, member_ptr);
+}
+#endif
+
+
+static void
+store_cached_block(struct gallivm_state *gallivm,
+                   LLVMValueRef *col,
+                   LLVMValueRef tag_value,
+                   LLVMValueRef hash_index,
+                   LLVMValueRef cache)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef ptr, indices[3];
+   LLVMTypeRef type_ptr4x32;
+   unsigned count;
+
+   type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
+   indices[2] = hash_index;
+   ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), "");
+   LLVMBuildStore(builder, tag_value, ptr);
+
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
+   hash_index = LLVMBuildMul(builder, hash_index,
+                             lp_build_const_int32(gallivm, 16), "");
+   for (count = 0; count < 4; count++) {
+      indices[2] = hash_index;
+      ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), "");
+      ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
+      LLVMBuildStore(builder, col[count], ptr);
+      hash_index = LLVMBuildAdd(builder, hash_index,
+                                lp_build_const_int32(gallivm, 4), "");
+   }
+}
+
+
+static LLVMValueRef
+lookup_cached_pixel(struct gallivm_state *gallivm,
+                    LLVMValueRef ptr,
+                    LLVMValueRef index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, indices[3];
+
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
+   indices[2] = index;
+   member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+   return LLVMBuildLoad(builder, member_ptr, "cache_data");
+}
+
+
+static LLVMValueRef
+lookup_tag_data(struct gallivm_state *gallivm,
+                LLVMValueRef ptr,
+                LLVMValueRef index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, indices[3];
+
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
+   indices[2] = index;
+   member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+   return LLVMBuildLoad(builder, member_ptr, "tag_data");
+}
+
+
+static void
+update_cached_block(struct gallivm_state *gallivm,
+                    const struct util_format_description *format_desc,
+                    LLVMValueRef ptr_addr,
+                    LLVMValueRef hash_index,
+                    LLVMValueRef cache)
+
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
+   LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
+   LLVMValueRef function;
+   LLVMValueRef tag_value, tmp_ptr;
+   LLVMValueRef col[4];
+   unsigned i, j;
+
+   /*
+    * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
+    * This doesn't actually make any sense whatsoever, someone would need
+    * to write a function doing this for all pixels in a block (either as
+    * an external c function or with generated code). Don't ask.
+    */
+
+   {
+      /*
+       * Function to call looks like:
+       *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+       */
+      LLVMTypeRef ret_type;
+      LLVMTypeRef arg_types[4];
+      LLVMTypeRef function_type;
+
+      assert(format_desc->fetch_rgba_8unorm);
+
+      ret_type = LLVMVoidTypeInContext(gallivm->context);
+      arg_types[0] = pi8t;
+      arg_types[1] = pi8t;
+      arg_types[2] = i32t;
+      arg_types[3] = i32t;
+      function_type = LLVMFunctionType(ret_type, arg_types,
+                                       Elements(arg_types), 0);
+
+      /* make const pointer for the C fetch_rgba_8unorm function */
+      function = lp_build_const_int_pointer(gallivm,
+         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
+
+      /* cast the callee pointer to the function's type */
+      function = LLVMBuildBitCast(builder, function,
+                                  LLVMPointerType(function_type, 0),
+                                  "cast callee");
+   }
+
+   tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
+                                   lp_build_const_int32(gallivm, 16),
+                                   "tmp_decode_store");
+   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
+
+   /*
+    * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
+    * This is going to be really really slow.
+    * Note: the block store format is actually
+    * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
+    */
+   for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j) {
+         LLVMValueRef args[4];
+         LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);
+
+         /*
+          * Note we actually supply a pointer to the start of the block,
+          * not the start of the texture.
+          */
+         args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
+         args[1] = ptr_addr;
+         args[2] = LLVMConstInt(i32t, i, 0);
+         args[3] = LLVMConstInt(i32t, j, 0);
+         LLVMBuildCall(builder, function, args, Elements(args), "");
+      }
+   }
+
+   /* Finally store the block - pointless mem copy + update tag. */
+   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
+   for (i = 0; i < 4; ++i) {
+      LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
+      LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
+      col[i] = LLVMBuildLoad(builder, ptr, "");
+   }
+
+   tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
+                                 LLVMInt64TypeInContext(gallivm->context), "");
+   store_cached_block(gallivm, col, tag_value, hash_index, cache);
+}
+
+
+/*
+ * Do a cached lookup.
+ *
+ * Returns (vectors of) 4x8 rgba aos value
+ */
+LLVMValueRef
+lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
+                             const struct util_format_description *format_desc,
+                             unsigned n,
+                             LLVMValueRef base_ptr,
+                             LLVMValueRef offset,
+                             LLVMValueRef i,
+                             LLVMValueRef j,
+                             LLVMValueRef cache)
+
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   unsigned count, low_bit, log2size;
+   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
+   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
+   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
+   struct lp_type type;
+   struct lp_build_context bld32;
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   assert(format_desc->block.width == 4);
+   assert(format_desc->block.height == 4);
+
+   lp_build_context_init(&bld32, gallivm, type);
+
+   /*
+    * compute hash - we use direct mapped cache, the hash function could
+    *                be better but it needs to be simple
+    * per-element:
+    *    compare offset with offset stored at tag (hash)
+    *    if not equal decode/store block, update tag
+    *    extract color from cache
+    *    assemble result vector
+    */
+
+   /* TODO: not ideal with 32bit pointers... */
+
+   low_bit = util_logbase2(format_desc->block.bits / 8);
+   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
+   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
+   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
+   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
+   /* For the hash function, first mask off the unused lowest bits. Then just
+      do some xor with address bits - only use lower 32bits */
+   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
+   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
+                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
+   /* This only really makes sense for size 64,128,256 */
+   hash_index = ptr_addrtrunc;
+   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
+                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
+   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
+   tmp = LLVMBuildLShr(builder, hash_index,
+                       lp_build_const_int_vec(gallivm, type, log2size), "");
+   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
+
+   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
+   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
+   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
+   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
+   block_index = LLVMBuildShl(builder, hash_index,
+                              lp_build_const_int_vec(gallivm, type, 4), "");
+   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
+
+   if (n > 1) {
+      color = LLVMGetUndef(LLVMVectorType(i32t, n));
+      for (count = 0; count < n; count++) {
+         LLVMValueRef index, cond, colorx;
+         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
+         struct lp_build_if_state if_ctx;
+
+         index = lp_build_const_int32(gallivm, count);
+         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
+         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
+         addrx = LLVMBuildAdd(builder, addrx, addr, "");
+         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
+         hash_indexx = LLVMBuildLShr(builder, block_indexx,
+                                     lp_build_const_int32(gallivm, 4), "");
+         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
+         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
+
+         lp_build_if(&if_ctx, gallivm, cond);
+         {
+            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
+                                          LLVMPointerType(i8t, 0), "");
+            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+            update_cache_access(gallivm, cache, 1,
+                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+#endif
+         }
+         lp_build_endif(&if_ctx);
+
+         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);
+
+         color = LLVMBuildInsertElement(builder, color, colorx,
+                                        lp_build_const_int32(gallivm, count), "");
+      }
+   }
+   else {
+      LLVMValueRef cond;
+      struct lp_build_if_state if_ctx;
+
+      tmp = LLVMBuildZExt(builder, offset, i64t, "");
+      addr = LLVMBuildAdd(builder, tmp, addr, "");
+      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
+      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
+
+      lp_build_if(&if_ctx, gallivm, cond);
+      {
+         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
+         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+         update_cache_access(gallivm, cache, 1,
+                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+#endif
+      }
+      lp_build_endif(&if_ctx);
+
+      color = lookup_cached_pixel(gallivm, cache, block_index);
+   }
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   update_cache_access(gallivm, cache, n,
+                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
+#endif
+   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
+}
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index afaabc08790..42aef8376f8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -346,6 +346,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
  *              these will always be (0,0).  For compressed formats, i will
  *              be in [0, block_width-1] and j will be in [0, block_height-1].
+ * \param cache  optional value pointing to a lp_build_format_cache structure
  */
 void
 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
@@ -355,6 +356,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                         LLVMValueRef offset,
                         LLVMValueRef i,
                         LLVMValueRef j,
+                        LLVMValueRef cache,
                         LLVMValueRef rgba_out[4])
 {
    LLVMBuilderRef builder = gallivm->builder;
@@ -473,7 +475,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       tmp_type.norm = TRUE;
 
       tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                    TRUE, base_ptr, offset, i, j);
+                                    TRUE, base_ptr, offset, i, j, cache);
 
       lp_build_rgba8_to_fi32_soa(gallivm,
                                 type,
@@ -483,6 +485,37 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       return;
    }
 
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
+       /* non-srgb case is already handled above */
+       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
+       type.floating && type.width == 32 &&
+       (type.length == 1 || (type.length % 4 == 0)) &&
+       cache) {
+      const struct util_format_description *format_decompressed;
+      LLVMValueRef packed;
+      packed = lp_build_fetch_cached_texels(gallivm,
+                                            format_desc,
+                                            type.length,
+                                            base_ptr,
+                                            offset,
+                                            i, j,
+                                            cache);
+      packed = LLVMBuildBitCast(builder, packed,
+                                lp_build_int_vec_type(gallivm, type), "");
+      /*
+       * The values are now packed so they match ordinary srgb RGBA8 format,
+       * hence need to use matching format for unpack.
+       */
+      format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
+
+      lp_build_unpack_rgba_soa(gallivm,
+                               format_decompressed,
+                               type,
+                               packed, rgba_out);
+
+      return;
+   }
+
    /*
     * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
     *
@@ -524,7 +557,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
          /* Get a single float[4]={R,G,B,A} pixel */
          tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                        TRUE, base_ptr, offset_elem,
-                                       i_elem, j_elem);
+                                       i_elem, j_elem, cache);
 
          /*
           * Insert the AoS tmp value channels into the SoA result vectors at
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index eba758da6ae..a6f0eff42f6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -99,6 +99,7 @@ struct lp_sampler_params
    unsigned sampler_index;
    unsigned sample_key;
    LLVMValueRef context_ptr;
+   LLVMValueRef thread_data_ptr;
    const LLVMValueRef *coords;
    const LLVMValueRef *offsets;
    LLVMValueRef lod;
@@ -267,6 +268,17 @@ struct lp_sampler_dynamic_state
                    struct gallivm_state *gallivm,
                    LLVMValueRef context_ptr,
                    unsigned sampler_unit);
+
+   /** 
+    * Obtain texture cache (returns ptr to lp_build_format_cache).
+    *
+    * It's optional: no caching will be done if it's NULL.
+    */
+   LLVMValueRef
+   (*cache_ptr)(const struct lp_sampler_dynamic_state *state,
+                struct gallivm_state *gallivm,
+                LLVMValueRef thread_data_ptr,
+                unsigned unit);
 };
 
 
@@ -356,6 +368,7 @@ struct lp_build_sample_context
    LLVMValueRef img_stride_array;
    LLVMValueRef base_ptr;
    LLVMValueRef mip_offsets;
+   LLVMValueRef cache;
 
    /** Integer vector with texture width, height, depth */
    LLVMValueRef int_size;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
index d7fde810a76..729c5b8f6ef 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -593,7 +593,8 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
                                       TRUE,
                                       data_ptr, offset,
                                       x_subcoord,
-                                      y_subcoord);
+                                      y_subcoord,
+                                      bld->cache);
    }
 
    *colors = rgba8;
@@ -933,7 +934,8 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                                                TRUE,
                                                data_ptr, offset[k][j][i],
                                                x_subcoord[i],
-                                               y_subcoord[j]);
+                                               y_subcoord[j],
+                                               bld->cache);
             }
 
             neighbors[k][j][i] = rgba8;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index 26bfa0d2677..e21933ffc85 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -161,6 +161,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                            bld->texel_type,
                            data_ptr, offset,
                            i, j,
+                           bld->cache,
                            texel_out);
 
    /*
@@ -2389,6 +2390,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                            bld->texel_type,
                            bld->base_ptr, offset,
                            i, j,
+                           bld->cache,
                            colors_out);
 
    if (out_of_bound_ret_zero) {
@@ -2442,6 +2444,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                          unsigned texture_index,
                          unsigned sampler_index,
                          LLVMValueRef context_ptr,
+                         LLVMValueRef thread_data_ptr,
                          const LLVMValueRef *coords,
                          const LLVMValueRef *offsets,
                          const struct lp_derivatives *derivs, /* optional */
@@ -2707,6 +2710,11 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                                                 context_ptr, texture_index);
    /* Note that mip_offsets is an array[level] of offsets to texture images */
 
+   if (dynamic_state->cache_ptr && thread_data_ptr) {
+      bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm,
+                                           thread_data_ptr, texture_index);
+   }
+
    /* width, height, depth as single int vector */
    if (dims <= 1) {
       bld.int_size = tex_width;
@@ -2883,6 +2891,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
          bld4.base_ptr = bld.base_ptr;
          bld4.mip_offsets = bld.mip_offsets;
          bld4.int_size = bld.int_size;
+         bld4.cache = bld.cache;
 
          bld4.vector_width = lp_type_width(type4);
 
@@ -3081,12 +3090,14 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
    LLVMValueRef offsets[3] = { NULL };
    LLVMValueRef lod = NULL;
    LLVMValueRef context_ptr;
+   LLVMValueRef thread_data_ptr = NULL;
    LLVMValueRef texel_out[4];
    struct lp_derivatives derivs;
    struct lp_derivatives *deriv_ptr = NULL;
    unsigned num_param = 0;
    unsigned i, num_coords, num_derivs, num_offsets, layer;
    enum lp_sampler_lod_control lod_control;
+   boolean need_cache = FALSE;
 
    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
                     LP_SAMPLER_LOD_CONTROL_SHIFT;
@@ -3094,8 +3105,19 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
    get_target_info(static_texture_state->target,
                    &num_coords, &num_derivs, &num_offsets, &layer);
 
+   if (dynamic_state->cache_ptr) {
+      const struct util_format_description *format_desc;
+      format_desc = util_format_description(static_texture_state->format);
+      if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+         need_cache = TRUE;
+      }
+   }
+
    /* "unpack" arguments */
    context_ptr = LLVMGetParam(function, num_param++);
+   if (need_cache) {
+      thread_data_ptr = LLVMGetParam(function, num_param++);
+   }
    for (i = 0; i < num_coords; i++) {
       coords[i] = LLVMGetParam(function, num_param++);
    }
@@ -3146,6 +3168,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
                             texture_index,
                             sampler_index,
                             context_ptr,
+                            thread_data_ptr,
                             coords,
                             offsets,
                             deriv_ptr,
@@ -3189,6 +3212,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
    const LLVMValueRef *offsets = params->offsets;
    const struct lp_derivatives *derivs = params->derivs;
    enum lp_sampler_lod_control lod_control;
+   boolean need_cache = FALSE;
 
    lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
                     LP_SAMPLER_LOD_CONTROL_SHIFT;
@@ -3196,6 +3220,17 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
    get_target_info(static_texture_state->target,
                    &num_coords, &num_derivs, &num_offsets, &layer);
 
+   if (dynamic_state->cache_ptr) {
+      const struct util_format_description *format_desc;
+      format_desc = util_format_description(static_texture_state->format);
+      if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+         /*
+          * This is not 100% correct, if we have cache but the
+          * util_format_s3tc_prefer is true the cache won't get used
+          * regardless (could hook up the block decode there...) */
+         need_cache = TRUE;
+      }
+   }
    /*
     * texture function matches are found by name.
     * Thus the name has to include both the texture and sampler unit
@@ -3221,6 +3256,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
        */
 
       arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
+      if (need_cache) {
+         arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
+      }
       for (i = 0; i < num_coords; i++) {
          arg_types[num_param++] = LLVMTypeOf(coords[0]);
          assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
@@ -3280,6 +3318,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
 
    num_args = 0;
    args[num_args++] = params->context_ptr;
+   if (need_cache) {
+      args[num_args++] = params->thread_data_ptr;
+   }
    for (i = 0; i < num_coords; i++) {
       args[num_args++] = coords[i];
    }
@@ -3384,6 +3425,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
                                params->texture_index,
                                params->sampler_index,
                                params->context_ptr,
+                               params->thread_data_ptr,
                                params->coords,
                                params->offsets,
                                params->derivs,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 2ca9c6194b3..cc4549778a3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -230,6 +230,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                   const LLVMValueRef (*inputs)[4],
                   LLVMValueRef (*outputs)[4],
                   LLVMValueRef context_ptr,
+                  LLVMValueRef thread_data_ptr,
                   struct lp_build_sampler_soa *sampler,
                   const struct tgsi_shader_info *info,
                   const struct lp_build_tgsi_gs_iface *gs_iface);
@@ -447,6 +448,7 @@ struct lp_build_tgsi_soa_context
    const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
    LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
    LLVMValueRef context_ptr;
+   LLVMValueRef thread_data_ptr;
 
    const struct lp_build_sampler_soa *sampler;
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index fae604e2f9c..7d2cd9a9e73 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -2321,6 +2321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    params.texture_index = unit;
    params.sampler_index = unit;
    params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
    params.coords = coords;
    params.offsets = offsets;
    params.lod = lod;
@@ -2488,6 +2489,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
    params.texture_index = texture_unit;
    params.sampler_index = sampler_unit;
    params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
    params.coords = coords;
    params.offsets = offsets;
    params.lod = lod;
@@ -2608,6 +2610,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
    params.texture_index = unit;
    params.sampler_index = unit;
    params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
    params.coords = coords;
    params.offsets = offsets;
    params.derivs = NULL;
@@ -3858,6 +3861,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                   LLVMValueRef context_ptr,
+                  LLVMValueRef thread_data_ptr,
                   struct lp_build_sampler_soa *sampler,
                   const struct tgsi_shader_info *info,
                   const struct lp_build_tgsi_gs_iface *gs_iface)
@@ -3893,6 +3897,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    bld.bld_base.info = info;
    bld.indirect_files = info->indirect_files;
    bld.context_ptr = context_ptr;
+   bld.thread_data_ptr = thread_data_ptr;
 
    /*
     * If the number of temporaries is rather large then we just
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 9acde4f1b06..b915c1d64ff 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -36,6 +36,7 @@
 #include "util/u_memory.h"
 #include "gallivm/lp_bld_init.h"
 #include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_format.h"
 #include "lp_context.h"
 #include "lp_jit.h"
 
@@ -208,6 +209,8 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
       LLVMTypeRef elem_types[LP_JIT_THREAD_DATA_COUNT];
       LLVMTypeRef thread_data_type;
 
+      elem_types[LP_JIT_THREAD_DATA_CACHE] =
+            LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
       elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
       elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
             LLVMInt32TypeInContext(lc);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 097fa7dce7c..9db26f2cba9 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -43,6 +43,7 @@
 #include "lp_texture.h"
 
 
+struct lp_build_format_cache;
 struct lp_fragment_shader_variant;
 struct llvmpipe_screen;
 
@@ -189,6 +190,7 @@ enum {
 
 struct lp_jit_thread_data
 {
+   struct lp_build_format_cache *cache;
    uint64_t vis_counter;
 
    /*
@@ -201,12 +203,16 @@ struct lp_jit_thread_data
 
 
 enum {
-   LP_JIT_THREAD_DATA_COUNTER = 0,
+   LP_JIT_THREAD_DATA_CACHE = 0,
+   LP_JIT_THREAD_DATA_COUNTER,
    LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX,
    LP_JIT_THREAD_DATA_COUNT
 };
 
 
+#define lp_jit_thread_data_cache(_gallivm, _ptr) \
+   lp_build_struct_get(_gallivm, _ptr, LP_JIT_THREAD_DATA_CACHE, "cache")
+
 #define lp_jit_thread_data_counter(_gallivm, _ptr) \
    lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter")
 
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index c726707c062..d22e50777fa 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -43,6 +43,7 @@
 #include "lp_query.h"
 #include "lp_rast.h"
 #include "lp_rast_priv.h"
+#include "gallivm/lp_bld_format.h"
 #include "gallivm/lp_bld_debug.h"
 #include "lp_scene.h"
 #include "lp_tex_sample.h"
@@ -664,6 +665,17 @@ rasterize_scene(struct lp_rasterizer_task *task,
 {
    task->scene = scene;
 
+   /* Clear the cache tags. This should not always be necessary but
+      simpler for now. */
+#if LP_USE_TEXTURE_CACHE
+   memset(task->thread_data.cache->cache_tags, 0,
+          sizeof(task->thread_data.cache->cache_tags));
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   task->thread_data.cache->cache_access_total = 0;
+   task->thread_data.cache->cache_access_miss = 0;
+#endif
+#endif
+
    if (!task->rast->no_rast && !scene->discard) {
       /* loop over scene bins, rasterize each */
       {
@@ -679,6 +691,20 @@ rasterize_scene(struct lp_rasterizer_task *task,
    }
 
 
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   {
+      uint64_t total, miss;
+      total = task->thread_data.cache->cache_access_total;
+      miss = task->thread_data.cache->cache_access_miss;
+      if (total) {
+         debug_printf("thread %d cache access %llu miss %llu hit rate %f\n",
+                 task->thread_index, (long long unsigned)total,
+                 (long long unsigned)miss,
+                 (float)(total - miss)/(float)total);
+      }
+   }
+#endif
+
    if (scene->fence) {
       lp_fence_signal(scene->fence);
    }
@@ -866,10 +892,15 @@ lp_rast_create( unsigned num_threads )
       goto no_full_scenes;
    }
 
-   for (i = 0; i < Elements(rast->tasks); i++) {
+   for (i = 0; i < MAX2(1, num_threads); i++) {
       struct lp_rasterizer_task *task = &rast->tasks[i];
       task->rast = rast;
       task->thread_index = i;
+      task->thread_data.cache = align_malloc(sizeof(struct lp_build_format_cache),
+                                             16);
+      if (!task->thread_data.cache) {
+         goto no_thread_data_cache;
+      }
    }
 
    rast->num_threads = num_threads;
@@ -885,6 +916,14 @@ lp_rast_create( unsigned num_threads )
 
    return rast;
 
+no_thread_data_cache:
+   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+      if (rast->tasks[i].thread_data.cache) {
+         align_free(rast->tasks[i].thread_data.cache);
+      }
+   }
+
+   lp_scene_queue_destroy(rast->full_scenes);
 no_full_scenes:
    FREE(rast);
 no_rast:
@@ -923,6 +962,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
       pipe_semaphore_destroy(&rast->tasks[i].work_ready);
       pipe_semaphore_destroy(&rast->tasks[i].work_done);
    }
+   for (i = 0; i < MAX2(1, rast->num_threads); i++) {
+      align_free(rast->tasks[i].thread_data.cache);
+   }
 
    /* for synchronizing rasterization threads */
    pipe_barrier_destroy( &rast->barrier );
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index fd6c49aacd8..f55f6b4fa4f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -421,7 +421,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
    lp_build_tgsi_soa(gallivm, tokens, type, &mask,
                      consts_ptr, num_consts_ptr, &system_values,
                      interp->inputs,
-                     outputs, context_ptr,
+                     outputs, context_ptr, thread_data_ptr,
                      sampler, &shader->info.base, NULL);
 
    /* Alpha test */
@@ -2303,8 +2303,8 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(dady_ptr, "dady");
    lp_build_name(color_ptr_ptr, "color_ptr_ptr");
    lp_build_name(depth_ptr, "depth");
-   lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(mask_input, "mask_input");
+   lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(stride_ptr, "stride_ptr");
    lp_build_name(depth_stride, "depth_stride");
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index d9abd1ae37c..0640a217874 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -44,6 +44,9 @@
 
 #include "lp_test.h"
 
+#define USE_TEXTURE_CACHE 1
+
+static struct lp_build_format_cache *cache_ptr;
 
 void
 write_tsv_header(FILE *fp)
@@ -71,7 +74,7 @@ write_tsv_row(FILE *fp,
 
 typedef void
 (*fetch_ptr_t)(void *unpacked, const void *packed,
-               unsigned i, unsigned j);
+               unsigned i, unsigned j, struct lp_build_format_cache *cache);
 
 
 static LLVMValueRef
@@ -83,7 +86,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
    LLVMContextRef context = gallivm->context;
    LLVMModuleRef module = gallivm->module;
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMTypeRef args[4];
+   LLVMTypeRef args[5];
    LLVMValueRef func;
    LLVMValueRef packed_ptr;
    LLVMValueRef offset = LLVMConstNull(LLVMInt32TypeInContext(context));
@@ -92,6 +95,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
    LLVMValueRef j;
    LLVMBasicBlockRef block;
    LLVMValueRef rgba;
+   LLVMValueRef cache = NULL;
 
    util_snprintf(name, sizeof name, "fetch_%s_%s", desc->short_name,
                  type.floating ? "float" : "unorm8");
@@ -99,6 +103,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
    args[0] = LLVMPointerType(lp_build_vec_type(gallivm, type), 0);
    args[1] = LLVMPointerType(LLVMInt8TypeInContext(context), 0);
    args[3] = args[2] = LLVMInt32TypeInContext(context);
+   args[4] = LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
 
    func = LLVMAddFunction(module, name,
                           LLVMFunctionType(LLVMVoidTypeInContext(context),
@@ -109,11 +114,15 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
    i = LLVMGetParam(func, 2);
    j = LLVMGetParam(func, 3);
 
+   if (cache_ptr) {
+      cache = LLVMGetParam(func, 4);
+   }
+
    block = LLVMAppendBasicBlockInContext(context, func, "entry");
    LLVMPositionBuilderAtEnd(builder, block);
 
    rgba = lp_build_fetch_rgba_aos(gallivm, desc, type, TRUE,
-                                  packed_ptr, offset, i, j);
+                                  packed_ptr, offset, i, j, cache);
 
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
@@ -170,7 +179,7 @@ test_format_float(unsigned verbose, FILE *fp,
 
                memset(unpacked, 0, sizeof unpacked);
 
-               fetch_ptr(unpacked, packed, j, i);
+               fetch_ptr(unpacked, packed, j, i, cache_ptr);
 
                for(k = 0; k < 4; ++k) {
                   if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
@@ -187,6 +196,11 @@ test_format_float(unsigned verbose, FILE *fp,
                   }
                }
 
+               /* Ignore errors in S3TC for now */
+               if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+                  match = TRUE;
+               }
+
                if (!match) {
                   printf("FAILED\n");
                   printf("  Packed: %02x %02x %02x %02x\n",
@@ -261,7 +275,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
 
                memset(unpacked, 0, sizeof unpacked);
 
-               fetch_ptr(unpacked, packed, j, i);
+               fetch_ptr(unpacked, packed, j, i, cache_ptr);
 
                match = TRUE;
                for(k = 0; k < 4; ++k) {
@@ -277,6 +291,11 @@ test_format_unorm8(unsigned verbose, FILE *fp,
                      match = FALSE;
                }
 
+               /* Ignore errors in S3TC as we only implement a poor man approach */
+               if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+                  match = TRUE;
+               }
+
                if (!match) {
                   printf("FAILED\n");
                   printf("  Packed: %02x %02x %02x %02x\n",
@@ -334,6 +353,10 @@ test_all(unsigned verbose, FILE *fp)
 
    util_format_s3tc_init();
 
+#if USE_TEXTURE_CACHE
+   cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
+#endif
+
    for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
       const struct util_format_description *format_desc;
 
@@ -363,6 +386,9 @@ test_all(unsigned verbose, FILE *fp)
            success = FALSE;
       }
    }
+#if USE_TEXTURE_CACHE
+   align_free(cache_ptr);
+#endif
 
    return success;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.c b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
index 316d1c55082..217abe963b7 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.c
@@ -221,6 +221,21 @@ LP_LLVM_SAMPLER_MEMBER(lod_bias,   LP_JIT_SAMPLER_LOD_BIAS, TRUE)
 LP_LLVM_SAMPLER_MEMBER(border_color, LP_JIT_SAMPLER_BORDER_COLOR, FALSE)
 
 
+#if LP_USE_TEXTURE_CACHE
+static LLVMValueRef
+lp_llvm_texture_cache_ptr(const struct lp_sampler_dynamic_state *base,
+                          struct gallivm_state *gallivm,
+                          LLVMValueRef thread_data_ptr,
+                          unsigned unit)
+{
+   /* We use the same cache for all units */
+   (void)unit;
+
+   return lp_jit_thread_data_cache(gallivm, thread_data_ptr);
+}
+#endif
+
+
 static void
 lp_llvm_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
 {
@@ -314,6 +329,10 @@ lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *static_state)
    sampler->dynamic_state.base.lod_bias = lp_llvm_sampler_lod_bias;
    sampler->dynamic_state.base.border_color = lp_llvm_sampler_border_color;
 
+#if LP_USE_TEXTURE_CACHE
+   sampler->dynamic_state.base.cache_ptr = lp_llvm_texture_cache_ptr;
+#endif
+
    sampler->dynamic_state.static_state = static_state;
 
    return &sampler->base;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index f4aff226ce1..939131e7975 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -34,6 +34,10 @@
 
 struct lp_sampler_static_state;
 
+/**
+ * Whether texture cache is used for s3tc textures.
+ */
+#define LP_USE_TEXTURE_CACHE 1
 
 /**
  * Pure-LLVM texture sampling code generator.
@@ -42,5 +46,4 @@ struct lp_sampler_static_state;
 struct lp_build_sampler_soa *
 lp_llvm_sampler_soa_create(const struct lp_sampler_static_state *key);
 
-
 #endif /* LP_TEX_SAMPLE_H */

From fb77da89f51fd82d5cee95400acb20ad74d9e7bc Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Sat, 31 Oct 2015 10:31:37 +1100
Subject: [PATCH 017/287] i965: add support for image AoA

V3: clamp array index to the correct size (the size of the current array
rather than the inner array) Francisco Jerez.

V2: avoid useless zero-initialization and addition for the first AoA level,
avoid redundant temporary, make use of type_size_scalar(), rename aoa_size
to element_size, assign the indirect indexing temporary directly to
image.reladdr, and replace while loop with a for loop. All suggested
by Francisco Jerez.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp      | 30 ++++++++++---------
 .../drivers/dri/i965/brw_nir_uniforms.cpp     |  2 ++
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index b6eab069a1f..e7a39ff741c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1062,18 +1062,17 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
    fs_reg image(UNIFORM, deref->var->data.driver_location,
                 BRW_REGISTER_TYPE_UD);
 
-   if (deref->deref.child) {
-      const nir_deref_array *deref_array =
-         nir_deref_as_array(deref->deref.child);
-      assert(deref->deref.child->deref_type == nir_deref_type_array &&
-             deref_array->deref.child == NULL);
-      const unsigned size = glsl_get_length(deref->var->type);
+   for (const nir_deref *tail = &deref->deref; tail->child;
+        tail = tail->child) {
+      const nir_deref_array *deref_array = nir_deref_as_array(tail->child);
+      assert(tail->child->deref_type == nir_deref_type_array);
+      const unsigned size = glsl_get_length(tail->type);
+      const unsigned element_size = type_size_scalar(deref_array->deref.type);
       const unsigned base = MIN2(deref_array->base_offset, size - 1);
-
-      image = offset(image, bld, base * BRW_IMAGE_PARAM_SIZE);
+      image = offset(image, bld, base * element_size);
 
       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
-         fs_reg *tmp = new(mem_ctx) fs_reg(vgrf(glsl_type::int_type));
+         fs_reg tmp = vgrf(glsl_type::int_type);
 
          if (devinfo->gen == 7 && !devinfo->is_haswell) {
             /* IVB hangs when trying to access an invalid surface index with
@@ -1084,15 +1083,18 @@ fs_visitor::get_nir_image_deref(const nir_deref_var *deref)
              * of the possible outcomes of the hang.  Clamp the index to
              * prevent access outside of the array bounds.
              */
-            bld.emit_minmax(*tmp, retype(get_nir_src(deref_array->indirect),
-                                         BRW_REGISTER_TYPE_UD),
+            bld.emit_minmax(tmp, retype(get_nir_src(deref_array->indirect),
+                                        BRW_REGISTER_TYPE_UD),
                             fs_reg(size - base - 1), BRW_CONDITIONAL_L);
          } else {
-            bld.MOV(*tmp, get_nir_src(deref_array->indirect));
+            bld.MOV(tmp, get_nir_src(deref_array->indirect));
          }
 
-         bld.MUL(*tmp, *tmp, fs_reg(BRW_IMAGE_PARAM_SIZE));
-         image.reladdr = tmp;
+         bld.MUL(tmp, tmp, fs_reg(element_size));
+         if (image.reladdr)
+            bld.ADD(*image.reladdr, *image.reladdr, tmp);
+         else
+            image.reladdr = new(mem_ctx) fs_reg(tmp);
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index d3326e9fb86..87b383919df 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -98,6 +98,8 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
       if (storage->type->is_image()) {
          brw_setup_image_uniform_values(stage, stage_prog_data,
                                         uniform_index, storage);
+         uniform_index +=
+            BRW_IMAGE_PARAM_SIZE * MAX2(storage->array_elements, 1);
       } else {
          gl_constant_value *components = storage->storage;
          unsigned vector_count = (MAX2(storage->array_elements, 1) *

From 5b75dbd7be09fdc80eff8141ef47c63a6a913c98 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Fri, 16 Oct 2015 10:28:47 +1100
Subject: [PATCH 018/287] i965: enable ARB_arrays_of_arrays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
index 4643ea3e87b..386b63c123d 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -174,6 +174,7 @@ intelInitExtensions(struct gl_context *ctx)
 
    assert(brw->gen >= 4);
 
+   ctx->Extensions.ARB_arrays_of_arrays = true;
    ctx->Extensions.ARB_buffer_storage = true;
    ctx->Extensions.ARB_clear_texture = true;
    ctx->Extensions.ARB_clip_control = true;

From 6e3b380387378e9f8e92eed3dc4a95767857b0de Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Fri, 16 Oct 2015 10:28:48 +1100
Subject: [PATCH 019/287] docs: Mark AoA as done for i965

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 docs/GL3.txt              | 4 ++--
 docs/relnotes/11.1.0.html | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7f6b8c9ef27..7abdcd8dea1 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -149,7 +149,7 @@ GL 4.2, GLSL 4.20:
 
 GL 4.3, GLSL 4.30:
 
-  GL_ARB_arrays_of_arrays                              started (Timothy)
+  GL_ARB_arrays_of_arrays                              DONE (i965)
   GL_ARB_ES3_compatibility                             DONE (all drivers that support GLSL 3.30)
   GL_ARB_clear_buffer_object                           DONE (all drivers)
   GL_ARB_compute_shader                                in progress (jljusten)
@@ -209,7 +209,7 @@ GL 4.5, GLSL 4.50:
 
 These are the extensions cherry-picked to make GLES 3.1
 GLES3.1, GLSL ES 3.1
-  GL_ARB_arrays_of_arrays                              started (Timothy)
+  GL_ARB_arrays_of_arrays                              DONE (i965)
   GL_ARB_compute_shader                                in progress (jljusten)
   GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 7160244fcb4..86549d7672b 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -44,6 +44,7 @@ Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
+<li>GL_ARB_arrays_of_arrays on i965</li>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
 <li>GL_ARB_copy_image on radeonsi</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>

From f6b3c163f954c4fb5a525af39ce906f63b445e89 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Wed, 4 Nov 2015 14:50:49 +1100
Subject: [PATCH 020/287] glsl: remove old TODO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SSBO support now exists as of commits f24e5e and f408a13dd30.

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Acked-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/linker.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 9dcc2a76c9a..3ad295587f8 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3799,11 +3799,6 @@ build_program_resource_list(struct gl_shader_program *shProg)
             return;
       }
    }
-
-   /* TODO - following extensions will require more resource types:
-    *
-    *    GL_ARB_shader_storage_buffer_object
-    */
 }
 
 /**

From 8e4cf900f0af9eb8a72c81a0e5e393906b11764a Mon Sep 17 00:00:00 2001
From: Timothy Arceri <t_arceri@yahoo.com.au>
Date: Wed, 4 Nov 2015 08:41:29 +1100
Subject: [PATCH 021/287] glsl: make sure to only add subroutines to resource
 list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Over looked in 763cd8c080353.

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/linker.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 3ad295587f8..26c02986be4 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3776,7 +3776,8 @@ build_program_resource_list(struct gl_shader_program *shProg)
          continue;
 
       for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) {
-         if (!shProg->UniformStorage[i].opaque[j].active)
+         if (!shProg->UniformStorage[i].opaque[j].active ||
+             !shProg->UniformStorage[i].type->is_subroutine())
             continue;
 
          type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j);

From 13b19aa815661cd17b74c8694b6c466bfaf75740 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <sonicadvance1@gmail.com>
Date: Mon, 2 Nov 2015 19:30:18 -0600
Subject: [PATCH 022/287] mesa: expose support for GL_EXT_buffer_storage

This extension requires ES 3.1 since it relies on glMemoryBarrier.
For testing purposes I temporarily moved glMemoryBarrier to be an ES 3.0
function.
This has been tested with the piglit in the ML and the Dolphin emulator.

Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.1.0.html     | 1 +
 src/mapi/glapi/gen/es_EXT.xml | 9 +++++++++
 src/mesa/main/extensions.c    | 1 +
 3 files changed, 11 insertions(+)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 86549d7672b..c35d91f4329 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -56,6 +56,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
 <li>GL_ARB_texture_view on radeonsi</li>
+<li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
 <li>GL_OES_draw_elements_base_vertex on all drivers</li>
 <li>EGL_KHR_create_context on softpipe, llvmpipe</li>
diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml
index bf20e4801cc..9a777a24c61 100644
--- a/src/mapi/glapi/gen/es_EXT.xml
+++ b/src/mapi/glapi/gen/es_EXT.xml
@@ -905,4 +905,13 @@
 
 </category>
 
+<category name="GL_EXT_buffer_storage" number="239">
+    <function name="BufferStorageEXT" alias="BufferStorage" es2="3.1">
+        <param name="target" type="GLenum"/>
+        <param name="size" type="GLsizeiptr"/>
+        <param name="data" type="const GLvoid *"/>
+        <param name="flags" type="GLbitfield"/>
+    </function>
+</category>
+
 </OpenGLAPI>
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index d964f030ecb..bdc68175bf2 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -222,6 +222,7 @@ static const struct extension extension_table[] = {
    { "GL_EXT_blend_color",                         o(EXT_blend_color),                         GLL,            1995 },
    { "GL_EXT_blend_equation_separate",             o(EXT_blend_equation_separate),             GL,             2003 },
    { "GL_EXT_blend_func_separate",                 o(EXT_blend_func_separate),                 GLL,            1999 },
+   { "GL_EXT_buffer_storage",                      o(ARB_buffer_storage),                                 ES31, 2015 },
    { "GL_EXT_discard_framebuffer",                 o(dummy_true),                                    ES1 | ES2, 2009 },
    { "GL_EXT_blend_minmax",                        o(EXT_blend_minmax),                        GLL | ES1 | ES2, 1995 },
    { "GL_EXT_blend_subtract",                      o(dummy_true),                              GLL,            1995 },

From d56a1478a8006af48aa65ab62e676e5f974f1ec3 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Tue, 13 Oct 2015 20:50:25 -0700
Subject: [PATCH 023/287] i965/meta: Assert fast clears and rep clears never
 overlap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is nothing wrong with the code today, but as one modifies the code it
turns out to be not too difficult to mess up the code, and this easy assertion
should catch such driver implementation failures quickly.

Cc: Kristian Høgsberg <krh@bitplanet.net>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Chad Versace <chad.versace@intel.com>
Reviewed-by: Neil Roberts <neil@linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index fbde3f04204..69fe7b4aa5b 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -536,6 +536,8 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
       }
    }
 
+   assert((fast_clear_buffers & rep_clear_buffers) == 0);
+
    if (!(fast_clear_buffers | rep_clear_buffers)) {
       if (plain_clear_buffers)
          /* If we only have plain clears, skip the meta save/restore. */

From c19443bc8b68ef4697ead1998286e42bd4d8a572 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 4 Nov 2015 14:21:43 +0100
Subject: [PATCH 024/287] gallivm: fix sampling for s3tc srgb formats when
 using texture cache

This actually stored the values as 8bit linear values in the cache,
then did another srgb->linear conversion...
We don't want to do the former (decoding 8bit srgb values to 8bit linear
completely defeats the purpose of srgb in the first place), so just decode
to 8bit srgb.
Fixes piglit.spec.ext_texture_srgb.texwrap formats-s3tc tests.
---
 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 42aef8376f8..8bae94af3d7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -492,9 +492,11 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
        (type.length == 1 || (type.length % 4 == 0)) &&
        cache) {
       const struct util_format_description *format_decompressed;
+      const struct util_format_description *flinear_desc;
       LLVMValueRef packed;
+      flinear_desc = util_format_description(util_format_linear(format_desc->format));
       packed = lp_build_fetch_cached_texels(gallivm,
-                                            format_desc,
+                                            flinear_desc,
                                             type.length,
                                             base_ptr,
                                             offset,

From c3d7caa1e006f00c3544a79a0be7d78904ce4177 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Thu, 22 Oct 2015 22:22:14 +0200
Subject: [PATCH 025/287] i965: check inst->predicate when clearing flag_live
 at dead code eliminate

Detected by Matt Turner while reviewing commit
a59359ecd22154cc2b3f88bb8c599f21af8a3934

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp   | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 4b5548a9dc5..1eaf1478877 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -105,7 +105,7 @@ fs_visitor::dead_code_eliminate()
             }
          }
 
-         if (inst->writes_flag()) {
+         if (inst->writes_flag() && !inst->predicate) {
             BITSET_CLEAR(flag_live, inst->flag_subreg);
          }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index 284e0a8d0a5..e8a51d6e066 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -145,7 +145,7 @@ vec4_visitor::dead_code_eliminate()
             }
          }
 
-         if (inst->writes_flag()) {
+         if (inst->writes_flag() && !inst->predicate) {
             for (unsigned c = 0; c < 4; c++)
                BITSET_CLEAR(flag_live, c);
          }

From fa6efbd27d1c725f38e960005d8806521bd58156 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 31 Oct 2015 07:44:23 -0600
Subject: [PATCH 026/287] util/indices: replace #define tokens with enum type

To ease debugging in gdb.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/auxiliary/indices/u_indices.c     | 45 ++++-----
 src/gallium/auxiliary/indices/u_indices.h     | 96 ++++++++++---------
 .../auxiliary/indices/u_unfilled_indices.c    | 34 +++----
 3 files changed, 90 insertions(+), 85 deletions(-)

diff --git a/src/gallium/auxiliary/indices/u_indices.c b/src/gallium/auxiliary/indices/u_indices.c
index c25594b4b7a..436f8f008cb 100644
--- a/src/gallium/auxiliary/indices/u_indices.c
+++ b/src/gallium/auxiliary/indices/u_indices.c
@@ -68,17 +68,18 @@ static void translate_memcpy_uint( const void *in,
  * \param out_nr  returns number of new vertices
  * \param out_translate  returns the translation function to use by the caller
  */
-int u_index_translator( unsigned hw_mask,
-                        unsigned prim,
-                        unsigned in_index_size,
-                        unsigned nr,
-                        unsigned in_pv,
-                        unsigned out_pv,
-                        unsigned prim_restart,
-                        unsigned *out_prim,
-                        unsigned *out_index_size,
-                        unsigned *out_nr,
-                        u_translate_func *out_translate )
+enum indices_mode
+u_index_translator(unsigned hw_mask,
+                   unsigned prim,
+                   unsigned in_index_size,
+                   unsigned nr,
+                   unsigned in_pv,
+                   unsigned out_pv,
+                   unsigned prim_restart,
+                   unsigned *out_prim,
+                   unsigned *out_index_size,
+                   unsigned *out_nr,
+                   u_translate_func *out_translate)
 {
    unsigned in_idx;
    unsigned out_idx;
@@ -204,17 +205,17 @@ int u_index_translator( unsigned hw_mask,
  * \param out_nr  returns new number of vertices to draw
  * \param out_generate  returns pointer to the generator function
  */
-int u_index_generator( unsigned hw_mask,
-                       unsigned prim,
-                       unsigned start,
-                       unsigned nr,
-                       unsigned in_pv,
-                       unsigned out_pv,
-                       unsigned *out_prim,
-                       unsigned *out_index_size,
-                       unsigned *out_nr,
-                       u_generate_func *out_generate )
-
+enum indices_mode
+u_index_generator(unsigned hw_mask,
+                  unsigned prim,
+                  unsigned start,
+                  unsigned nr,
+                  unsigned in_pv,
+                  unsigned out_pv,
+                  unsigned *out_prim,
+                  unsigned *out_index_size,
+                  unsigned *out_nr,
+                  u_generate_func *out_generate)
 {
    unsigned out_idx;
 
diff --git a/src/gallium/auxiliary/indices/u_indices.h b/src/gallium/auxiliary/indices/u_indices.h
index e01201e4b04..4483eb81337 100644
--- a/src/gallium/auxiliary/indices/u_indices.h
+++ b/src/gallium/auxiliary/indices/u_indices.h
@@ -67,66 +67,68 @@ typedef void (*u_generate_func)( unsigned start,
 /* Return codes describe the translate/generate operation.  Caller may
  * be able to reuse translated indices under some circumstances.
  */
-#define U_TRANSLATE_ERROR  -1
-#define U_TRANSLATE_NORMAL  1
-#define U_TRANSLATE_MEMCPY  2
-#define U_GENERATE_LINEAR   3
-#define U_GENERATE_REUSABLE 4
-#define U_GENERATE_ONE_OFF  5
-
+enum indices_mode {
+   U_TRANSLATE_ERROR = -1,
+   U_TRANSLATE_NORMAL = 1,
+   U_TRANSLATE_MEMCPY = 2,
+   U_GENERATE_LINEAR  = 3,
+   U_GENERATE_REUSABLE= 4,
+   U_GENERATE_ONE_OFF = 5,
+};
 
 void u_index_init( void );
 
-int u_index_translator( unsigned hw_mask,
-                        unsigned prim,
-                        unsigned in_index_size,
-                        unsigned nr,
-                        unsigned in_pv,   /* API */
-                        unsigned out_pv,  /* hardware */
-                        unsigned prim_restart,
-                        unsigned *out_prim,
-                        unsigned *out_index_size,
-                        unsigned *out_nr,
-                        u_translate_func *out_translate );
+enum indices_mode
+u_index_translator(unsigned hw_mask,
+                   unsigned prim,
+                   unsigned in_index_size,
+                   unsigned nr,
+                   unsigned in_pv,   /* API */
+                   unsigned out_pv,  /* hardware */
+                   unsigned prim_restart,
+                   unsigned *out_prim,
+                   unsigned *out_index_size,
+                   unsigned *out_nr,
+                   u_translate_func *out_translate);
 
 /* Note that even when generating it is necessary to know what the
  * API's PV is, as the indices generated will depend on whether it is
  * the same as hardware or not, and in the case of triangle strips,
  * whether it is first or last.
  */
-int u_index_generator( unsigned hw_mask,
-                       unsigned prim,
-                       unsigned start,
-                       unsigned nr,
-                       unsigned in_pv,   /* API */
-                       unsigned out_pv,  /* hardware */
-                       unsigned *out_prim,
-                       unsigned *out_index_size,
-                       unsigned *out_nr,
-                       u_generate_func *out_generate );
+enum indices_mode
+u_index_generator(unsigned hw_mask,
+                  unsigned prim,
+                  unsigned start,
+                  unsigned nr,
+                  unsigned in_pv,   /* API */
+                  unsigned out_pv,  /* hardware */
+                  unsigned *out_prim,
+                  unsigned *out_index_size,
+                  unsigned *out_nr,
+                  u_generate_func *out_generate);
 
 
 void u_unfilled_init( void );
 
-int u_unfilled_translator( unsigned prim,
-                           unsigned in_index_size,
-                           unsigned nr,
-                           unsigned unfilled_mode,
-                           unsigned *out_prim,
-                           unsigned *out_index_size,
-                           unsigned *out_nr,
-                           u_translate_func *out_translate );
-
-int u_unfilled_generator( unsigned prim,
-                          unsigned start,
-                          unsigned nr,
-                          unsigned unfilled_mode,
-                          unsigned *out_prim,
-                          unsigned *out_index_size,
-                          unsigned *out_nr,
-                          u_generate_func *out_generate );
-
-
+enum indices_mode
+u_unfilled_translator(unsigned prim,
+                      unsigned in_index_size,
+                      unsigned nr,
+                      unsigned unfilled_mode,
+                      unsigned *out_prim,
+                      unsigned *out_index_size,
+                      unsigned *out_nr,
+                      u_translate_func *out_translate);
 
+enum indices_mode
+u_unfilled_generator(unsigned prim,
+                     unsigned start,
+                     unsigned nr,
+                     unsigned unfilled_mode,
+                     unsigned *out_prim,
+                     unsigned *out_index_size,
+                     unsigned *out_nr,
+                     u_generate_func *out_generate);
 
 #endif
diff --git a/src/gallium/auxiliary/indices/u_unfilled_indices.c b/src/gallium/auxiliary/indices/u_unfilled_indices.c
index 121877a60fb..fc974f8b946 100644
--- a/src/gallium/auxiliary/indices/u_unfilled_indices.c
+++ b/src/gallium/auxiliary/indices/u_unfilled_indices.c
@@ -111,14 +111,15 @@ static unsigned nr_lines( unsigned prim,
                               
 
 
-int u_unfilled_translator( unsigned prim,
-                        unsigned in_index_size,
-                        unsigned nr,
-                        unsigned unfilled_mode,
-                        unsigned *out_prim,
-                        unsigned *out_index_size,
-                        unsigned *out_nr,
-                        u_translate_func *out_translate )
+enum indices_mode
+u_unfilled_translator(unsigned prim,
+                      unsigned in_index_size,
+                      unsigned nr,
+                      unsigned unfilled_mode,
+                      unsigned *out_prim,
+                      unsigned *out_index_size,
+                      unsigned *out_nr,
+                      u_translate_func *out_translate)
 {
    unsigned in_idx;
    unsigned out_idx;
@@ -170,14 +171,15 @@ int u_unfilled_translator( unsigned prim,
  * different front/back fill modes, that can be handled with the
  * 'draw' module.
  */
-int u_unfilled_generator( unsigned prim,
-                          unsigned start,
-                          unsigned nr,
-                          unsigned unfilled_mode,
-                          unsigned *out_prim,
-                          unsigned *out_index_size,
-                          unsigned *out_nr,
-                          u_generate_func *out_generate )
+enum indices_mode
+u_unfilled_generator(unsigned prim,
+                     unsigned start,
+                     unsigned nr,
+                     unsigned unfilled_mode,
+                     unsigned *out_prim,
+                     unsigned *out_index_size,
+                     unsigned *out_nr,
+                     u_generate_func *out_generate)
 {
    unsigned out_idx;
 

From 3f98c812b30d739b744d70267a28a25afcaa8b13 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 31 Oct 2015 07:44:49 -0600
Subject: [PATCH 027/287] svga: use new enum indices_mode type

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/drivers/svga/svga_draw_arrays.c   | 3 ++-
 src/gallium/drivers/svga/svga_draw_elements.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
index caf4b17de16..acb2e95e747 100644
--- a/src/gallium/drivers/svga/svga_draw_arrays.c
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -204,7 +204,8 @@ svga_hwtnl_draw_arrays(struct svga_hwtnl *hwtnl,
                        unsigned prim, unsigned start, unsigned count,
                        unsigned start_instance, unsigned instance_count)
 {
-   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   unsigned gen_prim, gen_size, gen_nr;
+   enum indices_mode gen_type;
    u_generate_func gen_func;
    enum pipe_error ret = PIPE_OK;
    unsigned api_pv = hwtnl->api_pv;
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
index 9df8f6e9beb..0213409ef29 100644
--- a/src/gallium/drivers/svga/svga_draw_elements.c
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -133,7 +133,8 @@ svga_hwtnl_draw_range_elements(struct svga_hwtnl *hwtnl,
                                unsigned prim, unsigned start, unsigned count,
                                unsigned start_instance, unsigned instance_count)
 {
-   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   unsigned gen_prim, gen_size, gen_nr;
+   enum indices_mode gen_type;
    u_translate_func gen_func;
    enum pipe_error ret = PIPE_OK;
 

From e450d4371a4166f57a7e412d2c1e68aa1162a703 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 28 Oct 2015 19:02:38 -0600
Subject: [PATCH 028/287] u_vbuf: add some const qualifiers

Trivial.
---
 src/gallium/auxiliary/util/u_vbuf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index b31ada138b8..9ddd9222e7e 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -998,7 +998,7 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
    return PIPE_OK;
 }
 
-static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr)
+static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
 {
    /* See if there are any per-vertex attribs which will be uploaded or
     * translated. Use bitmasks to get the info instead of looping over vertex
@@ -1009,7 +1009,7 @@ static boolean u_vbuf_need_minmax_index(struct u_vbuf *mgr)
             mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0;
 }
 
-static boolean u_vbuf_mapping_vertex_buffer_blocks(struct u_vbuf *mgr)
+static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
 {
    /* Return true if there are hw buffers which don't need to be translated.
     *

From 149ac1fe43a87ee4219f9979dcce2de7964c31a9 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 28 Oct 2015 19:05:27 -0600
Subject: [PATCH 029/287] u_vbuf: minor code reformatting / line wrapping

Trivial.
---
 src/gallium/auxiliary/util/u_vbuf.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c
index 9ddd9222e7e..54e9e717104 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -1004,9 +1004,11 @@ static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr)
     * translated. Use bitmasks to get the info instead of looping over vertex
     * elements. */
    return (mgr->ve->used_vb_mask &
-           ((mgr->user_vb_mask | mgr->incompatible_vb_mask |
+           ((mgr->user_vb_mask |
+             mgr->incompatible_vb_mask |
              mgr->ve->incompatible_vb_mask_any) &
-            mgr->ve->noninstance_vb_mask_any & mgr->nonzero_stride_vb_mask)) != 0;
+            mgr->ve->noninstance_vb_mask_any &
+            mgr->nonzero_stride_vb_mask)) != 0;
 }
 
 static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
@@ -1016,8 +1018,10 @@ static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr)
     * We could query whether each buffer is busy, but that would
     * be way more costly than this. */
    return (mgr->ve->used_vb_mask &
-           (~mgr->user_vb_mask & ~mgr->incompatible_vb_mask &
-            mgr->ve->compatible_vb_mask_all & mgr->ve->noninstance_vb_mask_any &
+           (~mgr->user_vb_mask &
+            ~mgr->incompatible_vb_mask &
+            mgr->ve->compatible_vb_mask_all &
+            mgr->ve->noninstance_vb_mask_any &
             mgr->nonzero_stride_vb_mask)) != 0;
 }
 

From d31481e70ab0da293d4c3010815f643f161b7168 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 3 Nov 2015 14:34:15 -0700
Subject: [PATCH 030/287] svga: implement 'white_fragments' option for VGPU10
 fragment shaders

When we emulate XOR logicop mode with blend-subtract, we need to ensure
that the fragment shader always emits white.  We had this implemented
for VGPU9, but not VGPU10.

VMware bug 1545492.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/drivers/svga/svga_tgsi_vgpu10.c | 35 ++++++++++++++++++---
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
index e70ee689c59..9b7ab16103f 100644
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@@ -2672,6 +2672,7 @@ emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
    }
    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
+          emit->key.fs.white_fragments ||
           emit->key.fs.write_color0_to_n_cbufs > 1) {
          /* Allocate a temp to hold the output color */
          emit->fs.color_tmp_index = total_temps;
@@ -6369,8 +6370,11 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
    emit_src_register(emit, &tmp_src_x);
    end_emit_instruction(emit);
 
-   /* If we don't need to broadcast the color below, emit final color here */
-   if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
+   /* If we don't need to broadcast the color below or set fragments to
+    * white, emit final color here.
+    */
+   if (emit->key.fs.write_color0_to_n_cbufs <= 1 &&
+       !emit->key.fs.white_fragments) {
       /* MOV output.color, tempcolor */
       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst,
                            &color_src, FALSE);     /* XXX saturate? */
@@ -6380,10 +6384,28 @@ emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
 }
 
 
+/**
+ * When we need to emit white for all fragments (for emulating XOR logicop
+ * mode), this function copies white into the temporary color output register.
+ */
+static void
+emit_set_color_white(struct svga_shader_emitter_v10 *emit,
+                     unsigned fs_color_tmp_index)
+{
+   struct tgsi_full_dst_register color_dst =
+      make_dst_temp_reg(fs_color_tmp_index);
+   struct tgsi_full_src_register white =
+      make_immediate_reg_float(emit, 1.0f);
+
+   emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE);
+}
+
+
 /**
  * Emit instructions for writing a single color output to multiple
  * color buffers.
- * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS
+ * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
+ * when key.fs.white_fragments is true).
  * property is set and the number of render targets is greater than one.
  * \param fs_color_tmp_index  index of the temp register that holds the
  *                            color to broadcast.
@@ -6398,7 +6420,6 @@ emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
       make_src_temp_reg(fs_color_tmp_index);
 
    assert(emit->unit == PIPE_SHADER_FRAGMENT);
-   assert(n > 1);
 
    for (i = 0; i < n; i++) {
       unsigned output_reg = emit->fs.color_out_index[i];
@@ -6440,7 +6461,11 @@ emit_post_helpers(struct svga_shader_emitter_v10 *emit)
       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
          emit_alpha_test_instructions(emit, fs_color_tmp_index);
       }
-      if (emit->key.fs.write_color0_to_n_cbufs > 1) {
+      if (emit->key.fs.white_fragments) {
+         emit_set_color_white(emit, fs_color_tmp_index);
+      }
+      if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
+          emit->key.fs.white_fragments) {
          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
       }
    }

From bdf6cef0333bf7278e2e2347aaae399288e87dcd Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 31 Oct 2015 07:02:36 -0600
Subject: [PATCH 031/287] vbo: fix another GL_LINE_LOOP bug

Very long line loops which spanned 3 or more vertex buffers were not
handled correctly and could result in stray lines.

The piglit lineloop test draws 10000 vertices by default, and is not
long enough to trigger this.  Even 'lineloop -count 100000' doesn't
trigger the bug.

For future reference, the issue can be reproduced by changing Mesa's
VBO_VERT_BUFFER_SIZE to 4096 and changing the piglit lineloop test to
use glVertex2f(), draw 3 loops instead of 1, and specifying -count
1023.

Acked-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
---
 src/mesa/vbo/vbo_exec_api.c  | 11 +++++++++--
 src/mesa/vbo/vbo_exec_draw.c |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index a614b26cae4..7534599c313 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -114,6 +114,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec )
       if (_mesa_inside_begin_end(exec->ctx)) {
 	 exec->vtx.prim[0].mode = exec->ctx->Driver.CurrentExecPrimitive;
 	 exec->vtx.prim[0].begin = 0;
+         exec->vtx.prim[0].end = 0;
 	 exec->vtx.prim[0].start = 0;
 	 exec->vtx.prim[0].count = 0;
 	 exec->vtx.prim_count++;
@@ -846,17 +847,23 @@ static void GLAPIENTRY vbo_exec_End( void )
          /* We're finishing drawing a line loop.  Append 0th vertex onto
           * end of vertex buffer so we can draw it as a line strip.
           */
-         const fi_type *src = exec->vtx.buffer_map;
+         const fi_type *src = exec->vtx.buffer_map +
+            last_prim->start * exec->vtx.vertex_size;
          fi_type *dst = exec->vtx.buffer_map +
             exec->vtx.vert_count * exec->vtx.vertex_size;
 
          /* copy 0th vertex to end of buffer */
          memcpy(dst, src, exec->vtx.vertex_size * sizeof(fi_type));
 
-         assert(last_prim->start == 0);
          last_prim->start++;  /* skip vertex0 */
          /* note that last_prim->count stays unchanged */
          last_prim->mode = GL_LINE_STRIP;
+
+         /* Increment the vertex count so the next primitive doesn't
+          * overwrite the last vertex which we just added.
+          */
+         exec->vtx.vert_count++;
+         exec->vtx.buffer_ptr += exec->vtx.vertex_size;
       }
 
       try_vbo_merge(exec);
diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c
index ed5d9e947b0..0d42618f246 100644
--- a/src/mesa/vbo/vbo_exec_draw.c
+++ b/src/mesa/vbo/vbo_exec_draw.c
@@ -117,6 +117,7 @@ vbo_copy_vertices( struct vbo_exec_context *exec )
           * subtract one from last_prim->start) so that we copy the 0th vertex
           * to the next vertex buffer.
           */
+         assert(last_prim->start > 0);
          src -= sz;
       }
       /* fall-through */

From 5bbd522452cfe86fc600203fe1a9b056582e2000 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 4 Nov 2015 14:26:37 -0500
Subject: [PATCH 032/287] mesa/tests: add glBufferStorageEXT to ES 3.1 dispatch
 list

I thought that aliased functions didn't need to be added, but that might
only be if the function aliases something in the same {desktop,ES}
space. Resolves the dispatch sanity test failure.

Fixes: 13b19aa81 (mesa: expose support for GL_EXT_buffer_storage)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92824
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/mesa/main/tests/dispatch_sanity.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp
index ac2d2332df8..abe0f432572 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -2506,5 +2506,8 @@ const struct function gles31_functions_possible[] = {
    /* GL_OES_texture_storage_multisample_2d_array */
    { "glTexStorage3DMultisampleOES", 31, -1 },
 
+   /* GL_EXT_buffer_storage */
+   { "glBufferStorageEXT", 31, -1 },
+
    { NULL, 0, -1 },
  };

From 4a951f1c0847353101d28db583e1dd397fdce9ba Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 Nov 2015 13:13:39 -0800
Subject: [PATCH 033/287] vc4: Fix dumping the size of BOs allocated/cached.

60MB of cached BOs are a lot less scary than 600MB.
---
 src/gallium/drivers/vc4/vc4_bufmgr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index f7b41f5816d..171a5544bea 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -42,9 +42,9 @@ vc4_bo_dump_stats(struct vc4_screen *screen)
         struct vc4_bo_cache *cache = &screen->bo_cache;
 
         fprintf(stderr, "  BOs allocated:   %d\n", screen->bo_count);
-        fprintf(stderr, "  BOs size:        %dkb\n", screen->bo_size / 102);
+        fprintf(stderr, "  BOs size:        %dkb\n", screen->bo_size / 1024);
         fprintf(stderr, "  BOs cached:      %d\n", cache->bo_count);
-        fprintf(stderr, "  BOs cached size: %dkb\n", cache->bo_size / 102);
+        fprintf(stderr, "  BOs cached size: %dkb\n", cache->bo_size / 1024);
 
         if (!list_empty(&cache->time_list)) {
                 struct vc4_bo *first = LIST_ENTRY(struct vc4_bo,

From 3f7c96c36cb18a9e4616d373369a130416884bf9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 Nov 2015 13:10:28 -0800
Subject: [PATCH 034/287] vc4: Print the rounded shader size in debug output.

It's surprising to see "0kb" printed for debug on short shaders, while
4kb alignment won't be suprising.
---
 src/gallium/drivers/vc4/vc4_bufmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 171a5544bea..52ba8ab19ef 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -428,7 +428,7 @@ vc4_bo_alloc_shader(struct vc4_screen *screen, const void *data, uint32_t size)
         screen->bo_count++;
         screen->bo_size += bo->size;
         if (dump_stats) {
-                fprintf(stderr, "Allocated shader %dkb:\n", size / 1024);
+                fprintf(stderr, "Allocated shader %dkb:\n", bo->size / 1024);
                 vc4_bo_dump_stats(screen);
         }
 

From 6d3a24bce80a32063aedfe568efd5532aea4c875 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 4 Nov 2015 13:27:16 -0800
Subject: [PATCH 035/287] vc4: When the create ioctl fails, free our cache and
 try again.

This greatly increases the pressure you can put on the driver before
create fails.  Ultimately we need to let the kernel take control of
our cached BOs and just take them from us (and other clients)
directly, but this is a very easy patch for the moment.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/vc4/vc4_bufmgr.c | 29 +++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 52ba8ab19ef..2f822f04c21 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -36,6 +36,9 @@
 
 static bool dump_stats = false;
 
+static void
+vc4_bo_cache_free_all(struct vc4_bo_cache *cache);
+
 static void
 vc4_bo_dump_stats(struct vc4_screen *screen)
 {
@@ -136,6 +139,8 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
         bo->name = name;
         bo->private = true;
 
+        bool cleared_and_retried = false;
+retry:
         if (!using_vc4_simulator) {
                 struct drm_vc4_create_bo create;
                 memset(&create, 0, sizeof(create));
@@ -157,6 +162,12 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
                 assert(create.size >= size);
         }
         if (ret != 0) {
+                if (!list_empty(&screen->bo_cache.time_list) &&
+                    !cleared_and_retried) {
+                        cleared_and_retried = true;
+                        vc4_bo_cache_free_all(&screen->bo_cache);
+                        goto retry;
+                }
                 fprintf(stderr, "create ioctl failure\n");
                 abort();
         }
@@ -248,6 +259,18 @@ free_stale_bos(struct vc4_screen *screen, time_t time)
         }
 }
 
+static void
+vc4_bo_cache_free_all(struct vc4_bo_cache *cache)
+{
+        pipe_mutex_lock(cache->lock);
+        list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
+                                 time_list) {
+                vc4_bo_remove_from_cache(cache, bo);
+                vc4_bo_free(bo);
+        }
+        pipe_mutex_unlock(cache->lock);
+}
+
 void
 vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
 {
@@ -600,11 +623,7 @@ vc4_bufmgr_destroy(struct pipe_screen *pscreen)
         struct vc4_screen *screen = vc4_screen(pscreen);
         struct vc4_bo_cache *cache = &screen->bo_cache;
 
-        list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
-                                 time_list) {
-                vc4_bo_remove_from_cache(cache, bo);
-                vc4_bo_free(bo);
-        }
+        vc4_bo_cache_free_all(cache);
 
         if (dump_stats) {
                 fprintf(stderr, "BO stats after screen destroy:\n");

From bb73fc4cb82c1abdf47aa373c78c2a85fe29b3ec Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 4 Nov 2015 22:42:41 -0500
Subject: [PATCH 036/287] nouveau: relax fence emit space assert

We also have the "reserved for kick" space available. Some of my earlier
changes can probably be removed, but this is a quick fix for some of the
rarer fallout.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/nouveau/nv30/nv30_screen.c | 2 +-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index bdecb0a32b3..794a0898eaf 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -353,7 +353,7 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)
 
    *sequence = ++screen->base.fence.sequence;
 
-   assert(PUSH_AVAIL(push) >= 3);
+   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 3);
    PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
               (2 /* size */ << 18) | (7 /* subchan */ << 13));
    PUSH_DATA (push, 0);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index a9e0c478322..de2150ca08c 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -392,7 +392,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
    /* we need to do it after possible flush in MARK_RING */
    *sequence = ++screen->base.fence.sequence;
 
-   assert(PUSH_AVAIL(push) >= 5);
+   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
    PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
    PUSH_DATAh(push, screen->fence.bo->offset);
    PUSH_DATA (push, screen->fence.bo->offset);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 6ad3980911d..3b543929f3c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -547,7 +547,7 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
    /* we need to do it after possible flush in MARK_RING */
    *sequence = ++screen->base.fence.sequence;
 
-   assert(PUSH_AVAIL(push) >= 5);
+   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
    PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
    PUSH_DATAh(push, screen->fence.bo->offset);
    PUSH_DATA (push, screen->fence.bo->offset);

From 56774e63028b2997a7d8c0abb5009a4c79f9a453 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= <apinheiro@igalia.com>
Date: Tue, 20 Oct 2015 13:08:09 +0200
Subject: [PATCH 037/287] i965/vec4: select predicate based on writemask for
 sel emissions

Equivalent to commit 8ac3b525c but with sel operations. In this case
we select the PredCtrl based on the writemask.

This patch helps on cases like this:
 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F
 2: cmp.nz.f0.0 null:D, vgrf40.xxxx:D, 0D
 3: (+f0.0) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD

In this case, cmod propagation can't optimize instruction #2, because
instructions #1 and #2 have different writemasks, and we can't update
directly instruction #2 writemask because our code thinks that sel at
instruction #3 reads all four channels of the flag, when it actually
only reads .x.

So, with this patch, the previous case becames this:
 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F
 2: cmp.nz.f0.0 null:D, vgrf40.xxxx:D, 0D
 3: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD

Now only the x channel of the flag is used, allowing dead code
eliminate to update the writemask at the second instruction:
 1: cmp.l.f0.0 vgrf40.0.x:F, vgrf0.zzzz:F, vgrf7.xxxx:F
 2: cmp.nz.f0.0 null.x:D, vgrf40.xxxx:D, 0D
 3: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD

So now cmod propagation can simplify out #2:
 1: cmp.l.f0.0 vgrf40.0.x:F, attr18.wwww:F, vgrf7.xxxx:F
 2: (+f0.0.x) sel vgrf41.0.x:UD, vgrf6.xxxx:UD, vgrf5.xxxx:UD

Shader-db numbers:
total instructions in shared programs: 6235835 -> 6228008 (-0.13%)
instructions in affected programs:     219850 -> 212023 (-3.56%)
total loops in shared programs:        1979 -> 1979 (0.00%)
helped:                                1192
HURT:                                  0
---
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 8ca8ddb98fb..b848810ebc7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1407,7 +1407,23 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
    case nir_op_bcsel:
       emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
       inst = emit(BRW_OPCODE_SEL, dst, op[1], op[2]);
-      inst->predicate = BRW_PREDICATE_NORMAL;
+      switch (dst.writemask) {
+      case WRITEMASK_X:
+         inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_X;
+         break;
+      case WRITEMASK_Y:
+         inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+         break;
+      case WRITEMASK_Z:
+         inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+         break;
+      case WRITEMASK_W:
+         inst->predicate = BRW_PREDICATE_ALIGN16_REPLICATE_W;
+         break;
+      default:
+         inst->predicate = BRW_PREDICATE_NORMAL;
+         break;
+      }
       break;
 
    case nir_op_fdot_replicated2:

From 5c6f21579d7db802f4db96bae8b166e7409afabe Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 3 Nov 2015 17:15:24 -0800
Subject: [PATCH 038/287] nir: Rename live_variables to live_ssa_defs.

This computes liveness of SSA values, not nir_variables.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h                            |  4 ++--
 src/glsl/nir/nir_from_ssa.c                   |  2 +-
 src/glsl/nir/nir_live_variables.c             | 12 ++++++------
 src/glsl/nir/nir_lower_global_vars_to_local.c |  2 +-
 src/glsl/nir/nir_metadata.c                   |  4 ++--
 src/glsl/nir/nir_opt_dead_cf.c                |  2 +-
 src/glsl/nir/nir_remove_dead_variables.c      |  2 +-
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 874a03966be..f8de40d0d13 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1310,7 +1310,7 @@ typedef enum {
    nir_metadata_none = 0x0,
    nir_metadata_block_index = 0x1,
    nir_metadata_dominance = 0x2,
-   nir_metadata_live_variables = 0x4,
+   nir_metadata_live_ssa_defs = 0x4,
 } nir_metadata;
 
 typedef struct {
@@ -1986,7 +1986,7 @@ bool nir_lower_gs_intrinsics(nir_shader *shader);
 
 bool nir_normalize_cubemap_coords(nir_shader *shader);
 
-void nir_live_variables_impl(nir_function_impl *impl);
+void nir_live_ssa_defs_impl(nir_function_impl *impl);
 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
 
 void nir_convert_to_ssa_impl(nir_function_impl *impl);
diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c
index eaf883dbaa0..f2797f72c8e 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -777,7 +777,7 @@ nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only)
    nir_metadata_preserve(impl, nir_metadata_block_index |
                                nir_metadata_dominance);
 
-   nir_metadata_require(impl, nir_metadata_live_variables |
+   nir_metadata_require(impl, nir_metadata_live_ssa_defs |
                               nir_metadata_dominance);
 
    nir_foreach_block(impl, coalesce_phi_nodes_block, &state);
diff --git a/src/glsl/nir/nir_live_variables.c b/src/glsl/nir/nir_live_variables.c
index 1c96dcf36c5..05f79d7bc61 100644
--- a/src/glsl/nir/nir_live_variables.c
+++ b/src/glsl/nir/nir_live_variables.c
@@ -42,7 +42,7 @@
  * block but not in the live-in of the block containing the phi node.
  */
 
-struct live_variables_state {
+struct live_ssa_defs_state {
    unsigned num_ssa_defs;
    unsigned bitset_words;
 
@@ -52,7 +52,7 @@ struct live_variables_state {
 static bool
 index_ssa_def(nir_ssa_def *def, void *void_state)
 {
-   struct live_variables_state *state = void_state;
+   struct live_ssa_defs_state *state = void_state;
 
    if (def->parent_instr->type == nir_instr_type_ssa_undef)
       def->live_index = 0;
@@ -77,7 +77,7 @@ index_ssa_definitions_block(nir_block *block, void *state)
 static bool
 init_liveness_block(nir_block *block, void *void_state)
 {
-   struct live_variables_state *state = void_state;
+   struct live_ssa_defs_state *state = void_state;
 
    block->live_in = reralloc(block, block->live_in, BITSET_WORD,
                              state->bitset_words);
@@ -129,7 +129,7 @@ set_ssa_def_dead(nir_ssa_def *def, void *void_live)
  */
 static bool
 propagate_across_edge(nir_block *pred, nir_block *succ,
-                      struct live_variables_state *state)
+                      struct live_ssa_defs_state *state)
 {
    NIR_VLA(BITSET_WORD, live, state->bitset_words);
    memcpy(live, succ->live_in, state->bitset_words * sizeof *live);
@@ -165,9 +165,9 @@ propagate_across_edge(nir_block *pred, nir_block *succ,
 }
 
 void
-nir_live_variables_impl(nir_function_impl *impl)
+nir_live_ssa_defs_impl(nir_function_impl *impl)
 {
-   struct live_variables_state state;
+   struct live_ssa_defs_state state;
 
    /* We start at 1 because we reserve the index value of 0 for ssa_undef
     * instructions.  Those are never live, so their liveness information
diff --git a/src/glsl/nir/nir_lower_global_vars_to_local.c b/src/glsl/nir/nir_lower_global_vars_to_local.c
index dcd091ae2fa..d549ee79bb4 100644
--- a/src/glsl/nir/nir_lower_global_vars_to_local.c
+++ b/src/glsl/nir/nir_lower_global_vars_to_local.c
@@ -102,7 +102,7 @@ nir_lower_global_vars_to_local(nir_shader *shader)
          exec_list_push_tail(&impl->locals, &var->node);
          nir_metadata_preserve(impl, nir_metadata_block_index |
                                      nir_metadata_dominance |
-                                     nir_metadata_live_variables);
+                                     nir_metadata_live_ssa_defs);
          progress = true;
       }
    }
diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c
index a03e12456a1..6de981f430f 100644
--- a/src/glsl/nir/nir_metadata.c
+++ b/src/glsl/nir/nir_metadata.c
@@ -39,8 +39,8 @@ nir_metadata_require(nir_function_impl *impl, nir_metadata required)
       nir_index_blocks(impl);
    if (NEEDS_UPDATE(nir_metadata_dominance))
       nir_calc_dominance_impl(impl);
-   if (NEEDS_UPDATE(nir_metadata_live_variables))
-      nir_live_variables_impl(impl);
+   if (NEEDS_UPDATE(nir_metadata_live_ssa_defs))
+      nir_live_ssa_defs_impl(impl);
 
 #undef NEEDS_UPDATE
 
diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c
index 0d4819b5158..356e926ffe3 100644
--- a/src/glsl/nir/nir_opt_dead_cf.c
+++ b/src/glsl/nir/nir_opt_dead_cf.c
@@ -204,7 +204,7 @@ loop_is_dead(nir_loop *loop)
       return false;
 
    nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node);
-   nir_metadata_require(impl, nir_metadata_live_variables |
+   nir_metadata_require(impl, nir_metadata_live_ssa_defs |
                               nir_metadata_dominance);
 
    for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) {
diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c
index 530a8475ed5..8f0833c7e24 100644
--- a/src/glsl/nir/nir_remove_dead_variables.c
+++ b/src/glsl/nir/nir_remove_dead_variables.c
@@ -130,7 +130,7 @@ nir_remove_dead_variables(nir_shader *shader)
          if (remove_dead_vars(&overload->impl->locals, live)) {
             nir_metadata_preserve(overload->impl, nir_metadata_block_index |
                                                   nir_metadata_dominance |
-                                                  nir_metadata_live_variables);
+                                                  nir_metadata_live_ssa_defs);
             progress = true;
          }
       }

From b9f8e729c88ad0d934422976a20a7c765016fcb8 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 3 Nov 2015 17:16:49 -0800
Subject: [PATCH 039/287] nir: Rename nir_live_variables.c to nir_liveness.c.

It doesn't actually operate on variables.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/Makefile.sources                             | 2 +-
 src/glsl/nir/{nir_live_variables.c => nir_liveness.c} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/glsl/nir/{nir_live_variables.c => nir_liveness.c} (100%)

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index ca870367640..0266f290ccb 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -37,7 +37,7 @@ NIR_FILES = \
 	nir/nir_intrinsics.h \
 	nir/nir_instr_set.c \
 	nir/nir_instr_set.h \
-	nir/nir_live_variables.c \
+	nir/nir_liveness.c \
 	nir/nir_lower_alu_to_scalar.c \
 	nir/nir_lower_atomics.c \
 	nir/nir_lower_clip.c \
diff --git a/src/glsl/nir/nir_live_variables.c b/src/glsl/nir/nir_liveness.c
similarity index 100%
rename from src/glsl/nir/nir_live_variables.c
rename to src/glsl/nir/nir_liveness.c

From 5048da974e68a05b86a0cec494e1380e81978684 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 28 Jan 2015 23:58:43 -0800
Subject: [PATCH 040/287] i965: Handle 16x MSAA in IMS dimension munging code.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Neil Roberts <neil@linux.intel.com>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b6e35205727..0802b92502c 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -416,9 +416,13 @@ intel_miptree_create_layout(struct brw_context *brw,
             width0 = ALIGN(width0, 2) * 4;
             height0 = ALIGN(height0, 2) * 2;
             break;
+         case 16:
+            width0 = ALIGN(width0, 2) * 4;
+            height0 = ALIGN(height0, 2) * 4;
+            break;
          default:
-            /* num_samples should already have been quantized to 0, 1, 2, 4, or
-             * 8.
+            /* num_samples should already have been quantized to 0, 1, 2, 4, 8
+             * or 16.
              */
             unreachable("not reached");
          }

From 20250e854eca3209133d592d98559ac474a5f60f Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 16 Sep 2015 11:48:42 +0100
Subject: [PATCH 041/287] i965: Program 16x MSAA sample positions.

This is the standard pattern used by the other 3D graphics API.

BDW has slots for these values, but they aren't actually used until
SKL. Even though the documentation for BDW says they must be zero, it
doesn't seem to cause any harm to program them anyway.

The comment above for the 8x sample positions says that the hardware
implements centroid interpolation by picking the centre-most sample
that is inside the primitive. That implies that it might be worthwhile
to pick a pattern that includes 0.5,0.5. However by experimentation
this doesn't seem to actually be the case. With the sample positions
in this patch, if I modify the piglit test below so that it instead
reports the centroid position, it reports 0.492188,0.421875 which
doesn't match any of the positions. If I modify the sample positions
so that they include one at exactly 0.5,0.5 it doesn't help and it
reports another position which is even further from the center for
some reason.

arb_gpu_shader5-interpolateAtSample-different

Kenneth Graunke experimented with some other patterns that have a
higher standard deviation but I think after some discussion it was
decided that it would be better to pick the same pattern as the other
graphics API in case there are games that rely on this pattern.

(Based on a patch by Kenneth Graunke)

Cc: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ben Widawsky <ben at bwidawsk.net>
---
 .../drivers/dri/i965/brw_multisample_state.h  | 26 +++++++++++++++++++
 .../drivers/dri/i965/gen6_multisample_state.c |  3 +++
 .../drivers/dri/i965/gen8_multisample_state.c | 12 ++++-----
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_multisample_state.h b/src/mesa/drivers/dri/i965/brw_multisample_state.h
index 26633e72983..42a7fd35121 100644
--- a/src/mesa/drivers/dri/i965/brw_multisample_state.h
+++ b/src/mesa/drivers/dri/i965/brw_multisample_state.h
@@ -81,3 +81,29 @@ brw_multisample_positions_4x = 0xae2ae662;
  */
 static const uint32_t
 brw_multisample_positions_8x[] = { 0xdbb39d79, 0x3ff55117 };
+
+/**
+ * Sample positions:
+ *
+ *    0 1 2 3 4 5 6 7 8 9 a b c d e f
+ * 0   15
+ * 1                  9
+ * 2         10
+ * 3                        7
+ * 4                               13
+ * 5                1
+ * 6        4
+ * 7                          3
+ * 8 12
+ * 9                    0
+ * a            2
+ * b                            6
+ * c     11
+ * d                      5
+ * e              8
+ * f                             14
+ */
+static const uint32_t
+brw_multisample_positions_16x[] = {
+   0xc75a7599, 0xb3dbad36, 0x2c42816e, 0x10eff408
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index 8444c0c9bae..49c6ebabfba 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -48,6 +48,9 @@ gen6_get_sample_position(struct gl_context *ctx,
    case 8:
       bits = brw_multisample_positions_8x[index >> 2] >> (8 * (index & 3));
       break;
+   case 16:
+      bits = brw_multisample_positions_16x[index >> 2] >> (8 * (index & 3));
+      break;
    default:
       unreachable("Not implemented");
    }
diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
index 75cbe06c522..4427f15996d 100644
--- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
@@ -52,13 +52,11 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
    BEGIN_BATCH(9);
    OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
 
-   /* 16x MSAA
-    * XXX: Need to program these.
-    */
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
+   /* 16x MSAA */
+   OUT_BATCH(brw_multisample_positions_16x[0]); /* positions  3,  2,  1,  0 */
+   OUT_BATCH(brw_multisample_positions_16x[1]); /* positions  7,  6,  5,  4 */
+   OUT_BATCH(brw_multisample_positions_16x[2]); /* positions 11, 10,  9,  8 */
+   OUT_BATCH(brw_multisample_positions_16x[3]); /* positions 15, 14, 13, 12 */
 
    /* 8x MSAA */
    OUT_BATCH(brw_multisample_positions_8x[1]); /* sample positions 7654 */

From e386fb0dee40d0f2342b43b6750b64c8174463a9 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Tue, 8 Sep 2015 15:52:09 +0100
Subject: [PATCH 042/287] i965/fs/skl+: Use ld2dms_w instead of ld2dms

In order to support 16x MSAA, skl+ has a wider version of ld2dms that
takes two parameters for the MCS data. The MCS data retrieved from the
ld_mcs instruction already returns 4 or 8 registers and is documented
to return zeroes for the mcsh value when the sample count is less than
16.

v2: Use get_lowered_simd_width to fall back to SIMD8 instructions when
    the message length would be too long in SIMD16.
Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_defines.h       |  3 ++
 src/mesa/drivers/dri/i965/brw_disasm.c        |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 42 ++++++++++++++++++-
 .../drivers/dri/i965/brw_fs_generator.cpp     |  5 +++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  9 ++--
 src/mesa/drivers/dri/i965/brw_shader.cpp      |  5 +++
 6 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 6433cffc919..0396e13d0c2 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -964,6 +964,8 @@ enum opcode {
    FS_OPCODE_TXB_LOGICAL,
    SHADER_OPCODE_TXF_CMS,
    SHADER_OPCODE_TXF_CMS_LOGICAL,
+   SHADER_OPCODE_TXF_CMS_W,
+   SHADER_OPCODE_TXF_CMS_W_LOGICAL,
    SHADER_OPCODE_TXF_UMS,
    SHADER_OPCODE_TXF_UMS_LOGICAL,
    SHADER_OPCODE_TXF_MCS,
@@ -1539,6 +1541,7 @@ enum brw_message_target {
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO   17
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C 18
 #define HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE 20
+#define GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W     28
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS       29
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS       30
 #define GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS       31
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index df747107188..fd93beaec19 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -622,6 +622,7 @@ static const char *const gen5_sampler_msg_type[] = {
    [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO]   = "gather4_po",
    [GEN7_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_C] = "gather4_po_c",
    [HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE] = "sample_d_c",
+   [GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W]     = "ld2dms_w",
    [GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS]       = "ld_mcs",
    [GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS]       = "ld2dms",
    [GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS]       = "ld2dss",
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4cc962613b3..f5294195656 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -717,6 +717,7 @@ fs_inst::components_read(unsigned i) const
    case SHADER_OPCODE_TXS_LOGICAL:
    case FS_OPCODE_TXB_LOGICAL:
    case SHADER_OPCODE_TXF_CMS_LOGICAL:
+   case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
    case SHADER_OPCODE_TXF_UMS_LOGICAL:
    case SHADER_OPCODE_TXF_MCS_LOGICAL:
    case SHADER_OPCODE_LOD_LOGICAL:
@@ -732,6 +733,9 @@ fs_inst::components_read(unsigned i) const
       /* Texture offset. */
       else if (i == 7)
          return 2;
+      /* MCS */
+      else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
+         return 2;
       else
          return 1;
 
@@ -896,6 +900,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case SHADER_OPCODE_TXD:
    case SHADER_OPCODE_TXF:
    case SHADER_OPCODE_TXF_CMS:
+   case SHADER_OPCODE_TXF_CMS_W:
    case SHADER_OPCODE_TXF_MCS:
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
@@ -3920,17 +3925,31 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
       coordinate_done = true;
       break;
    case SHADER_OPCODE_TXF_CMS:
+   case SHADER_OPCODE_TXF_CMS_W:
    case SHADER_OPCODE_TXF_UMS:
    case SHADER_OPCODE_TXF_MCS:
-      if (op == SHADER_OPCODE_TXF_UMS || op == SHADER_OPCODE_TXF_CMS) {
+      if (op == SHADER_OPCODE_TXF_UMS ||
+          op == SHADER_OPCODE_TXF_CMS ||
+          op == SHADER_OPCODE_TXF_CMS_W) {
          bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index);
          length++;
       }
 
-      if (op == SHADER_OPCODE_TXF_CMS) {
+      if (op == SHADER_OPCODE_TXF_CMS || op == SHADER_OPCODE_TXF_CMS_W) {
          /* Data from the multisample control surface. */
          bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs);
          length++;
+
+         /* On Gen9+ we'll use ld2dms_w instead which has two registers for
+          * the MCS data.
+          */
+         if (op == SHADER_OPCODE_TXF_CMS_W) {
+            bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD),
+                    mcs.file == IMM ?
+                    mcs :
+                    offset(mcs, bld, 1));
+            length++;
+         }
       }
 
       /* There is no offsetting for this message; just copy in the integer
@@ -4144,6 +4163,10 @@ fs_visitor::lower_logical_sends()
          lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS);
          break;
 
+      case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+         lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_CMS_W);
+         break;
+
       case SHADER_OPCODE_TXF_UMS_LOGICAL:
          lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_TXF_UMS);
          break;
@@ -4336,6 +4359,21 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
       else
          return inst->exec_size;
 
+   case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
+      /* This opcode can take up to 6 arguments which means that in some
+       * circumstances it can end up with a message that is too long in SIMD16
+       * mode.
+       */
+      const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
+      /* First three arguments are the sample index and the two arguments for
+       * the MCS data.
+       */
+      if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
+         return 8;
+      else
+         return inst->exec_size;
+   }
+
    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
    case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
    case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e207a77fdc1..28fb620279b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -741,6 +741,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
       case SHADER_OPCODE_TXF:
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 	 break;
+      case SHADER_OPCODE_TXF_CMS_W:
+         assert(devinfo->gen >= 9);
+         msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
+         break;
       case SHADER_OPCODE_TXF_CMS:
          if (devinfo->gen >= 7)
             msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@@ -2050,6 +2054,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case SHADER_OPCODE_TXD:
       case SHADER_OPCODE_TXF:
       case SHADER_OPCODE_TXF_CMS:
+      case SHADER_OPCODE_TXF_CMS_W:
       case SHADER_OPCODE_TXF_UMS:
       case SHADER_OPCODE_TXF_MCS:
       case SHADER_OPCODE_TXL:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index ef92098286c..94a9c1b68f2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -208,8 +208,8 @@ fs_visitor::emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
    fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs,
                             ARRAY_SIZE(srcs));
 
-   /* We only care about one reg of response, but the sampler always writes
-    * 4/8.
+   /* We only care about one or two regs of response, but the sampler always
+    * writes 4/8.
     */
    inst->regs_written = 4 * dispatch_width / 8;
 
@@ -295,7 +295,10 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       opcode = SHADER_OPCODE_TXF_LOGICAL;
       break;
    case ir_txf_ms:
-      opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
+      if (devinfo->gen >= 9)
+         opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
+      else
+         opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
       break;
    case ir_txs:
    case ir_query_levels:
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 4ea297ade4c..0312024ed1b 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -351,6 +351,10 @@ brw_instruction_name(enum opcode op)
       return "txf_cms";
    case SHADER_OPCODE_TXF_CMS_LOGICAL:
       return "txf_cms_logical";
+   case SHADER_OPCODE_TXF_CMS_W:
+      return "txf_cms_w";
+   case SHADER_OPCODE_TXF_CMS_W_LOGICAL:
+      return "txf_cms_w_logical";
    case SHADER_OPCODE_TXF_UMS:
       return "txf_ums";
    case SHADER_OPCODE_TXF_UMS_LOGICAL:
@@ -787,6 +791,7 @@ backend_instruction::is_tex() const
            opcode == SHADER_OPCODE_TXD ||
            opcode == SHADER_OPCODE_TXF ||
            opcode == SHADER_OPCODE_TXF_CMS ||
+           opcode == SHADER_OPCODE_TXF_CMS_W ||
            opcode == SHADER_OPCODE_TXF_UMS ||
            opcode == SHADER_OPCODE_TXF_MCS ||
            opcode == SHADER_OPCODE_TXL ||

From 4ef27745c8ed5153464db22950a90d74d2ef4435 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 9 Sep 2015 15:59:36 +0100
Subject: [PATCH 043/287] i965/vec4/skl+: Use ld2dms_w instead of ld2dms

In order to support 16x MSAA, skl+ has a wider version of ld2dms that
takes two parameters for the MCS data. The MCS data in the response
still fits in a single register so we just need to ensure we copy both
values rather than just the lower one.

Acked-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp           |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  5 +++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 14 ++++++++++++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 01eb1580953..8350a024e88 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -339,6 +339,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_TXD:
    case SHADER_OPCODE_TXF:
    case SHADER_OPCODE_TXF_CMS:
+   case SHADER_OPCODE_TXF_CMS_W:
    case SHADER_OPCODE_TXF_MCS:
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_TG4:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 8bc21df5ffc..f0ad903c572 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -135,6 +135,10 @@ generate_tex(struct brw_codegen *p,
       case SHADER_OPCODE_TXF:
 	 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 	 break;
+      case SHADER_OPCODE_TXF_CMS_W:
+         assert(devinfo->gen >= 9);
+         msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W;
+         break;
       case SHADER_OPCODE_TXF_CMS:
          if (devinfo->gen >= 7)
             msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
@@ -1313,6 +1317,7 @@ generate_code(struct brw_codegen *p,
       case SHADER_OPCODE_TXD:
       case SHADER_OPCODE_TXF:
       case SHADER_OPCODE_TXF_CMS:
+      case SHADER_OPCODE_TXF_CMS_W:
       case SHADER_OPCODE_TXF_MCS:
       case SHADER_OPCODE_TXL:
       case SHADER_OPCODE_TXS:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 606fbd06278..7d949896bcc 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -900,7 +900,8 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
    case ir_txl: opcode = SHADER_OPCODE_TXL; break;
    case ir_txd: opcode = SHADER_OPCODE_TXD; break;
    case ir_txf: opcode = SHADER_OPCODE_TXF; break;
-   case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
+   case ir_txf_ms: opcode = (devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W :
+                             SHADER_OPCODE_TXF_CMS); break;
    case ir_txs: opcode = SHADER_OPCODE_TXS; break;
    case ir_tg4: opcode = offset_value.file != BAD_FILE
                          ? SHADER_OPCODE_TG4_OFFSET : SHADER_OPCODE_TG4; break;
@@ -992,7 +993,16 @@ vec4_visitor::emit_texture(ir_texture_opcode op,
       } else if (op == ir_txf_ms) {
          emit(MOV(dst_reg(MRF, param_base + 1, sample_index.type, WRITEMASK_X),
                   sample_index));
-         if (devinfo->gen >= 7) {
+         if (opcode == SHADER_OPCODE_TXF_CMS_W) {
+            /* MCS data is stored in the first two channels of ‘mcs’, but we
+             * need to get it into the .y and .z channels of the second vec4
+             * of params.
+             */
+            mcs.swizzle = BRW_SWIZZLE4(0, 0, 1, 1);
+            emit(MOV(dst_reg(MRF, param_base + 1,
+                             glsl_type::uint_type, WRITEMASK_YZ),
+                     mcs));
+         } else if (devinfo->gen >= 7) {
             /* MCS data is in the first channel of `mcs`, but we need to get it into
              * the .y channel of the second vec4 of params, so replicate .x across
              * the whole vec4 and then mask off everything except .y

From 1a97cac767425b22e56fe698127795bc287bb773 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Tue, 15 Sep 2015 16:34:35 +0100
Subject: [PATCH 044/287] i965/fs: Add a sampler program key for whether the
 texture is 16x MSAA

When 16x MSAA is used for sampling with texelFetch the compiler needs
to use a different instruction which passes more arguments for the MCS
data. Previously on skl+ it was unconditionally using this new
instruction. However since 16x MSAA is probably going to be pretty
rare, it is probably worthwhile to avoid using this instruction for
the other sample counts. In order to do that this patch adds a new
member to brw_sampler_prog_key_data to track when a sampler refers to
a buffer with 16 samples.

Note that this isn't done for the vec4 backend because it wouldn't
change how many registers it uses.

Acked-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_compiler.h     | 7 +++++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
 src/mesa/drivers/dri/i965/brw_wm.c           | 8 ++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index 91eabaf7787..f022f3829be 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -142,6 +142,13 @@ struct brw_sampler_prog_key_data {
     */
    uint32_t compressed_multisample_layout_mask;
 
+   /**
+    * Whether this sampler is using 16x multisampling. If so fetching from
+    * this sampler will be handled with a different instruction, ld2dms_w
+    * instead of ld2dms.
+    */
+   uint32_t msaa_16;
+
    /**
     * For Sandybridge, which shader w/a we need for gather quirks.
     */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 94a9c1b68f2..213c9120b50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -295,7 +295,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       opcode = SHADER_OPCODE_TXF_LOGICAL;
       break;
    case ir_txf_ms:
-      if (devinfo->gen >= 9)
+      if ((key_tex->msaa_16 & (1 << sampler)))
          opcode = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
       else
          opcode = SHADER_OPCODE_TXF_CMS_LOGICAL;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 5c49db9e63e..8d9ed3a6c33 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -212,6 +212,9 @@ brw_debug_recompile_sampler_key(struct brw_context *brw,
    found |= key_debug(brw, "compressed multisample layout",
                       old_key->compressed_multisample_layout_mask,
                       key->compressed_multisample_layout_mask);
+   found |= key_debug(brw, "16x msaa",
+                      old_key->msaa_16,
+                      key->msaa_16);
 
    for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
       found |= key_debug(brw, "textureGather workarounds",
@@ -371,6 +374,11 @@ brw_populate_sampler_prog_key_data(struct gl_context *ctx,
          if (brw->gen >= 7 &&
              intel_tex->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
             key->compressed_multisample_layout_mask |= 1 << s;
+
+            if (intel_tex->mt->num_samples >= 16) {
+               assert(brw->gen >= 9);
+               key->msaa_16 |= 1 << s;
+            }
          }
       }
    }

From b4c2e6054fe830c299113b143622bcd2158cd257 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 9 Sep 2015 14:36:42 +0100
Subject: [PATCH 045/287] i965: Support calculating the bits needed to set up
 16x MSAA

The gen7_surface_msaa_bits function already returns the right values
for 16 samples but it just needs its assert to be relaxed.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 5080f1c3fe4..438caefdd4a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -78,7 +78,7 @@ gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout layout)
 {
    uint32_t ss4 = 0;
 
-   assert(num_samples <= 8);
+   assert(num_samples <= 16);
 
    /* The SURFACE_MULTISAMPLECOUNT_X enums are simply log2(num_samples) << 3. */
    ss4 |= (ffs(MAX2(num_samples, 1)) - 1) << 3;

From bf6bd7eaf09fadc516a1e46635ed8590f4d88535 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 9 Sep 2015 14:38:08 +0100
Subject: [PATCH 046/287] i965: Support allocating the MCS buffer for 16x MSAA

When 16 samples are used the MCS buffer needs 64 bits per pixel.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 0802b92502c..b1a7632d82f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1427,6 +1427,12 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
        */
       format = MESA_FORMAT_R_UINT32;
       break;
+   case 16:
+      /* 64 bits/pixel are required for MCS data when using 16x MSAA (4 bits
+       * for each sample).
+       */
+      format = MESA_FORMAT_RG_UINT32;
+      break;
    default:
       unreachable("Unrecognized sample count in intel_miptree_alloc_mcs");
    };

From a6804654283a9d03bee92d61eee5b1d036c8db68 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 9 Sep 2015 17:44:17 +0100
Subject: [PATCH 047/287] i965/fs/skl+: Fix calculating gl_SampleID for 16x
 MSAA

In order to accomodate 16x MSAA, the starting sample pair index is now
3 bits rather than 2 on SKL+.

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f5294195656..cb2536263dd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1313,9 +1313,15 @@ fs_visitor::emit_sampleid_setup()
        * are sample 1 of subspan 0; the third group is sample 0 of
        * subspan 1, and finally sample 1 of subspan 1.
        */
+
+      /* SKL+ has an extra bit for the Starting Sample Pair Index to
+       * accomodate 16x MSAA.
+       */
+      unsigned sspi_mask = devinfo->gen >= 9 ? 0x1c0 : 0xc0;
+
       abld.exec_all().group(1, 0)
           .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
-               fs_reg(0xc0));
+               fs_reg(sspi_mask));
       abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
 
       /* This works for both SIMD8 and SIMD16 */

From 1a22b12fc51e80c20c700f93904ffd12caa73473 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Fri, 11 Sep 2015 18:09:46 +0100
Subject: [PATCH 048/287] i965/meta: Support 16x MSAA in the meta stencil blit

The destination rectangle is now drawn at 4x4 the size and the shader
code to calculate the sample number is adjusted accordingly.

Acked-by: Ben Widawsky <ben@bwidawsk.net>
---
 .../drivers/dri/i965/brw_meta_stencil_blit.c  | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
index cbbb919c6ee..4e9aa949506 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -163,6 +163,13 @@ static const char *fs_tmpl =
    "      txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
    "      txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
    "      sample_index = (X & 0x4) | (Y & 0x2) | ((X & 0x2) >> 1);\n"
+   "      break;\n"
+   "   case 16:\n"
+   "      txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
+   "      txl_coords.y = ((Y & int(0xfff8)) >> 2) | (Y & int(0x1));\n"
+   "      sample_index = (((Y & 0x4) << 1) | (X & 0x4) | (Y & 0x2) |\n"
+   "                      ((X & 0x2) >> 1));\n"
+   "      break;\n"
    "   }\n"
    "}\n"
    "\n"
@@ -313,11 +320,16 @@ adjust_msaa(struct blit_dims *dims, int num_samples)
       dims->dst_x0 *= 2;
       dims->dst_x1 *= 2;
    } else if (num_samples) {
-      const int x_num_samples = num_samples / 2;
-      dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0 * x_num_samples, num_samples);
-      dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0 * 2, 4);
-      dims->dst_x1 = ALIGN(dims->dst_x1 * x_num_samples, num_samples);
-      dims->dst_y1 = ALIGN(dims->dst_y1 * 2, 4);
+      const int y_num_samples = num_samples >= 16 ? 4 : 2;
+      const int x_num_samples = num_samples / y_num_samples;
+      dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0 * x_num_samples,
+                                   x_num_samples * 2);
+      dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0 * y_num_samples,
+                                   y_num_samples * 2);
+      dims->dst_x1 = ALIGN(dims->dst_x1 * x_num_samples,
+                           x_num_samples * 2);
+      dims->dst_y1 = ALIGN(dims->dst_y1 * y_num_samples,
+                           y_num_samples * 2);
    }
 }
 

From 2dd76ec16e599bd919962f439b59fdd73e85ff94 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Wed, 16 Sep 2015 17:43:33 +0100
Subject: [PATCH 049/287] meta: Support 16x MSAA in the multisample scaled blit
 shader

v2: Fix the x_scale in the shader. Remove the doubts in the commit
    message.
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/common/meta.h                |  2 ++
 src/mesa/drivers/common/meta_blit.c           | 29 ++++++++++++-------
 .../drivers/dri/i965/gen6_multisample_state.c | 14 +++++++++
 src/mesa/main/mtypes.h                        | 15 +++++++++-
 4 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index 23fa209905d..d742eaa9f67 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -285,9 +285,11 @@ enum blit_msaa_shader {
    BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
    BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
    BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
+   BLIT_16X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
    BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
    BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
    BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
+   BLIT_16X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
    BLIT_MSAA_SHADER_COUNT,
 };
 
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 5972a5af0c9..b92c2e2f22b 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -72,20 +72,25 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
    char *sample_map_expr = rzalloc_size(mem_ctx, 1);
    char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
    const char *sampler_array_suffix = "";
-   float y_scale;
+   float x_scale, y_scale;
    enum blit_msaa_shader shader_index;
 
    assert(src_rb);
    samples = MAX2(src_rb->NumSamples, 1);
-   y_scale = samples * 0.5;
+
+   if (samples == 16)
+      x_scale = 4.0;
+   else
+      x_scale = 2.0;
+   y_scale = samples / x_scale;
 
    /* We expect only power of 2 samples in source multisample buffer. */
    assert(samples > 0 && _mesa_is_pow_two(samples));
    while (samples >> (shader_offset + 1)) {
       shader_offset++;
    }
-   /* Update the assert if we plan to support more than 8X MSAA. */
-   assert(shader_offset > 0 && shader_offset < 4);
+   /* Update the assert if we plan to support more than 16X MSAA. */
+   assert(shader_offset > 0 && shader_offset <= 4);
 
    assert(target == GL_TEXTURE_2D_MULTISAMPLE ||
           target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY);
@@ -129,6 +134,10 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
       sample_number =  "sample_map[int(2 * fract(coord.x) + 8 * fract(coord.y))]";
       sample_map = ctx->Const.SampleMap8x;
       break;
+   case 16:
+      sample_number =  "sample_map[int(4 * fract(coord.x) + 16 * fract(coord.y))]";
+      sample_map = ctx->Const.SampleMap16x;
+      break;
    default:
       sample_number = NULL;
       sample_map = NULL;
@@ -184,9 +193,9 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                                "{\n"
                                "%s"
                                "   vec2 interp;\n"
-                               "   const vec2 scale = vec2(2.0f, %ff);\n"
-                               "   const vec2 scale_inv = vec2(0.5f, %ff);\n"
-                               "   const vec2 s_0_offset = vec2(0.25f, %ff);\n"
+                               "   const vec2 scale = vec2(%ff, %ff);\n"
+                               "   const vec2 scale_inv = vec2(%ff, %ff);\n"
+                               "   const vec2 s_0_offset = vec2(%ff, %ff);\n"
                                "   vec2 s_0_coord, s_1_coord, s_2_coord, s_3_coord;\n"
                                "   vec4 s_0_color, s_1_color, s_2_color, s_3_color;\n"
                                "   vec4 x_0_color, x_1_color;\n"
@@ -219,9 +228,9 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                                "}\n",
                                sampler_array_suffix,
                                sample_map_expr,
-                               y_scale,
-                               1.0f / y_scale,
-                               1.0f / samples,
+                               x_scale, y_scale,
+                               1.0f / x_scale, 1.0f / y_scale,
+                               0.5f / x_scale, 0.5f / y_scale,
                                texel_fetch_macro);
 
    _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name,
diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
index 49c6ebabfba..8eb620de56b 100644
--- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
@@ -91,6 +91,17 @@ gen6_get_sample_position(struct gl_context *ctx,
  *           | 6 | 7 |                      | 7 | 1 |
  *           ---------                      ---------
  *
+ * 16X MSAA sample index layout  16x MSAA sample number layout
+ *         -----------------            -----------------
+ *         | 0 | 1 | 2 | 3 |            |15 |10 | 9 | 7 |
+ *         -----------------            -----------------
+ *         | 4 | 5 | 6 | 7 |            | 4 | 1 | 3 |13 |
+ *         -----------------            -----------------
+ *         | 8 | 9 |10 |11 |            |12 | 2 | 0 | 6 |
+ *         -----------------            -----------------
+ *         |12 |13 |14 |15 |            |11 | 8 | 5 |14 |
+ *         -----------------            -----------------
+ *
  * A sample map is used to map sample indices to sample numbers.
  */
 void
@@ -99,10 +110,13 @@ gen6_set_sample_maps(struct gl_context *ctx)
    uint8_t map_2x[2] = {0, 1};
    uint8_t map_4x[4] = {0, 1, 2, 3};
    uint8_t map_8x[8] = {5, 2, 4, 6, 0, 3, 7, 1};
+   uint8_t map_16x[16] = { 15, 10, 9, 7, 4, 1, 3, 13,
+                           12, 2, 0, 6, 11, 8, 5, 14 };
 
    memcpy(ctx->Const.SampleMap2x, map_2x, sizeof(map_2x));
    memcpy(ctx->Const.SampleMap4x, map_4x, sizeof(map_4x));
    memcpy(ctx->Const.SampleMap8x, map_8x, sizeof(map_8x));
+   memcpy(ctx->Const.SampleMap16x, map_16x, sizeof(map_16x));
 }
 
 /**
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index fdb3b3df318..05c546e00a0 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3578,11 +3578,24 @@ struct gl_constants
     * below:
     *    SampleMap8x = {a, b, c, d, e, f, g, h};
     *
-    * Follow the logic for other sample counts.
+    * Follow the logic for sample counts 2-8.
+    *
+    * For 16x the sample indices layout as a 4x4 grid as follows:
+    *
+    *            -----------------
+    *            | 0 | 1 | 2 | 3 |
+    *            -----------------
+    *            | 4 | 5 | 6 | 7 |
+    *            -----------------
+    *            | 8 | 9 |10 |11 |
+    *            -----------------
+    *            |12 |13 |14 |15 |
+    *            -----------------
     */
    uint8_t SampleMap2x[2];
    uint8_t SampleMap4x[4];
    uint8_t SampleMap8x[8];
+   uint8_t SampleMap16x[16];
 
    /** GL_ARB_shader_atomic_counters */
    GLuint MaxAtomicBufferBindings;

From b080b3d54d99dfb46b5e8a6eb94fdbdeb937f255 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Thu, 22 Oct 2015 10:55:35 +0200
Subject: [PATCH 050/287] meta/blit: Always try to enable GL_ARB_sample_shading
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously this extension was only enabled when blitting between two
multisampled buffers. However I don't think it does any harm to just
enable it all the time. The ‘enable’ option is used instead of
‘require’ so that the shader will still compile if the extension isn't
available in the cases where it isn't used. This will make the next
patch simpler because it wants to add another optional extension.

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/common/meta_blit.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index b92c2e2f22b..496ce458824 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -357,17 +357,11 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
        shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_COPY ||
        shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_COPY) {
       char *sample_index;
-      const char *arb_sample_shading_extension_string;
 
       if (dst_is_msaa) {
-         arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable";
          sample_index = "gl_SampleID";
          name = "depth MSAA copy";
       } else {
-         /* Don't need that extension, since we're drawing to a single-sampled
-          * destination.
-          */
-         arb_sample_shading_extension_string = "";
          /* From the GL 4.3 spec:
           *
           *     "If there is a multisample buffer (the value of SAMPLE_BUFFERS
@@ -397,7 +391,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
       fs_source = ralloc_asprintf(mem_ctx,
                                   "#version 130\n"
                                   "#extension GL_ARB_texture_multisample : enable\n"
-                                  "%s\n"
+                                  "#extension GL_ARB_sample_shading : enable\n"
                                   "uniform sampler2DMS%s texSampler;\n"
                                   "in %s texCoords;\n"
                                   "out vec4 out_color;\n"
@@ -406,7 +400,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                   "{\n"
                                   "   gl_FragDepth = texelFetch(texSampler, i%s(texCoords), %s).r;\n"
                                   "}\n",
-                                  arb_sample_shading_extension_string,
                                   sampler_array_suffix,
                                   texcoord_type,
                                   texcoord_type,
@@ -416,14 +409,12 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
        * sample).  Yes, this is ridiculous.
        */
       char *sample_resolve;
-      const char *arb_sample_shading_extension_string;
       const char *merge_function;
       name = ralloc_asprintf(mem_ctx, "%svec4 MSAA %s",
                              vec4_prefix,
                              dst_is_msaa ? "copy" : "resolve");
 
       if (dst_is_msaa) {
-         arb_sample_shading_extension_string = "#extension GL_ARB_sample_shading : enable";
          sample_resolve = ralloc_asprintf(mem_ctx, "   out_color = texelFetch(texSampler, i%s(texCoords), gl_SampleID);", texcoord_type);
          merge_function = "";
       } else {
@@ -439,8 +430,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                "vec4 merge(vec4 a, vec4 b) { return (a + b); }\n";
          }
 
-         arb_sample_shading_extension_string = "";
-
          /* We're assuming power of two samples for this resolution procedure.
           *
           * To avoid losing any floating point precision if the samples all
@@ -496,7 +485,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
       fs_source = ralloc_asprintf(mem_ctx,
                                   "#version 130\n"
                                   "#extension GL_ARB_texture_multisample : enable\n"
-                                  "%s\n"
+                                  "#extension GL_ARB_sample_shading : enable\n"
                                   "#define gvec4 %svec4\n"
                                   "uniform %ssampler2DMS%s texSampler;\n"
                                   "in %s texCoords;\n"
@@ -507,7 +496,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                   "{\n"
                                   "%s\n" /* sample_resolve */
                                   "}\n",
-                                  arb_sample_shading_extension_string,
                                   vec4_prefix,
                                   vec4_prefix,
                                   sampler_array_suffix,

From aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Mon, 28 Sep 2015 18:22:32 +0100
Subject: [PATCH 051/287] mesa/meta: Use interpolateAtOffset for 16x MSAA copy
 blit

Previously there was a problem in i965 where if 16x MSAA is used then
some of the sample positions are exactly on the 0 x or y axis. When
the MSAA copy blit shader interpolates the texture coordinates at
these sample positions it was possible that it would jump to a
neighboring texel due to rounding errors. It is likely that these
positions would be used on 16x MSAA because that is where they are
defined to be in D3D.

To fix that this patch makes it use interpolateAtOffset in the blit
shader whenever 16x MSAA is used and the GL_ARB_gpu_shader5 extension
is available. This forces it to interpolate the texture coordinates at
the pixel center to avoid these problematic positions.

This fixes ext_framebuffer_multisample-unaligned-blit and
ext_framebuffer_multisample-clip-and-scissor-blit with 16x MSAA on
SKL+.

v2: Use interpolateAtOffset instead of interpolateAtSample
v3: Always try to enable GL_ARB_gpu_shader5 in the shader
    [Ian Romanick]

Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/common/meta_blit.c | 39 +++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 496ce458824..4a2444af0f9 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -357,10 +357,16 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
        shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_COPY ||
        shader_index == BLIT_MSAA_SHADER_2D_MULTISAMPLE_DEPTH_COPY) {
       char *sample_index;
+      const char *tex_coords = "texCoords";
 
       if (dst_is_msaa) {
          sample_index = "gl_SampleID";
          name = "depth MSAA copy";
+
+         if (ctx->Extensions.ARB_gpu_shader5 && samples >= 16) {
+            /* See comment below for the color copy */
+            tex_coords = "interpolateAtOffset(texCoords, vec2(0.0))";
+         }
       } else {
          /* From the GL 4.3 spec:
           *
@@ -392,17 +398,19 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                   "#version 130\n"
                                   "#extension GL_ARB_texture_multisample : enable\n"
                                   "#extension GL_ARB_sample_shading : enable\n"
+                                  "#extension GL_ARB_gpu_shader5 : enable\n"
                                   "uniform sampler2DMS%s texSampler;\n"
                                   "in %s texCoords;\n"
                                   "out vec4 out_color;\n"
                                   "\n"
                                   "void main()\n"
                                   "{\n"
-                                  "   gl_FragDepth = texelFetch(texSampler, i%s(texCoords), %s).r;\n"
+                                  "   gl_FragDepth = texelFetch(texSampler, i%s(%s), %s).r;\n"
                                   "}\n",
                                   sampler_array_suffix,
                                   texcoord_type,
                                   texcoord_type,
+                                  tex_coords,
                                   sample_index);
    } else {
       /* You can create 2D_MULTISAMPLE textures with 0 sample count (meaning 1
@@ -415,7 +423,33 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                              dst_is_msaa ? "copy" : "resolve");
 
       if (dst_is_msaa) {
-         sample_resolve = ralloc_asprintf(mem_ctx, "   out_color = texelFetch(texSampler, i%s(texCoords), gl_SampleID);", texcoord_type);
+         const char *tex_coords;
+
+         if (ctx->Extensions.ARB_gpu_shader5 && samples >= 16) {
+            /* If interpolateAtOffset is available then it will be used to
+             * force the interpolation to the center. This is required at
+             * least on Intel hardware because it is possible to have a sample
+             * position on the 0 x or y axis which means it will lie exactly
+             * on the pixel boundary. If we let the hardware interpolate the
+             * coordinates at one of these positions then it is possible for
+             * it to jump to a neighboring texel when converting to ints due
+             * to rounding errors. This is only done for >= 16x MSAA because
+             * it probably has some overhead. It is more likely that some
+             * hardware will use one of these problematic positions at 16x
+             * MSAA because in that case in D3D they are defined to be at
+             * these positions.
+             */
+            tex_coords = "interpolateAtOffset(texCoords, vec2(0.0))";
+         } else {
+            tex_coords = "texCoords";
+         }
+
+         sample_resolve =
+            ralloc_asprintf(mem_ctx,
+                            "   out_color = texelFetch(texSampler, "
+                            "i%s(%s), gl_SampleID);",
+                            texcoord_type, tex_coords);
+
          merge_function = "";
       } else {
          int i;
@@ -486,6 +520,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                   "#version 130\n"
                                   "#extension GL_ARB_texture_multisample : enable\n"
                                   "#extension GL_ARB_sample_shading : enable\n"
+                                  "#extension GL_ARB_gpu_shader5 : enable\n"
                                   "#define gvec4 %svec4\n"
                                   "uniform %ssampler2DMS%s texSampler;\n"
                                   "in %s texCoords;\n"

From 6c5f371a27f901d5bc60cf5a2a11cf6629f96f78 Mon Sep 17 00:00:00 2001
From: Neil Roberts <neil@linux.intel.com>
Date: Mon, 7 Sep 2015 18:23:14 +0100
Subject: [PATCH 052/287] i965/skl+: Enable support for 16x multisampling

Reviewed-by: Ben Widawsky <ben@bwidawsk.net>
---
 src/mesa/drivers/dri/i965/brw_context.c  | 6 ++++++
 src/mesa/drivers/dri/i965/intel_screen.c | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 3b125448e14..ac6045dbba9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -84,6 +84,12 @@ brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
 
    switch (brw->gen) {
    case 9:
+      samples[0] = 16;
+      samples[1] = 8;
+      samples[2] = 4;
+      samples[3] = 2;
+      return 4;
+
    case 8:
       samples[0] = 8;
       samples[1] = 4;
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index fb95fb629ad..d64ebade769 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1178,12 +1178,15 @@ intel_detect_timestamp(struct intel_screen *screen)
 const int*
 intel_supported_msaa_modes(const struct intel_screen  *screen)
 {
+   static const int gen9_modes[] = {16, 8, 4, 2, 0, -1};
    static const int gen8_modes[] = {8, 4, 2, 0, -1};
    static const int gen7_modes[] = {8, 4, 0, -1};
    static const int gen6_modes[] = {4, 0, -1};
    static const int gen4_modes[] = {0, -1};
 
-   if (screen->devinfo->gen >= 8) {
+   if (screen->devinfo->gen >= 9) {
+      return gen9_modes;
+   } else if (screen->devinfo->gen >= 8) {
       return gen8_modes;
    } else if (screen->devinfo->gen >= 7) {
       return gen7_modes;

From 027b64a55afc0fe8efcf9f6217192807e285c830 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 30 Oct 2015 08:39:11 +0100
Subject: [PATCH 053/287] i965/fs: Do not mark direct used surfaces in
 VARYING_PULL_CONSTANT_LOAD

Right now the generator marks direct surfaces as used but leaves marking of
indirect surfaces to the caller. Just make the callers handle marking in both
cases for consistency.

v2: Use const and remove useless surf_index temporary (Curro)

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 7 ++++---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 8 --------
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp       | 6 ++++--
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cb2536263dd..a813746cffc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2012,7 +2012,7 @@ fs_visitor::demote_pull_constants()
 
          /* Set up the annotation tracking for new generated instructions. */
          const fs_builder ibld(this, block, inst);
-         fs_reg surf_index(stage_prog_data->binding_table.pull_constants_start);
+         const unsigned index = stage_prog_data->binding_table.pull_constants_start;
          fs_reg dst = vgrf(glsl_type::float_type);
 
          assert(inst->src[i].stride == 0);
@@ -2020,16 +2020,17 @@ fs_visitor::demote_pull_constants()
          /* Generate a pull load into dst. */
          if (inst->src[i].reladdr) {
             VARYING_PULL_CONSTANT_LOAD(ibld, dst,
-                                       surf_index,
+                                       fs_reg(index),
                                        *inst->src[i].reladdr,
                                        pull_index);
             inst->src[i].reladdr = NULL;
             inst->src[i].stride = 1;
+            brw_mark_surface_used(prog_data, index);
          } else {
             const fs_builder ubld = ibld.exec_all().group(8, 0);
             fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
             ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                      dst, surf_index, offset);
+                      dst, fs_reg(index), offset);
             inst->src[i].set_smear(pull_index & 3);
          }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 28fb620279b..87152634c73 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1349,8 +1349,6 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
                            inst->header_size != 0,
                            simd_mode,
                            return_format);
-
-   brw_mark_surface_used(prog_data, surf_index);
 }
 
 void
@@ -1395,8 +1393,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
                               simd_mode,
                               0);
 
-      brw_mark_surface_used(prog_data, surf_index);
-
    } else {
 
       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1427,10 +1423,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
                               false /* header */,
                               simd_mode,
                               0);
-
-      /* visitor knows more than we do about the surface limit required,
-       * so has already done marking.
-       */
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index e7a39ff741c..50b8218e934 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1827,8 +1827,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       fs_reg surf_index;
 
       if (const_index) {
-         surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
-                             const_index->u[0]);
+         const unsigned index = stage_prog_data->binding_table.ubo_start +
+                                const_index->u[0];
+         surf_index = fs_reg(index);
+         brw_mark_surface_used(prog_data, index);
       } else {
          /* The block index is not a constant. Evaluate the index expression
           * per-channel and add the base UBO index; we have to select a value

From d7013988fb1d1c277e1fbce8623abddc43f78e05 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 30 Oct 2015 08:48:57 +0100
Subject: [PATCH 054/287] i965/fs: Do not mark used direct surfaces in
 UNIFORM_PULL_CONSTANT_LOAD

Right now the generator marks direct surfaces as used but leaves marking of
indirect surfaces to the caller. Just make the callers handle marking in both
cases for consistency.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp           |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index a813746cffc..629fbbdf01b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2025,7 +2025,6 @@ fs_visitor::demote_pull_constants()
                                        pull_index);
             inst->src[i].reladdr = NULL;
             inst->src[i].stride = 1;
-            brw_mark_surface_used(prog_data, index);
          } else {
             const fs_builder ubld = ibld.exec_all().group(8, 0);
             fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
@@ -2033,6 +2032,7 @@ fs_visitor::demote_pull_constants()
                       dst, fs_reg(index), offset);
             inst->src[i].set_smear(pull_index & 3);
          }
+         brw_mark_surface_used(prog_data, index);
 
          /* Rewrite the instruction to use the temporary VGRF. */
          inst->src[i].file = GRF;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 87152634c73..c73257a8d21 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1184,8 +1184,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
 
    brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
 			read_offset, surf_index);
-
-   brw_mark_surface_used(prog_data, surf_index);
 }
 
 void
@@ -1246,9 +1244,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
                               header_present,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
-
-      brw_mark_surface_used(prog_data, surf_index);
-
    } else {
 
       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1278,11 +1273,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
                               0);
 
       brw_pop_insn_state(p);
-
-      /* visitor knows more than we do about the surface limit required,
-       * so has already done marking.
-       */
-
    }
 }
 

From 6105d1d0a02c7eea83b327965713be3bada306f7 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 30 Oct 2015 10:24:12 +0100
Subject: [PATCH 055/287] i965/vec4: Do not mark used direct surfaces in
 VS_OPCODE_PULL_CONSTANT_LOAD

Right now the generator marks direct surfaces as used but leaves marking of
indirect surfaces to the caller. Just make the callers handle marking in both
cases for consistency.

v2: Use const, do not add unnecessary temporary (Curro)

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 9 ---------
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp       | 6 ++++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 6 ++++--
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index f0ad903c572..d9252ef4c89 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -929,8 +929,6 @@ generate_pull_constant_load(struct brw_codegen *p,
 			   2, /* mlen */
                            true, /* header_present */
 			   1 /* rlen */);
-
-   brw_mark_surface_used(&prog_data->base, surf_index);
 }
 
 static void
@@ -985,9 +983,6 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
                               inst->header_size != 0,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
-
-      brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
-
    } else {
 
       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1017,10 +1012,6 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
                               inst->header_size != 0,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
-
-      /* visitor knows more than we do about the surface limit required,
-       * so has already done marking.
-       */
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index b848810ebc7..e6c018e52ae 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -749,8 +749,10 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
          /* The block index is a constant, so just emit the binding table entry
           * as an immediate.
           */
-         surf_index = src_reg(prog_data->base.binding_table.ubo_start +
-                              const_block_index->u[0]);
+         const unsigned index = prog_data->base.binding_table.ubo_start +
+                                const_block_index->u[0];
+         surf_index = src_reg(index);
+         brw_mark_surface_used(&prog_data->base, index);
       } else {
          /* The block index is not a constant. Evaluate the index expression
           * per-channel and add the base UBO index; we have to select a value
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 7d949896bcc..94759afd166 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1745,14 +1745,16 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
 				      int base_offset)
 {
    int reg_offset = base_offset + orig_src.reg_offset;
-   src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
+   const unsigned index = prog_data->base.binding_table.pull_constants_start;
    src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
                                              reg_offset);
 
    emit_pull_constant_load_reg(temp,
-                               index,
+                               src_reg(index),
                                offset,
                                block, inst);
+
+   brw_mark_surface_used(&prog_data->base, index);
 }
 
 /**

From eca4c43a33c5c1bb63c8aa9d0506ed2ba3f9d8cb Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 30 Oct 2015 10:57:47 +0100
Subject: [PATCH 056/287] i965/vec4: Do not mark used surfaces in
 VS_OPCODE_GET_BUFFER_SIZE

Do it in the visitor, like we do for other opcodes.

v2: use const, get rid of useless surf_index temporary (Curro)

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 --
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp       | 8 +++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index d9252ef4c89..693f5835412 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -955,8 +955,6 @@ generate_get_buffer_size(struct brw_codegen *p,
               inst->header_size > 0,
               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
               BRW_SAMPLER_RETURN_FORMAT_SINT32);
-
-   brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index e6c018e52ae..e0d5a14981a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -427,15 +427,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
       unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
 
-      src_reg surf_index = src_reg(prog_data->base.binding_table.ssbo_start +
-                                   ssbo_index);
+      const unsigned index =
+         prog_data->base.binding_table.ssbo_start + ssbo_index;
       dst_reg result_dst = get_nir_dest(instr->dest);
       vec4_instruction *inst = new(mem_ctx)
          vec4_instruction(VS_OPCODE_GET_BUFFER_SIZE, result_dst);
 
       inst->base_mrf = 2;
       inst->mlen = 1; /* always at least one */
-      inst->src[1] = src_reg(surf_index);
+      inst->src[1] = src_reg(index);
 
       /* MRF for the first parameter */
       src_reg lod = src_reg(0);
@@ -444,6 +444,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
 
       emit(inst);
+
+      brw_mark_surface_used(&prog_data->base, index);
       break;
    }
 

From eea3c907cc480a105224b21be51d62bc64ea1057 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 30 Oct 2015 11:10:02 +0100
Subject: [PATCH 057/287] i965/fs: Do not mark used surfaces in
 FS_OPCODE_GET_BUFFER_SIZE

Do it in the visitor, like we do for other opcodes.

v2: use const, get rid of useless surf_index temporary (Curro)

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 --
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp       | 6 ++++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c73257a8d21..974219f3ece 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -666,8 +666,6 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
               inst->header_size > 0,
               simd_mode,
               BRW_SAMPLER_RETURN_FORMAT_SINT32);
-
-   brw_mark_surface_used(prog_data, surf_index.dw1.ud);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 50b8218e934..b6f4c52c50f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2275,12 +2275,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
                                   BRW_REGISTER_TYPE_UD);
       bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
 
-      fs_reg surf_index = fs_reg(prog_data->binding_table.ssbo_start + ssbo_index);
+      const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index;
       fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
-                               src_payload, surf_index);
+                               src_payload, fs_reg(index));
       inst->header_size = 0;
       inst->mlen = mlen;
       bld.emit(inst);
+
+      brw_mark_surface_used(prog_data, index);
       break;
    }
 

From e587590a83588133d7a9044e3935585f675bbb30 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 5 Nov 2015 00:33:22 -0500
Subject: [PATCH 058/287] st/mesa: account for texture views when doing
 CopyImageSubData
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/state_tracker/st_cb_copyimage.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c
index 75114cdb712..03a7294e7c9 100644
--- a/src/mesa/state_tracker/st_cb_copyimage.c
+++ b/src/mesa/state_tracker/st_cb_copyimage.c
@@ -552,6 +552,10 @@ st_CopyImageSubData(struct gl_context *ctx,
       src_res = src->pt;
       src_level = src_image->Level;
       src_z += src_image->Face;
+      if (src_image->TexObject->Immutable) {
+         src_level += src_image->TexObject->MinLevel;
+         src_z += src_image->TexObject->MinLayer;
+      }
    } else {
       struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer);
       src_res = src->texture;
@@ -563,6 +567,10 @@ st_CopyImageSubData(struct gl_context *ctx,
       dst_res = dst->pt;
       dst_level = dst_image->Level;
       dst_z += dst_image->Face;
+      if (dst_image->TexObject->Immutable) {
+         dst_level += dst_image->TexObject->MinLevel;
+         dst_z += dst_image->TexObject->MinLayer;
+      }
    } else {
       struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer);
       dst_res = dst->texture;

From fc76cc05e39839c0933320f28b4cc9041d4e7770 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 30 Oct 2015 03:17:35 -0400
Subject: [PATCH 059/287] gallium: expose a debug message callback settable by
 context owner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will allow gallium drivers to send messages to KHR_debug endpoints

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/auxiliary/util/u_debug.c | 14 ++++++++++++++
 src/gallium/auxiliary/util/u_debug.h | 20 ++++++++++++++++++++
 src/gallium/docs/source/context.rst  |  3 +++
 src/gallium/include/pipe/p_context.h |  8 ++++++++
 src/gallium/include/pipe/p_defines.h | 12 ++++++++++++
 src/gallium/include/pipe/p_state.h   | 25 +++++++++++++++++++++++++
 6 files changed, 82 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 7388a499c74..702953673ba 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -70,6 +70,20 @@ void _debug_vprintf(const char *format, va_list ap)
 #endif
 }
 
+void
+_pipe_debug_message(
+   struct pipe_debug_callback *cb,
+   unsigned *id,
+   enum pipe_debug_type type,
+   const char *fmt, ...)
+{
+   va_list args;
+   va_start(args, fmt);
+   if (cb && cb->debug_message)
+      cb->debug_message(cb->data, id, type, fmt, args);
+   va_end(args);
+}
+
 
 void
 debug_disable_error_message_boxes(void)
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 926063a1918..aaf223c6f68 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -42,6 +42,7 @@
 #include "os/os_misc.h"
 
 #include "pipe/p_format.h"
+#include "pipe/p_defines.h"
 
 
 #ifdef	__cplusplus
@@ -262,6 +263,25 @@ void _debug_assert_fail(const char *expr,
    _debug_printf("error: %s\n", __msg)
 #endif
 
+/**
+ * Output a debug log message to the debug info callback.
+ */
+#define pipe_debug_message(cb, type, fmt, ...) do { \
+   static unsigned id = 0; \
+   _pipe_debug_message(cb, &id, \
+                       PIPE_DEBUG_TYPE_ ## type, \
+                       fmt, __VA_ARGS__); \
+} while (0)
+
+struct pipe_debug_callback;
+
+void
+_pipe_debug_message(
+   struct pipe_debug_callback *cb,
+   unsigned *id,
+   enum pipe_debug_type type,
+   const char *fmt, ...) _util_printf_format(4, 5);
+
 
 /**
  * Used by debug_dump_enum and debug_dump_flags to describe symbols.
diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index a7d08d2c7f9..dbc087700b5 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -84,6 +84,9 @@ objects. They all follow simple, one-method binding calls, e.g.
     levels. This corresponds to GL's ``PATCH_DEFAULT_OUTER_LEVEL``.
   * ``default_inner_level`` is the default value for the inner tessellation
     levels. This corresponds to GL's ``PATCH_DEFAULT_INNER_LEVEL``.
+* ``set_debug_callback`` sets the callback to be used for reporting
+  various debug messages, eventually reported via KHR_debug and
+  similar mechanisms.
 
 
 Sampler Views
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 6f9fe767404..5adbd18e690 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -45,6 +45,7 @@ struct pipe_blit_info;
 struct pipe_box;
 struct pipe_clip_state;
 struct pipe_constant_buffer;
+struct pipe_debug_callback;
 struct pipe_depth_stencil_alpha_state;
 struct pipe_draw_info;
 struct pipe_fence_handle;
@@ -238,6 +239,13 @@ struct pipe_context {
                           const float default_outer_level[4],
                           const float default_inner_level[2]);
 
+   /**
+    * Sets the debug callback. If the pointer is null, then no callback is
+    * set, otherwise a copy of the data should be made.
+    */
+   void (*set_debug_callback)(struct pipe_context *,
+                              const struct pipe_debug_callback *);
+
    /**
     * Bind an array of shader buffers that will be used by a shader.
     * Any buffers that were previously bound to the specified range
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index b15c8809c1d..d6f87ccae12 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -868,6 +868,18 @@ struct pipe_driver_query_group_info
    unsigned num_queries;
 };
 
+enum pipe_debug_type
+{
+   PIPE_DEBUG_TYPE_OUT_OF_MEMORY = 1,
+   PIPE_DEBUG_TYPE_ERROR,
+   PIPE_DEBUG_TYPE_SHADER_INFO,
+   PIPE_DEBUG_TYPE_PERF_INFO,
+   PIPE_DEBUG_TYPE_INFO,
+   PIPE_DEBUG_TYPE_FALLBACK,
+   PIPE_DEBUG_TYPE_CONFORMANCE,
+};
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 4bf8d46c686..6bdf03a8b2b 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -684,6 +684,31 @@ struct pipe_compute_state
    unsigned req_input_mem; /**< Required size of the INPUT resource. */
 };
 
+/**
+ * Structure that contains a callback for debug messages from the driver back
+ * to the state tracker.
+ */
+struct pipe_debug_callback
+{
+   /**
+    * Callback for the driver to report debug/performance/etc information back
+    * to the state tracker.
+    *
+    * \param data       user-supplied data pointer
+    * \param id         message type identifier, if pointed value is 0, then a
+    *                   new id is assigned
+    * \param type       PIPE_DEBUG_TYPE_*
+    * \param format     printf-style format string
+    * \param args       args for format string
+    */
+   void (*debug_message)(void *data,
+                         unsigned *id,
+                         enum pipe_debug_type type,
+                         const char *fmt,
+                         va_list args);
+   void *data;
+};
+
 #ifdef __cplusplus
 }
 #endif

From c93c9d220baa60fdd0e685a072a61857d3a2846b Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 30 Oct 2015 23:28:01 -0400
Subject: [PATCH 060/287] st/mesa: set debug callback for debug contexts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/state_tracker/st_manager.c | 57 +++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c
index 7abd128e719..d0d261f4fde 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -623,6 +623,58 @@ st_context_destroy(struct st_context_iface *stctxi)
    st_destroy_context(st);
 }
 
+static void
+st_debug_message(void *data,
+                 unsigned *id,
+                 enum pipe_debug_type ptype,
+                 const char *fmt,
+                 va_list args)
+{
+   struct st_context *st = data;
+   enum mesa_debug_source source;
+   enum mesa_debug_type type;
+   enum mesa_debug_severity severity;
+
+   switch (ptype) {
+   case PIPE_DEBUG_TYPE_OUT_OF_MEMORY:
+      source = MESA_DEBUG_SOURCE_API;
+      type = MESA_DEBUG_TYPE_ERROR;
+      severity = MESA_DEBUG_SEVERITY_MEDIUM;
+      break;
+   case PIPE_DEBUG_TYPE_ERROR:
+      source = MESA_DEBUG_SOURCE_API;
+      type = MESA_DEBUG_TYPE_ERROR;
+      severity = MESA_DEBUG_SEVERITY_MEDIUM;
+      break;
+   case PIPE_DEBUG_TYPE_SHADER_INFO:
+      source = MESA_DEBUG_SOURCE_SHADER_COMPILER;
+      type = MESA_DEBUG_TYPE_OTHER;
+      severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+      break;
+   case PIPE_DEBUG_TYPE_PERF_INFO:
+      source = MESA_DEBUG_SOURCE_API;
+      type = MESA_DEBUG_TYPE_PERFORMANCE;
+      severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+      break;
+   case PIPE_DEBUG_TYPE_INFO:
+      source = MESA_DEBUG_SOURCE_API;
+      type = MESA_DEBUG_TYPE_OTHER;
+      severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+      break;
+   case PIPE_DEBUG_TYPE_FALLBACK:
+      source = MESA_DEBUG_SOURCE_API;
+      type = MESA_DEBUG_TYPE_PERFORMANCE;
+      severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+      break;
+   case PIPE_DEBUG_TYPE_CONFORMANCE:
+      source = MESA_DEBUG_SOURCE_API;
+      type = MESA_DEBUG_TYPE_OTHER;
+      severity = MESA_DEBUG_SEVERITY_NOTIFICATION;
+      break;
+   }
+   _mesa_gl_vdebug(st->ctx, id, source, type, severity, fmt, args);
+}
+
 static struct st_context_iface *
 st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
                       const struct st_context_attribs *attribs,
@@ -677,6 +729,11 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
          return NULL;
       }
       st->ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
+
+      if (pipe->set_debug_callback) {
+         struct pipe_debug_callback cb = { st_debug_message, st };
+         pipe->set_debug_callback(pipe, &cb);
+      }
    }
 
    if (attribs->flags & ST_CONTEXT_FLAG_FORWARD_COMPATIBLE)

From 6706cc1671bfd8e6c021db8b68815959fa7fceba Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 30 Oct 2015 23:25:59 -0400
Subject: [PATCH 061/287] st/clover: provide a path for drivers to call through
 to pfn_notify

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>

[ Francisco Jerez: Clean up clover::context interface by passing
  around a function object. ]
---
 .../state_trackers/clover/api/context.cpp     |  7 ++++++-
 .../state_trackers/clover/core/context.cpp    |  5 +++--
 .../state_trackers/clover/core/context.hpp    |  7 ++++++-
 .../state_trackers/clover/core/queue.cpp      | 21 +++++++++++++++++++
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/context.cpp b/src/gallium/state_trackers/clover/api/context.cpp
index 021eea36f6e..c0cd2d32b95 100644
--- a/src/gallium/state_trackers/clover/api/context.cpp
+++ b/src/gallium/state_trackers/clover/api/context.cpp
@@ -45,8 +45,13 @@ clCreateContext(const cl_context_properties *d_props, cl_uint num_devs,
          throw error(CL_INVALID_PROPERTY);
    }
 
+   const auto notify = (!pfn_notify ? context::notify_action() :
+                        [=](const char *s) {
+                           pfn_notify(s, NULL, 0, user_data);
+                        });
+
    ret_error(r_errcode, CL_SUCCESS);
-   return desc(new context(props, devs));
+   return desc(new context(props, devs, notify));
 
 } catch (error &e) {
    ret_error(r_errcode, e);
diff --git a/src/gallium/state_trackers/clover/core/context.cpp b/src/gallium/state_trackers/clover/core/context.cpp
index bf4df39dc2a..c3e20829384 100644
--- a/src/gallium/state_trackers/clover/core/context.cpp
+++ b/src/gallium/state_trackers/clover/core/context.cpp
@@ -25,8 +25,9 @@
 using namespace clover;
 
 context::context(const property_list &props,
-                 const ref_vector<device> &devs) :
-   props(props), devs(devs) {
+                 const ref_vector<device> &devs,
+                 const notify_action &notify) :
+   notify(notify), props(props), devs(devs) {
 }
 
 bool
diff --git a/src/gallium/state_trackers/clover/core/context.hpp b/src/gallium/state_trackers/clover/core/context.hpp
index 0ec4ff4a231..7b22ccae78f 100644
--- a/src/gallium/state_trackers/clover/core/context.hpp
+++ b/src/gallium/state_trackers/clover/core/context.hpp
@@ -36,7 +36,10 @@ namespace clover {
       typedef clover::property_list<cl_context_properties> property_list;
 
    public:
-      context(const property_list &props, const ref_vector<device> &devs);
+      typedef std::function<void (const char *)> notify_action;
+
+      context(const property_list &props, const ref_vector<device> &devs,
+              const notify_action &notify);
 
       context(const context &ctx) = delete;
       context &
@@ -53,6 +56,8 @@ namespace clover {
       device_range
       devices() const;
 
+      const notify_action notify;
+
    private:
       property_list props;
       const std::vector<intrusive_ref<device>> devs;
diff --git a/src/gallium/state_trackers/clover/core/queue.cpp b/src/gallium/state_trackers/clover/core/queue.cpp
index 4aaf67de241..24d71f186e0 100644
--- a/src/gallium/state_trackers/clover/core/queue.cpp
+++ b/src/gallium/state_trackers/clover/core/queue.cpp
@@ -24,15 +24,36 @@
 #include "core/event.hpp"
 #include "pipe/p_screen.h"
 #include "pipe/p_context.h"
+#include "pipe/p_state.h"
 
 using namespace clover;
 
+namespace {
+   void
+   debug_notify_callback(void *data,
+                         unsigned *id,
+                         enum pipe_debug_type type,
+                         const char *fmt,
+                         va_list args) {
+      const command_queue *queue = (const command_queue *)data;
+      char buffer[1024];
+      vsnprintf(buffer, sizeof(buffer), fmt, args);
+      queue->context().notify(buffer);
+   }
+}
+
 command_queue::command_queue(clover::context &ctx, clover::device &dev,
                              cl_command_queue_properties props) :
    context(ctx), device(dev), props(props) {
    pipe = dev.pipe->context_create(dev.pipe, NULL, PIPE_CONTEXT_COMPUTE_ONLY);
    if (!pipe)
       throw error(CL_INVALID_DEVICE);
+
+   if (ctx.notify) {
+      struct pipe_debug_callback cb = { &debug_notify_callback, this };
+      if (pipe->set_debug_callback)
+         pipe->set_debug_callback(pipe, &cb);
+   }
 }
 
 command_queue::~command_queue() {

From 4335b28840be53ad3c230a4f2dfc2262bf56a0a7 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 30 Oct 2015 17:23:22 -0400
Subject: [PATCH 062/287] nouveau: add support for sending debug messages via
 KHR_debug

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nouveau_context.h |  4 ++++
 src/gallium/drivers/nouveau/nouveau_screen.c  | 19 +++++++++++++++++++
 .../drivers/nouveau/nv30/nv30_context.c       |  1 +
 .../drivers/nouveau/nv50/nv50_context.c       |  1 +
 .../drivers/nouveau/nvc0/nvc0_context.c       |  1 +
 5 files changed, 26 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index decb2714ede..a8189b82f82 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -14,6 +14,7 @@ struct nouveau_context {
 
    struct nouveau_client *client;
    struct nouveau_pushbuf *pushbuf;
+   struct pipe_debug_callback debug;
 
    bool vbo_dirty;
 
@@ -63,6 +64,9 @@ nouveau_context(struct pipe_context *pipe)
 void
 nouveau_context_init_vdec(struct nouveau_context *);
 
+void
+nouveau_context_init(struct nouveau_context *);
+
 void
 nouveau_scratch_runout_release(struct nouveau_context *);
 
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 47603b0b7fd..21d431788ec 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -18,6 +18,7 @@
 
 #include "nouveau_winsys.h"
 #include "nouveau_screen.h"
+#include "nouveau_context.h"
 #include "nouveau_fence.h"
 #include "nouveau_mm.h"
 #include "nouveau_buffer.h"
@@ -238,3 +239,21 @@ nouveau_screen_fini(struct nouveau_screen *screen)
 
    nouveau_device_del(&screen->device);
 }
+
+static void
+nouveau_set_debug_callback(struct pipe_context *pipe,
+                           const struct pipe_debug_callback *cb)
+{
+   struct nouveau_context *context = nouveau_context(pipe);
+
+   if (cb)
+      context->debug = *cb;
+   else
+      memset(&context->debug, 0, sizeof(context->debug));
+}
+
+void
+nouveau_context_init(struct nouveau_context *context)
+{
+   context->pipe.set_debug_callback = nouveau_set_debug_callback;
+}
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_context.c b/src/gallium/drivers/nouveau/nv30/nv30_context.c
index a36fd57fae7..3ed088912e2 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.c
@@ -242,6 +242,7 @@ nv30_context_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    if (debug_get_bool_option("NV30_SWTNL", false))
       nv30->draw_flags |= NV30_NEW_SWTNL;
 
+   nouveau_context_init(&nv30->base);
    nv30->sample_mask = 0xffff;
    nv30_vbo_init(pipe);
    nv30_query_init(pipe);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index 4108f48005e..7867c2df7f3 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -306,6 +306,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    }
    nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;
 
+   nouveau_context_init(&nv50->base);
    nv50_init_query_functions(nv50);
    nv50_init_surface_functions(nv50);
    nv50_init_state_functions(nv50);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index f7604f11788..82ed5a1864e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -309,6 +309,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    pipe->memory_barrier = nvc0_memory_barrier;
    pipe->get_sample_position = nvc0_context_get_sample_position;
 
+   nouveau_context_init(&nvc0->base);
    nvc0_init_query_functions(nvc0);
    nvc0_init_surface_functions(nvc0);
    nvc0_init_state_functions(nvc0);

From 4f6cd5fad03757e371b66049dcd42855e4853c14 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 30 Oct 2015 18:41:09 -0400
Subject: [PATCH 063/287] nv50,nvc0: provide debug messages with shader
 compilation stats

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h   | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 2 ++
 src/gallium/drivers/nouveau/nv50/nv50_program.c        | 8 +++++++-
 src/gallium/drivers/nouveau/nv50/nv50_program.h        | 3 ++-
 src/gallium/drivers/nouveau/nv50/nv50_shader_state.c   | 2 +-
 src/gallium/drivers/nouveau/nv50/nv50_state.c          | 3 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c        | 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h        | 3 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c        | 8 +++++++-
 src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c   | 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c          | 3 ++-
 11 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index c0cab3299b5..b49bf9d53bc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -96,6 +96,7 @@ struct nv50_ir_prog_info
       uint32_t tlsSpace;  /* required local memory per thread */
       uint32_t *code;
       uint32_t codeSize;
+      uint32_t instructions;
       uint8_t sourceRep;  /* NV50_PROGRAM_IR */
       const void *source;
       void *relocData;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index afc8ff1374f..4390a726d1c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -373,6 +373,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
    if (!code)
       return false;
    emit->setCodeLocation(code, binSize);
+   info->bin.instructions = 0;
 
    for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) {
       Function *fn = reinterpret_cast<Function *>(fi.get());
@@ -382,6 +383,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
       for (int b = 0; b < fn->bbCount; ++b) {
          for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
             emit->emitInstruction(i);
+            info->bin.instructions++;
             if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
                info->io.fp64 = true;
          }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 299629b6438..89e7a338283 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -318,7 +318,8 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
 }
 
 bool
-nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
+nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
+                       struct pipe_debug_callback *debug)
 {
    struct nv50_ir_prog_info *info;
    int ret;
@@ -406,6 +407,11 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
       prog->so = nv50_program_create_strmout_state(info,
                                                    &prog->pipe.stream_output);
 
+   pipe_debug_message(debug, SHADER_INFO,
+                      "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
+                      prog->type, info->bin.tlsSpace, prog->max_gpr,
+                      info->bin.instructions, info->bin.codeSize);
+
 out:
    FREE(info);
    return !ret;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.h b/src/gallium/drivers/nouveau/nv50/nv50_program.h
index 24cc96567d7..7a33eb11d6d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -106,7 +106,8 @@ struct nv50_program {
    struct nv50_stream_output_state *so;
 };
 
-bool nv50_program_translate(struct nv50_program *, uint16_t chipset);
+bool nv50_program_translate(struct nv50_program *, uint16_t chipset,
+                            struct pipe_debug_callback *);
 bool nv50_program_upload_code(struct nv50_context *, struct nv50_program *);
 void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index 9b911043132..8e4b2b42bda 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -113,7 +113,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *prog)
 {
    if (!prog->translated) {
       prog->translated = nv50_program_translate(
-         prog, nv50->screen->base.device->chipset);
+         prog, nv50->screen->base.device->chipset, &nv50->base.debug);
       if (!prog->translated)
          return false;
    } else
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index 6c8c9f0b4e6..d27f12ca94b 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -727,7 +727,8 @@ nv50_sp_state_create(struct pipe_context *pipe,
       prog->pipe.stream_output = cso->stream_output;
 
    prog->translated = nv50_program_translate(
-         prog, nv50_context(pipe)->screen->base.device->chipset);
+         prog, nv50_context(pipe)->screen->base.device->chipset,
+         &nouveau_context(pipe)->debug);
 
    return (void *)prog;
 }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index e33af042620..2e7c790e9ee 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -120,7 +120,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0)
 
    if (!prog->translated) {
       prog->translated = nvc0_program_translate(
-         prog, nvc0->screen->base.device->chipset);
+         prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
       if (!prog->translated)
          return false;
    }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 4af83c53224..39b73ecb0c2 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -224,7 +224,8 @@ void nvc0_default_kick_notify(struct nouveau_pushbuf *);
 extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
 
 /* nvc0_program.c */
-bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
+bool nvc0_program_translate(struct nvc0_program *, uint16_t chipset,
+                            struct pipe_debug_callback *);
 bool nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_library_upload(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 68048f9d6c0..43d7c7b1123 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -517,7 +517,8 @@ nvc0_program_dump(struct nvc0_program *prog)
 #endif
 
 bool
-nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
+nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
+                       struct pipe_debug_callback *debug)
 {
    struct nv50_ir_prog_info *info;
    int ret;
@@ -639,6 +640,11 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
       prog->tfb = nvc0_program_create_tfb_state(info,
                                                 &prog->pipe.stream_output);
 
+   pipe_debug_message(debug, SHADER_INFO,
+                      "type: %d, local: %d, gpr: %d, inst: %d, bytes: %d",
+                      prog->type, info->bin.tlsSpace, prog->num_gprs,
+                      info->bin.instructions, info->bin.codeSize);
+
 out:
    FREE(info);
    return !ret;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 8595800592c..7e2e9992fe8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -72,7 +72,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
 
    if (!prog->translated) {
       prog->translated = nvc0_program_translate(
-         prog, nvc0->screen->base.device->chipset);
+         prog, nvc0->screen->base.device->chipset, &nvc0->base.debug);
       if (!prog->translated)
          return false;
    }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index ba1714da010..5dce5f0e65d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -681,7 +681,8 @@ nvc0_sp_state_create(struct pipe_context *pipe,
       prog->pipe.stream_output = cso->stream_output;
 
    prog->translated = nvc0_program_translate(
-      prog, nvc0_context(pipe)->screen->base.device->chipset);
+      prog, nvc0_context(pipe)->screen->base.device->chipset,
+      &nouveau_context(pipe)->debug);
 
    return (void *)prog;
 }

From ba093a099af13a630c255b34dc5d315760248e5f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 30 Oct 2015 20:44:57 -0400
Subject: [PATCH 064/287] nouveau: send back a debug message when waiting for a
 fence to complete

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nouveau_buffer.c     | 13 +++++++------
 src/gallium/drivers/nouveau/nouveau_context.h    |  1 +
 src/gallium/drivers/nouveau/nouveau_fence.c      | 14 ++++++++++++--
 src/gallium/drivers/nouveau/nouveau_fence.h      |  4 +++-
 src/gallium/drivers/nouveau/nouveau_screen.c     |  2 +-
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  2 +-
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  2 +-
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c      |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c |  4 ++--
 10 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c
index 72e070b5f06..68e69beb08f 100644
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -225,21 +225,22 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
  * for write/read by waiting on the buffer's relevant fences.
  */
 static inline bool
-nouveau_buffer_sync(struct nv04_resource *buf, unsigned rw)
+nouveau_buffer_sync(struct nouveau_context *nv,
+                    struct nv04_resource *buf, unsigned rw)
 {
    if (rw == PIPE_TRANSFER_READ) {
       if (!buf->fence_wr)
          return true;
       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
                            !nouveau_fence_signalled(buf->fence_wr));
-      if (!nouveau_fence_wait(buf->fence_wr))
+      if (!nouveau_fence_wait(buf->fence_wr, &nv->debug))
          return false;
    } else {
       if (!buf->fence)
          return true;
       NOUVEAU_DRV_STAT_RES(buf, buf_non_kernel_fence_sync_count,
                            !nouveau_fence_signalled(buf->fence));
-      if (!nouveau_fence_wait(buf->fence))
+      if (!nouveau_fence_wait(buf->fence, &nv->debug))
          return false;
 
       nouveau_fence_ref(NULL, &buf->fence);
@@ -478,7 +479,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
       if (unlikely(usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)) {
          /* Discarding was not possible, must sync because
           * subsequent transfers might use UNSYNCHRONIZED. */
-         nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
+         nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
       } else
       if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
          /* The whole range is being discarded, so it doesn't matter what was
@@ -490,7 +491,7 @@ nouveau_buffer_transfer_map(struct pipe_context *pipe,
          if (usage & PIPE_TRANSFER_DONTBLOCK)
             map = NULL;
          else
-            nouveau_buffer_sync(buf, usage & PIPE_TRANSFER_READ_WRITE);
+            nouveau_buffer_sync(nv, buf, usage & PIPE_TRANSFER_READ_WRITE);
       } else {
          /* It is expected that the returned buffer be a representation of the
           * data in question, so we must copy it over from the buffer. */
@@ -615,7 +616,7 @@ nouveau_resource_map_offset(struct nouveau_context *nv,
    if (res->mm) {
       unsigned rw;
       rw = (flags & NOUVEAU_BO_WR) ? PIPE_TRANSFER_WRITE : PIPE_TRANSFER_READ;
-      nouveau_buffer_sync(res, rw);
+      nouveau_buffer_sync(nv, res, rw);
       if (nouveau_bo_map(res->bo, 0, NULL))
          return NULL;
    } else {
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
index a8189b82f82..c3bbb11bd60 100644
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -2,6 +2,7 @@
 #define __NOUVEAU_CONTEXT_H__
 
 #include "pipe/p_context.h"
+#include "pipe/p_state.h"
 #include <nouveau.h>
 
 #define NOUVEAU_MAX_SCRATCH_BUFS 4
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index 21cf2b9ae5e..d3a34060952 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -23,6 +23,7 @@
 #include "nouveau_screen.h"
 #include "nouveau_winsys.h"
 #include "nouveau_fence.h"
+#include "os/os_time.h"
 
 #ifdef PIPE_OS_UNIX
 #include <sched.h>
@@ -182,10 +183,11 @@ nouveau_fence_signalled(struct nouveau_fence *fence)
 }
 
 bool
-nouveau_fence_wait(struct nouveau_fence *fence)
+nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debug)
 {
    struct nouveau_screen *screen = fence->screen;
    uint32_t spins = 0;
+   int64_t start = 0;
 
    /* wtf, someone is waiting on a fence in flush_notify handler? */
    assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
@@ -206,11 +208,19 @@ nouveau_fence_wait(struct nouveau_fence *fence)
    if (fence == screen->fence.current)
       nouveau_fence_next(screen);
 
+   if (debug && debug->debug_message)
+      start = os_time_get_nano();
+
    do {
       nouveau_fence_update(screen, false);
 
-      if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED)
+      if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
+         if (debug && debug->debug_message)
+            pipe_debug_message(debug, PERF_INFO,
+                               "stalled %.3f ms waiting for fence",
+                               (os_time_get_nano() - start) / 1000000.f);
          return true;
+      }
       if (!spins)
          NOUVEAU_DRV_STAT(screen, any_non_kernel_fence_sync_count, 1);
       spins++;
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index 2efcab2172d..0fa9d020f50 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -11,6 +11,8 @@
 #define NOUVEAU_FENCE_STATE_FLUSHED   3
 #define NOUVEAU_FENCE_STATE_SIGNALLED 4
 
+struct pipe_debug_callback;
+
 struct nouveau_fence_work {
    struct list_head list;
    void (*func)(void *);
@@ -34,7 +36,7 @@ bool nouveau_fence_new(struct nouveau_screen *, struct nouveau_fence **,
 bool nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
 void nouveau_fence_update(struct nouveau_screen *, bool flushed);
 void nouveau_fence_next(struct nouveau_screen *);
-bool nouveau_fence_wait(struct nouveau_fence *);
+bool nouveau_fence_wait(struct nouveau_fence *, struct pipe_debug_callback *);
 bool nouveau_fence_signalled(struct nouveau_fence *);
 
 void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 21d431788ec..a6065e45aaa 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -76,7 +76,7 @@ nouveau_screen_fence_finish(struct pipe_screen *screen,
    if (!timeout)
       return nouveau_fence_signalled(nouveau_fence(pfence));
 
-   return nouveau_fence_wait(nouveau_fence(pfence));
+   return nouveau_fence_wait(nouveau_fence(pfence), NULL);
 }
 
 
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 794a0898eaf..44aac22010f 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -383,7 +383,7 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
        * _current_ one, and remove both.
        */
       nouveau_fence_ref(screen->base.fence.current, &current);
-      nouveau_fence_wait(current);
+      nouveau_fence_wait(current, NULL);
       nouveau_fence_ref(NULL, &current);
       nouveau_fence_ref(NULL, &screen->base.fence.current);
    }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index de2150ca08c..5dda98141de 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -350,7 +350,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
        * _current_ one, and remove both.
        */
       nouveau_fence_ref(screen->base.fence.current, &current);
-      nouveau_fence_wait(current);
+      nouveau_fence_wait(current, NULL);
       nouveau_fence_ref(NULL, &current);
       nouveau_fence_ref(NULL, &screen->base.fence.current);
    }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 9fa6fceeefa..9aa593f919e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
        * pushbuf submit, but it's probably not a big performance difference.
        */
       if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
-         nouveau_fence_wait(buf->fence_wr);
+         nouveau_fence_wait(buf->fence_wr, &nv50->base.debug);
 
       while (instance_count--) {
          BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 3b543929f3c..7d96977c24b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -415,7 +415,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
        * _current_ one, and remove both.
        */
       nouveau_fence_ref(screen->base.fence.current, &current);
-      nouveau_fence_wait(current);
+      nouveau_fence_wait(current, NULL);
       nouveau_fence_ref(NULL, &current);
       nouveau_fence_ref(NULL, &screen->base.fence.current);
    }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
index d459dd61c19..279c7e93cc8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -340,8 +340,8 @@ nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
       return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client);
    }
    if (usage & PIPE_TRANSFER_WRITE)
-      return !mt->base.fence || nouveau_fence_wait(mt->base.fence);
-   return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr);
+      return !mt->base.fence || nouveau_fence_wait(mt->base.fence, &nvc0->base.debug);
+   return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr, &nvc0->base.debug);
 }
 
 void *

From 5ae37ae6151623303300047d7465d199df8199a4 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Thu, 5 Nov 2015 18:00:40 +0100
Subject: [PATCH 065/287] llvmpipe: disable texture cache

There are some weird problems with 8-wide vectors.
---
 src/gallium/drivers/llvmpipe/lp_tex_sample.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 939131e7975..e26d608c9eb 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -37,7 +37,7 @@ struct lp_sampler_static_state;
 /**
  * Whether texture cache is used for s3tc textures.
  */
-#define LP_USE_TEXTURE_CACHE 1
+#define LP_USE_TEXTURE_CACHE 0
 
 /**
  * Pure-LLVM texture sampling code generator.

From 8dcf807cb43383590ba193c7ff20b8a98e4a9f65 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 13 Oct 2015 15:30:03 -0700
Subject: [PATCH 066/287] i965: Fix scalar VS float[] and vec2[] output arrays.

The scalar VS backend has never handled float[] and vec2[] outputs
correctly (my original code was broken).  Outputs need to be padded
out to vec4 slots.

In fs_visitor::nir_setup_outputs(), we tried to process each vec4 slot
by looping from 0 to ALIGN(type_size_scalar(type), 4) / 4.  However,
this is wrong: type_size_scalar() for a float[2] would return 2, or
for vec2[2] it would return 4.  This looked like a single slot, even
though in reality each array element would be stored in separate vec4
slots.

Because of this bug, outputs[] and output_components[] would not get
initialized for the second element's VARYING_SLOT, which meant
emit_urb_writes() would skip writing them.  Nothing used those values,
and dead code elimination threw a party.

To fix this, we introduce a new type_size_vec4_times_4() function which
pads array elements correctly, but still counts in scalar components,
generating correct indices in store_output intrinsics.

Normally, varying packing avoids this problem by turning varyings into
vec4s.  So this doesn't actually fix any Piglit or dEQP tests today.
However, if varying packing is disabled, things would be broken.
Tessellation shaders can't use varying packing, so this fixes various
tcs-input Piglit tests on a branch of mine.

v2: Shorten the implementation of type_size_4x to a single line (caught
    by Connor Abbott), and rename it to type_size_vec4_times_4()
    (renaming suggested by Jason Ekstrand).  Use type_size_vec4
    rather than using type_size_vec4_times_4 and then dividing by 4.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp     | 13 +++++++++++++
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_nir.c      |  3 ++-
 src/mesa/drivers/dri/i965/brw_shader.h   |  1 +
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 629fbbdf01b..ad94fa479e2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -513,6 +513,19 @@ type_size_scalar(const struct glsl_type *type)
    return 0;
 }
 
+/**
+ * Returns the number of scalar components needed to store type, assuming
+ * that vectors are padded out to vec4.
+ *
+ * This has the packing rules of type_size_vec4(), but counts components
+ * similar to type_size_scalar().
+ */
+extern "C" int
+type_size_vec4_times_4(const struct glsl_type *type)
+{
+   return 4 * type_size_vec4(type);
+}
+
 /**
  * Create a MOV to read the timestamp register.
  *
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index b6f4c52c50f..261518605b7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -104,7 +104,7 @@ fs_visitor::nir_setup_outputs()
       switch (stage) {
       case MESA_SHADER_VERTEX:
       case MESA_SHADER_GEOMETRY:
-         for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
+         for (int i = 0; i < type_size_vec4(var->type); i++) {
             int output = var->data.location + i;
             this->outputs[output] = offset(reg, bld, 4 * i);
             this->output_components[output] = vector_elements;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index a7a5eb511cd..dece208233f 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -150,7 +150,8 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
    case MESA_SHADER_GEOMETRY:
       if (is_scalar) {
          nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
-                                  type_size_scalar);
+                                  type_size_vec4_times_4);
+         nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
       } else {
          nir_foreach_variable(var, &nir->outputs)
             var->data.driver_location = var->data.location;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 6a2dfc9bbb6..29baebf0cc1 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -277,6 +277,7 @@ bool brw_cs_precompile(struct gl_context *ctx,
 
 int type_size_scalar(const struct glsl_type *type);
 int type_size_vec4(const struct glsl_type *type);
+int type_size_vec4_times_4(const struct glsl_type *type);
 
 bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);
 

From e0b896c86c92c4dd02aea7fb5eb8eabe089b9e58 Mon Sep 17 00:00:00 2001
From: Julien Isorce <julien.isorce@gmail.com>
Date: Thu, 5 Nov 2015 08:24:44 +0000
Subject: [PATCH 067/287] st/va: indent vlVaQuerySurfaceAttributes and
 vlVaCreateSurfaces2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some lines were using 4 indentation spaces instead of 3.

Signed-off-by: Julien Isorce <j.isorce@samsung.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/state_trackers/va/surface.c | 498 ++++++++++++------------
 1 file changed, 249 insertions(+), 249 deletions(-)

diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index 8f406e09990..59815aa4c5a 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -311,101 +311,101 @@ VAStatus
 vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config,
                            VASurfaceAttrib *attrib_list, unsigned int *num_attribs)
 {
-    vlVaDriver *drv;
-    VASurfaceAttrib *attribs;
-    struct pipe_screen *pscreen;
-    int i;
+   vlVaDriver *drv;
+   VASurfaceAttrib *attribs;
+   struct pipe_screen *pscreen;
+   int i;
 
-    if (config == VA_INVALID_ID)
-        return VA_STATUS_ERROR_INVALID_CONFIG;
+   if (config == VA_INVALID_ID)
+      return VA_STATUS_ERROR_INVALID_CONFIG;
 
-    if (!attrib_list && !num_attribs)
-        return VA_STATUS_ERROR_INVALID_PARAMETER;
+   if (!attrib_list && !num_attribs)
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
 
-    if (!attrib_list) {
-        *num_attribs = VASurfaceAttribCount;
-        return VA_STATUS_SUCCESS;
-    }
+   if (!attrib_list) {
+      *num_attribs = VASurfaceAttribCount;
+      return VA_STATUS_SUCCESS;
+   }
 
-    if (!ctx)
-       return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    drv = VL_VA_DRIVER(ctx);
+   drv = VL_VA_DRIVER(ctx);
 
-    if (!drv)
-        return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!drv)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    pscreen = VL_VA_PSCREEN(ctx);
+   pscreen = VL_VA_PSCREEN(ctx);
 
-    if (!pscreen)
-       return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!pscreen)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    attribs = CALLOC(VASurfaceAttribCount, sizeof(VASurfaceAttrib));
+   attribs = CALLOC(VASurfaceAttribCount, sizeof(VASurfaceAttrib));
 
-    if (!attribs)
-        return VA_STATUS_ERROR_ALLOCATION_FAILED;
+   if (!attribs)
+      return VA_STATUS_ERROR_ALLOCATION_FAILED;
 
-    i = 0;
+   i = 0;
 
-    if (config == PIPE_VIDEO_PROFILE_UNKNOWN) {
-       /* vlVaCreateConfig returns PIPE_VIDEO_PROFILE_UNKNOWN
+   if (config == PIPE_VIDEO_PROFILE_UNKNOWN) {
+      /* vlVaCreateConfig returns PIPE_VIDEO_PROFILE_UNKNOWN
           only for VAEntrypointVideoProc. */
-       attribs[i].type = VASurfaceAttribPixelFormat;
-       attribs[i].value.type = VAGenericValueTypeInteger;
-       attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
-       attribs[i].value.value.i = VA_FOURCC_BGRA;
-       i++;
+      attribs[i].type = VASurfaceAttribPixelFormat;
+      attribs[i].value.type = VAGenericValueTypeInteger;
+      attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+      attribs[i].value.value.i = VA_FOURCC_BGRA;
+      i++;
 
-       attribs[i].type = VASurfaceAttribPixelFormat;
-       attribs[i].value.type = VAGenericValueTypeInteger;
-       attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
-       attribs[i].value.value.i = VA_FOURCC_RGBA;
-       i++;
-    } else {
-       /* Assume VAEntrypointVLD for now. */
-       attribs[i].type = VASurfaceAttribPixelFormat;
-       attribs[i].value.type = VAGenericValueTypeInteger;
-       attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
-       attribs[i].value.value.i = VA_FOURCC_NV12;
-       i++;
-    }
+      attribs[i].type = VASurfaceAttribPixelFormat;
+      attribs[i].value.type = VAGenericValueTypeInteger;
+      attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+      attribs[i].value.value.i = VA_FOURCC_RGBA;
+      i++;
+   } else {
+      /* Assume VAEntrypointVLD for now. */
+      attribs[i].type = VASurfaceAttribPixelFormat;
+      attribs[i].value.type = VAGenericValueTypeInteger;
+      attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+      attribs[i].value.value.i = VA_FOURCC_NV12;
+      i++;
+   }
 
-    attribs[i].type = VASurfaceAttribMemoryType;
-    attribs[i].value.type = VAGenericValueTypeInteger;
-    attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
-    attribs[i].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA |
-        VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;
-    i++;
+   attribs[i].type = VASurfaceAttribMemoryType;
+   attribs[i].value.type = VAGenericValueTypeInteger;
+   attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+   attribs[i].value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_VA |
+         VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME;
+   i++;
 
-    attribs[i].type = VASurfaceAttribExternalBufferDescriptor;
-    attribs[i].value.type = VAGenericValueTypePointer;
-    attribs[i].flags = VA_SURFACE_ATTRIB_SETTABLE;
-    attribs[i].value.value.p = NULL; /* ignore */
-    i++;
+   attribs[i].type = VASurfaceAttribExternalBufferDescriptor;
+   attribs[i].value.type = VAGenericValueTypePointer;
+   attribs[i].flags = VA_SURFACE_ATTRIB_SETTABLE;
+   attribs[i].value.value.p = NULL; /* ignore */
+   i++;
 
-    attribs[i].type = VASurfaceAttribMaxWidth;
-    attribs[i].value.type = VAGenericValueTypeInteger;
-    attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
-    attribs[i].value.value.i = vl_video_buffer_max_size(pscreen);
-    i++;
+   attribs[i].type = VASurfaceAttribMaxWidth;
+   attribs[i].value.type = VAGenericValueTypeInteger;
+   attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
+   attribs[i].value.value.i = vl_video_buffer_max_size(pscreen);
+   i++;
 
-    attribs[i].type = VASurfaceAttribMaxHeight;
-    attribs[i].value.type = VAGenericValueTypeInteger;
-    attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
-    attribs[i].value.value.i = vl_video_buffer_max_size(pscreen);
-    i++;
+   attribs[i].type = VASurfaceAttribMaxHeight;
+   attribs[i].value.type = VAGenericValueTypeInteger;
+   attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE;
+   attribs[i].value.value.i = vl_video_buffer_max_size(pscreen);
+   i++;
 
-    if (i > *num_attribs) {
-        *num_attribs = i;
-        FREE(attribs);
-        return VA_STATUS_ERROR_MAX_NUM_EXCEEDED;
-    }
+   if (i > *num_attribs) {
+      *num_attribs = i;
+      FREE(attribs);
+      return VA_STATUS_ERROR_MAX_NUM_EXCEEDED;
+   }
 
-    *num_attribs = i;
-    memcpy(attrib_list, attribs, i * sizeof(VASurfaceAttrib));
-    FREE(attribs);
+   *num_attribs = i;
+   memcpy(attrib_list, attribs, i * sizeof(VASurfaceAttrib));
+   FREE(attribs);
 
-    return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 }
 
 static VAStatus
@@ -414,75 +414,75 @@ suface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface,
                             int index, VASurfaceID *surfaces,
                             struct pipe_video_buffer *templat)
 {
-    vlVaDriver *drv;
-    struct pipe_screen *pscreen;
-    struct pipe_resource *resource;
-    struct pipe_resource res_templ;
-    struct winsys_handle whandle;
-    struct pipe_resource *resources[VL_NUM_COMPONENTS];
+   vlVaDriver *drv;
+   struct pipe_screen *pscreen;
+   struct pipe_resource *resource;
+   struct pipe_resource res_templ;
+   struct winsys_handle whandle;
+   struct pipe_resource *resources[VL_NUM_COMPONENTS];
 
-    if (!ctx)
-        return VA_STATUS_ERROR_INVALID_PARAMETER;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
 
-    pscreen = VL_VA_PSCREEN(ctx);
-    drv = VL_VA_DRIVER(ctx);
+   pscreen = VL_VA_PSCREEN(ctx);
+   drv = VL_VA_DRIVER(ctx);
 
-    if (!memory_attibute || !memory_attibute->buffers ||
-        index > memory_attibute->num_buffers)
-        return VA_STATUS_ERROR_INVALID_PARAMETER;
+   if (!memory_attibute || !memory_attibute->buffers ||
+       index > memory_attibute->num_buffers)
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
 
-    if (surface->templat.width != memory_attibute->width ||
-        surface->templat.height != memory_attibute->height ||
-        memory_attibute->num_planes < 1)
-        return VA_STATUS_ERROR_INVALID_PARAMETER;
+   if (surface->templat.width != memory_attibute->width ||
+       surface->templat.height != memory_attibute->height ||
+       memory_attibute->num_planes < 1)
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
 
-    switch (memory_attibute->pixel_format) {
-    case VA_FOURCC_RGBA:
-    case VA_FOURCC_RGBX:
-    case VA_FOURCC_BGRA:
-    case VA_FOURCC_BGRX:
-        if (memory_attibute->num_planes != 1)
-            return VA_STATUS_ERROR_INVALID_PARAMETER;
-        break;
-    default:
-        return VA_STATUS_ERROR_INVALID_PARAMETER;
-    }
+   switch (memory_attibute->pixel_format) {
+   case VA_FOURCC_RGBA:
+   case VA_FOURCC_RGBX:
+   case VA_FOURCC_BGRA:
+   case VA_FOURCC_BGRX:
+      if (memory_attibute->num_planes != 1)
+         return VA_STATUS_ERROR_INVALID_PARAMETER;
+      break;
+   default:
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
+   }
 
-    memset(&res_templ, 0, sizeof(res_templ));
-    res_templ.target = PIPE_TEXTURE_2D;
-    res_templ.last_level = 0;
-    res_templ.depth0 = 1;
-    res_templ.array_size = 1;
-    res_templ.width0 = memory_attibute->width;
-    res_templ.height0 = memory_attibute->height;
-    res_templ.format = surface->templat.buffer_format;
-    res_templ.bind = PIPE_BIND_SAMPLER_VIEW;
-    res_templ.usage = PIPE_USAGE_DEFAULT;
+   memset(&res_templ, 0, sizeof(res_templ));
+   res_templ.target = PIPE_TEXTURE_2D;
+   res_templ.last_level = 0;
+   res_templ.depth0 = 1;
+   res_templ.array_size = 1;
+   res_templ.width0 = memory_attibute->width;
+   res_templ.height0 = memory_attibute->height;
+   res_templ.format = surface->templat.buffer_format;
+   res_templ.bind = PIPE_BIND_SAMPLER_VIEW;
+   res_templ.usage = PIPE_USAGE_DEFAULT;
 
-    memset(&whandle, 0, sizeof(struct winsys_handle));
-    whandle.type = DRM_API_HANDLE_TYPE_FD;
-    whandle.handle = memory_attibute->buffers[index];
-    whandle.stride = memory_attibute->pitches[index];
+   memset(&whandle, 0, sizeof(struct winsys_handle));
+   whandle.type = DRM_API_HANDLE_TYPE_FD;
+   whandle.handle = memory_attibute->buffers[index];
+   whandle.stride = memory_attibute->pitches[index];
 
-    resource = pscreen->resource_from_handle(pscreen, &res_templ, &whandle);
+   resource = pscreen->resource_from_handle(pscreen, &res_templ, &whandle);
 
-    if (!resource)
-       return VA_STATUS_ERROR_ALLOCATION_FAILED;
-
-    memset(resources, 0, sizeof resources);
-    resources[0] = resource;
-
-    surface->buffer = vl_video_buffer_create_ex2(drv->pipe, templat, resources);
-    if (!surface->buffer)
-        return VA_STATUS_ERROR_ALLOCATION_FAILED;
-
-    util_dynarray_init(&surface->subpics);
-    surfaces[index] = handle_table_add(drv->htab, surface);
-
-    if (!surfaces[index])
+   if (!resource)
       return VA_STATUS_ERROR_ALLOCATION_FAILED;
 
-    return VA_STATUS_SUCCESS;
+   memset(resources, 0, sizeof resources);
+   resources[0] = resource;
+
+   surface->buffer = vl_video_buffer_create_ex2(drv->pipe, templat, resources);
+   if (!surface->buffer)
+      return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
+   util_dynarray_init(&surface->subpics);
+   surfaces[index] = handle_table_add(drv->htab, surface);
+
+   if (!surfaces[index])
+      return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
+   return VA_STATUS_SUCCESS;
 }
 
 VAStatus
@@ -491,143 +491,143 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int format,
                     VASurfaceID *surfaces, unsigned int num_surfaces,
                     VASurfaceAttrib *attrib_list, unsigned int num_attribs)
 {
-    vlVaDriver *drv;
-    VASurfaceAttribExternalBuffers *memory_attibute;
-    struct pipe_video_buffer templat;
-    struct pipe_screen *pscreen;
-    int i;
-    int memory_type;
-    int expected_fourcc;
-    VAStatus vaStatus;
+   vlVaDriver *drv;
+   VASurfaceAttribExternalBuffers *memory_attibute;
+   struct pipe_video_buffer templat;
+   struct pipe_screen *pscreen;
+   int i;
+   int memory_type;
+   int expected_fourcc;
+   VAStatus vaStatus;
 
-    if (!ctx)
-       return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    if (!(width && height))
-       return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+   if (!(width && height))
+      return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
 
-    drv = VL_VA_DRIVER(ctx);
+   drv = VL_VA_DRIVER(ctx);
 
-    if (!drv)
-        return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!drv)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    pscreen = VL_VA_PSCREEN(ctx);
+   pscreen = VL_VA_PSCREEN(ctx);
 
-    if (!pscreen)
-        return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!pscreen)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    /* Default. */
-    memory_attibute = NULL;
-    memory_type = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
-    expected_fourcc = 0;
+   /* Default. */
+   memory_attibute = NULL;
+   memory_type = VA_SURFACE_ATTRIB_MEM_TYPE_VA;
+   expected_fourcc = 0;
 
-    for (i = 0; i < num_attribs && attrib_list; i++) {
-        if ((attrib_list[i].type == VASurfaceAttribPixelFormat) &&
-            (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) {
-            if (attrib_list[i].value.type != VAGenericValueTypeInteger)
-                return VA_STATUS_ERROR_INVALID_PARAMETER;
-            expected_fourcc = attrib_list[i].value.value.i;
-        }
+   for (i = 0; i < num_attribs && attrib_list; i++) {
+      if ((attrib_list[i].type == VASurfaceAttribPixelFormat) &&
+          (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) {
+         if (attrib_list[i].value.type != VAGenericValueTypeInteger)
+            return VA_STATUS_ERROR_INVALID_PARAMETER;
+         expected_fourcc = attrib_list[i].value.value.i;
+      }
 
-        if ((attrib_list[i].type == VASurfaceAttribMemoryType) &&
-            (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) {
+      if ((attrib_list[i].type == VASurfaceAttribMemoryType) &&
+          (attrib_list[i].flags & VA_SURFACE_ATTRIB_SETTABLE)) {
 
-            if (attrib_list[i].value.type != VAGenericValueTypeInteger)
-                return VA_STATUS_ERROR_INVALID_PARAMETER;
+         if (attrib_list[i].value.type != VAGenericValueTypeInteger)
+            return VA_STATUS_ERROR_INVALID_PARAMETER;
 
-            switch (attrib_list[i].value.value.i) {
-                case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
-                case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
-                   memory_type = attrib_list[i].value.value.i;
-                   break;
-                default:
-                   return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE;
-            }
-        }
-
-        if ((attrib_list[i].type == VASurfaceAttribExternalBufferDescriptor) &&
-            (attrib_list[i].flags == VA_SURFACE_ATTRIB_SETTABLE)) {
-            if (attrib_list[i].value.type != VAGenericValueTypePointer)
-                return VA_STATUS_ERROR_INVALID_PARAMETER;
-            memory_attibute = (VASurfaceAttribExternalBuffers *)attrib_list[i].value.value.p;
-        }
-    }
-
-    if (VA_RT_FORMAT_YUV420 != format &&
-        VA_RT_FORMAT_YUV422 != format &&
-        VA_RT_FORMAT_YUV444 != format &&
-        VA_RT_FORMAT_RGB32  != format) {
-        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
-    }
-
-    switch (memory_type) {
-        case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
+         switch (attrib_list[i].value.value.i) {
+         case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
+         case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
+            memory_type = attrib_list[i].value.value.i;
             break;
-        case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
-            if (!memory_attibute)
-               return VA_STATUS_ERROR_INVALID_PARAMETER;
+         default:
+            return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE;
+         }
+      }
 
-            expected_fourcc = memory_attibute->pixel_format;
-            break;
-        default:
-            assert(0);
-    }
+      if ((attrib_list[i].type == VASurfaceAttribExternalBufferDescriptor) &&
+          (attrib_list[i].flags == VA_SURFACE_ATTRIB_SETTABLE)) {
+         if (attrib_list[i].value.type != VAGenericValueTypePointer)
+            return VA_STATUS_ERROR_INVALID_PARAMETER;
+         memory_attibute = (VASurfaceAttribExternalBuffers *)attrib_list[i].value.value.p;
+      }
+   }
 
-    memset(&templat, 0, sizeof(templat));
+   if (VA_RT_FORMAT_YUV420 != format &&
+       VA_RT_FORMAT_YUV422 != format &&
+       VA_RT_FORMAT_YUV444 != format &&
+       VA_RT_FORMAT_RGB32  != format) {
+      return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+   }
 
-    if (expected_fourcc) {
-       templat.buffer_format = VaFourccToPipeFormat(expected_fourcc);
-       templat.interlaced = 0;
-    } else {
-        templat.buffer_format = pscreen->get_video_param
-        (
-           pscreen,
-           PIPE_VIDEO_PROFILE_UNKNOWN,
-           PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
-           PIPE_VIDEO_CAP_PREFERED_FORMAT
-        );
-        templat.interlaced = pscreen->get_video_param
-        (
-           pscreen,
-           PIPE_VIDEO_PROFILE_UNKNOWN,
-           PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
-           PIPE_VIDEO_CAP_PREFERS_INTERLACED
-        );
-    }
+   switch (memory_type) {
+   case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
+      break;
+   case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
+      if (!memory_attibute)
+         return VA_STATUS_ERROR_INVALID_PARAMETER;
 
-    templat.chroma_format = ChromaToPipe(format);
+      expected_fourcc = memory_attibute->pixel_format;
+      break;
+   default:
+      assert(0);
+   }
 
-    templat.width = width;
-    templat.height = height;
+   memset(&templat, 0, sizeof(templat));
 
-    memset(surfaces, VA_INVALID_ID, num_surfaces * sizeof(VASurfaceID));
+   if (expected_fourcc) {
+      templat.buffer_format = VaFourccToPipeFormat(expected_fourcc);
+      templat.interlaced = 0;
+   } else {
+      templat.buffer_format = pscreen->get_video_param
+            (
+               pscreen,
+               PIPE_VIDEO_PROFILE_UNKNOWN,
+               PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+               PIPE_VIDEO_CAP_PREFERED_FORMAT
+               );
+      templat.interlaced = pscreen->get_video_param
+            (
+               pscreen,
+               PIPE_VIDEO_PROFILE_UNKNOWN,
+               PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+               PIPE_VIDEO_CAP_PREFERS_INTERLACED
+               );
+   }
 
-    for (i = 0; i < num_surfaces; i++) {
-        vlVaSurface *surf = CALLOC(1, sizeof(vlVaSurface));
-        if (!surf)
+   templat.chroma_format = ChromaToPipe(format);
+
+   templat.width = width;
+   templat.height = height;
+
+   memset(surfaces, VA_INVALID_ID, num_surfaces * sizeof(VASurfaceID));
+
+   for (i = 0; i < num_surfaces; i++) {
+      vlVaSurface *surf = CALLOC(1, sizeof(vlVaSurface));
+      if (!surf)
+         goto no_res;
+
+      surf->templat = templat;
+
+      switch (memory_type) {
+      case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
+         surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &templat);
+         if (!surf->buffer)
             goto no_res;
+         util_dynarray_init(&surf->subpics);
+         surfaces[i] = handle_table_add(drv->htab, surf);
+         break;
+      case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
+         vaStatus = suface_from_external_memory(ctx, surf, memory_attibute, i, surfaces, &templat);
+         if (vaStatus != VA_STATUS_SUCCESS)
+            goto no_res;
+         break;
+      default:
+         assert(0);
+      }
+   }
 
-        surf->templat = templat;
-
-        switch (memory_type) {
-            case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
-                surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &templat);
-                if (!surf->buffer)
-                    goto no_res;
-                util_dynarray_init(&surf->subpics);
-                surfaces[i] = handle_table_add(drv->htab, surf);
-                break;
-            case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
-                vaStatus = suface_from_external_memory(ctx, surf, memory_attibute, i, surfaces, &templat);
-                if (vaStatus != VA_STATUS_SUCCESS)
-                  goto no_res;
-                break;
-            default:
-                assert(0);
-        }
-    }
-
-    return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 
 no_res:
    if (i)
@@ -707,7 +707,7 @@ vlVaQueryVideoProcPipelineCaps(VADriverContextP ctx, VAContextID context,
       return VA_STATUS_ERROR_INVALID_CONTEXT;
 
    if (!pipeline_cap)
-   return VA_STATUS_ERROR_INVALID_PARAMETER;
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
 
    if (num_filters && !filters)
       return VA_STATUS_ERROR_INVALID_PARAMETER;

From 497bde6727260e7719c680dc483b10c0751a3fcd Mon Sep 17 00:00:00 2001
From: Julien Isorce <julien.isorce@gmail.com>
Date: Thu, 5 Nov 2015 08:24:45 +0000
Subject: [PATCH 068/287] st/va: fix memory leak on error in
 vlVaCreateSurfaces2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Found by coverity: CID #1337953

Signed-off-by: Julien Isorce <j.isorce@samsung.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/state_trackers/va/surface.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index 59815aa4c5a..3db21c3de39 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -479,8 +479,10 @@ suface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface,
    util_dynarray_init(&surface->subpics);
    surfaces[index] = handle_table_add(drv->htab, surface);
 
-   if (!surfaces[index])
+   if (!surfaces[index]) {
+      surface->buffer->destroy(surface->buffer);
       return VA_STATUS_ERROR_ALLOCATION_FAILED;
+   }
 
    return VA_STATUS_SUCCESS;
 }
@@ -612,15 +614,19 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int format,
       switch (memory_type) {
       case VA_SURFACE_ATTRIB_MEM_TYPE_VA:
          surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &templat);
-         if (!surf->buffer)
+         if (!surf->buffer) {
+            FREE(surf);
             goto no_res;
+         }
          util_dynarray_init(&surf->subpics);
          surfaces[i] = handle_table_add(drv->htab, surf);
          break;
       case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME:
          vaStatus = suface_from_external_memory(ctx, surf, memory_attibute, i, surfaces, &templat);
-         if (vaStatus != VA_STATUS_SUCCESS)
+         if (vaStatus != VA_STATUS_SUCCESS) {
+            FREE(surf);
             goto no_res;
+         }
          break;
       default:
          assert(0);

From 581111c4d67c65305dcae83789ac504deeec9da2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 5 Nov 2015 19:03:39 -0700
Subject: [PATCH 069/287] mesa: report enum name in glClientActiveTexture()
 error string

As we do for glActiveTexture().  Trivial.
---
 src/mesa/main/texstate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index cb147fac476..9d88554d945 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -330,7 +330,8 @@ _mesa_ClientActiveTexture(GLenum texture)
       return;
 
    if (texUnit >= ctx->Const.MaxTextureCoordUnits) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glClientActiveTexture(texture)");
+      _mesa_error(ctx, GL_INVALID_ENUM, "glClientActiveTexture(texture=%s)",
+                  _mesa_enum_to_string(texture));
       return;
    }
 

From d68226087cf5f2f686d6c8f3377c5a1dec3d8bc4 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Thu, 5 Nov 2015 23:12:52 -0500
Subject: [PATCH 070/287] nvc0: reintroduce BGRA4 format support

Commit 342e68dc60 (nvc0: remove BGRA4 format support) removed the
support to fix a WoW trace. However after further experimentation, I was
able to get the blit to work by using a different "fake" format in the
2d engine.

The reason why this worked on nv50 is that nv50 falls back to the 3d
blit path in case either the src or the dst aren't "faithfully"
supported, while nvc0 only does it for the dst format. RG8 is better
supported by the nvc0 2d engine than R16.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nv50/nv50_formats.c | 2 --
 src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
index 80f92be682d..49a93bf1d91 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -203,10 +203,8 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
    F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
    C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
    F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
-#if NOUVEAU_DRIVER != 0xc0
    C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
    F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
-#endif
    F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
 
    C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index be123349148..5f47bad22f3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -67,7 +67,7 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
    case 1:
       return NV50_SURFACE_FORMAT_R8_UNORM;
    case 2:
-      return NV50_SURFACE_FORMAT_R16_UNORM;
+      return NV50_SURFACE_FORMAT_RG8_UNORM;
    case 4:
       return NV50_SURFACE_FORMAT_BGRA8_UNORM;
    case 8:

From 99597d033a62bdfa31148714f4d2c40f84655a5a Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Wed, 21 Oct 2015 10:57:15 -0400
Subject: [PATCH 071/287] nir: some small cleanups

The various cf nodes all get allocated w/ shader as their ralloc_parent,
so lets make this more explicit.  Plus couple other corrections/
clarifications.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.c | 18 +++++++++---------
 src/glsl/nir/nir.h | 10 +++++-----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 5f03095d673..bb7a5fa5835 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -302,9 +302,9 @@ nir_function_impl_create(nir_function_overload *overload)
 }
 
 nir_block *
-nir_block_create(void *mem_ctx)
+nir_block_create(nir_shader *shader)
 {
-   nir_block *block = ralloc(mem_ctx, nir_block);
+   nir_block *block = ralloc(shader, nir_block);
 
    cf_init(&block->cf_node, nir_cf_node_block);
 
@@ -330,19 +330,19 @@ src_init(nir_src *src)
 }
 
 nir_if *
-nir_if_create(void *mem_ctx)
+nir_if_create(nir_shader *shader)
 {
-   nir_if *if_stmt = ralloc(mem_ctx, nir_if);
+   nir_if *if_stmt = ralloc(shader, nir_if);
 
    cf_init(&if_stmt->cf_node, nir_cf_node_if);
    src_init(&if_stmt->condition);
 
-   nir_block *then = nir_block_create(mem_ctx);
+   nir_block *then = nir_block_create(shader);
    exec_list_make_empty(&if_stmt->then_list);
    exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
    then->cf_node.parent = &if_stmt->cf_node;
 
-   nir_block *else_stmt = nir_block_create(mem_ctx);
+   nir_block *else_stmt = nir_block_create(shader);
    exec_list_make_empty(&if_stmt->else_list);
    exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
    else_stmt->cf_node.parent = &if_stmt->cf_node;
@@ -351,13 +351,13 @@ nir_if_create(void *mem_ctx)
 }
 
 nir_loop *
-nir_loop_create(void *mem_ctx)
+nir_loop_create(nir_shader *shader)
 {
-   nir_loop *loop = ralloc(mem_ctx, nir_loop);
+   nir_loop *loop = ralloc(shader, nir_loop);
 
    cf_init(&loop->cf_node, nir_cf_node_loop);
 
-   nir_block *body = nir_block_create(mem_ctx);
+   nir_block *body = nir_block_create(shader);
    exec_list_make_empty(&loop->body);
    exec_list_push_tail(&loop->body, &body->cf_node.node);
    body->cf_node.parent = &loop->cf_node;
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index f8de40d0d13..ef39df5dc51 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -394,10 +394,10 @@ typedef struct {
     */
    bool is_packed;
 
-   /** set of nir_instr's where this register is used (read from) */
+   /** set of nir_src's where this register is used (read from) */
    struct list_head uses;
 
-   /** set of nir_instr's where this register is defined (written to) */
+   /** set of nir_dest's where this register is defined (written to) */
    struct list_head defs;
 
    /** set of nir_if's where this register is used as a condition */
@@ -1621,9 +1621,9 @@ nir_function_overload *nir_function_overload_create(nir_function *func);
 
 nir_function_impl *nir_function_impl_create(nir_function_overload *func);
 
-nir_block *nir_block_create(void *mem_ctx);
-nir_if *nir_if_create(void *mem_ctx);
-nir_loop *nir_loop_create(void *mem_ctx);
+nir_block *nir_block_create(nir_shader *shader);
+nir_if *nir_if_create(nir_shader *shader);
+nir_loop *nir_loop_create(nir_shader *shader);
 
 nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
 

From 8f55ebe802ea930d14eef9cd622aeb9a8d989e01 Mon Sep 17 00:00:00 2001
From: Boyan Ding <boyan.j.ding@gmail.com>
Date: Fri, 16 Oct 2015 15:15:38 +0800
Subject: [PATCH 072/287] freedreno/ir3: Use nir_foreach_variable

Signed-off-by: Boyan Ding <boyan.j.ding@gmail.com>
Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index 8c9234b3847..157dc73a3c6 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -2325,17 +2325,17 @@ emit_instructions(struct ir3_compile *ctx)
 	}
 
 	/* Setup inputs: */
-	foreach_list_typed(nir_variable, var, node, &ctx->s->inputs) {
+	nir_foreach_variable(var, &ctx->s->inputs) {
 		setup_input(ctx, var);
 	}
 
 	/* Setup outputs: */
-	foreach_list_typed(nir_variable, var, node, &ctx->s->outputs) {
+	nir_foreach_variable(var, &ctx->s->outputs) {
 		setup_output(ctx, var);
 	}
 
 	/* Setup variables (which should only be arrays): */
-	foreach_list_typed(nir_variable, var, node, &ctx->s->globals) {
+	nir_foreach_variable(var, &ctx->s->globals) {
 		declare_var(ctx, var);
 	}
 

From 6f5e0c08a477c6872e8be6d1b09aea97db7fe125 Mon Sep 17 00:00:00 2001
From: Guillaume Charifi <guillaume.charifi@sfr.fr>
Date: Fri, 6 Nov 2015 11:17:25 -0500
Subject: [PATCH 073/287] freedreno: add a305 support

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 9f8c33263fb..7ee1a3fa9cf 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -549,6 +549,7 @@ fd_screen_create(struct fd_device *dev)
 	case 220:
 		fd2_screen_init(pscreen);
 		break;
+	case 305:
 	case 307:
 	case 320:
 	case 330:

From 7465e161248b94d0bd1cdae6fc4c501ecfcf9b0b Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 27 Oct 2015 11:33:32 -0400
Subject: [PATCH 074/287] freedreno: update generated headers

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a2xx/a2xx.xml.h |  5 +-
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h |  5 +-
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h | 65 ++++++++++++-------
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 12 ----
 .../drivers/freedreno/adreno_common.xml.h     |  5 +-
 .../drivers/freedreno/adreno_pm4.xml.h        |  5 +-
 6 files changed, 54 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index 2853787a340..ef235734755 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index 4bbcb33614c..b5e1ddadde0 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index 819f5b14a17..9f970365464 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -489,8 +490,8 @@ static inline uint32_t A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_r
 	return ((val) << A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
 }
 
-#define REG_A4XX_RB_BLEND_RED					0x000020f3
-#define A4XX_RB_BLEND_RED_UINT__MASK				0x00007fff
+#define REG_A4XX_RB_BLEND_RED					0x000020f0
+#define A4XX_RB_BLEND_RED_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_RED_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_RED_UINT(uint32_t val)
 {
@@ -503,8 +504,16 @@ static inline uint32_t A4XX_RB_BLEND_RED_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_RED_FLOAT__SHIFT) & A4XX_RB_BLEND_RED_FLOAT__MASK;
 }
 
-#define REG_A4XX_RB_BLEND_GREEN					0x000020f4
-#define A4XX_RB_BLEND_GREEN_UINT__MASK				0x00007fff
+#define REG_A4XX_RB_BLEND_RED_F32				0x000020f1
+#define A4XX_RB_BLEND_RED_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_RED_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_RED_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_RED_F32__SHIFT) & A4XX_RB_BLEND_RED_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_GREEN					0x000020f2
+#define A4XX_RB_BLEND_GREEN_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_GREEN_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_GREEN_UINT(uint32_t val)
 {
@@ -517,8 +526,16 @@ static inline uint32_t A4XX_RB_BLEND_GREEN_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A4XX_RB_BLEND_GREEN_FLOAT__MASK;
 }
 
-#define REG_A4XX_RB_BLEND_BLUE					0x000020f5
-#define A4XX_RB_BLEND_BLUE_UINT__MASK				0x00007fff
+#define REG_A4XX_RB_BLEND_GREEN_F32				0x000020f3
+#define A4XX_RB_BLEND_GREEN_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_GREEN_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_GREEN_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_GREEN_F32__SHIFT) & A4XX_RB_BLEND_GREEN_F32__MASK;
+}
+
+#define REG_A4XX_RB_BLEND_BLUE					0x000020f4
+#define A4XX_RB_BLEND_BLUE_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_BLUE_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_BLUE_UINT(uint32_t val)
 {
@@ -531,8 +548,16 @@ static inline uint32_t A4XX_RB_BLEND_BLUE_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A4XX_RB_BLEND_BLUE_FLOAT__MASK;
 }
 
+#define REG_A4XX_RB_BLEND_BLUE_F32				0x000020f5
+#define A4XX_RB_BLEND_BLUE_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_BLUE_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_BLUE_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_BLUE_F32__SHIFT) & A4XX_RB_BLEND_BLUE_F32__MASK;
+}
+
 #define REG_A4XX_RB_BLEND_ALPHA					0x000020f6
-#define A4XX_RB_BLEND_ALPHA_UINT__MASK				0x00007fff
+#define A4XX_RB_BLEND_ALPHA_UINT__MASK				0x0000ffff
 #define A4XX_RB_BLEND_ALPHA_UINT__SHIFT				0
 static inline uint32_t A4XX_RB_BLEND_ALPHA_UINT(uint32_t val)
 {
@@ -545,6 +570,14 @@ static inline uint32_t A4XX_RB_BLEND_ALPHA_FLOAT(float val)
 	return ((util_float_to_half(val)) << A4XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A4XX_RB_BLEND_ALPHA_FLOAT__MASK;
 }
 
+#define REG_A4XX_RB_BLEND_ALPHA_F32				0x000020f7
+#define A4XX_RB_BLEND_ALPHA_F32__MASK				0xffffffff
+#define A4XX_RB_BLEND_ALPHA_F32__SHIFT				0
+static inline uint32_t A4XX_RB_BLEND_ALPHA_F32(float val)
+{
+	return ((fui(val)) << A4XX_RB_BLEND_ALPHA_F32__SHIFT) & A4XX_RB_BLEND_ALPHA_F32__MASK;
+}
+
 #define REG_A4XX_RB_ALPHA_CONTROL				0x000020f8
 #define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__MASK			0x000000ff
 #define A4XX_RB_ALPHA_CONTROL_ALPHA_REF__SHIFT			0
@@ -2645,20 +2678,6 @@ static inline uint32_t A4XX_PC_HS_PARAM_PRIMTYPE(enum adreno_pa_su_sc_draw val)
 
 #define REG_A4XX_UNKNOWN_20EF					0x000020ef
 
-#define REG_A4XX_UNKNOWN_20F0					0x000020f0
-
-#define REG_A4XX_UNKNOWN_20F1					0x000020f1
-
-#define REG_A4XX_UNKNOWN_20F2					0x000020f2
-
-#define REG_A4XX_UNKNOWN_20F7					0x000020f7
-#define A4XX_UNKNOWN_20F7__MASK					0xffffffff
-#define A4XX_UNKNOWN_20F7__SHIFT				0
-static inline uint32_t A4XX_UNKNOWN_20F7(float val)
-{
-	return ((fui(val)) << A4XX_UNKNOWN_20F7__SHIFT) & A4XX_UNKNOWN_20F7__MASK;
-}
-
 #define REG_A4XX_UNKNOWN_2152					0x00002152
 
 #define REG_A4XX_UNKNOWN_2153					0x00002153
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index cf5dd7b0f17..848a82fee7e 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -699,15 +699,6 @@ fd4_emit_restore(struct fd_context *ctx)
 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
 	OUT_RING(ring, 0x00000000);
 
-	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
-	OUT_RING(ring, 0x00000000);
-
-	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
-	OUT_RING(ring, 0x00000000);
-
-	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
-	OUT_RING(ring, 0x00000000);
-
 	OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
 	OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(0) |
 			A4XX_RB_BLEND_RED_FLOAT(0.0));
@@ -718,9 +709,6 @@ fd4_emit_restore(struct fd_context *ctx)
 	OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(0x7fff) |
 			A4XX_RB_BLEND_ALPHA_FLOAT(1.0));
 
-	OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
-	OUT_RING(ring, 0x3f800000);
-
 	OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
 	OUT_RING(ring, 0x00000000);
 
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index 906368c0efa..ca3d2ac3fca 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 490cf5beaf0..f095e3061b2 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -8,13 +8,14 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    398 bytes, from 2015-09-24 17:25:31)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63914 bytes, from 2015-10-27 17:13:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml         (   1773 bytes, from 2015-09-24 17:30:00)
 
 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)

From 6459e780ae44d8826322e0dc2466d0ee6d9e9800 Mon Sep 17 00:00:00 2001
From: Rob Clark <robclark@freedesktop.org>
Date: Tue, 27 Oct 2015 11:38:34 -0400
Subject: [PATCH 075/287] freedreno/a4xx: fix blend color

Signed-off-by: Rob Clark <robclark@freedesktop.org>
---
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 848a82fee7e..26b58718cd8 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -613,15 +613,19 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 
 	if (dirty & FD_DIRTY_BLEND_COLOR) {
 		struct pipe_blend_color *bcolor = &ctx->blend_color;
-		OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 4);
-		OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 255.0) |
+		OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
+		OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
 				A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
-		OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 255.0) |
+		OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
+		OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
 				A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
-		OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 255.0) |
+		OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
+		OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
 				A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
-		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 255.0) |
+		OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
+		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
 				A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
+		OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
 	}
 
 	if (dirty & FD_DIRTY_VERTTEX) {

From bfc245e9ac430aab0b3c4c2a1b7767793f2854b0 Mon Sep 17 00:00:00 2001
From: Julien Isorce <j.isorce@samsung.com>
Date: Fri, 6 Nov 2015 09:45:11 +0000
Subject: [PATCH 076/287] st/va: properly indent buffer.c, config.c, image.c
 and picture.c

Some lines were using 4 indentation spaces instead of 3.

Signed-off-by: Julien Isorce <j.isorce@samsung.com>
Reviewed-by: Christian K<C3><B6>nig <christian.koenig@amd.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/state_trackers/va/buffer.c  | 14 ++---
 src/gallium/state_trackers/va/config.c  | 12 ++--
 src/gallium/state_trackers/va/image.c   |  4 +-
 src/gallium/state_trackers/va/picture.c | 80 ++++++++++++-------------
 4 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/src/gallium/state_trackers/va/buffer.c b/src/gallium/state_trackers/va/buffer.c
index 71a65037757..47bf35ac725 100644
--- a/src/gallium/state_trackers/va/buffer.c
+++ b/src/gallium/state_trackers/va/buffer.c
@@ -152,11 +152,11 @@ vlVaUnmapBuffer(VADriverContextP ctx, VABufferID buf_id)
       return VA_STATUS_ERROR_INVALID_BUFFER;
 
    if (buf->derived_surface.resource) {
-     if (!buf->derived_surface.transfer)
-        return VA_STATUS_ERROR_INVALID_BUFFER;
+      if (!buf->derived_surface.transfer)
+         return VA_STATUS_ERROR_INVALID_BUFFER;
 
-     pipe_buffer_unmap(drv->pipe, buf->derived_surface.transfer);
-     buf->derived_surface.transfer = NULL;
+      pipe_buffer_unmap(drv->pipe, buf->derived_surface.transfer);
+      buf->derived_surface.transfer = NULL;
    }
 
    return VA_STATUS_SUCCESS;
@@ -175,10 +175,10 @@ vlVaDestroyBuffer(VADriverContextP ctx, VABufferID buf_id)
       return VA_STATUS_ERROR_INVALID_BUFFER;
 
    if (buf->derived_surface.resource) {
-     if (buf->export_refcount > 0)
-       return VA_STATUS_ERROR_INVALID_BUFFER;
+      if (buf->export_refcount > 0)
+         return VA_STATUS_ERROR_INVALID_BUFFER;
 
-     pipe_resource_reference(&buf->derived_surface.resource, NULL);
+      pipe_resource_reference(&buf->derived_surface.resource, NULL);
    }
 
    FREE(buf->data);
diff --git a/src/gallium/state_trackers/va/config.c b/src/gallium/state_trackers/va/config.c
index 0f47aacdbd6..a545a18c1e0 100644
--- a/src/gallium/state_trackers/va/config.c
+++ b/src/gallium/state_trackers/va/config.c
@@ -71,8 +71,8 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile profile,
    *num_entrypoints = 0;
 
    if (profile == VAProfileNone) {
-       entrypoint_list[(*num_entrypoints)++] = VAEntrypointVideoProc;
-       return VA_STATUS_SUCCESS;
+      entrypoint_list[(*num_entrypoints)++] = VAEntrypointVideoProc;
+      return VA_STATUS_SUCCESS;
    }
 
    p = ProfileToPipe(profile);
@@ -104,7 +104,7 @@ vlVaGetConfigAttributes(VADriverContextP ctx, VAProfile profile, VAEntrypoint en
          value = VA_RT_FORMAT_YUV420;
          break;
       case VAConfigAttribRateControl:
-	 value = VA_RC_NONE;
+         value = VA_RC_NONE;
          break;
       default:
          value = VA_ATTRIB_NOT_SUPPORTED;
@@ -127,8 +127,8 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoin
       return VA_STATUS_ERROR_INVALID_CONTEXT;
 
    if (profile == VAProfileNone && entrypoint == VAEntrypointVideoProc) {
-       *config_id = PIPE_VIDEO_PROFILE_UNKNOWN;
-       return VA_STATUS_SUCCESS;
+      *config_id = PIPE_VIDEO_PROFILE_UNKNOWN;
+      return VA_STATUS_SUCCESS;
    }
 
    p = ProfileToPipe(profile);
@@ -167,7 +167,7 @@ vlVaQueryConfigAttributes(VADriverContextP ctx, VAConfigID config_id, VAProfile
 
    if (config_id == PIPE_VIDEO_PROFILE_UNKNOWN) {
       *entrypoint = VAEntrypointVideoProc;
-       *num_attribs = 0;
+      *num_attribs = 0;
       return VA_STATUS_SUCCESS;
    }
 
diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c
index c6d0c5abf65..ae07da857e1 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -447,8 +447,8 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, VAImageID image,
       tmp_buf = drv->pipe->create_video_buffer(drv->pipe, &surf->templat);
 
       if (!tmp_buf) {
-          surf->templat.buffer_format = old_surf_format;
-          return VA_STATUS_ERROR_ALLOCATION_FAILED;
+         surf->templat.buffer_format = old_surf_format;
+         return VA_STATUS_ERROR_ALLOCATION_FAILED;
       }
 
       surf->buffer->destroy(surf->buffer);
diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index e850689005d..644b8488ec2 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -65,7 +65,7 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende
       if ((context->target->buffer_format != PIPE_FORMAT_B8G8R8A8_UNORM  &&
            context->target->buffer_format != PIPE_FORMAT_R8G8B8A8_UNORM) ||
            context->target->interlaced)
-          return VA_STATUS_ERROR_UNIMPLEMENTED;
+         return VA_STATUS_ERROR_UNIMPLEMENTED;
       return VA_STATUS_SUCCESS;
    }
 
@@ -717,60 +717,60 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
 static VAStatus
 handleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
 {
-    struct u_rect src_rect;
-    struct u_rect dst_rect;
-    struct u_rect *dirty_area;
-    vlVaSurface *src_surface;
-    VAProcPipelineParameterBuffer *pipeline_param;
-    struct pipe_surface **surfaces;
-    struct pipe_screen *screen;
-    struct pipe_surface *psurf;
+   struct u_rect src_rect;
+   struct u_rect dst_rect;
+   struct u_rect *dirty_area;
+   vlVaSurface *src_surface;
+   VAProcPipelineParameterBuffer *pipeline_param;
+   struct pipe_surface **surfaces;
+   struct pipe_screen *screen;
+   struct pipe_surface *psurf;
 
-    if (!drv || !context)
-       return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!drv || !context)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    if (!buf || !buf->data)
-       return VA_STATUS_ERROR_INVALID_BUFFER;
+   if (!buf || !buf->data)
+      return VA_STATUS_ERROR_INVALID_BUFFER;
 
-    if (!context->target)
-        return VA_STATUS_ERROR_INVALID_SURFACE;
+   if (!context->target)
+      return VA_STATUS_ERROR_INVALID_SURFACE;
 
-    pipeline_param = (VAProcPipelineParameterBuffer *)buf->data;
+   pipeline_param = (VAProcPipelineParameterBuffer *)buf->data;
 
-    src_surface = handle_table_get(drv->htab, pipeline_param->surface);
-    if (!src_surface || !src_surface->buffer)
-       return VA_STATUS_ERROR_INVALID_SURFACE;
+   src_surface = handle_table_get(drv->htab, pipeline_param->surface);
+   if (!src_surface || !src_surface->buffer)
+      return VA_STATUS_ERROR_INVALID_SURFACE;
 
-    surfaces = context->target->get_surfaces(context->target);
+   surfaces = context->target->get_surfaces(context->target);
 
-    if (!surfaces || !surfaces[0])
-        return VA_STATUS_ERROR_INVALID_SURFACE;
+   if (!surfaces || !surfaces[0])
+      return VA_STATUS_ERROR_INVALID_SURFACE;
 
-    screen = drv->pipe->screen;
+   screen = drv->pipe->screen;
 
-    psurf = surfaces[0];
+   psurf = surfaces[0];
 
-    src_rect.x0 = pipeline_param->surface_region->x;
-    src_rect.y0 = pipeline_param->surface_region->y;
-    src_rect.x1 = pipeline_param->surface_region->x + pipeline_param->surface_region->width;
-    src_rect.y1 = pipeline_param->surface_region->y + pipeline_param->surface_region->height;
+   src_rect.x0 = pipeline_param->surface_region->x;
+   src_rect.y0 = pipeline_param->surface_region->y;
+   src_rect.x1 = pipeline_param->surface_region->x + pipeline_param->surface_region->width;
+   src_rect.y1 = pipeline_param->surface_region->y + pipeline_param->surface_region->height;
 
-    dst_rect.x0 = pipeline_param->output_region->x;
-    dst_rect.y0 = pipeline_param->output_region->y;
-    dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width;
-    dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height;
+   dst_rect.x0 = pipeline_param->output_region->x;
+   dst_rect.y0 = pipeline_param->output_region->y;
+   dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width;
+   dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height;
 
-    dirty_area = vl_screen_get_dirty_area(drv->vscreen);
+   dirty_area = vl_screen_get_dirty_area(drv->vscreen);
 
-    vl_compositor_clear_layers(&drv->cstate);
-    vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE);
-    vl_compositor_set_layer_dst_area(&drv->cstate, 0, &dst_rect);
-    vl_compositor_render(&drv->cstate, &drv->compositor, psurf, dirty_area, true);
+   vl_compositor_clear_layers(&drv->cstate);
+   vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE);
+   vl_compositor_set_layer_dst_area(&drv->cstate, 0, &dst_rect);
+   vl_compositor_render(&drv->cstate, &drv->compositor, psurf, dirty_area, true);
 
-    screen->fence_reference(screen, &src_surface->fence, NULL);
-    drv->pipe->flush(drv->pipe, &src_surface->fence, 0);
+   screen->fence_reference(screen, &src_surface->fence, NULL);
+   drv->pipe->flush(drv->pipe, &src_surface->fence, 0);
 
-    return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 }
 
 VAStatus

From bf6acbb2db4baaf18ae5a139142acf06e84d1b9c Mon Sep 17 00:00:00 2001
From: Julien Isorce <j.isorce@samsung.com>
Date: Fri, 6 Nov 2015 09:45:17 +0000
Subject: [PATCH 077/287] st/va: properly use brackets in
 vlVaAcquireBufferHandle's switch

In "switch (mem_type)" the brackets were surrounding "case+default"
instead of "case" only.

Signed-off-by: Julien Isorce <j.isorce@samsung.com>
Reviewed-by: Christian K<C3><B6>nig <christian.koenig@amd.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/state_trackers/va/buffer.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/state_trackers/va/buffer.c b/src/gallium/state_trackers/va/buffer.c
index 47bf35ac725..769305e2999 100644
--- a/src/gallium/state_trackers/va/buffer.c
+++ b/src/gallium/state_trackers/va/buffer.c
@@ -280,15 +280,14 @@ vlVaAcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id,
 
          buf_info->handle = (intptr_t)whandle.handle;
          break;
+      }
       default:
          return VA_STATUS_ERROR_UNSUPPORTED_MEMORY_TYPE;
       }
-   }
-
-   buf_info->type = buf->type;
-   buf_info->mem_type = mem_type;
-   buf_info->mem_size = buf->num_elements * buf->size;
 
+      buf_info->type = buf->type;
+      buf_info->mem_type = mem_type;
+      buf_info->mem_size = buf->num_elements * buf->size;
    }
 
    buf->export_refcount++;

From 42a5e143a8d58a0ad15dd5747449eb4b57c87177 Mon Sep 17 00:00:00 2001
From: Julien Isorce <j.isorce@samsung.com>
Date: Fri, 6 Nov 2015 09:45:19 +0000
Subject: [PATCH 078/287] vl/buffers: add RGBX and BGRX to the supported
 formats

Useful is one wants to create RGBX or BGRX surfaces.
The infrastructure is such that it required just a
few definitions to support these formats.

Signed-off-by: Julien Isorce <j.isorce@samsung.com>
Reviewed-by: Christian K<C3><B6>nig <christian.koenig@amd.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/auxiliary/vl/vl_video_buffer.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 5e0ae0ecb8b..6cd2557a892 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -62,6 +62,18 @@ const enum pipe_format const_resource_formats_VUYA[3] = {
    PIPE_FORMAT_NONE
 };
 
+const enum pipe_format const_resource_formats_YUVX[3] = {
+   PIPE_FORMAT_R8G8B8X8_UNORM,
+   PIPE_FORMAT_NONE,
+   PIPE_FORMAT_NONE
+};
+
+const enum pipe_format const_resource_formats_VUYX[3] = {
+   PIPE_FORMAT_B8G8R8X8_UNORM,
+   PIPE_FORMAT_NONE,
+   PIPE_FORMAT_NONE
+};
+
 const enum pipe_format const_resource_formats_YUYV[3] = {
    PIPE_FORMAT_R8G8_R8B8_UNORM,
    PIPE_FORMAT_NONE,
@@ -102,6 +114,12 @@ vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format)
    case PIPE_FORMAT_B8G8R8A8_UNORM:
       return const_resource_formats_VUYA;
 
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
+      return const_resource_formats_VUYX;
+
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
+      return const_resource_formats_VUYX;
+
    case PIPE_FORMAT_YUYV:
       return const_resource_formats_YUYV;
 

From cc1e5c972eff8c774c93c8dc51d89b550d00633e Mon Sep 17 00:00:00 2001
From: Julien Isorce <j.isorce@samsung.com>
Date: Fri, 6 Nov 2015 09:45:22 +0000
Subject: [PATCH 079/287] st/va: add support for RGBX and BGRX in VPP

Before it was only possible to convert a NV12 surface to
RGBA or BGRA. This patch uses the same post processing
function, "handleVAProcPipelineParameterBufferType", but
add definitions for RGBX and BGRX.

This patch also makes vlVaQuerySurfaceAttributes more generic
to avoid copy and pasting the same lines.

Signed-off-by: Julien Isorce <j.isorce@samsung.com>
Reviewed-by: Christian K<C3><B6>nig <christian.koenig@amd.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/gallium/state_trackers/va/picture.c |  5 ++--
 src/gallium/state_trackers/va/surface.c | 36 ++++++++++++++-----------
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index 644b8488ec2..d6cdbea197d 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -59,11 +59,12 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende
       return VA_STATUS_ERROR_INVALID_SURFACE;
 
    context->target = surf->buffer;
-
    if (!context->decoder) {
       /* VPP */
       if ((context->target->buffer_format != PIPE_FORMAT_B8G8R8A8_UNORM  &&
-           context->target->buffer_format != PIPE_FORMAT_R8G8B8A8_UNORM) ||
+           context->target->buffer_format != PIPE_FORMAT_R8G8B8A8_UNORM  &&
+           context->target->buffer_format != PIPE_FORMAT_B8G8R8X8_UNORM  &&
+           context->target->buffer_format != PIPE_FORMAT_R8G8B8X8_UNORM) ||
            context->target->interlaced)
          return VA_STATUS_ERROR_UNIMPLEMENTED;
       return VA_STATUS_SUCCESS;
diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index 3db21c3de39..589d6860b6a 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -45,6 +45,11 @@
 
 #include <va/va_drmcommon.h>
 
+static const enum pipe_format vpp_surface_formats[] = {
+   PIPE_FORMAT_B8G8R8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM,
+   PIPE_FORMAT_B8G8R8X8_UNORM, PIPE_FORMAT_R8G8B8X8_UNORM
+};
+
 VAStatus
 vlVaCreateSurfaces(VADriverContextP ctx, int width, int height, int format,
                    int num_surfaces, VASurfaceID *surfaces)
@@ -314,7 +319,9 @@ vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config,
    vlVaDriver *drv;
    VASurfaceAttrib *attribs;
    struct pipe_screen *pscreen;
-   int i;
+   int i, j;
+
+   STATIC_ASSERT(ARRAY_SIZE(vpp_surface_formats) <= VL_VA_MAX_IMAGE_FORMATS);
 
    if (config == VA_INVALID_ID)
       return VA_STATUS_ERROR_INVALID_CONFIG;
@@ -323,7 +330,7 @@ vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config,
       return VA_STATUS_ERROR_INVALID_PARAMETER;
 
    if (!attrib_list) {
-      *num_attribs = VASurfaceAttribCount;
+      *num_attribs = VL_VA_MAX_IMAGE_FORMATS + VASurfaceAttribCount;
       return VA_STATUS_SUCCESS;
    }
 
@@ -340,27 +347,24 @@ vlVaQuerySurfaceAttributes(VADriverContextP ctx, VAConfigID config,
    if (!pscreen)
       return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-   attribs = CALLOC(VASurfaceAttribCount, sizeof(VASurfaceAttrib));
+   attribs = CALLOC(VL_VA_MAX_IMAGE_FORMATS + VASurfaceAttribCount,
+                    sizeof(VASurfaceAttrib));
 
    if (!attribs)
       return VA_STATUS_ERROR_ALLOCATION_FAILED;
 
    i = 0;
 
+   /* vlVaCreateConfig returns PIPE_VIDEO_PROFILE_UNKNOWN
+    * only for VAEntrypointVideoProc. */
    if (config == PIPE_VIDEO_PROFILE_UNKNOWN) {
-      /* vlVaCreateConfig returns PIPE_VIDEO_PROFILE_UNKNOWN
-          only for VAEntrypointVideoProc. */
-      attribs[i].type = VASurfaceAttribPixelFormat;
-      attribs[i].value.type = VAGenericValueTypeInteger;
-      attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
-      attribs[i].value.value.i = VA_FOURCC_BGRA;
-      i++;
-
-      attribs[i].type = VASurfaceAttribPixelFormat;
-      attribs[i].value.type = VAGenericValueTypeInteger;
-      attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
-      attribs[i].value.value.i = VA_FOURCC_RGBA;
-      i++;
+      for (j = 0; j < ARRAY_SIZE(vpp_surface_formats); ++j) {
+         attribs[i].type = VASurfaceAttribPixelFormat;
+         attribs[i].value.type = VAGenericValueTypeInteger;
+         attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+         attribs[i].value.value.i = PipeFormatToVaFourcc(vpp_surface_formats[j]);
+         i++;
+      }
    } else {
       /* Assume VAEntrypointVLD for now. */
       attribs[i].type = VASurfaceAttribPixelFormat;

From ed55def44febbe1662ddcc0c33a23308899ce488 Mon Sep 17 00:00:00 2001
From: Boyuan Zhang <boyuan.zhang@amd.com>
Date: Wed, 23 Sep 2015 10:11:07 +0200
Subject: [PATCH 080/287] st/vaapi: fix vaapi VC-1 simple/main corruption v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apply the start code fix only to advanced profile.

v2 (chk): add commit message

Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/state_trackers/va/picture.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index d6cdbea197d..5e7841a0521 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -694,8 +694,10 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
           bufHasStartcode(buf, 0x0000010b, 32))
          break;
 
+      if (context->decoder->profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED) {
          buffers[num_buffers] = (void *const)&start_code_vc1;
          sizes[num_buffers++] = sizeof(start_code_vc1);
+      }
       break;
    case PIPE_VIDEO_FORMAT_MPEG4:
       if (bufHasStartcode(buf, 0x000001, 24))

From 6bad554d98004e6c8ab46e8cbe73f3b3024e55c5 Mon Sep 17 00:00:00 2001
From: Boyuan Zhang <boyuan.zhang@amd.com>
Date: Wed, 23 Sep 2015 10:11:08 +0200
Subject: [PATCH 081/287] radeon/uvd: fix VC-1 simple/main profile decode v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We just needed to set the extra width/height fields to get this working.

v2 (chk): rebased, CC stable added, commit message added, fixed coding style

Signed-off-by: Boyuan Zhang <boyuan.zhang@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/radeon/radeon_uvd.c   | 6 ++++++
 src/gallium/drivers/radeon/radeon_video.c | 3 +--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 33b01361aa5..0c643e5cd59 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -947,6 +947,12 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
 	dec->msg->body.decode.width_in_samples = dec->base.width;
 	dec->msg->body.decode.height_in_samples = dec->base.height;
 
+	if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
+	    (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
+		dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16;
+		dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;
+	}
+
 	dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
 	dec->msg->body.decode.bsd_size = bs_size;
 	dec->msg->body.decode.db_pitch = dec->base.width;
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index 32bfc32073b..f56c6cf6cb4 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -244,8 +244,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
 				return codec != PIPE_VIDEO_FORMAT_MPEG4;
 			return true;
 		case PIPE_VIDEO_FORMAT_VC1:
-			/* FIXME: VC-1 simple/main profile is broken */
-			return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED;
+			return true;
 		case PIPE_VIDEO_FORMAT_HEVC:
 			/* Carrizo only supports HEVC Main */
 			return rscreen->family >= CHIP_CARRIZO &&

From 91f188710ad8dce79936c5d28fd7b9a91b6d870a Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Fri, 9 Oct 2015 14:16:05 -0700
Subject: [PATCH 082/287] glsl: Add new barrier functions for compute shaders

When these functions are called in GLSL code, we create an intrinsic
function call:

 * groupMemoryBarrier => __intrinsic_group_memory_barrier
 * memoryBarrierAtomicCounter => __intrinsic_memory_barrier_atomic_counter
 * memoryBarrierBuffer => __intrinsic_memory_barrier_buffer
 * memoryBarrierImage => __intrinsic_memory_barrier_image
 * memoryBarrierShared => __intrinsic_memory_barrier_shared

v2:
 * Consolidate with memoryBarrier function/intrinsic creation (curro)

v3:
 * Instead of add_memory_barrier_function, add an intrinsic_name
   parameter to _memory_barrier (curro)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/glsl/builtin_functions.cpp | 55 ++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 509a57b8813..13494446b59 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -458,10 +458,16 @@ fp64(const _mesa_glsl_parse_state *state)
    return state->has_double();
 }
 
+static bool
+compute_shader(const _mesa_glsl_parse_state *state)
+{
+   return state->stage == MESA_SHADER_COMPUTE;
+}
+
 static bool
 barrier_supported(const _mesa_glsl_parse_state *state)
 {
-   return state->stage == MESA_SHADER_COMPUTE ||
+   return compute_shader(state) ||
           state->stage == MESA_SHADER_TESS_CTRL;
 }
 
@@ -785,8 +791,8 @@ private:
 
    ir_function_signature *_memory_barrier_intrinsic(
       builtin_available_predicate avail);
-   ir_function_signature *_memory_barrier(
-      builtin_available_predicate avail);
+   ir_function_signature *_memory_barrier(const char *intrinsic_name,
+                                          builtin_available_predicate avail);
 
    ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate avail,
                                                   const glsl_type *type);
@@ -963,6 +969,21 @@ builtin_builder::create_intrinsics()
    add_function("__intrinsic_memory_barrier",
                 _memory_barrier_intrinsic(shader_image_load_store),
                 NULL);
+   add_function("__intrinsic_group_memory_barrier",
+                _memory_barrier_intrinsic(compute_shader),
+                NULL);
+   add_function("__intrinsic_memory_barrier_atomic_counter",
+                _memory_barrier_intrinsic(compute_shader),
+                NULL);
+   add_function("__intrinsic_memory_barrier_buffer",
+                _memory_barrier_intrinsic(compute_shader),
+                NULL);
+   add_function("__intrinsic_memory_barrier_image",
+                _memory_barrier_intrinsic(compute_shader),
+                NULL);
+   add_function("__intrinsic_memory_barrier_shared",
+                _memory_barrier_intrinsic(compute_shader),
+                NULL);
 
    add_function("__intrinsic_shader_clock",
                 _shader_clock_intrinsic(shader_clock,
@@ -2754,7 +2775,28 @@ builtin_builder::create_builtins()
    add_image_functions(true);
 
    add_function("memoryBarrier",
-                _memory_barrier(shader_image_load_store),
+                _memory_barrier("__intrinsic_memory_barrier",
+                                shader_image_load_store),
+                NULL);
+   add_function("groupMemoryBarrier",
+                _memory_barrier("__intrinsic_group_memory_barrier",
+                                compute_shader),
+                NULL);
+   add_function("memoryBarrierAtomicCounter",
+                _memory_barrier("__intrinsic_memory_barrier_atomic_counter",
+                                compute_shader),
+                NULL);
+   add_function("memoryBarrierBuffer",
+                _memory_barrier("__intrinsic_memory_barrier_buffer",
+                                compute_shader),
+                NULL);
+   add_function("memoryBarrierImage",
+                _memory_barrier("__intrinsic_memory_barrier_image",
+                                compute_shader),
+                NULL);
+   add_function("memoryBarrierShared",
+                _memory_barrier("__intrinsic_memory_barrier_shared",
+                                compute_shader),
                 NULL);
 
    add_function("clock2x32ARB",
@@ -5264,10 +5306,11 @@ builtin_builder::_memory_barrier_intrinsic(builtin_available_predicate avail)
 }
 
 ir_function_signature *
-builtin_builder::_memory_barrier(builtin_available_predicate avail)
+builtin_builder::_memory_barrier(const char *intrinsic_name,
+                                 builtin_available_predicate avail)
 {
    MAKE_SIG(glsl_type::void_type, avail, 0);
-   body.emit(call(shader->symbols->get_function("__intrinsic_memory_barrier"),
+   body.emit(call(shader->symbols->get_function(intrinsic_name),
                   NULL, sig->parameters));
    return sig;
 }

From 9d65f3208bbded17119c7ad38f4b692d3ed00635 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Sat, 10 Oct 2015 08:59:42 -0700
Subject: [PATCH 083/287] nir: Add new barrier functions for compute shaders

When these functions are called in glsl-ir, we create a corresponding
nir intrinsic function call.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/glsl/nir/glsl_to_nir.cpp  | 15 +++++++++++++++
 src/glsl/nir/nir_intrinsics.h | 11 +++++++++++
 2 files changed, 26 insertions(+)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 57aba5be0f5..facb9fa4a7a 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -719,6 +719,16 @@ nir_visitor::visit(ir_call *ir)
          op = nir_intrinsic_ssbo_atomic_comp_swap;
       } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) {
          op = nir_intrinsic_shader_clock;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_group_memory_barrier") == 0) {
+         op = nir_intrinsic_group_memory_barrier;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_atomic_counter") == 0) {
+         op = nir_intrinsic_memory_barrier_atomic_counter;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_buffer") == 0) {
+         op = nir_intrinsic_memory_barrier_buffer;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_image") == 0) {
+         op = nir_intrinsic_memory_barrier_image;
+      } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) {
+         op = nir_intrinsic_memory_barrier_shared;
       } else {
          unreachable("not reached");
       }
@@ -821,6 +831,11 @@ nir_visitor::visit(ir_call *ir)
          break;
       }
       case nir_intrinsic_memory_barrier:
+      case nir_intrinsic_group_memory_barrier:
+      case nir_intrinsic_memory_barrier_atomic_counter:
+      case nir_intrinsic_memory_barrier_buffer:
+      case nir_intrinsic_memory_barrier_image:
+      case nir_intrinsic_memory_barrier_shared:
          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       case nir_intrinsic_shader_clock:
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index c2b6fe7166d..36fb2861c16 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -91,6 +91,17 @@ BARRIER(memory_barrier)
  */
 INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)
 
+/*
+ * Memory barrier with semantics analogous to the compute shader
+ * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
+ * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics.
+ */
+BARRIER(group_memory_barrier)
+BARRIER(memory_barrier_atomic_counter)
+BARRIER(memory_barrier_buffer)
+BARRIER(memory_barrier_image)
+BARRIER(memory_barrier_shared)
+
 /** A conditional discard, with a single boolean source. */
 INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
 

From faa119307035787f5e421dd6a9eb4d0101de963b Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Sat, 10 Oct 2015 13:00:04 -0700
Subject: [PATCH 084/287] i965/nir/fs: Implement new barrier functions for
 compute shaders

For these nir intrinsics, we emit the same code as
nir_intrinsic_memory_barrier:

 * nir_intrinsic_memory_barrier_atomic_counter
 * nir_intrinsic_memory_barrier_buffer
 * nir_intrinsic_memory_barrier_image

We treat these nir intrinsics as no-ops:
 * nir_intrinsic_group_memory_barrier
 * nir_intrinsic_memory_barrier_shared

v3:
 * Add comment for no-op cases (curro)

v4:
 * Moving comment to a separate patch authored by curro

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 261518605b7..5d2dd18552a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1697,6 +1697,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_memory_barrier_atomic_counter:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
    case nir_intrinsic_memory_barrier: {
       const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 16 / dispatch_width);
       bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
@@ -1704,6 +1707,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_group_memory_barrier:
+   case nir_intrinsic_memory_barrier_shared:
+      break;
+
    case nir_intrinsic_shader_clock: {
       /* We cannot do anything if there is an event, so ignore it for now */
       fs_reg shader_clock = get_timestamp(bld);

From 51694072218b5ae84b5d8f98ee2172d7c5d61b31 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Fri, 6 Nov 2015 13:19:56 -0800
Subject: [PATCH 085/287] i965/nir/fs: Add comment for no-op memory barrier
 functions

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 5d2dd18552a..02b9f5bbc8a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1709,6 +1709,25 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
 
    case nir_intrinsic_group_memory_barrier:
    case nir_intrinsic_memory_barrier_shared:
+      /* We treat these workgroup-level barriers as no-ops.  This should be
+       * safe at present and as long as:
+       *
+       *  - Memory access instructions are not subsequently reordered by the
+       *    compiler back-end.
+       *
+       *  - All threads from a given compute shader workgroup fit within a
+       *    single subslice and therefore talk to the same HDC shared unit
+       *    what supposedly guarantees ordering and coherency between threads
+       *    from the same workgroup.  This may change in the future when we
+       *    start splitting workgroups across multiple subslices.
+       *
+       *  - The context is not in fault-and-stream mode, which could cause
+       *    memory transactions (including to SLM) prior to the barrier to be
+       *    replayed after the barrier if a pagefault occurs.  This shouldn't
+       *    be a problem up to and including SKL because fault-and-stream is
+       *    not usable due to hardware issues, but that's likely to change in
+       *    the future.
+       */
       break;
 
    case nir_intrinsic_shader_clock: {

From 12c850d01ce2bf364f2b1719154df789d43a7a59 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 5 Nov 2015 14:32:34 +0100
Subject: [PATCH 086/287] nvc0/ir: Add support for double immediates

Add support for encoding double immediates (up to 20 bits of precision)
into the generated nvc0 machine-code.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index fd103146c72..8784f3b0a21 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -323,6 +323,14 @@ CodeEmitterNVC0::setImmediate(const Instruction *i, const int s)
    assert(imm);
    u32 = imm->reg.data.u32;
 
+   if ((code[0] & 0xf) == 0x1) {
+      // double immediate
+      uint64_t u64 = imm->reg.data.u64;
+      assert(!(u64 & 0x00000fffffffffffULL));
+      assert(!(code[1] & 0xc000));
+      code[0] |= ((u64 >> 44) & 0x3f) << 26;
+      code[1] |= 0xc000 | (u64 >> 50);
+   } else
    if ((code[0] & 0xf) == 0x2) {
       // LIMM
       code[0] |= (u32 & 0x3f) << 26;

From b487b55f7d08c00f2efabc097c7138403528893f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 5 Nov 2015 14:32:35 +0100
Subject: [PATCH 087/287] gm107/ir: Add support for double immediates

Add support for encoding double immediates (up to 20 bits of precision)
into the generated gm107 machine-code.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index a327d572470..7e6ed842d54 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -310,9 +310,12 @@ CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
    uint32_t val = imm->reg.data.u32;
 
    if (len == 19) {
-      if (isFloatType(insn->sType)) {
+      if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
          assert(!(val & 0x00000fff));
          val >>= 12;
+      } else if (insn->sType == TYPE_F64) {
+         assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
+         val = imm->reg.data.u64 >> 44;
       }
       assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000);
       emitField( 56,   1, (val & 0x80000) >> 19);

From 11e3dac36e7b992e30efbce4473451c4e1ac617f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 6 Nov 2015 17:18:01 -0500
Subject: [PATCH 088/287] nv50/ir: allow movs with TYPE_F64 destinations to be
 split

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index 19418c0e0f1..ece6ce40643 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -555,6 +555,12 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
    switch (i->dType) {
    case TYPE_U64: hTy = TYPE_U32; break;
    case TYPE_S64: hTy = TYPE_S32; break;
+   case TYPE_F64:
+      if (i->op == OP_MOV) {
+         hTy = TYPE_U32;
+         break;
+      }
+      /* fallthrough */
    default:
       return NULL;
    }

From 2437f0085372355980864454964749ac8231ca44 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 6 Nov 2015 17:58:42 -0500
Subject: [PATCH 089/287] nv50/ir: disallow 64-bit immediates on nv50 targets

No instructions are able to load short immediates like nvc0 can.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index f3ddcaa5199..94cf0f0e05e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -343,7 +343,7 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
    }
 
    if (sf == FILE_IMMEDIATE)
-      return true;
+      return ldSize <= 4;
 
 
    // Check if memory access is encodable:

From 428506ece2c7627392d0f02c7f83021caa46bb4f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 5 Nov 2015 14:32:36 +0100
Subject: [PATCH 090/287] nv50/ir: Add support for merge-s to the
 ConstantFolding pass

This allows later passes like LoadPropagation to properly deal with 64
bit immediates.

If the new 64 bit load this introduces does not get optimized away then
split64BitOpPostRA() will split this into 2 instructions again.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp  | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 44f74c61304..8e241f1ebc4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -447,6 +447,7 @@ ConstantFolding::expr(Instruction *i,
 {
    struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
    struct Storage res;
+   uint8_t fixSrc0Size = 0;
 
    memset(&res.data, 0, sizeof(res.data));
 
@@ -589,6 +590,18 @@ ConstantFolding::expr(Instruction *i,
       // the second argument will not be constant, but that can happen.
       res.data.u32 = a->data.u32 + b->data.u32;
       break;
+   case OP_MERGE:
+      switch (i->dType) {
+      case TYPE_U64:
+      case TYPE_S64:
+      case TYPE_F64:
+         res.data.u64 = (((uint64_t)b->data.u32) << 32) | a->data.u32;
+         fixSrc0Size = 8;
+         break;
+      default:
+         return;
+      }
+      break;
    default:
       return;
    }
@@ -602,6 +615,8 @@ ConstantFolding::expr(Instruction *i,
    i->setSrc(1, NULL);
 
    i->getSrc(0)->reg.data = res.data;
+   if (fixSrc0Size)
+      i->getSrc(0)->reg.size = fixSrc0Size;
 
    switch (i->op) {
    case OP_MAD:

From 9f2f8bda6e060cb85f6e099a4ad65c58cde36ba0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 5 Nov 2015 14:32:37 +0100
Subject: [PATCH 091/287] nvc0/ir: Teach insnCanLoad about double immediates

Teach insnCanLoad about double immediates, together with the
"Add support for merge-s to the ConstantFolding pass"

This turns the following (nvc0) code:
  1: mov u32 $r2 0x00000000 (8)
  2: mov u32 $r3 0x3fe00000 (8)
  3: add f64 $r0d $r0d $r2d (8)

Into:
  1: add f64 $r0d $r0d 0.500000 (8)

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../nouveau/codegen/nv50_ir_target_nvc0.cpp   | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 27df0eba66b..8f59d86a72f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -338,17 +338,30 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
    if (sf == FILE_IMMEDIATE) {
       Storage &reg = ld->getSrc(0)->asImm()->reg;
 
-      if (typeSizeof(i->sType) > 4)
-         return false;
-      if (opInfo[i->op].immdBits != 0xffffffff) {
-         if (i->sType == TYPE_F32) {
+      if (opInfo[i->op].immdBits != 0xffffffff || typeSizeof(i->sType) > 4) {
+         switch (i->sType) {
+         case TYPE_F64:
+            if (reg.data.u64 & 0x00000fffffffffffULL)
+               return false;
+            break;
+         case TYPE_F32:
             if (reg.data.u32 & 0xfff)
                return false;
-         } else
-         if (i->sType == TYPE_S32 || i->sType == TYPE_U32) {
+            break;
+         case TYPE_S32:
+         case TYPE_U32:
             // with u32, 0xfffff counts as 0xffffffff as well
             if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000)
                return false;
+            break;
+         case TYPE_U8:
+         case TYPE_S8:
+         case TYPE_U16:
+         case TYPE_S16:
+         case TYPE_F16:
+            break;
+         default:
+            return false;
          }
       } else
       if (i->op == OP_MAD || i->op == OP_FMA) {

From f979d3cfec2b336801fe59ccd264111f403428f5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 5 Nov 2015 14:32:38 +0100
Subject: [PATCH 092/287] nv50/ir: Add support for 64bit immediates to
 checkSwapSrc01

Now that we support 64 bit immediates in insnCanLoad, we need to swap
64 bit immediate sources too for optimal effect.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp      | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 8e241f1ebc4..b952c760a21 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -155,7 +155,7 @@ private:
    void checkSwapSrc01(Instruction *);
 
    bool isCSpaceLoad(Instruction *);
-   bool isImmd32Load(Instruction *);
+   bool isImmdLoad(Instruction *);
    bool isAttribOrSharedLoad(Instruction *);
 };
 
@@ -166,9 +166,10 @@ LoadPropagation::isCSpaceLoad(Instruction *ld)
 }
 
 bool
-LoadPropagation::isImmd32Load(Instruction *ld)
+LoadPropagation::isImmdLoad(Instruction *ld)
 {
-   if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4))
+   if (!ld || (ld->op != OP_MOV) ||
+       ((typeSizeof(ld->dType) != 4) && (typeSizeof(ld->dType) != 8)))
       return false;
    return ld->src(0).getFile() == FILE_IMMEDIATE;
 }
@@ -201,8 +202,8 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
       else
          return;
    } else
-   if (isImmd32Load(i0)) {
-      if (!isCSpaceLoad(i1) && !isImmd32Load(i1))
+   if (isImmdLoad(i0)) {
+      if (!isCSpaceLoad(i1) && !isImmdLoad(i1))
          insn->swapSources(0, 1);
       else
          return;

From 76957389fc6952e59c1f0f1cbdf74f6949a7a956 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 22 Feb 2015 19:49:49 -0500
Subject: [PATCH 093/287] nv50/ir: add fp64 opcode emission support for G200
 (NVA0)

Need to emulate rcp/rsq before providing full fp64 support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../nouveau/codegen/nv50_ir_emit_nv50.cpp     | 94 +++++++++++++++++--
 1 file changed, 84 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 9f1e4b803d5..ee115b581b8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -96,9 +96,12 @@ private:
    void emitUADD(const Instruction *);
    void emitAADD(const Instruction *);
    void emitFADD(const Instruction *);
+   void emitDADD(const Instruction *);
    void emitIMUL(const Instruction *);
    void emitFMUL(const Instruction *);
+   void emitDMUL(const Instruction *);
    void emitFMAD(const Instruction *);
+   void emitDMAD(const Instruction *);
    void emitIMAD(const Instruction *);
    void emitISAD(const Instruction *);
 
@@ -954,11 +957,13 @@ CodeEmitterNV50::emitMINMAX(const Instruction *i)
          assert(0);
          break;
       }
-      code[1] |= i->src(0).mod.abs() << 20;
-      code[1] |= i->src(0).mod.neg() << 26;
-      code[1] |= i->src(1).mod.abs() << 19;
-      code[1] |= i->src(1).mod.neg() << 27;
    }
+
+   code[1] |= i->src(0).mod.abs() << 20;
+   code[1] |= i->src(0).mod.neg() << 26;
+   code[1] |= i->src(1).mod.abs() << 19;
+   code[1] |= i->src(1).mod.neg() << 27;
+
    emitForm_MAD(i);
 }
 
@@ -993,6 +998,26 @@ CodeEmitterNV50::emitFMAD(const Instruction *i)
    }
 }
 
+void
+CodeEmitterNV50::emitDMAD(const Instruction *i)
+{
+   const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg();
+   const int neg_add = i->src(2).mod.neg();
+
+   assert(i->encSize == 8);
+   assert(!i->saturate);
+
+   code[1] = 0x40000000;
+   code[0] = 0xe0000000;
+
+   code[1] |= neg_mul << 26;
+   code[1] |= neg_add << 27;
+
+   roundMode_MAD(i);
+
+   emitForm_MAD(i);
+}
+
 void
 CodeEmitterNV50::emitFADD(const Instruction *i)
 {
@@ -1027,6 +1052,25 @@ CodeEmitterNV50::emitFADD(const Instruction *i)
    }
 }
 
+void
+CodeEmitterNV50::emitDADD(const Instruction *i)
+{
+   const int neg0 = i->src(0).mod.neg();
+   const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0);
+
+   assert(!(i->src(0).mod | i->src(1).mod).abs());
+   assert(!i->saturate);
+   assert(i->encSize == 8);
+
+   code[1] = 0x60000000;
+   code[0] = 0xe0000000;
+
+   emitForm_ADD(i);
+
+   code[1] |= neg0 << 26;
+   code[1] |= neg1 << 27;
+}
+
 void
 CodeEmitterNV50::emitUADD(const Instruction *i)
 {
@@ -1120,6 +1164,25 @@ CodeEmitterNV50::emitFMUL(const Instruction *i)
    }
 }
 
+void
+CodeEmitterNV50::emitDMUL(const Instruction *i)
+{
+   const int neg = (i->src(0).mod ^ i->src(1).mod).neg();
+
+   assert(!i->saturate);
+   assert(i->encSize == 8);
+
+   code[1] = 0x80000000;
+   code[0] = 0xe0000000;
+
+   if (neg)
+      code[1] |= 0x08000000;
+
+   roundMode_CVT(i->rnd);
+
+   emitForm_MAD(i);
+}
+
 void
 CodeEmitterNV50::emitIMAD(const Instruction *i)
 {
@@ -1181,9 +1244,11 @@ CodeEmitterNV50::emitSET(const Instruction *i)
    code[0] = 0x30000000;
    code[1] = 0x60000000;
 
-   emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
-
    switch (i->sType) {
+   case TYPE_F64:
+      code[0] = 0xe0000000;
+      code[1] = 0xe0000000;
+      break;
    case TYPE_F32: code[0] |= 0x80000000; break;
    case TYPE_S32: code[1] |= 0x0c000000; break;
    case TYPE_U32: code[1] |= 0x04000000; break;
@@ -1193,6 +1258,9 @@ CodeEmitterNV50::emitSET(const Instruction *i)
       assert(0);
       break;
    }
+
+   emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14);
+
    if (i->src(0).mod.neg()) code[1] |= 0x04000000;
    if (i->src(1).mod.neg()) code[1] |= 0x08000000;
    if (i->src(0).mod.abs()) code[1] |= 0x00100000;
@@ -1756,7 +1824,9 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
       break;
    case OP_ADD:
    case OP_SUB:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDADD(insn);
+      else if (isFloatType(insn->dType))
          emitFADD(insn);
       else if (insn->getDef(0)->reg.file == FILE_ADDRESS)
          emitAADD(insn);
@@ -1764,14 +1834,18 @@ CodeEmitterNV50::emitInstruction(Instruction *insn)
          emitUADD(insn);
       break;
    case OP_MUL:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDMUL(insn);
+      else if (isFloatType(insn->dType))
          emitFMUL(insn);
       else
          emitIMUL(insn);
       break;
    case OP_MAD:
    case OP_FMA:
-      if (isFloatType(insn->dType))
+      if (insn->dType == TYPE_F64)
+         emitDMAD(insn);
+      else if (isFloatType(insn->dType))
          emitFMAD(insn);
       else
          emitIMAD(insn);
@@ -1943,7 +2017,7 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
 {
    const Target::OpInfo &info = targ->getOpInfo(i);
 
-   if (info.minEncSize > 4)
+   if (info.minEncSize > 4 || i->dType == TYPE_F64)
       return 8;
 
    // check constraints on dst and src operands

From 2f9aaed7499499679d44e47b7a070df237f77683 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 6 Nov 2015 19:13:35 -0500
Subject: [PATCH 094/287] nv50/ir: add support for const-folding OP_CVT with
 F64 source/dest

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../nouveau/codegen/nv50_ir_build_util.cpp    | 12 +++++++
 .../nouveau/codegen/nv50_ir_build_util.h      |  2 ++
 .../nouveau/codegen/nv50_ir_peephole.cpp      | 31 +++++++++++++++++++
 3 files changed, 45 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index ece6ce40643..dca799dd9b5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -392,12 +392,24 @@ BuildUtil::mkImm(float f)
    return mkImm(u.u32);
 }
 
+ImmediateValue *
+BuildUtil::mkImm(double d)
+{
+   return new_ImmediateValue(prog, d);
+}
+
 Value *
 BuildUtil::loadImm(Value *dst, float f)
 {
    return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f));
 }
 
+Value *
+BuildUtil::loadImm(Value *dst, double d)
+{
+   return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(), mkImm(d));
+}
+
 Value *
 BuildUtil::loadImm(Value *dst, uint32_t u)
 {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
index 0d544581697..8f3bf77949c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
@@ -90,12 +90,14 @@ public:
    void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2);
 
    ImmediateValue *mkImm(float);
+   ImmediateValue *mkImm(double);
    ImmediateValue *mkImm(uint32_t);
    ImmediateValue *mkImm(uint64_t);
 
    ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); }
 
    Value *loadImm(Value *dst, float);
+   Value *loadImm(Value *dst, double);
    Value *loadImm(Value *dst, uint32_t);
    Value *loadImm(Value *dst, uint64_t);
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index b952c760a21..f0955978dc8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1164,6 +1164,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
 #define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \
    case type: \
       switch (i->sType) { \
+      case TYPE_F64: \
+         res.data.dst = util_iround(i->saturate ? \
+                                    CLAMP(imm0.reg.data.f64, fmin, fmax) : \
+                                    imm0.reg.data.f64); \
+         break; \
       case TYPE_F32: \
          res.data.dst = util_iround(i->saturate ? \
                                     CLAMP(imm0.reg.data.f32, fmin, fmax) : \
@@ -1201,6 +1206,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
       CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX);
       case TYPE_F32:
          switch (i->sType) {
+         case TYPE_F64:
+            res.data.f32 = i->saturate ?
+               CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) :
+               imm0.reg.data.f64;
+            break;
          case TYPE_F32:
             res.data.f32 = i->saturate ?
                CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
@@ -1215,6 +1225,27 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
          }
          i->setSrc(0, bld.mkImm(res.data.f32));
          break;
+      case TYPE_F64:
+         switch (i->sType) {
+         case TYPE_F64:
+            res.data.f64 = i->saturate ?
+               CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) :
+               imm0.reg.data.f64;
+            break;
+         case TYPE_F32:
+            res.data.f64 = i->saturate ?
+               CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) :
+               imm0.reg.data.f32;
+            break;
+         case TYPE_U16: res.data.f64 = (double) imm0.reg.data.u16; break;
+         case TYPE_U32: res.data.f64 = (double) imm0.reg.data.u32; break;
+         case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
+         case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
+         default:
+            return;
+         }
+         i->setSrc(0, bld.mkImm(res.data.f64));
+         break;
       default:
          return;
       }

From 393d0c336bc766a123e139ae85383663f81e00d1 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 6 Nov 2015 19:28:29 -0500
Subject: [PATCH 095/287] nv50/ir: properly set the type of the constant
 folding result

This removes the hack used for merge, which only covers a fraction of
the cases.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index f0955978dc8..0f1dcf0dacd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -448,7 +448,7 @@ ConstantFolding::expr(Instruction *i,
 {
    struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
    struct Storage res;
-   uint8_t fixSrc0Size = 0;
+   DataType type = i->dType;
 
    memset(&res.data, 0, sizeof(res.data));
 
@@ -590,6 +590,7 @@ ConstantFolding::expr(Instruction *i,
       // The two arguments to pfetch are logically added together. Normally
       // the second argument will not be constant, but that can happen.
       res.data.u32 = a->data.u32 + b->data.u32;
+      type = TYPE_U32;
       break;
    case OP_MERGE:
       switch (i->dType) {
@@ -597,7 +598,6 @@ ConstantFolding::expr(Instruction *i,
       case TYPE_S64:
       case TYPE_F64:
          res.data.u64 = (((uint64_t)b->data.u32) << 32) | a->data.u32;
-         fixSrc0Size = 8;
          break;
       default:
          return;
@@ -616,8 +616,8 @@ ConstantFolding::expr(Instruction *i,
    i->setSrc(1, NULL);
 
    i->getSrc(0)->reg.data = res.data;
-   if (fixSrc0Size)
-      i->getSrc(0)->reg.size = fixSrc0Size;
+   i->getSrc(0)->reg.type = type;
+   i->getSrc(0)->reg.size = typeSizeof(type);
 
    switch (i->op) {
    case OP_MAD:

From 8e9ade7eb3582fc541700ade1d232a329da890b0 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 7 Nov 2015 00:41:05 -0500
Subject: [PATCH 096/287] nv50/ir: allow emission of immediates in imul/imad
 ops

Nothing actually uses this yet (due to complications), but the emission
logic is right.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp      | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index ee115b581b8..7e0fb532565 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -1125,7 +1125,10 @@ CodeEmitterNV50::emitIMUL(const Instruction *i)
 
    if (i->encSize == 8) {
       code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
-      emitForm_MAD(i);
+      if (i->src(1).getFile() == FILE_IMMEDIATE)
+         emitForm_IMM(i);
+      else
+         emitForm_MAD(i);
    } else {
       if (i->sType == TYPE_S16)
          code[0] |= 0x8100;
@@ -1199,7 +1202,10 @@ CodeEmitterNV50::emitIMAD(const Instruction *i)
    code[1] |= neg1 << 27;
    code[1] |= neg2 << 26;
 
-   emitForm_MAD(i);
+   if (i->src(1).getFile() == FILE_IMMEDIATE)
+      emitForm_IMM(i);
+   else
+      emitForm_MAD(i);
 
    if (i->flagsSrc >= 0) {
       // add with carry from $cX

From c3e527f93d4281ad6e2ca165eaf6ff588e4faefa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 31 Oct 2015 01:03:42 +0100
Subject: [PATCH 097/287] radeonsi: only enable write confirmation on the last
 CP DMA packet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This should improve performance for big copies that need to be split.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index d4bd7b28cf3..c5636444e62 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -48,6 +48,7 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+	uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
 	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
 	uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
 			   S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
@@ -70,7 +71,7 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
 		radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
 		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | raw_wait);		/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
 	}
 }
 
@@ -81,6 +82,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
 	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
+	uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
 	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
 	uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
 
@@ -101,7 +103,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 		radeon_emit(cs, sync_flag | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
 		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | raw_wait);		/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
 	}
 }
 

From 89da3b4458762a76de2774118bbb53953f01c562 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 31 Oct 2015 01:21:01 +0100
Subject: [PATCH 098/287] radeonsi: unify CP DMA code determining various flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: don't call get_flush_flags twice per function

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 51 +++++++++++-------------
 1 file changed, 23 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index c5636444e62..993fb44328c 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -107,6 +107,21 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 	}
 }
 
+static unsigned get_flush_flags(struct si_context *sctx, bool is_framebuffer)
+{
+	if (is_framebuffer)
+		return SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+
+	return SI_CONTEXT_INV_TC_L1 |
+	       (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
+	       SI_CONTEXT_INV_KCACHE;
+}
+
+static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
+{
+	return is_framebuffer || sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+}
+
 /* The max number of bytes to copy per packet. */
 #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
 
@@ -115,7 +130,8 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 			    bool is_framebuffer)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
-	unsigned flush_flags, tc_l2_flag;
+	unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
+	unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
 
 	if (!size)
 		return;
@@ -139,19 +155,8 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 
 	uint64_t va = r600_resource(dst)->gpu_address + offset;
 
-	/* Flush the caches where the resource is bound. */
-	if (is_framebuffer) {
-		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-		tc_l2_flag = 0;
-	} else {
-		flush_flags = SI_CONTEXT_INV_TC_L1 |
-			      (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
-			      SI_CONTEXT_INV_KCACHE;
-		tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
-	}
-
-	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			 flush_flags;
+	/* Flush the caches. */
+	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
 
 	while (size) {
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@@ -195,7 +200,8 @@ void si_copy_buffer(struct si_context *sctx,
 		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
 		    bool is_framebuffer)
 {
-	unsigned flush_flags, tc_l2_flag;
+	unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
+	unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
 
 	if (!size)
 		return;
@@ -209,19 +215,8 @@ void si_copy_buffer(struct si_context *sctx,
 	dst_offset += r600_resource(dst)->gpu_address;
 	src_offset += r600_resource(src)->gpu_address;
 
-	/* Flush the caches where the resource is bound. */
-	if (is_framebuffer) {
-		flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-		tc_l2_flag = 0;
-	} else {
-		flush_flags = SI_CONTEXT_INV_TC_L1 |
-			      (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
-			      SI_CONTEXT_INV_KCACHE;
-		tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
-	}
-
-	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			 flush_flags;
+	/* Flush the caches. */
+	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
 
 	while (size) {
 		unsigned sync_flags = tc_l2_flag;

From fc0416ef5d7775b00f13a5fa83620abb7b1669a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 31 Oct 2015 01:33:42 +0100
Subject: [PATCH 099/287] radeonsi: unify CP DMA preparation logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 71 ++++++++++++------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 993fb44328c..2e39a24071b 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -122,6 +122,36 @@ static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
 	return is_framebuffer || sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
 }
 
+static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst,
+			      struct pipe_resource *src, unsigned byte_count,
+			      unsigned remaining_size, unsigned *flags)
+{
+	si_need_cs_space(sctx);
+
+	/* This must be done after need_cs_space. */
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+				  (struct r600_resource*)dst,
+				  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
+	if (src)
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+					  (struct r600_resource*)src,
+					  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
+
+	/* Flush the caches for the first copy only.
+	 * Also wait for the previous CP DMA operations.
+	 */
+	if (sctx->b.flags) {
+		si_emit_cache_flush(sctx, NULL);
+		*flags |= SI_CP_DMA_RAW_WAIT;
+	}
+
+	/* Do the synchronization after the last dma, so that all data
+	 * is written to memory.
+	 */
+	if (byte_count == remaining_size)
+		*flags |= R600_CP_DMA_SYNC;
+}
+
 /* The max number of bytes to copy per packet. */
 #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
 
@@ -162,23 +192,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 		unsigned dma_flags = tc_l2_flag;
 
-		si_need_cs_space(sctx);
-
-		/* This must be done after need_cs_space. */
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
-				      (struct r600_resource*)dst, RADEON_USAGE_WRITE,
-				      RADEON_PRIO_CP_DMA);
-
-		/* Flush the caches for the first copy only.
-		 * Also wait for the previous CP DMA operations. */
-		if (sctx->b.flags) {
-			si_emit_cache_flush(sctx, NULL);
-			dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
-		}
-
-		/* Do the synchronization after the last copy, so that all data is written to memory. */
-		if (size == byte_count)
-			dma_flags |= R600_CP_DMA_SYNC;
+		si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, &dma_flags);
 
 		/* Emit the clear packet. */
 		si_emit_cp_dma_clear_buffer(sctx, va, byte_count, value, dma_flags);
@@ -219,29 +233,12 @@ void si_copy_buffer(struct si_context *sctx,
 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
 
 	while (size) {
-		unsigned sync_flags = tc_l2_flag;
+		unsigned dma_flags = tc_l2_flag;
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 
-		si_need_cs_space(sctx);
+		si_cp_dma_prepare(sctx, dst, src, byte_count, size, &dma_flags);
 
-		/* Flush the caches for the first copy only. Also wait for old CP DMA packets to complete. */
-		if (sctx->b.flags) {
-			si_emit_cache_flush(sctx, NULL);
-			sync_flags |= SI_CP_DMA_RAW_WAIT;
-		}
-
-		/* Do the synchronization after the last copy, so that all data is written to memory. */
-		if (size == byte_count) {
-			sync_flags |= R600_CP_DMA_SYNC;
-		}
-
-		/* This must be done after r600_need_cs_space. */
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)src,
-				      RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, (struct r600_resource*)dst,
-				      RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
-
-		si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, sync_flags);
+		si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, dma_flags);
 
 		size -= byte_count;
 		src_offset += byte_count;

From 2658777f468e8c0d71669a043ff7401672717622 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 1 Nov 2015 13:43:26 +0100
Subject: [PATCH 100/287] radeonsi: add workarounds for CP DMA to stay on the
 fast path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: set emit_scratch_reloc, add a NULL check

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 93 ++++++++++++++++++++++--
 1 file changed, 88 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 2e39a24071b..418b2cf65c5 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -152,8 +152,10 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 		*flags |= R600_CP_DMA_SYNC;
 }
 
+/* Alignment for optimal performance. */
+#define CP_DMA_ALIGNMENT	32
 /* The max number of bytes to copy per packet. */
-#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+#define CP_DMA_MAX_BYTE_COUNT	((1 << 21) - CP_DMA_ALIGNMENT)
 
 static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 			    unsigned offset, unsigned size, unsigned value,
@@ -209,11 +211,51 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 		r600_resource(dst)->TC_L2_dirty = true;
 }
 
+/**
+ * Realign the CP DMA engine. This must be done after a copy with an unaligned
+ * size.
+ *
+ * \param size  Remaining size to the CP DMA alignment.
+ */
+static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size)
+{
+	uint64_t va;
+	unsigned dma_flags = 0;
+	unsigned scratch_size = CP_DMA_ALIGNMENT * 2;
+
+	assert(size < CP_DMA_ALIGNMENT);
+
+	/* Use the scratch buffer as the dummy buffer. The 3D engine should be
+	 * idle at this point.
+	 */
+	if (!sctx->scratch_buffer ||
+	    sctx->scratch_buffer->b.b.width0 < scratch_size) {
+		r600_resource_reference(&sctx->scratch_buffer, NULL);
+		sctx->scratch_buffer =
+			si_resource_create_custom(&sctx->screen->b.b,
+						  PIPE_USAGE_DEFAULT,
+						  scratch_size);
+		if (!sctx->scratch_buffer)
+			return;
+		sctx->emit_scratch_reloc = true;
+	}
+
+	si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
+			  &sctx->scratch_buffer->b.b, size, size, &dma_flags);
+
+	va = sctx->scratch_buffer->gpu_address;
+	si_emit_cp_dma_copy_buffer(sctx, va, va + CP_DMA_ALIGNMENT, size,
+				   dma_flags);
+}
+
 void si_copy_buffer(struct si_context *sctx,
 		    struct pipe_resource *dst, struct pipe_resource *src,
 		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
 		    bool is_framebuffer)
 {
+	uint64_t main_dst_offset, main_src_offset;
+	unsigned skipped_size = 0;
+	unsigned realign_size = 0;
 	unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
 	unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
 
@@ -229,22 +271,63 @@ void si_copy_buffer(struct si_context *sctx,
 	dst_offset += r600_resource(dst)->gpu_address;
 	src_offset += r600_resource(src)->gpu_address;
 
+	/* If the size is not aligned, we must add a dummy copy at the end
+	 * just to align the internal counter. Otherwise, the DMA engine
+	 * would slow down by an order of magnitude for following copies.
+	 */
+	if (size % CP_DMA_ALIGNMENT)
+		realign_size = CP_DMA_ALIGNMENT - (size % CP_DMA_ALIGNMENT);
+
+	/* If the copy begins unaligned, we must start copying from the next
+	 * aligned block and the skipped part should be copied after everything
+	 * else has been copied. Only the src alignment matters, not dst.
+	 */
+	if (src_offset % CP_DMA_ALIGNMENT) {
+		skipped_size = CP_DMA_ALIGNMENT - (src_offset % CP_DMA_ALIGNMENT);
+		/* The main part will be skipped if the size is too small. */
+		skipped_size = MIN2(skipped_size, size);
+		size -= skipped_size;
+	}
+
 	/* Flush the caches. */
 	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags;
 
+	/* This is the main part doing the copying. Src is always aligned. */
+	main_dst_offset = dst_offset + skipped_size;
+	main_src_offset = src_offset + skipped_size;
+
 	while (size) {
 		unsigned dma_flags = tc_l2_flag;
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 
-		si_cp_dma_prepare(sctx, dst, src, byte_count, size, &dma_flags);
+		si_cp_dma_prepare(sctx, dst, src, byte_count,
+				  size + skipped_size + realign_size,
+				  &dma_flags);
 
-		si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset, byte_count, dma_flags);
+		si_emit_cp_dma_copy_buffer(sctx, main_dst_offset, main_src_offset,
+					   byte_count, dma_flags);
 
 		size -= byte_count;
-		src_offset += byte_count;
-		dst_offset += byte_count;
+		main_src_offset += byte_count;
+		main_dst_offset += byte_count;
 	}
 
+	/* Copy the part we skipped because src wasn't aligned. */
+	if (skipped_size) {
+		unsigned dma_flags = tc_l2_flag;
+
+		si_cp_dma_prepare(sctx, dst, src, skipped_size,
+				  skipped_size + realign_size,
+				  &dma_flags);
+
+		si_emit_cp_dma_copy_buffer(sctx, dst_offset, src_offset,
+					   skipped_size, dma_flags);
+	}
+
+	/* Finally, realign the engine if the size wasn't aligned. */
+	if (realign_size)
+		si_cp_dma_realign_engine(sctx, realign_size);
+
 	/* Flush the caches again in case the 3D engine has been prefetching
 	 * the resource. */
 	sctx->b.flags |= flush_flags;

From d57ede92b7832f01df2aa5755c8c34b4de4866d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Tue, 3 Nov 2015 12:20:18 +0100
Subject: [PATCH 101/287] radeonsi: add register definitions for Stoney

There are a few non-stoney changes too.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 src/gallium/drivers/radeonsi/sid.h | 322 +++++++++++++++++++++++++++++
 1 file changed, 322 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 4bb24572b90..0c48340beef 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -3608,6 +3608,9 @@
 #define   S_00B854_WAVES_PER_SH(x)                                    (((x) & 0x3F) << 0) /* mask is 0x3FF on CIK */
 #define   G_00B854_WAVES_PER_SH(x)                                    (((x) >> 0) & 0x3F) /* mask is 0x3FF on CIK */
 #define   C_00B854_WAVES_PER_SH                                       0xFFFFFFC0 /* mask is 0x3FF on CIK */
+#define   S_00B854_WAVES_PER_SH_CIK(x)                                (((x) & 0x3FF) << 0)
+#define   G_00B854_WAVES_PER_SH_CIK(x)                                (((x) >> 0) & 0x3FF)
+#define   C_00B854_WAVES_PER_SH_CIK                                   0xFFFFFC00
 #define   S_00B854_TG_PER_CU(x)                                       (((x) & 0x0F) << 12)
 #define   G_00B854_TG_PER_CU(x)                                       (((x) >> 12) & 0x0F)
 #define   C_00B854_TG_PER_CU                                          0xFFFF0FFF
@@ -5211,6 +5214,296 @@
 #define     V_028714_SPI_SHADER_UINT16_ABGR                         0x07
 #define     V_028714_SPI_SHADER_SINT16_ABGR                         0x08
 #define     V_028714_SPI_SHADER_32_ABGR                             0x09
+/* Stoney */
+#define R_028754_SX_PS_DOWNCONVERT                                      0x028754
+#define   S_028754_MRT0(x)                                            (((x) & 0x0F) << 0)
+#define   G_028754_MRT0(x)                                            (((x) >> 0) & 0x0F)
+#define   C_028754_MRT0                                               0xFFFFFFF0
+#define     V_028754_SX_RT_EXPORT_NO_CONVERSION				0
+#define     V_028754_SX_RT_EXPORT_32_R					1
+#define     V_028754_SX_RT_EXPORT_32_A					2
+#define     V_028754_SX_RT_EXPORT_10_11_11				3
+#define     V_028754_SX_RT_EXPORT_2_10_10_10				4
+#define     V_028754_SX_RT_EXPORT_8_8_8_8				5
+#define     V_028754_SX_RT_EXPORT_5_6_5					6
+#define     V_028754_SX_RT_EXPORT_1_5_5_5				7
+#define     V_028754_SX_RT_EXPORT_4_4_4_4				8
+#define     V_028754_SX_RT_EXPORT_16_16_GR				9
+#define     V_028754_SX_RT_EXPORT_16_16_AR				10
+#define   S_028754_MRT1(x)                                            (((x) & 0x0F) << 4)
+#define   G_028754_MRT1(x)                                            (((x) >> 4) & 0x0F)
+#define   C_028754_MRT1                                               0xFFFFFF0F
+#define   S_028754_MRT2(x)                                            (((x) & 0x0F) << 8)
+#define   G_028754_MRT2(x)                                            (((x) >> 8) & 0x0F)
+#define   C_028754_MRT2                                               0xFFFFF0FF
+#define   S_028754_MRT3(x)                                            (((x) & 0x0F) << 12)
+#define   G_028754_MRT3(x)                                            (((x) >> 12) & 0x0F)
+#define   C_028754_MRT3                                               0xFFFF0FFF
+#define   S_028754_MRT4(x)                                            (((x) & 0x0F) << 16)
+#define   G_028754_MRT4(x)                                            (((x) >> 16) & 0x0F)
+#define   C_028754_MRT4                                               0xFFF0FFFF
+#define   S_028754_MRT5(x)                                            (((x) & 0x0F) << 20)
+#define   G_028754_MRT5(x)                                            (((x) >> 20) & 0x0F)
+#define   C_028754_MRT5                                               0xFF0FFFFF
+#define   S_028754_MRT6(x)                                            (((x) & 0x0F) << 24)
+#define   G_028754_MRT6(x)                                            (((x) >> 24) & 0x0F)
+#define   C_028754_MRT6                                               0xF0FFFFFF
+#define   S_028754_MRT7(x)                                            (((x) & 0x0F) << 28)
+#define   G_028754_MRT7(x)                                            (((x) >> 28) & 0x0F)
+#define   C_028754_MRT7                                               0x0FFFFFFF
+#define R_028758_SX_BLEND_OPT_EPSILON                                   0x028758
+#define   S_028758_MRT0_EPSILON(x)                                    (((x) & 0x0F) << 0)
+#define   G_028758_MRT0_EPSILON(x)                                    (((x) >> 0) & 0x0F)
+#define   C_028758_MRT0_EPSILON                                       0xFFFFFFF0
+#define      V_028758_EXACT						0
+#define      V_028758_11BIT_FORMAT					1
+#define      V_028758_10BIT_FORMAT					3
+#define      V_028758_8BIT_FORMAT					7
+#define      V_028758_6BIT_FORMAT					11
+#define      V_028758_5BIT_FORMAT					13
+#define      V_028758_4BIT_FORMAT					15
+#define   S_028758_MRT1_EPSILON(x)                                    (((x) & 0x0F) << 4)
+#define   G_028758_MRT1_EPSILON(x)                                    (((x) >> 4) & 0x0F)
+#define   C_028758_MRT1_EPSILON                                       0xFFFFFF0F
+#define   S_028758_MRT2_EPSILON(x)                                    (((x) & 0x0F) << 8)
+#define   G_028758_MRT2_EPSILON(x)                                    (((x) >> 8) & 0x0F)
+#define   C_028758_MRT2_EPSILON                                       0xFFFFF0FF
+#define   S_028758_MRT3_EPSILON(x)                                    (((x) & 0x0F) << 12)
+#define   G_028758_MRT3_EPSILON(x)                                    (((x) >> 12) & 0x0F)
+#define   C_028758_MRT3_EPSILON                                       0xFFFF0FFF
+#define   S_028758_MRT4_EPSILON(x)                                    (((x) & 0x0F) << 16)
+#define   G_028758_MRT4_EPSILON(x)                                    (((x) >> 16) & 0x0F)
+#define   C_028758_MRT4_EPSILON                                       0xFFF0FFFF
+#define   S_028758_MRT5_EPSILON(x)                                    (((x) & 0x0F) << 20)
+#define   G_028758_MRT5_EPSILON(x)                                    (((x) >> 20) & 0x0F)
+#define   C_028758_MRT5_EPSILON                                       0xFF0FFFFF
+#define   S_028758_MRT6_EPSILON(x)                                    (((x) & 0x0F) << 24)
+#define   G_028758_MRT6_EPSILON(x)                                    (((x) >> 24) & 0x0F)
+#define   C_028758_MRT6_EPSILON                                       0xF0FFFFFF
+#define   S_028758_MRT7_EPSILON(x)                                    (((x) & 0x0F) << 28)
+#define   G_028758_MRT7_EPSILON(x)                                    (((x) >> 28) & 0x0F)
+#define   C_028758_MRT7_EPSILON                                       0x0FFFFFFF
+#define R_02875C_SX_BLEND_OPT_CONTROL                                   0x02875C
+#define   S_02875C_MRT0_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 0)
+#define   G_02875C_MRT0_COLOR_OPT_DISABLE(x)                          (((x) >> 0) & 0x1)
+#define   C_02875C_MRT0_COLOR_OPT_DISABLE                             0xFFFFFFFE
+#define   S_02875C_MRT0_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 1)
+#define   G_02875C_MRT0_ALPHA_OPT_DISABLE(x)                          (((x) >> 1) & 0x1)
+#define   C_02875C_MRT0_ALPHA_OPT_DISABLE                             0xFFFFFFFD
+#define   S_02875C_MRT1_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 4)
+#define   G_02875C_MRT1_COLOR_OPT_DISABLE(x)                          (((x) >> 4) & 0x1)
+#define   C_02875C_MRT1_COLOR_OPT_DISABLE                             0xFFFFFFEF
+#define   S_02875C_MRT1_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 5)
+#define   G_02875C_MRT1_ALPHA_OPT_DISABLE(x)                          (((x) >> 5) & 0x1)
+#define   C_02875C_MRT1_ALPHA_OPT_DISABLE                             0xFFFFFFDF
+#define   S_02875C_MRT2_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 8)
+#define   G_02875C_MRT2_COLOR_OPT_DISABLE(x)                          (((x) >> 8) & 0x1)
+#define   C_02875C_MRT2_COLOR_OPT_DISABLE                             0xFFFFFEFF
+#define   S_02875C_MRT2_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 9)
+#define   G_02875C_MRT2_ALPHA_OPT_DISABLE(x)                          (((x) >> 9) & 0x1)
+#define   C_02875C_MRT2_ALPHA_OPT_DISABLE                             0xFFFFFDFF
+#define   S_02875C_MRT3_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 12)
+#define   G_02875C_MRT3_COLOR_OPT_DISABLE(x)                          (((x) >> 12) & 0x1)
+#define   C_02875C_MRT3_COLOR_OPT_DISABLE                             0xFFFFEFFF
+#define   S_02875C_MRT3_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 13)
+#define   G_02875C_MRT3_ALPHA_OPT_DISABLE(x)                          (((x) >> 13) & 0x1)
+#define   C_02875C_MRT3_ALPHA_OPT_DISABLE                             0xFFFFDFFF
+#define   S_02875C_MRT4_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 16)
+#define   G_02875C_MRT4_COLOR_OPT_DISABLE(x)                          (((x) >> 16) & 0x1)
+#define   C_02875C_MRT4_COLOR_OPT_DISABLE                             0xFFFEFFFF
+#define   S_02875C_MRT4_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 17)
+#define   G_02875C_MRT4_ALPHA_OPT_DISABLE(x)                          (((x) >> 17) & 0x1)
+#define   C_02875C_MRT4_ALPHA_OPT_DISABLE                             0xFFFDFFFF
+#define   S_02875C_MRT5_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 20)
+#define   G_02875C_MRT5_COLOR_OPT_DISABLE(x)                          (((x) >> 20) & 0x1)
+#define   C_02875C_MRT5_COLOR_OPT_DISABLE                             0xFFEFFFFF
+#define   S_02875C_MRT5_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 21)
+#define   G_02875C_MRT5_ALPHA_OPT_DISABLE(x)                          (((x) >> 21) & 0x1)
+#define   C_02875C_MRT5_ALPHA_OPT_DISABLE                             0xFFDFFFFF
+#define   S_02875C_MRT6_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 24)
+#define   G_02875C_MRT6_COLOR_OPT_DISABLE(x)                          (((x) >> 24) & 0x1)
+#define   C_02875C_MRT6_COLOR_OPT_DISABLE                             0xFEFFFFFF
+#define   S_02875C_MRT6_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 25)
+#define   G_02875C_MRT6_ALPHA_OPT_DISABLE(x)                          (((x) >> 25) & 0x1)
+#define   C_02875C_MRT6_ALPHA_OPT_DISABLE                             0xFDFFFFFF
+#define   S_02875C_MRT7_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 28)
+#define   G_02875C_MRT7_COLOR_OPT_DISABLE(x)                          (((x) >> 28) & 0x1)
+#define   C_02875C_MRT7_COLOR_OPT_DISABLE                             0xEFFFFFFF
+#define   S_02875C_MRT7_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 29)
+#define   G_02875C_MRT7_ALPHA_OPT_DISABLE(x)                          (((x) >> 29) & 0x1)
+#define   C_02875C_MRT7_ALPHA_OPT_DISABLE                             0xDFFFFFFF
+#define   S_02875C_PIXEN_ZERO_OPT_DISABLE(x)                          (((x) & 0x1) << 31)
+#define   G_02875C_PIXEN_ZERO_OPT_DISABLE(x)                          (((x) >> 31) & 0x1)
+#define   C_02875C_PIXEN_ZERO_OPT_DISABLE                             0x7FFFFFFF
+#define R_028760_SX_MRT0_BLEND_OPT                                      0x028760
+#define   S_028760_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028760_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028760_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define     V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL			0
+#define     V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE			1
+#define     V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0			2
+#define     V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1			3
+#define     V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0			4
+#define     V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1			5
+#define     V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0			6
+#define     V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE		7
+#define   S_028760_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028760_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028760_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028760_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028760_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028760_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define     V_028760_OPT_COMB_NONE					0
+#define     V_028760_OPT_COMB_ADD					1
+#define     V_028760_OPT_COMB_SUBTRACT					2
+#define     V_028760_OPT_COMB_MIN					3
+#define     V_028760_OPT_COMB_MAX					4
+#define     V_028760_OPT_COMB_REVSUBTRACT				5
+#define     V_028760_OPT_COMB_BLEND_DISABLED				6
+#define     V_028760_OPT_COMB_SAFE_ADD					7
+#define   S_028760_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028760_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028760_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028760_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028760_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028760_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028760_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028760_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028760_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028764_SX_MRT1_BLEND_OPT                                      0x028764
+#define   S_028764_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028764_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028764_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028764_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028764_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028764_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028764_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028764_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028764_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028764_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028764_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028764_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028764_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028764_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028764_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028764_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028764_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028764_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028768_SX_MRT2_BLEND_OPT                                      0x028768
+#define   S_028768_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028768_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028768_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028768_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028768_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028768_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028768_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028768_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028768_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028768_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028768_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028768_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028768_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028768_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028768_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028768_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028768_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028768_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_02876C_SX_MRT3_BLEND_OPT                                      0x02876C
+#define   S_02876C_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_02876C_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_02876C_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_02876C_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_02876C_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_02876C_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_02876C_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_02876C_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_02876C_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_02876C_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_02876C_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_02876C_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_02876C_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_02876C_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_02876C_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_02876C_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_02876C_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_02876C_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028770_SX_MRT4_BLEND_OPT                                      0x028770
+#define   S_028770_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028770_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028770_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028770_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028770_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028770_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028770_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028770_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028770_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028770_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028770_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028770_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028770_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028770_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028770_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028770_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028770_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028770_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028774_SX_MRT5_BLEND_OPT                                      0x028774
+#define   S_028774_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028774_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028774_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028774_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028774_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028774_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028774_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028774_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028774_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028774_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028774_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028774_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028774_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028774_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028774_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028774_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028774_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028774_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028778_SX_MRT6_BLEND_OPT                                      0x028778
+#define   S_028778_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028778_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028778_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028778_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028778_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028778_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028778_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028778_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028778_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028778_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028778_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028778_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028778_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028778_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028778_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028778_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028778_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028778_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_02877C_SX_MRT7_BLEND_OPT                                      0x02877C
+#define   S_02877C_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_02877C_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_02877C_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_02877C_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_02877C_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_02877C_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_02877C_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_02877C_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_02877C_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_02877C_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_02877C_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_02877C_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_02877C_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_02877C_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_02877C_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_02877C_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_02877C_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_02877C_ALPHA_COMB_FCN                                     0xF8FFFFFF
+/*        */
 #define R_028780_CB_BLEND0_CONTROL                                      0x028780
 #define   S_028780_COLOR_SRCBLEND(x)                                  (((x) & 0x1F) << 0)
 #define   G_028780_COLOR_SRCBLEND(x)                                  (((x) >> 0) & 0x1F)
@@ -5473,6 +5766,7 @@
 #define     V_028808_CB_ELIMINATE_FAST_CLEAR                        0x02
 #define     V_028808_CB_RESOLVE                                     0x03
 #define     V_028808_CB_FMASK_DECOMPRESS                            0x05
+#define     V_028808_CB_DCC_DECOMPRESS                              0x06
 #define   S_028808_ROP3(x)                                            (((x) & 0xFF) << 16)
 #define   G_028808_ROP3(x)                                            (((x) >> 16) & 0xFF)
 #define   C_028808_ROP3                                               0xFF00FFFF
@@ -5551,6 +5845,11 @@
 #define     V_02880C_EXPORT_GREATER_THAN_Z                          2
 #define     V_02880C_EXPORT_RESERVED                                3
 /*     */
+/* Stoney */
+#define   S_02880C_DUAL_QUAD_DISABLE(x)                               (((x) & 0x1) << 15)
+#define   G_02880C_DUAL_QUAD_DISABLE(x)                               (((x) >> 15) & 0x1)
+#define   C_02880C_DUAL_QUAD_DISABLE                                  0xFFFF7FFF
+/*        */
 #define R_028810_PA_CL_CLIP_CNTL                                        0x028810
 #define   S_028810_UCP_ENA_0(x)                                       (((x) & 0x1) << 0)
 #define   G_028810_UCP_ENA_0(x)                                       (((x) >> 0) & 0x1)
@@ -6132,6 +6431,9 @@
 #define     V_028A40_GS_SCENARIO_G                                  0x03
 #define     V_028A40_GS_SCENARIO_C                                  0x04
 #define     V_028A40_SPRITE_EN                                      0x05
+#define   S_028A40_RESERVED_0(x)                                      (((x) & 0x1) << 3)
+#define   G_028A40_RESERVED_0(x)                                      (((x) >> 3) & 0x1)
+#define   C_028A40_RESERVED_0                                         0xFFFFFFF7
 #define   S_028A40_CUT_MODE(x)                                        (((x) & 0x03) << 4)
 #define   G_028A40_CUT_MODE(x)                                        (((x) >> 4) & 0x03)
 #define   C_028A40_CUT_MODE                                           0xFFFFFFCF
@@ -6139,12 +6441,19 @@
 #define     V_028A40_GS_CUT_512                                     0x01
 #define     V_028A40_GS_CUT_256                                     0x02
 #define     V_028A40_GS_CUT_128                                     0x03
+#define   S_028A40_RESERVED_1(x)                                      (((x) & 0x1F) << 6)
+#define   G_028A40_RESERVED_1(x)                                      (((x) >> 6) & 0x1F)
+#define   C_028A40_RESERVED_1                                         0xFFFFF83F
 #define   S_028A40_GS_C_PACK_EN(x)                                    (((x) & 0x1) << 11)
 #define   G_028A40_GS_C_PACK_EN(x)                                    (((x) >> 11) & 0x1)
 #define   C_028A40_GS_C_PACK_EN                                       0xFFFFF7FF
+#define   S_028A40_RESERVED_2(x)                                      (((x) & 0x1) << 12)
+#define   G_028A40_RESERVED_2(x)                                      (((x) >> 12) & 0x1)
+#define   C_028A40_RESERVED_2                                         0xFFFFEFFF
 #define   S_028A40_ES_PASSTHRU(x)                                     (((x) & 0x1) << 13)
 #define   G_028A40_ES_PASSTHRU(x)                                     (((x) >> 13) & 0x1)
 #define   C_028A40_ES_PASSTHRU                                        0xFFFFDFFF
+/* SI-CIK */
 #define   S_028A40_COMPUTE_MODE(x)                                    (((x) & 0x1) << 14)
 #define   G_028A40_COMPUTE_MODE(x)                                    (((x) >> 14) & 0x1)
 #define   C_028A40_COMPUTE_MODE                                       0xFFFFBFFF
@@ -6154,6 +6463,7 @@
 #define   S_028A40_ELEMENT_INFO_EN(x)                                 (((x) & 0x1) << 16)
 #define   G_028A40_ELEMENT_INFO_EN(x)                                 (((x) >> 16) & 0x1)
 #define   C_028A40_ELEMENT_INFO_EN                                    0xFFFEFFFF
+/*        */
 #define   S_028A40_PARTIAL_THD_AT_EOI(x)                              (((x) & 0x1) << 17)
 #define   G_028A40_PARTIAL_THD_AT_EOI(x)                              (((x) >> 17) & 0x1)
 #define   C_028A40_PARTIAL_THD_AT_EOI                                 0xFFFDFFFF
@@ -6339,6 +6649,9 @@
 #define   C_028A7C_RDREQ_POLICY                                       0xFFFFFF3F
 #define     V_028A7C_VGT_POLICY_LRU                                 0x00
 #define     V_028A7C_VGT_POLICY_STREAM                              0x01
+#define   S_028A7C_RDREQ_POLICY_VI(x)                                 (((x) & 0x1) << 6)
+#define   G_028A7C_RDREQ_POLICY_VI(x)                                 (((x) >> 6) & 0x1)
+#define   C_028A7C_RDREQ_POLICY_VI                                    0xFFFFFFBF
 #define   S_028A7C_ATC(x)                                             (((x) & 0x1) << 8)
 #define   G_028A7C_ATC(x)                                             (((x) >> 8) & 0x1)
 #define   C_028A7C_ATC                                                0xFFFFFEFF
@@ -6715,6 +7028,9 @@
 #define     V_028B6C_VGT_POLICY_BYPASS                              0x02
 /*     */
 /* VI */
+#define   S_028B6C_RDREQ_POLICY_VI(x)                                 (((x) & 0x1) << 15)
+#define   G_028B6C_RDREQ_POLICY_VI(x)                                 (((x) >> 15) & 0x1)
+#define   C_028B6C_RDREQ_POLICY_VI                                    0xFFFF7FFF
 #define   S_028B6C_DISTRIBUTION_MODE(x)                               (((x) & 0x03) << 17)
 #define   G_028B6C_DISTRIBUTION_MODE(x)                               (((x) >> 17) & 0x03)
 #define   C_028B6C_DISTRIBUTION_MODE                                  0xFFF9FFFF
@@ -7317,6 +7633,12 @@
 #define   S_028C3C_AA_MASK_X1Y1(x)                                    (((x) & 0xFFFF) << 16)
 #define   G_028C3C_AA_MASK_X1Y1(x)                                    (((x) >> 16) & 0xFFFF)
 #define   C_028C3C_AA_MASK_X1Y1                                       0x0000FFFF
+/* Stoney */
+#define R_028C40_PA_SC_SHADER_CONTROL                                   0x028C40
+#define   S_028C40_REALIGN_DQUADS_AFTER_N_WAVES(x)                    (((x) & 0x03) << 0)
+#define   G_028C40_REALIGN_DQUADS_AFTER_N_WAVES(x)                    (((x) >> 0) & 0x03)
+#define   C_028C40_REALIGN_DQUADS_AFTER_N_WAVES                       0xFFFFFFFC
+/*        */
 #define R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL                            0x028C58
 #define   S_028C58_VTX_REUSE_DEPTH(x)                                 (((x) & 0xFF) << 0)
 #define   G_028C58_VTX_REUSE_DEPTH(x)                                 (((x) >> 0) & 0xFF)

From c839174d55216cf1da5cdc4bf0f735ab8359d221 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Thu, 22 Oct 2015 16:53:27 -0700
Subject: [PATCH 102/287] nir/validate: Add better validation of load/store
 types

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_validate.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index c6fedf9b1ad..a42e830fd72 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -398,15 +398,27 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
    }
 
    switch (instr->intrinsic) {
-   case nir_intrinsic_load_var:
+   case nir_intrinsic_load_var: {
+      const struct glsl_type *type =
+         nir_deref_tail(&instr->variables[0]->deref)->type;
+      assert(glsl_type_is_vector_or_scalar(type));
+      assert(instr->num_components == glsl_get_vector_elements(type));
       assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
       break;
-   case nir_intrinsic_store_var:
+   }
+   case nir_intrinsic_store_var: {
+      const struct glsl_type *type =
+         nir_deref_tail(&instr->variables[0]->deref)->type;
+      assert(glsl_type_is_vector_or_scalar(type));
+      assert(instr->num_components == glsl_get_vector_elements(type));
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&
              instr->variables[0]->var->data.mode != nir_var_shader_storage);
       break;
+   }
    case nir_intrinsic_copy_var:
+      assert(nir_deref_tail(&instr->variables[0]->deref)->type ==
+             nir_deref_tail(&instr->variables[1]->deref)->type);
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&
              instr->variables[0]->var->data.mode != nir_var_shader_storage);

From d43e16b1638cdadc7fcff2007b106e2a559dae7d Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 5 Nov 2015 16:37:47 -0800
Subject: [PATCH 103/287] i965/fs: Use regs_read/written for post-RA scheduling
 in calculate_deps

Previously, we were assuming that everything read/wrote exactly 1 logical
GRF (1 in SIMD8 and 2 in SIMD16).  This isn't actually true.  In
particular, the PLN instruction reads 2 logical registers in one of the
components.  This commit changes post-RA scheduling to use regs_read and
regs_written instead so that we add enough dependencies.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92770
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 .../dri/i965/brw_schedule_instructions.cpp        | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 88c45f74333..d21bc677c82 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -927,7 +927,6 @@ fs_instruction_scheduler::calculate_deps()
     * granular level.
     */
    schedule_node *last_fixed_grf_write = NULL;
-   int reg_width = v->dispatch_width / 8;
 
    /* The last instruction always needs to still be the last
     * instruction.  Either it's flow control (IF, ELSE, ENDIF, DO,
@@ -964,10 +963,7 @@ fs_instruction_scheduler::calculate_deps()
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
-               int size = reg_width;
-               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
-                  size = 1;
-               for (int r = 0; r < size; r++)
+               for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
             } else {
                add_dep(last_fixed_grf_write, n);
@@ -1031,7 +1027,7 @@ fs_instruction_scheduler::calculate_deps()
       } else if (inst->dst.file == HW_REG &&
                  inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
          if (post_reg_alloc) {
-            for (int r = 0; r < reg_width; r++)
+            for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
          } else {
             last_fixed_grf_write = n;
@@ -1093,10 +1089,7 @@ fs_instruction_scheduler::calculate_deps()
                     (inst->src[i].fixed_hw_reg.file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
-               int size = reg_width;
-               if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0)
-                  size = 1;
-               for (int r = 0; r < size; r++)
+               for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0);
             } else {
                add_dep(n, last_fixed_grf_write, 0);
@@ -1159,7 +1152,7 @@ fs_instruction_scheduler::calculate_deps()
       } else if (inst->dst.file == HW_REG &&
                  inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
          if (post_reg_alloc) {
-            for (int r = 0; r < reg_width; r++)
+            for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
          } else {
             last_fixed_grf_write = n;

From 7d90e570f311066d1fd1eaafe681a8c939c86bae Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Fri, 1 May 2015 11:26:40 -0700
Subject: [PATCH 104/287] nir/types: Add an is_vector_or_scalar helper

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_types.cpp | 6 ++++++
 src/glsl/nir/nir_types.h   | 1 +
 2 files changed, 7 insertions(+)

diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp
index 965f42320be..135591ab97d 100644
--- a/src/glsl/nir/nir_types.cpp
+++ b/src/glsl/nir/nir_types.cpp
@@ -143,6 +143,12 @@ glsl_type_is_scalar(const struct glsl_type *type)
    return type->is_scalar();
 }
 
+bool
+glsl_type_is_vector_or_scalar(const struct glsl_type *type)
+{
+   return type->is_vector() || type->is_scalar();
+}
+
 bool
 glsl_type_is_matrix(const struct glsl_type *type)
 {
diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h
index 60d561b25ee..b0b51842a43 100644
--- a/src/glsl/nir/nir_types.h
+++ b/src/glsl/nir/nir_types.h
@@ -70,6 +70,7 @@ unsigned glsl_get_record_location_offset(const struct glsl_type *type,
 bool glsl_type_is_void(const struct glsl_type *type);
 bool glsl_type_is_vector(const struct glsl_type *type);
 bool glsl_type_is_scalar(const struct glsl_type *type);
+bool glsl_type_is_vector_or_scalar(const struct glsl_type *type);
 bool glsl_type_is_matrix(const struct glsl_type *type);
 
 const struct glsl_type *glsl_void_type(void);

From 6c731d85666abb61c49e5b4affa196545f5ac086 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Sat, 7 Nov 2015 12:01:50 -0800
Subject: [PATCH 105/287] nir: Add a nir_deref_tail helper

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir.h                  |  9 +++++++++
 src/glsl/nir/nir_lower_var_copies.c | 15 ++-------------
 src/glsl/nir/nir_split_var_copies.c | 12 ++----------
 3 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index ef39df5dc51..2559ef2a456 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -785,6 +785,15 @@ NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref)
 NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref)
 NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref)
 
+/* Returns the last deref in the chain. */
+static inline nir_deref *
+nir_deref_tail(nir_deref *deref)
+{
+   while (deref->child)
+      deref = deref->child;
+   return deref;
+}
+
 typedef struct {
    nir_instr instr;
 
diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c
index 21672901f04..98c107aa50e 100644
--- a/src/glsl/nir/nir_lower_var_copies.c
+++ b/src/glsl/nir/nir_lower_var_copies.c
@@ -53,17 +53,6 @@ deref_next_wildcard_parent(nir_deref *deref)
    return NULL;
 }
 
-/* Returns the last deref in the chain.
- */
-static nir_deref *
-get_deref_tail(nir_deref *deref)
-{
-   while (deref->child)
-      deref = deref->child;
-
-   return deref;
-}
-
 /* This function recursively walks the given deref chain and replaces the
  * given copy instruction with an equivalent sequence load/store
  * operations.
@@ -121,8 +110,8 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
    } else {
       /* In this case, we have no wildcards anymore, so all we have to do
        * is just emit the load and store operations. */
-      src_tail = get_deref_tail(src_tail);
-      dest_tail = get_deref_tail(dest_tail);
+      src_tail = nir_deref_tail(src_tail);
+      dest_tail = nir_deref_tail(dest_tail);
 
       assert(src_tail->type == dest_tail->type);
 
diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c
index d463f7bdae9..bfbef72c1ab 100644
--- a/src/glsl/nir/nir_split_var_copies.c
+++ b/src/glsl/nir/nir_split_var_copies.c
@@ -67,14 +67,6 @@ struct split_var_copies_state {
    bool progress;
 };
 
-static nir_deref *
-get_deref_tail(nir_deref *deref)
-{
-   while (deref->child != NULL)
-      deref = deref->child;
-   return deref;
-}
-
 /* Recursively constructs deref chains to split a copy instruction into
  * multiple (if needed) copy instructions with full-length deref chains.
  * External callers of this function should pass the tail and head of the
@@ -227,8 +219,8 @@ split_var_copies_block(nir_block *block, void *void_state)
 
       nir_deref *dest_head = &intrinsic->variables[0]->deref;
       nir_deref *src_head = &intrinsic->variables[1]->deref;
-      nir_deref *dest_tail = get_deref_tail(dest_head);
-      nir_deref *src_tail = get_deref_tail(src_head);
+      nir_deref *dest_tail = nir_deref_tail(dest_head);
+      nir_deref *src_tail = nir_deref_tail(src_head);
 
       switch (glsl_get_base_type(src_tail->type)) {
       case GLSL_TYPE_ARRAY:

From 87711183ac35d85ca7d2c2ee67536fe689d6bef3 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Sat, 31 Oct 2015 16:19:43 +1000
Subject: [PATCH 106/287] virgl: wrap ret assignment with braces to do correct
 thing

Coverity reported that ret could only be 0 or 1, since it
was setting ret = fn() > 0, instead of doing (ret = fn()) > 0.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/winsys/virgl/drm/virgl_drm_winsys.c     | 2 +-
 src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
index d77ebd6ca15..b5d4435e5e6 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -309,7 +309,7 @@ virgl_drm_winsys_resource_cache_create(struct virgl_winsys *qws,
    while (curr != &qdws->delayed) {
       curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head);
 
-      if (!res && (ret = virgl_is_res_compat(qdws, curr_res, size, bind, format) > 0))
+      if (!res && ((ret = virgl_is_res_compat(qdws, curr_res, size, bind, format)) > 0))
          res = curr_res;
       else if (os_time_timeout(curr_res->start, curr_res->end, now)) {
          LIST_DEL(&curr_res->head);
diff --git a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
index b19c4561493..9c9ec044591 100644
--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
@@ -343,7 +343,7 @@ virgl_vtest_winsys_resource_cache_create(struct virgl_winsys *vws,
    while (curr != &vtws->delayed) {
       curr_res = LIST_ENTRY(struct virgl_hw_res, curr, head);
 
-      if (!res && (ret = virgl_is_res_compat(vtws, curr_res, size, bind, format) > 0))
+      if (!res && ((ret = virgl_is_res_compat(vtws, curr_res, size, bind, format)) > 0))
          res = curr_res;
       else if (os_time_timeout(curr_res->start, curr_res->end, now)) {
          LIST_DEL(&curr_res->head);

From 0f5b1409fd2f9b26c45e750a37947d27c892ee60 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Sun, 8 Nov 2015 07:55:17 +1000
Subject: [PATCH 107/287] llvmpipe: disable front updates for now

As pointed out by Emil, this sometimes hangs, appears to be due to threading

need to rethink how this stuff works for llvmpipe.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/llvmpipe/lp_texture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 7862ac8f217..82868814581 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -805,7 +805,7 @@ llvmpipe_init_screen_resource_funcs(struct pipe_screen *screen)
 #endif
 
    screen->resource_create = llvmpipe_resource_create;
-   screen->resource_create_front = llvmpipe_resource_create_front;
+/*   screen->resource_create_front = llvmpipe_resource_create_front; */
    screen->resource_destroy = llvmpipe_resource_destroy;
    screen->resource_from_handle = llvmpipe_resource_from_handle;
    screen->resource_get_handle = llvmpipe_resource_get_handle;

From 53cbb11707a502a31bb9f0380d730840245ee9b2 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Fri, 6 Nov 2015 00:44:10 -0500
Subject: [PATCH 108/287] nouveau: avoid queueing too much work onto a single
 fence

Force the fence to get kicked off, which won't actually wait for its
completion, but any additional work will be put onto a fresh list.

This fixes crashes in teximage-colors --benchmark with too many active
maps.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nouveau_fence.c | 68 +++++++++++++--------
 src/gallium/drivers/nouveau/nouveau_fence.h |  1 +
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_fence.c b/src/gallium/drivers/nouveau/nouveau_fence.c
index d3a34060952..691553ae7e4 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -59,26 +59,6 @@ nouveau_fence_trigger_work(struct nouveau_fence *fence)
    }
 }
 
-bool
-nouveau_fence_work(struct nouveau_fence *fence,
-                   void (*func)(void *), void *data)
-{
-   struct nouveau_fence_work *work;
-
-   if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
-      func(data);
-      return true;
-   }
-
-   work = CALLOC_STRUCT(nouveau_fence_work);
-   if (!work)
-      return false;
-   work->func = func;
-   work->data = data;
-   LIST_ADD(&work->list, &fence->work);
-   return true;
-}
-
 void
 nouveau_fence_emit(struct nouveau_fence *fence)
 {
@@ -182,12 +162,10 @@ nouveau_fence_signalled(struct nouveau_fence *fence)
    return fence->state == NOUVEAU_FENCE_STATE_SIGNALLED;
 }
 
-bool
-nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debug)
+static bool
+nouveau_fence_kick(struct nouveau_fence *fence)
 {
    struct nouveau_screen *screen = fence->screen;
-   uint32_t spins = 0;
-   int64_t start = 0;
 
    /* wtf, someone is waiting on a fence in flush_notify handler? */
    assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
@@ -208,12 +186,25 @@ nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debu
    if (fence == screen->fence.current)
       nouveau_fence_next(screen);
 
+   nouveau_fence_update(screen, false);
+
+   return true;
+}
+
+bool
+nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debug)
+{
+   struct nouveau_screen *screen = fence->screen;
+   uint32_t spins = 0;
+   int64_t start = 0;
+
    if (debug && debug->debug_message)
       start = os_time_get_nano();
 
-   do {
-      nouveau_fence_update(screen, false);
+   if (!nouveau_fence_kick(fence))
+      return false;
 
+   do {
       if (fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
          if (debug && debug->debug_message)
             pipe_debug_message(debug, PERF_INFO,
@@ -228,6 +219,8 @@ nouveau_fence_wait(struct nouveau_fence *fence, struct pipe_debug_callback *debu
       if (!(spins % 8)) /* donate a few cycles */
          sched_yield();
 #endif
+
+      nouveau_fence_update(screen, false);
    } while (spins < NOUVEAU_FENCE_MAX_SPINS);
 
    debug_printf("Wait on fence %u (ack = %u, next = %u) timed out !\n",
@@ -259,3 +252,26 @@ nouveau_fence_unref_bo(void *data)
 
    nouveau_bo_ref(NULL, &bo);
 }
+
+bool
+nouveau_fence_work(struct nouveau_fence *fence,
+                   void (*func)(void *), void *data)
+{
+   struct nouveau_fence_work *work;
+
+   if (!fence || fence->state == NOUVEAU_FENCE_STATE_SIGNALLED) {
+      func(data);
+      return true;
+   }
+
+   work = CALLOC_STRUCT(nouveau_fence_work);
+   if (!work)
+      return false;
+   work->func = func;
+   work->data = data;
+   LIST_ADD(&work->list, &fence->work);
+   p_atomic_inc(&fence->work_count);
+   if (fence->work_count > 64)
+      nouveau_fence_kick(fence);
+   return true;
+}
diff --git a/src/gallium/drivers/nouveau/nouveau_fence.h b/src/gallium/drivers/nouveau/nouveau_fence.h
index 0fa9d020f50..f10016da826 100644
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -25,6 +25,7 @@ struct nouveau_fence {
    int state;
    int ref;
    uint32_t sequence;
+   uint32_t work_count;
    struct list_head work;
 };
 

From af218217d71152df8562b7f087086197f28080fe Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 7 Nov 2015 18:47:40 -0500
Subject: [PATCH 109/287] nv50/ir: only take abs value when computing high
 result

Not reachable from TGSI since it only has UMUL, no IMUL. However it's
surprising that setting argument types to s32 will cause sign to get
lost.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index eec502be798..75164ef0641 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -75,7 +75,7 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
    s[0] = mul->getSrc(0);
    s[1] = mul->getSrc(1);
 
-   if (isSignedType(mul->sType)) {
+   if (isSignedType(mul->sType) && highResult) {
       s[0] = bld->getSSA(fullSize);
       s[1] = bld->getSSA(fullSize);
       bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));

From e06238cb9e50e3b994d5abac921ad800692a90af Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 7 Nov 2015 18:48:55 -0500
Subject: [PATCH 110/287] nv50/ir: fix emission of s[] args in certain
 situations

There might only be a single arg (e.g. cvt), so use mode rather than
looking at the source directly. Also we don't want to rely on the type
of the value, which can be unreliable, but instead use the
instruction's. This works out well since mkSplit doesn't adjust the
type.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 7e0fb532565..0b5288218d1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -441,9 +441,9 @@ CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc)
       return;
 
    if ((mode & 3) == 1) {
-      const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14;
+      const int pos = ((mode >> 2) & 3) == 3 ? 13 : 14;
 
-      switch (i->getSrc(0)->reg.type) {
+      switch (i->sType) {
       case TYPE_U8:
          break;
       case TYPE_U16:

From ffb60e77882d2da9f42a76d602114cdb26dd25bc Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Fri, 6 Nov 2015 00:33:48 +0100
Subject: [PATCH 111/287] nvc0: enable compute support on Fermi

Altough the compute support is still not complete because textures and
surfaces need to be implemented, it allows to launch very simple compute
kernel like one which reads reading MP performance counters.

This turns on PIPE_CAP_COMPUTE and PIPE_SHADER_COMPUTE.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7d96977c24b..7f8ce21a348 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -186,7 +186,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
    case PIPE_CAP_COMPUTE:
-      return (class_3d == NVE4_3D_CLASS) ? 1 : 0;
+      return (class_3d <= NVE4_3D_CLASS) ? 1 : 0;
    case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
       return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
 
@@ -245,7 +245,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
          return 0;
       break;
    case PIPE_SHADER_COMPUTE:
-      if (class_3d != NVE4_3D_CLASS)
+      if (class_3d > NVE4_3D_CLASS)
          return 0;
       break;
    default:

From d115e47099b6c3ceb27d0c462eb559df6d1f9fd7 Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Thu, 5 Nov 2015 11:22:22 -0500
Subject: [PATCH 112/287] st/va: fix build fails with pipe loader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no dev in drv, and dev should be from vl_screen here

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/state_trackers/va/context.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index ec9e0488d85..25fa9058edb 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -151,8 +151,9 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
 #if GALLIUM_STATIC_TARGETS
       drv->vscreen->pscreen = dd_create_screen(drm_fd);
 #else
-      if (pipe_loader_drm_probe_fd(&drv->dev, drm_fd))
-         drv->vscreen->pscreen = pipe_loader_create_screen(drv->dev, PIPE_SEARCH_DIR);
+      if (pipe_loader_drm_probe_fd(&drv->vscreen->dev, drm_fd))
+         drv->vscreen->pscreen =
+           pipe_loader_create_screen(drv->vscreen->dev, PIPE_SEARCH_DIR);
 #endif
 
       if (!drv->vscreen->pscreen)

From 7da86e0ec0cd38dcf58db97bb5c8a0eff9a3dd15 Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Wed, 4 Nov 2015 16:24:26 -0500
Subject: [PATCH 113/287] vl: add drm support for vl_screen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will allow the state trackers to use render nodes
with screen creation

v2: dup fd for pipe loader

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/auxiliary/Makefile.sources   |  3 +-
 src/gallium/auxiliary/vl/vl_winsys.h     |  6 ++
 src/gallium/auxiliary/vl/vl_winsys_drm.c | 77 ++++++++++++++++++++++++
 3 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_winsys_drm.c

diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 6e22ced4e41..82ef5ecfce4 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -349,7 +349,8 @@ VL_SOURCES := \
 
 # XXX: Nuke this as our dri targets no longer depend on VL.
 VL_WINSYS_SOURCES := \
-	vl/vl_winsys_dri.c
+	vl/vl_winsys_dri.c \
+	vl/vl_winsys_drm.c
 
 VL_STUB_SOURCES := \
 	vl/vl_stubs.c
diff --git a/src/gallium/auxiliary/vl/vl_winsys.h b/src/gallium/auxiliary/vl/vl_winsys.h
index f6b47c964f9..df01917466f 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -66,4 +66,10 @@ vl_screen_set_next_timestamp(struct vl_screen *vscreen, uint64_t stamp);
 void*
 vl_screen_get_private(struct vl_screen *vscreen);
 
+struct vl_screen*
+vl_drm_screen_create(int fd);
+
+void
+vl_drm_screen_destroy(struct vl_screen *vscreen);
+
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c b/src/gallium/auxiliary/vl/vl_winsys_drm.c
new file mode 100644
index 00000000000..1167fcf6a90
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -0,0 +1,77 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include "pipe/p_screen.h"
+#include "pipe-loader/pipe_loader.h"
+#include "state_tracker/drm_driver.h"
+
+#include "util/u_memory.h"
+#include "vl/vl_winsys.h"
+
+struct vl_screen*
+vl_drm_screen_create(int fd)
+{
+   struct vl_screen *vscreen;
+
+   vscreen = CALLOC_STRUCT(vl_screen);
+   if (!vscreen)
+      return NULL;
+
+#if GALLIUM_STATIC_TARGETS
+   vscreen->pscreen = dd_create_screen(fd);
+#else
+   if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd))) {
+      vscreen->pscreen =
+         pipe_loader_create_screen(vscreen->dev, PIPE_SEARCH_DIR);
+      if (!vscreen->pscreen)
+         pipe_loader_release(&vscreen->dev, 1);
+   }
+#endif
+
+   if (!vscreen->pscreen) {
+      FREE(vscreen);
+      return NULL;
+   }
+
+   return vscreen;
+}
+
+void
+vl_drm_screen_destroy(struct vl_screen *vscreen)
+{
+   assert(vscreen);
+
+   vscreen->pscreen->destroy(vscreen->pscreen);
+
+#if !GALLIUM_STATIC_TARGETS
+   pipe_loader_release(&vscreen->dev, 1);
+#endif
+
+   FREE(vscreen);
+}

From 25526d77b1b69822145bcf11411e9398426e2984 Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Thu, 5 Nov 2015 11:56:37 -0500
Subject: [PATCH 114/287] st/va: use vl screen drm support from vl_wys_drm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: move the dup to vl_wys_drm for pipe loader

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/state_trackers/va/context.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c
index 25fa9058edb..98c4104da48 100644
--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -28,8 +28,6 @@
 
 #include "pipe/p_screen.h"
 #include "pipe/p_video_codec.h"
-#include "pipe-loader/pipe_loader.h"
-#include "state_tracker/drm_driver.h"
 #include "util/u_memory.h"
 #include "util/u_handle_table.h"
 #include "util/u_video.h"
@@ -133,32 +131,16 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
          return VA_STATUS_ERROR_INVALID_PARAMETER;
       }
 
-#if GALLIUM_STATIC_TARGETS
       drm_fd = drm_info->fd;
-#else
-      drm_fd = dup(drm_info->fd);
-#endif
 
       if (drm_fd < 0) {
          FREE(drv);
          return VA_STATUS_ERROR_INVALID_PARAMETER;
       }
 
-      drv->vscreen = CALLOC_STRUCT(vl_screen);
+      drv->vscreen = vl_drm_screen_create(drm_fd);
       if (!drv->vscreen)
          goto error_screen;
-
-#if GALLIUM_STATIC_TARGETS
-      drv->vscreen->pscreen = dd_create_screen(drm_fd);
-#else
-      if (pipe_loader_drm_probe_fd(&drv->vscreen->dev, drm_fd))
-         drv->vscreen->pscreen =
-           pipe_loader_create_screen(drv->vscreen->dev, PIPE_SEARCH_DIR);
-#endif
-
-      if (!drv->vscreen->pscreen)
-         goto error_pipe;
-
       }
       break;
    default:
@@ -203,7 +185,7 @@ error_pipe:
    if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11)
       vl_screen_destroy(drv->vscreen);
    else
-      FREE(drv->vscreen);
+      vl_drm_screen_destroy(drv->vscreen);
 
 error_screen:
    FREE(drv);
@@ -343,7 +325,7 @@ vlVaTerminate(VADriverContextP ctx)
    if (ctx->display_type == VA_DISPLAY_GLX || ctx->display_type == VA_DISPLAY_X11)
       vl_screen_destroy(drv->vscreen);
    else
-      FREE(drv->vscreen);
+      vl_drm_screen_destroy(drv->vscreen);
    handle_table_destroy(drv->htab);
    FREE(drv);
 

From 519502d08fe5fefebd618e191eec4462dae3bd9a Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Wed, 4 Nov 2015 16:38:28 -0500
Subject: [PATCH 115/287] st/omx: add headless support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will allow dec/enc/transcode without X

v2:  use env override even with X,
     use loader_open_device instead of open
v3:  clean up

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 src/gallium/state_trackers/omx/entrypoint.c | 45 ++++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/gallium/state_trackers/omx/entrypoint.c b/src/gallium/state_trackers/omx/entrypoint.c
index a7656660e8c..7df90b16a84 100644
--- a/src/gallium/state_trackers/omx/entrypoint.c
+++ b/src/gallium/state_trackers/omx/entrypoint.c
@@ -38,6 +38,7 @@
 
 #include "os/os_thread.h"
 #include "util/u_memory.h"
+#include "loader/loader.h"
 
 #include "entrypoint.h"
 #include "vid_dec.h"
@@ -47,6 +48,8 @@ pipe_static_mutex(omx_lock);
 static Display *omx_display = NULL;
 static struct vl_screen *omx_screen = NULL;
 static unsigned omx_usecount = 0;
+static const char *omx_render_node = NULL;
+static int drm_fd;
 
 int omx_component_library_Setup(stLoaderComponentType **stComponents)
 {
@@ -73,18 +76,30 @@ struct vl_screen *omx_get_screen(void)
    pipe_mutex_lock(omx_lock);
 
    if (!omx_display) {
-      omx_display = XOpenDisplay(NULL);
-      if (!omx_display) {
-         pipe_mutex_unlock(omx_lock);
-         return NULL;
+      omx_render_node = debug_get_option("OMX_RENDER_NODE", NULL);
+      if (!omx_render_node) {
+         omx_display = XOpenDisplay(NULL);
+         if (!omx_display)
+            goto error;
       }
    }
 
    if (!omx_screen) {
-      omx_screen = vl_screen_create(omx_display, 0);
-      if (!omx_screen) {
-         pipe_mutex_unlock(omx_lock);
-         return NULL;
+      if (omx_render_node) {
+         drm_fd = loader_open_device(omx_render_node);
+         if (drm_fd < 0)
+            goto error;
+         omx_screen = vl_drm_screen_create(drm_fd);
+         if (!omx_screen) {
+            close(drm_fd);
+            goto error;
+         }
+      } else {
+         omx_screen = vl_screen_create(omx_display, 0);
+         if (!omx_screen) {
+            XCloseDisplay(omx_display);
+            goto error;
+         }
       }
    }
 
@@ -92,14 +107,24 @@ struct vl_screen *omx_get_screen(void)
 
    pipe_mutex_unlock(omx_lock);
    return omx_screen;
+
+error:
+   pipe_mutex_unlock(omx_lock);
+   return NULL;
 }
 
 void omx_put_screen(void)
 {
    pipe_mutex_lock(omx_lock);
    if ((--omx_usecount) == 0) {
-      vl_screen_destroy(omx_screen);
-      XCloseDisplay(omx_display);
+      if (!omx_render_node) {
+         vl_screen_destroy(omx_screen);
+         if (omx_display)
+            XCloseDisplay(omx_display);
+      } else {
+         close(drm_fd);
+         vl_drm_screen_destroy(omx_screen);
+      }
       omx_screen = NULL;
       omx_display = NULL;
    }

From df4f9b0236e3dcfb94eb160b599657a315822c82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
Date: Fri, 6 Nov 2015 15:15:56 -0500
Subject: [PATCH 116/287] radeon/uvd: add H.265/HEVC to legal notes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
---
 docs/README.UVD | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/README.UVD b/docs/README.UVD
index 38ea8646a64..b0f4b9d8c67 100644
--- a/docs/README.UVD
+++ b/docs/README.UVD
@@ -2,8 +2,8 @@ The software may implement third party technologies (e.g. third party
 libraries) that are not licensed to you by AMD and for which you may need
 to obtain licenses from other parties.  Unless explicitly stated otherwise,
 these third party technologies are not licensed hereunder.  Such third
-party technologies include, but are not limited, to H.264, MPEG-2, MPEG-4,
-AVC, and VC-1.
+party technologies include, but are not limited, to H.264, H.265, HEVC, MPEG-2,
+MPEG-4, AVC, and VC-1.
 
 For MPEG-2 Encoding Products ANY USE OF THIS PRODUCT IN ANY MANNER OTHER
 THAN PERSONAL USE THAT COMPLIES WITH THE MPEG-2 STANDARD FOR ENCODING VIDEO

From 24abbaff9ad177624c2b4906c7d94f5d91ac3cc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Thu, 21 Aug 2014 18:30:44 +0900
Subject: [PATCH 117/287] winsys/radeon: Use CPU page size instead of
 hardcoding 4096 bytes v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes GPUVM conflicts with non-4K page size.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92738

v2: Replace sanitization of VM base address alignment with comment why
    that's not necessary.
v3: Use unsigned instead of long as the type for the size_align member.
    (Marek)

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Christian König <christian.koenig@amd.com> (v1)
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 30 ++++++++++++-------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 2878c8f5744..7f395b704c7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -76,6 +76,9 @@ struct radeon_bomgr {
     bool va;
     uint64_t va_offset;
     struct list_head va_holes;
+
+    /* BO size alignment */
+    unsigned size_align;
 };
 
 static inline struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
@@ -188,8 +191,10 @@ static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size, ui
     struct radeon_bo_va_hole *hole, *n;
     uint64_t offset = 0, waste = 0;
 
-    alignment = MAX2(alignment, 4096);
-    size = align(size, 4096);
+    /* All VM address space holes will implicitly start aligned to the
+     * size alignment, so we don't need to sanitize the alignment here
+     */
+    size = align(size, mgr->size_align);
 
     pipe_mutex_lock(mgr->bo_va_mutex);
     /* first look for a hole */
@@ -246,7 +251,7 @@ static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t
 {
     struct radeon_bo_va_hole *hole;
 
-    size = align(size, 4096);
+    size = align(size, mgr->size_align);
 
     pipe_mutex_lock(mgr->bo_va_mutex);
     if ((va + size) == mgr->va_offset) {
@@ -357,9 +362,9 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
     pipe_mutex_destroy(bo->map_mutex);
 
     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-        bo->rws->allocated_vram -= align(bo->base.size, 4096);
+        bo->rws->allocated_vram -= align(bo->base.size, mgr->size_align);
     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
-        bo->rws->allocated_gtt -= align(bo->base.size, 4096);
+        bo->rws->allocated_gtt -= align(bo->base.size, mgr->size_align);
     FREE(bo);
 }
 
@@ -644,9 +649,9 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     }
 
     if (rdesc->initial_domains & RADEON_DOMAIN_VRAM)
-        rws->allocated_vram += align(size, 4096);
+        rws->allocated_vram += align(size, mgr->size_align);
     else if (rdesc->initial_domains & RADEON_DOMAIN_GTT)
-        rws->allocated_gtt += align(size, 4096);
+        rws->allocated_gtt += align(size, mgr->size_align);
 
     return &bo->base;
 }
@@ -720,6 +725,9 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
     mgr->va_offset = rws->va_start;
     list_inithead(&mgr->va_holes);
 
+    /* TTM aligns the BO size to the CPU page size */
+    mgr->size_align = sysconf(_SC_PAGESIZE);
+
     return &mgr->base;
 }
 
@@ -882,7 +890,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
      * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
      * like constant/uniform buffers, can benefit from better and more reuse.
      */
-    size = align(size, 4096);
+    size = align(size, mgr->size_align);
 
     /* Only set one usage bit each for domains and flags, or the cache manager
      * might consider different sets of domains / flags compatible
@@ -993,7 +1001,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
         pipe_mutex_unlock(mgr->bo_handles_mutex);
     }
 
-    ws->allocated_gtt += align(bo->base.size, 4096);
+    ws->allocated_gtt += align(bo->base.size, mgr->size_align);
 
     return (struct pb_buffer*)bo;
 }
@@ -1130,9 +1138,9 @@ done:
     bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
 
     if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-        ws->allocated_vram += align(bo->base.size, 4096);
+        ws->allocated_vram += align(bo->base.size, mgr->size_align);
     else if (bo->initial_domain & RADEON_DOMAIN_GTT)
-        ws->allocated_gtt += align(bo->base.size, 4096);
+        ws->allocated_gtt += align(bo->base.size, mgr->size_align);
 
     return (struct pb_buffer*)bo;
 

From e524df5ef32fe8fada0695417bd86413d836a6bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Thu, 22 Mar 2012 12:16:17 +0000
Subject: [PATCH 118/287] st/wgl: Don't rely on GDI to bookkeep pixelformat for
 us.

This allows to use apitrace's retracediff script on Windows to retrace and
compare two builds of a Mesa based opengl32.dll/ICD side-by-side.

See also https://github.com/apitrace/apitrace/commit/e4a4f15f5b92e0abbd24d7d053da25f8278c9f64
---
 src/gallium/state_trackers/wgl/stw_context.c     | 9 +++------
 src/gallium/state_trackers/wgl/stw_framebuffer.c | 4 +++-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index 3e99cc44db7..19f06203e91 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -160,21 +160,18 @@ stw_create_context_attribs(HDC hdc, INT iLayerPlane, DHGLRC hShareContext,
    if (iLayerPlane != 0)
       return 0;
 
-   iPixelFormat = GetPixelFormat(hdc);
-   if(!iPixelFormat)
-      return 0;
-
    /*
     * GDI only knows about displayable pixel formats, so determine the pixel
     * format from the framebuffer.
     *
-    * TODO: Remove the GetPixelFormat() above, and stop relying on GDI.
+    * This also allows to use a OpenGL DLL / ICD without installing.
     */
    fb = stw_framebuffer_from_hdc( hdc );
    if (fb) {
-      assert(iPixelFormat == fb->iDisplayablePixelFormat);
       iPixelFormat = fb->iPixelFormat;
       stw_framebuffer_release(fb);
+   } else {
+      return 0;
    }
 
    pfi = stw_pixelformat_get_info( iPixelFormat );
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 7b34fcbb5ed..cd8990d54f1 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -466,7 +466,9 @@ DrvSetPixelFormat(HDC hdc, LONG iPixelFormat)
     * avoid opengl32.dll's wglCreateContext to fail */
    if (GetPixelFormat(hdc) == 0) {
       BOOL bRet = SetPixelFormat(hdc, iPixelFormat, NULL);
-      assert(bRet);
+      if (!bRet) {
+	  debug_printf("SetPixelFormat failed\n");
+      }
    }
 
    return TRUE;

From 8083943e2e5b3f2a7201650a586b597dff481d43 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 1 Jun 2015 08:45:07 -0600
Subject: [PATCH 119/287] st/wgl: reorder pixel formats to put MSAA formats
 last
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

And put 8-bit/channel formats before 5/6/5 formats.

The ChoosePixelFormat() function seems to be finicky about format
selection.  Putting the MSAA formats after the non-MSAA formats
means most apps get a low-numbered format.  Now we generally get
the same pixel format regardless of whether using vgpu9 or 10.

VMware bug 1455030

Reviewed-by: José Fonseca <jfonseca@vmware.com>
---
 .../state_trackers/wgl/stw_pixelformat.c      | 61 ++++++++++---------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c
index db6cf8ee30f..ef6158d3645 100644
--- a/src/gallium/state_trackers/wgl/stw_pixelformat.c
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c
@@ -74,10 +74,11 @@ stw_pf_color[] = {
    /* no-alpha */
    { PIPE_FORMAT_B8G8R8X8_UNORM,    { 8,  8,  8,  0}, {16,  8,  0,  0} },
    { PIPE_FORMAT_X8R8G8B8_UNORM,    { 8,  8,  8,  0}, { 8, 16, 24,  0} },
-   { PIPE_FORMAT_B5G6R5_UNORM,      { 5,  6,  5,  0}, {11,  5,  0,  0} },
    /* alpha */
    { PIPE_FORMAT_B8G8R8A8_UNORM,    { 8,  8,  8,  8}, {16,  8,  0, 24} },
    { PIPE_FORMAT_A8R8G8B8_UNORM,    { 8,  8,  8,  8}, { 8, 16, 24,  0} },
+   /* shallow bit depths */
+   { PIPE_FORMAT_B5G6R5_UNORM,      { 5,  6,  5,  0}, {11,  5,  0,  0} },
 #if 0
    { PIPE_FORMAT_R10G10B10A2_UNORM, {10, 10, 10,  2}, { 0, 10, 20, 30} },
 #endif
@@ -214,14 +215,15 @@ stw_pixelformat_add(
 
 
 /**
- * Add the depth/stencil/accum/ms variants for a particular color format.
+ * Add the depth/stencil/accum/ms variants for a list of color formats.
  */
 static unsigned
-add_color_format_variants(const struct stw_pf_color_info *color,
+add_color_format_variants(const struct stw_pf_color_info *color_formats,
+                          unsigned num_color_formats,
                           boolean extended)
 {
    struct pipe_screen *screen = stw_dev->screen;
-   unsigned ms, db, ds, acc;
+   unsigned cfmt, ms, db, ds, acc;
    unsigned bind_flags = PIPE_BIND_RENDER_TARGET;
    unsigned num_added = 0;
    int force_samples = 0;
@@ -245,27 +247,31 @@ add_color_format_variants(const struct stw_pf_color_info *color,
       if (force_samples && samples != force_samples)
          continue;
 
-      if (!screen->is_format_supported(screen, color->format,
-                                       PIPE_TEXTURE_2D, samples, bind_flags)) {
-         continue;
-      }
+      for (cfmt = 0; cfmt < num_color_formats; cfmt++) {
+         if (!screen->is_format_supported(screen, color_formats[cfmt].format,
+                                          PIPE_TEXTURE_2D, samples,
+                                          bind_flags)) {
+            continue;
+         }
 
-      for (db = 0; db < Elements(stw_pf_doublebuffer); db++) {
-         unsigned doublebuffer = stw_pf_doublebuffer[db];
+         for (db = 0; db < Elements(stw_pf_doublebuffer); db++) {
+            unsigned doublebuffer = stw_pf_doublebuffer[db];
 
-         for (ds = 0; ds < Elements(stw_pf_depth_stencil); ds++) {
-            const struct stw_pf_depth_info *depth = &stw_pf_depth_stencil[ds];
+            for (ds = 0; ds < Elements(stw_pf_depth_stencil); ds++) {
+               const struct stw_pf_depth_info *depth = &stw_pf_depth_stencil[ds];
 
-            if (!screen->is_format_supported(screen, depth->format,
-                                             PIPE_TEXTURE_2D, samples,
-                                             PIPE_BIND_DEPTH_STENCIL)) {
-               continue;
-            }
+               if (!screen->is_format_supported(screen, depth->format,
+                                                PIPE_TEXTURE_2D, samples,
+                                                PIPE_BIND_DEPTH_STENCIL)) {
+                  continue;
+               }
 
-            for (acc = 0; acc < 2; acc++) {
-               stw_pixelformat_add(stw_dev, extended, color, depth,
-                                   acc * 16, doublebuffer, samples);
-               num_added++;
+               for (acc = 0; acc < 2; acc++) {
+                  stw_pixelformat_add(stw_dev, extended, &color_formats[cfmt],
+                                      depth,
+                                      acc * 16, doublebuffer, samples);
+                  num_added++;
+               }
             }
          }
       }
@@ -278,22 +284,19 @@ add_color_format_variants(const struct stw_pf_color_info *color,
 void
 stw_pixelformat_init( void )
 {
-   unsigned i;
-   unsigned num_formats = 0;
+   unsigned num_formats;
 
    assert( !stw_dev->pixelformat_count );
    assert( !stw_dev->pixelformat_extended_count );
 
    /* normal, displayable formats */
-   for (i = 0; i < Elements(stw_pf_color); i++) {
-      num_formats += add_color_format_variants(&stw_pf_color[i], FALSE);
-   }
+   num_formats = add_color_format_variants(stw_pf_color,
+                                           Elements(stw_pf_color), FALSE);
    assert(num_formats > 0);
 
    /* extended, pbuffer-only formats */
-   for (i = 0; i < Elements(stw_pf_color_extended); i++) {
-      add_color_format_variants(&stw_pf_color_extended[i], TRUE);
-   }
+   add_color_format_variants(stw_pf_color_extended,
+                             Elements(stw_pf_color_extended), TRUE);
 
    assert( stw_dev->pixelformat_count <= stw_dev->pixelformat_extended_count );
    assert( stw_dev->pixelformat_extended_count <= STW_MAX_PIXELFORMATS );

From 75d1e363ff142b273b30d8cd236bb1c92c1b2f27 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 15 Jun 2015 19:14:42 -0600
Subject: [PATCH 120/287] st/wgl: fix double-present on swapbuffers bug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The stw_st_framebuffer_present_locked() function was getting called
twice per SwapBuffers.  First, when st_context_iface::flush() was
called from DrvSwapBuffers() because the ST_FLUSH_FRONT flag was
given.  Second, by stw_st_swap_framebuffer_locked() which does the
actual SwapBuffers.

Two code changes:
1. Pass ST_FLUSH_END_OF_FRAME, instead of ST_FLUSH_FRONT.
2. Move the implementation of stw_flush_current_locked() into
DrvSwapBuffers() since it's not called anywhere else.

Not much change in perf for benchmarks like Lightsmark, but some simple
Mesa demos are measurably faster.

Reviewed-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_context.c  | 12 ------------
 src/gallium/state_trackers/wgl/stw_context.h  |  1 -
 .../state_trackers/wgl/stw_framebuffer.c      | 19 ++++++++++++-------
 3 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index 19f06203e91..93f61e76efa 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -473,18 +473,6 @@ fail:
    return ret;
 }
 
-/**
- * Flush the current context if it is bound to the framebuffer.
- */
-void
-stw_flush_current_locked( struct stw_framebuffer *fb )
-{
-   struct stw_context *ctx = stw_current_context();
-
-   if (ctx && ctx->current_framebuffer == fb) {
-      ctx->st->flush(ctx->st, ST_FLUSH_FRONT, NULL);
-   }
-}
 
 /**
  * Notify the current context that the framebuffer has become invalid.
diff --git a/src/gallium/state_trackers/wgl/stw_context.h b/src/gallium/state_trackers/wgl/stw_context.h
index c66c166de2e..6bfa7150d6f 100644
--- a/src/gallium/state_trackers/wgl/stw_context.h
+++ b/src/gallium/state_trackers/wgl/stw_context.h
@@ -60,7 +60,6 @@ HDC stw_get_current_dc( void );
 
 BOOL stw_make_current( HDC hdc, DHGLRC dhglrc );
 
-void stw_flush_current_locked( struct stw_framebuffer *fb );
 void stw_notify_current_locked( struct stw_framebuffer *fb );
 
 #endif /* STW_CONTEXT_H */
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index cd8990d54f1..fbe77b23d9c 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -605,15 +605,20 @@ DrvSwapBuffers(HDC hdc)
       return TRUE;
    }
 
-   /* Display the HUD */
    ctx = stw_current_context();
-   if (ctx && ctx->hud) {
-      struct pipe_resource *back =
-         stw_get_framebuffer_resource(fb->stfb, ST_ATTACHMENT_BACK_LEFT);
-      hud_draw(ctx->hud, back);
-   }
+   if (ctx) {
+      if (ctx->hud) {
+         /* Display the HUD */
+         struct pipe_resource *back =
+            stw_get_framebuffer_resource(fb->stfb, ST_ATTACHMENT_BACK_LEFT);
+         hud_draw(ctx->hud, back);
+      }
 
-   stw_flush_current_locked(fb);
+      if (ctx->current_framebuffer == fb) {
+         /* flush current context */
+         ctx->st->flush(ctx->st, ST_FLUSH_END_OF_FRAME, NULL);
+      }
+   }
 
    return stw_st_swap_framebuffer_locked(hdc, fb->stfb);
 }

From 28f6faca518843b1ee30644a7bfb7aadd6551df1 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 6 Jul 2015 14:53:06 -0600
Subject: [PATCH 121/287] st/wgl: add null pointer check for HUD texture

Fixes crash when using HUD with Nobel Clinician Viewer.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_framebuffer.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index fbe77b23d9c..11f60b6c763 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -611,7 +611,9 @@ DrvSwapBuffers(HDC hdc)
          /* Display the HUD */
          struct pipe_resource *back =
             stw_get_framebuffer_resource(fb->stfb, ST_ATTACHMENT_BACK_LEFT);
-         hud_draw(ctx->hud, back);
+         if (back) {
+            hud_draw(ctx->hud, back);
+         }
       }
 
       if (ctx->current_framebuffer == fb) {

From 3ea3727998add8ba201e48934febc96be2cbdb99 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 9 Nov 2015 07:13:29 -0500
Subject: [PATCH 122/287] docs: note that ARB_copy_image was added to nv50,
 nvc0 in this release

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/relnotes/11.1.0.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index c35d91f4329..11fbdfff236 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -46,7 +46,7 @@ Note: some of the new features are only available with certain drivers.
 <ul>
 <li>GL_ARB_arrays_of_arrays on i965</li>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
-<li>GL_ARB_copy_image on radeonsi</li>
+<li>GL_ARB_copy_image on nv50, nvc0, radeonsi</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 <li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
 <li>GL_ARB_shader_clock on i965 (gen7+)</li>

From a4a46fe3fa566b2918f7323e7f0eede17f118f03 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Sat, 7 Nov 2015 10:53:53 +1100
Subject: [PATCH 123/287] glsl: simplify interface block stream qualifier
 validation

Qualifiers on member variables are redundent all we need to do
if check if it matches the stream associated with the block and
throw an error if its not.

Reviewed-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Cc: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/glsl/ast_to_hir.cpp   | 27 +++++++++++++--------------
 src/glsl/nir/glsl_types.h | 10 +---------
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 030653079d9..5a22820c692 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -5964,8 +5964,19 @@ ast_process_structure_or_interface_block(exec_list *instructions,
          fields[i].sample = qual->flags.q.sample ? 1 : 0;
          fields[i].patch = qual->flags.q.patch ? 1 : 0;
 
-         /* Only save explicitly defined streams in block's field */
-         fields[i].stream = qual->flags.q.explicit_stream ? qual->stream : -1;
+         /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
+          *
+          *   "A block member may be declared with a stream identifier, but
+          *   the specified stream must match the stream associated with the
+          *   containing block."
+          */
+         if (qual->flags.q.explicit_stream &&
+             qual->stream != layout->stream) {
+            _mesa_glsl_error(&loc, state, "stream layout qualifier on "
+                             "interface block member `%s' does not match "
+                             "the interface block (%d vs %d)",
+                             fields[i].name, qual->stream, layout->stream);
+         }
 
          if (qual->flags.q.row_major || qual->flags.q.column_major) {
             if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
@@ -6267,18 +6278,6 @@ ast_interface_block::hir(exec_list *instructions,
 
    state->struct_specifier_depth--;
 
-   for (unsigned i = 0; i < num_variables; i++) {
-      if (fields[i].stream != -1 &&
-          (unsigned) fields[i].stream != this->layout.stream) {
-         _mesa_glsl_error(&loc, state,
-                          "stream layout qualifier on "
-                          "interface block member `%s' does not match "
-                          "the interface block (%d vs %d)",
-                          fields[i].name, fields[i].stream,
-                          this->layout.stream);
-      }
-   }
-
    if (!redeclaring_per_vertex) {
       validate_identifier(this->block_name, loc, state);
 
diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index 52ca8260da7..1f17ad5c5b0 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -828,13 +828,6 @@ struct glsl_struct_field {
     */
    unsigned patch:1;
 
-   /**
-    * For interface blocks, it has a value if this variable uses multiple vertex
-    * streams (as in ir_variable::stream). -1 otherwise.
-    */
-   int stream;
-
-
    /**
     * Image qualifiers, applicable to buffer variables defined in shader
     * storage buffer objects (SSBOs)
@@ -847,8 +840,7 @@ struct glsl_struct_field {
 
    glsl_struct_field(const struct glsl_type *_type, const char *_name)
       : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
-        sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
-        stream(-1)
+        sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0)
    {
       /* empty */
    }

From 8b28b3553139c19efed6d54d0a21315867371864 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Sun, 15 Mar 2015 13:53:06 -0700
Subject: [PATCH 124/287] glsl: Parse shared keyword for compute shader
 variables

v2:
 * Move shared parsing under storage qualifiers (tarceri)
 * Fail to compile if shared is used in non-compute shader (tarceri)
 * Use separate shared_storage bit for shared variables (tarceri)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/ast.h          | 1 +
 src/glsl/ast_to_hir.cpp | 6 ++++++
 src/glsl/ast_type.cpp   | 3 ++-
 src/glsl/glsl_lexer.ll  | 2 ++
 src/glsl/glsl_parser.yy | 6 ++++++
 5 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index e803e6d7675..1b75234d578 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -448,6 +448,7 @@ struct ast_type_qualifier {
 	 unsigned patch:1;
 	 unsigned uniform:1;
 	 unsigned buffer:1;
+	 unsigned shared_storage:1;
 	 unsigned smooth:1;
 	 unsigned flat:1;
 	 unsigned noperspective:1;
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 5a22820c692..a8eaecc54c3 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -3089,6 +3089,12 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
                        "members");
    }
 
+   if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) {
+      _mesa_glsl_error(loc, state,
+                       "the shared storage qualifiers can only be used with "
+                       "compute shaders");
+   }
+
    if (qual->flags.q.row_major || qual->flags.q.column_major) {
       validate_matrix_layout_for_type(state, loc, var->type, var);
    }
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 08a4504296b..79134c19893 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -85,7 +85,8 @@ ast_type_qualifier::has_storage() const
           || this->flags.q.in
           || this->flags.q.out
           || this->flags.q.uniform
-          || this->flags.q.buffer;
+          || this->flags.q.buffer
+          || this->flags.q.shared_storage;
 }
 
 bool
diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll
index 21428177c97..e59f93e10ef 100644
--- a/src/glsl/glsl_lexer.ll
+++ b/src/glsl/glsl_lexer.ll
@@ -414,6 +414,8 @@ writeonly      KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_lo
 
 atomic_uint     KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT);
 
+shared          KEYWORD_WITH_ALT(430, 310, 430, 310, yyextra->ARB_compute_shader_enable, SHARED);
+
 struct		return STRUCT;
 void		return VOID_TOK;
 
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 4636435f191..4ac8e45b63a 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -165,6 +165,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
 %token IMAGE1DSHADOW IMAGE2DSHADOW IMAGE1DARRAYSHADOW IMAGE2DARRAYSHADOW
 %token COHERENT VOLATILE RESTRICT READONLY WRITEONLY
 %token ATOMIC_UINT
+%token SHARED
 %token STRUCT VOID_TOK WHILE
 %token <identifier> IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER
 %type <identifier> any_identifier
@@ -1929,6 +1930,11 @@ storage_qualifier:
       memset(& $$, 0, sizeof($$));
       $$.flags.q.buffer = 1;
    }
+   | SHARED
+   {
+      memset(& $$, 0, sizeof($$));
+      $$.flags.q.shared_storage = 1;
+   }
    ;
 
 memory_qualifier:

From 007d96730e03ae208b7baa981122b821e72efe92 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Tue, 28 Jul 2015 14:56:49 -0700
Subject: [PATCH 125/287] glsl: Align comments on variables types

v2:
 * Split from patch to add ir_var_shader_shared (tarceri)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/ir.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 9c9f22d018b..e576db5da71 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -322,18 +322,18 @@ protected:
  * Variable storage classes
  */
 enum ir_variable_mode {
-   ir_var_auto = 0,     /**< Function local variables and globals. */
-   ir_var_uniform,      /**< Variable declared as a uniform. */
-   ir_var_shader_storage,   /**< Variable declared as an ssbo. */
+   ir_var_auto = 0,             /**< Function local variables and globals. */
+   ir_var_uniform,              /**< Variable declared as a uniform. */
+   ir_var_shader_storage,       /**< Variable declared as an ssbo. */
    ir_var_shader_in,
    ir_var_shader_out,
    ir_var_function_in,
    ir_var_function_out,
    ir_var_function_inout,
-   ir_var_const_in,	/**< "in" param that must be a constant expression */
-   ir_var_system_value, /**< Ex: front-face, instance-id, etc. */
-   ir_var_temporary,	/**< Temporary variable generated during compilation. */
-   ir_var_mode_count	/**< Number of variable modes */
+   ir_var_const_in,             /**< "in" param that must be a constant expression */
+   ir_var_system_value,         /**< Ex: front-face, instance-id, etc. */
+   ir_var_temporary,            /**< Temporary variable generated during compilation. */
+   ir_var_mode_count            /**< Number of variable modes */
 };
 
 /**

From c0ac4740a75f0701637df1ea82c450bb8c31a63a Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Sun, 8 Nov 2015 19:07:43 -0800
Subject: [PATCH 126/287] glsl: Add space to shader_storage in print_visitor

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/ir_print_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index b9196900ea6..211ac76d3d0 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -173,7 +173,7 @@ void ir_print_visitor::visit(ir_variable *ir)
    const char *const samp = (ir->data.sample) ? "sample " : "";
    const char *const patc = (ir->data.patch) ? "patch " : "";
    const char *const inv = (ir->data.invariant) ? "invariant " : "";
-   const char *const mode[] = { "", "uniform ", "shader_storage",
+   const char *const mode[] = { "", "uniform ", "shader_storage ",
                                 "shader_in ", "shader_out ",
                                 "in ", "out ", "inout ",
 			        "const_in ", "sys ", "temporary " };

From 32746fc9b49db5eccdc228e12cbce4734f1020f9 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Tue, 28 Jul 2015 14:56:49 -0700
Subject: [PATCH 127/287] glsl: Add shared variable type

Shared variables are stored in a common pool accessible by all threads
in a compute shader local work group.

These variables are similar to OpenCL's local/__local variables.

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/ir.h                 | 1 +
 src/glsl/ir_print_visitor.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index e576db5da71..32a766ef0f0 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -325,6 +325,7 @@ enum ir_variable_mode {
    ir_var_auto = 0,             /**< Function local variables and globals. */
    ir_var_uniform,              /**< Variable declared as a uniform. */
    ir_var_shader_storage,       /**< Variable declared as an ssbo. */
+   ir_var_shader_shared,        /**< Variable declared as shared. */
    ir_var_shader_in,
    ir_var_shader_out,
    ir_var_function_in,
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 211ac76d3d0..42b03fdea52 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -174,7 +174,7 @@ void ir_print_visitor::visit(ir_variable *ir)
    const char *const patc = (ir->data.patch) ? "patch " : "";
    const char *const inv = (ir->data.invariant) ? "invariant " : "";
    const char *const mode[] = { "", "uniform ", "shader_storage ",
-                                "shader_in ", "shader_out ",
+                                "shader_shared ", "shader_in ", "shader_out ",
                                 "in ", "out ", "inout ",
 			        "const_in ", "sys ", "temporary " };
    STATIC_ASSERT(ARRAY_SIZE(mode) == ir_var_mode_count);

From fb3da129d1086df21122f032dd64d4b1c480cb70 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Tue, 28 Jul 2015 15:00:47 -0700
Subject: [PATCH 128/287] glsl: Use shared storage variable type for shared
 variables

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/ast_to_hir.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index a8eaecc54c3..6f5f3c1b245 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2790,6 +2790,8 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
       var->data.mode = ir_var_uniform;
    else if (qual->flags.q.buffer)
       var->data.mode = ir_var_shader_storage;
+   else if (qual->flags.q.shared_storage)
+      var->data.mode = ir_var_shader_shared;
 
    if (!is_parameter && is_varying_var(var, state->stage)) {
       /* User-defined ins/outs are not permitted in compute shaders. */

From 855a3ca598734273f53c5e316f1d825aea3dd6da Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 9 Nov 2015 08:51:47 -0800
Subject: [PATCH 129/287] vc4: Fix a compiler warning.

---
 src/gallium/drivers/vc4/vc4_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 122bda0bac6..146929637ec 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -668,7 +668,7 @@ vc4_get_shadow_index_buffer(struct pipe_context *pctx,
         uint16_t *dst = data;
 
         struct pipe_transfer *src_transfer = NULL;
-        uint32_t *src;
+        const uint32_t *src;
         if (ib->user_buffer) {
                 src = ib->user_buffer;
         } else {

From 84608e07e7f45b14a77e4f771484f0091a6e7c14 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 27 Oct 2015 16:14:05 -0700
Subject: [PATCH 130/287] vc4: Add CL dumping for GL_ARRAY_PRIMITIVE.

---
 src/gallium/drivers/vc4/vc4_cl_dump.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 476d2b5b0b1..a719f276b2e 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -183,6 +183,21 @@ dump_VC4_PACKET_GL_INDEXED_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offs
                 offset + 9, hw_offset + 9, *max_index);
 }
 
+static void
+dump_VC4_PACKET_GL_ARRAY_PRIMITIVE(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+        uint8_t *b = cl + offset;
+        uint32_t *count = cl + offset + 1;
+        uint32_t *start = cl + offset + 5;
+
+        fprintf(stderr, "0x%08x 0x%08x:      0x%02x %s\n",
+                offset, hw_offset, b[0], u_prim_name(b[0] & 0x7));
+        fprintf(stderr, "0x%08x 0x%08x:      %d verts\n",
+                offset + 1, hw_offset + 1, *count);
+        fprintf(stderr, "0x%08x 0x%08x:      0x%08x start\n",
+                offset + 5, hw_offset + 5, *start);
+}
+
 static void
 dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
 {
@@ -380,7 +395,7 @@ static const struct packet_info {
         PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL),
 
         PACKET_DUMP(VC4_PACKET_GL_INDEXED_PRIMITIVE),
-        PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE),
+        PACKET_DUMP(VC4_PACKET_GL_ARRAY_PRIMITIVE),
 
         PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE),
         PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE),

From eb8fb0064dbde7a363c2f99466a51b346b09a029 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 6 Nov 2015 11:07:25 -0800
Subject: [PATCH 131/287] vc4: Return GL_OUT_OF_MEMORY when buffer allocation
 fails.

I was afraid our callers weren't prepared for this, but it looks like
at least for resource creation, mesa/st throws an error appropriately.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/vc4/vc4_bufmgr.c   |  5 +--
 src/gallium/drivers/vc4/vc4_resource.c | 46 ++++++++++++++++----------
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 2f822f04c21..21e3bde2ee2 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -168,8 +168,9 @@ retry:
                         vc4_bo_cache_free_all(&screen->bo_cache);
                         goto retry;
                 }
-                fprintf(stderr, "create ioctl failure\n");
-                abort();
+
+                free(bo);
+                return NULL;
         }
 
         screen->bo_count++;
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index 146929637ec..bb723845531 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -35,11 +35,12 @@
 
 static bool miptree_debug = false;
 
-static void
+static bool
 vc4_resource_bo_alloc(struct vc4_resource *rsc)
 {
         struct pipe_resource *prsc = &rsc->base.b;
         struct pipe_screen *pscreen = prsc->screen;
+        struct vc4_bo *bo;
 
         if (miptree_debug) {
                 fprintf(stderr, "alloc %p: size %d + offset %d -> %d\n",
@@ -51,12 +52,18 @@ vc4_resource_bo_alloc(struct vc4_resource *rsc)
                         rsc->cube_map_stride * (prsc->array_size - 1));
         }
 
-        vc4_bo_unreference(&rsc->bo);
-        rsc->bo = vc4_bo_alloc(vc4_screen(pscreen),
-                               rsc->slices[0].offset +
-                               rsc->slices[0].size +
-                               rsc->cube_map_stride * (prsc->array_size - 1),
-                               "resource");
+        bo = vc4_bo_alloc(vc4_screen(pscreen),
+                          rsc->slices[0].offset +
+                          rsc->slices[0].size +
+                          rsc->cube_map_stride * (prsc->array_size - 1),
+                          "resource");
+        if (bo) {
+                vc4_bo_unreference(&rsc->bo);
+                rsc->bo = bo;
+                return true;
+        } else {
+                return false;
+        }
 }
 
 static void
@@ -101,21 +108,27 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
         char *buf;
 
         if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
-                vc4_resource_bo_alloc(rsc);
+                if (vc4_resource_bo_alloc(rsc)) {
 
-                /* If it might be bound as one of our vertex buffers, make
-                 * sure we re-emit vertex buffer state.
-                 */
-                if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
-                        vc4->dirty |= VC4_DIRTY_VTXBUF;
+                        /* If it might be bound as one of our vertex buffers,
+                         * make sure we re-emit vertex buffer state.
+                         */
+                        if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+                                vc4->dirty |= VC4_DIRTY_VTXBUF;
+                } else {
+                        /* If we failed to reallocate, flush everything so
+                         * that we don't violate any syncing requirements.
+                         */
+                        vc4_flush(pctx);
+                }
         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                 if (vc4_cl_references_bo(pctx, rsc->bo)) {
                         if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
                             prsc->last_level == 0 &&
                             prsc->width0 == box->width &&
                             prsc->height0 == box->height &&
-                            prsc->depth0 == box->depth) {
-                                vc4_resource_bo_alloc(rsc);
+                            prsc->depth0 == box->depth &&
+                            vc4_resource_bo_alloc(rsc)) {
                                 if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
                                         vc4->dirty |= VC4_DIRTY_VTXBUF;
                         } else {
@@ -389,8 +402,7 @@ vc4_resource_create(struct pipe_screen *pscreen,
                 rsc->vc4_format = get_resource_texture_format(prsc);
 
         vc4_setup_slices(rsc);
-        vc4_resource_bo_alloc(rsc);
-        if (!rsc->bo)
+        if (!vc4_resource_bo_alloc(rsc))
                 goto fail;
 
         return prsc;

From 5980389bbf98b8186ba6a06392d92b82fa9efad3 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 9 Nov 2015 08:56:01 -0800
Subject: [PATCH 132/287] vc4: Return NULL when we can't make our shadow for a
 sampler view.

I'm not sure what the caller does is appropriate (just have a NULL sampler
at this slot), but it fixes the immediate crash.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
---
 src/gallium/drivers/vc4/vc4_state.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 78aa344ab1d..7317695b652 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -583,6 +583,10 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                 tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
 
                 prsc = vc4_resource_create(pctx->screen, &tmpl);
+                if (!prsc) {
+                        free(so);
+                        return NULL;
+                }
                 rsc = vc4_resource(prsc);
                 clone = vc4_resource(prsc);
                 clone->shadow_parent = &shadow_parent->base.b;

From 437d7b611972c52fac32cb54038d3b278f66fd5a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 9 Nov 2015 09:12:20 -0800
Subject: [PATCH 133/287] vc4: Avoid loading undefined (newly-allocated) FBO
 contents.

Since X has undefined contents in new pixmaps, it will allocate new
textures for an FBO and draw to them without an explicit clear.  For
VC4, it's much faster to emit a clear than the load of the actual
undefined memory contents, so just do that instead.
---
 src/gallium/drivers/vc4/vc4_state.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index 7317695b652..a234ce53b20 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -420,6 +420,23 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
         cso->width = framebuffer->width;
         cso->height = framebuffer->height;
 
+        /* If we're binding to uninitialized buffers, no need to load their
+         * contents before drawing..
+         */
+        if (cso->cbufs[0]) {
+                struct vc4_resource *rsc =
+                        vc4_resource(cso->cbufs[0]->texture);
+                if (!rsc->writes)
+                        vc4->cleared |= PIPE_CLEAR_COLOR0;
+        }
+
+        if (cso->zsbuf) {
+                struct vc4_resource *rsc =
+                        vc4_resource(cso->zsbuf->texture);
+                if (!rsc->writes)
+                        vc4->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+        }
+
         /* Nonzero texture mipmap levels are laid out as if they were in
          * power-of-two-sized spaces.  The renderbuffer config infers its
          * stride from the width parameter, so we need to configure our

From db54673b54ee7e2c49d5aa6f77d2fff3eed0aa24 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 9 Oct 2015 15:49:49 -0700
Subject: [PATCH 134/287] nir: Store PatchInputsRead and PatchOutputsWritten in
 nir_shader_info.

These tessellation shader related fields need plumbing through NIR.

v2: Use uint32_t instead of uint64_t to match the source type of
    GLbitfield (caught by Iago Toral).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/nir/glsl_to_nir.cpp | 2 ++
 src/glsl/nir/nir.h           | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index facb9fa4a7a..ec6bdc8acb0 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -160,6 +160,8 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
    shader->info.num_images = sh->NumImages;
    shader->info.inputs_read = sh->Program->InputsRead;
    shader->info.outputs_written = sh->Program->OutputsWritten;
+   shader->info.patch_inputs_read = sh->Program->PatchInputsRead;
+   shader->info.patch_outputs_written = sh->Program->PatchOutputsWritten;
    shader->info.system_values_read = sh->Program->SystemValuesRead;
    shader->info.uses_texture_gather = sh->Program->UsesGather;
    shader->info.uses_clip_distance_out =
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 2559ef2a456..13ebbcae564 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1490,6 +1490,11 @@ typedef struct nir_shader_info {
    /* Which system values are actually read */
    uint64_t system_values_read;
 
+   /* Which patch inputs are actually read */
+   uint32_t patch_inputs_read;
+   /* Which patch outputs are actually written */
+   uint32_t patch_outputs_written;
+
    /* Whether or not this shader ever uses textureGather() */
    bool uses_texture_gather;
 

From 6f4216232921370e513e56d38cf894c94ae63fa6 Mon Sep 17 00:00:00 2001
From: Jose Fonseca <jfonseca@vmware.com>
Date: Mon, 9 Nov 2015 22:25:27 +0000
Subject: [PATCH 135/287] st/mesa: Destroy buffer object's mutex.

Ideally we should have a _mesa_cleanup_buffer_object function in
src/mesa/bufferobj.c so that the destruction logic resided in a single
place.

Reviewed-by: Brian Paul <brianp@vmware.com>
---
 src/mesa/state_tracker/st_cb_bufferobjects.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 8afd336779f..5d20b26d26e 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -83,6 +83,7 @@ st_bufferobj_free(struct gl_context *ctx, struct gl_buffer_object *obj)
    if (st_obj->buffer)
       pipe_resource_reference(&st_obj->buffer, NULL);
 
+   mtx_destroy(&st_obj->Base.Mutex);
    free(st_obj->Base.Label);
    free(st_obj);
 }

From 30fe8eaa8e65e656d2bc95f499791eebea7d7dc9 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 30 Oct 2015 23:32:50 -0400
Subject: [PATCH 136/287] nir/glsl: make evaluate_rvalue() return a nir_ssa_def
 *

A long time ago, before NIR was even merged to master, glsl_to_nir used
registers and these sources were actually register sources. But nowadays
everything in glsl_to_nir is an SSA value, so stop pretending that by
evaluating an rvalue we can get an arbitrary nir_src. Most importantly,
we need this since the builder takes nir_ssa_def * sources directly.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/glsl_to_nir.cpp | 90 +++++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 37 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index ec6bdc8acb0..3b8fb00abcf 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -73,7 +73,7 @@ public:
 private:
    void create_overload(ir_function_signature *ir, nir_function *function);
    void add_instr(nir_instr *instr, unsigned num_components);
-   nir_src evaluate_rvalue(ir_rvalue *ir);
+   nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
 
    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src *srcs);
    nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1);
@@ -560,7 +560,8 @@ nir_visitor::visit(ir_loop *ir)
 void
 nir_visitor::visit(ir_if *ir)
 {
-   nir_src condition = evaluate_rvalue(ir->condition);
+   nir_src condition =
+      nir_src_for_ssa(evaluate_rvalue(ir->condition));
 
    exec_list *old_list = this->cf_node_list;
 
@@ -591,7 +592,8 @@ nir_visitor::visit(ir_discard *ir)
    if (ir->condition) {
       discard = nir_intrinsic_instr_create(this->shader,
                                            nir_intrinsic_discard_if);
-      discard->src[0] = evaluate_rvalue(ir->condition);
+      discard->src[0] =
+         nir_src_for_ssa(evaluate_rvalue(ir->condition));
    } else {
       discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
    }
@@ -792,7 +794,8 @@ nir_visitor::visit(ir_call *ir)
          /* Set the address argument, extending the coordinate vector to four
           * components.
           */
-         const nir_src src_addr = evaluate_rvalue((ir_dereference *)param);
+         const nir_src src_addr =
+            nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
          nir_alu_instr *instr_addr = nir_alu_instr_create(shader, nir_op_vec4);
          nir_ssa_dest_init(&instr_addr->instr, &instr_addr->dest.dest, 4, NULL);
 
@@ -813,7 +816,8 @@ nir_visitor::visit(ir_call *ir)
           * images.
           */
          if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
-            instr->src[1] = evaluate_rvalue((ir_dereference *)param);
+            instr->src[1] =
+               nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
             param = param->get_next();
          } else {
             instr->src[1] = nir_src_for_ssa(&instr_undef->def);
@@ -821,12 +825,14 @@ nir_visitor::visit(ir_call *ir)
 
          /* Set the intrinsic parameters. */
          if (!param->is_tail_sentinel()) {
-            instr->src[2] = evaluate_rvalue((ir_dereference *)param);
+            instr->src[2] =
+               nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
             param = param->get_next();
          }
 
          if (!param->is_tail_sentinel()) {
-            instr->src[3] = evaluate_rvalue((ir_dereference *)param);
+            instr->src[3] =
+               nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
             param = param->get_next();
          }
          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
@@ -864,7 +870,7 @@ nir_visitor::visit(ir_call *ir)
             op = nir_intrinsic_store_ssbo_indirect;
             ralloc_free(instr);
             instr = nir_intrinsic_instr_create(shader, op);
-            instr->src[2] = evaluate_rvalue(offset);
+            instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
             instr->const_index[0] = 0;
          } else {
             instr->const_index[0] = const_offset->value.u[0];
@@ -872,10 +878,10 @@ nir_visitor::visit(ir_call *ir)
 
          instr->const_index[1] = write_mask->value.u[0];
 
-         instr->src[0] = evaluate_rvalue(val);
+         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
          instr->num_components = val->type->vector_elements;
 
-         instr->src[1] = evaluate_rvalue(block);
+         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
          nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
          break;
       }
@@ -892,14 +898,14 @@ nir_visitor::visit(ir_call *ir)
             op = nir_intrinsic_load_ssbo_indirect;
             ralloc_free(instr);
             instr = nir_intrinsic_instr_create(shader, op);
-            instr->src[1] = evaluate_rvalue(offset);
+            instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
             instr->const_index[0] = 0;
             dest = &instr->dest;
          } else {
             instr->const_index[0] = const_offset->value.u[0];
          }
 
-         instr->src[0] = evaluate_rvalue(block);
+         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
 
          const glsl_type *type = ir->return_deref->var->type;
          instr->num_components = type->vector_elements;
@@ -959,24 +965,24 @@ nir_visitor::visit(ir_call *ir)
          /* Block index */
          exec_node *param = ir->actual_parameters.get_head();
          ir_instruction *inst = (ir_instruction *) param;
-         instr->src[0] = evaluate_rvalue(inst->as_rvalue());
+         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
 
          /* Offset */
          param = param->get_next();
          inst = (ir_instruction *) param;
-         instr->src[1] = evaluate_rvalue(inst->as_rvalue());
+         instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
 
          /* data1 parameter (this is always present) */
          param = param->get_next();
          inst = (ir_instruction *) param;
-         instr->src[2] = evaluate_rvalue(inst->as_rvalue());
+         instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
 
          /* data2 parameter (only with atomic_comp_swap) */
          if (param_count == 4) {
             assert(op == nir_intrinsic_ssbo_atomic_comp_swap);
             param = param->get_next();
             inst = (ir_instruction *) param;
-            instr->src[3] = evaluate_rvalue(inst->as_rvalue());
+            instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
          }
 
          /* Atomic result */
@@ -1039,7 +1045,7 @@ nir_visitor::visit(ir_assignment *ir)
 
       if (ir->condition) {
          nir_if *if_stmt = nir_if_create(this->shader);
-         if_stmt->condition = evaluate_rvalue(ir->condition);
+         if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
          nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
          nir_instr_insert_after_cf_list(&if_stmt->then_list, &copy->instr);
       } else {
@@ -1052,7 +1058,7 @@ nir_visitor::visit(ir_assignment *ir)
 
    ir->lhs->accept(this);
    nir_deref_var *lhs_deref = this->deref_head;
-   nir_src src = evaluate_rvalue(ir->rhs);
+   nir_src src = nir_src_for_ssa(evaluate_rvalue(ir->rhs));
 
    if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
       /*
@@ -1115,7 +1121,7 @@ nir_visitor::visit(ir_assignment *ir)
 
    if (ir->condition) {
       nir_if *if_stmt = nir_if_create(this->shader);
-      if_stmt->condition = evaluate_rvalue(ir->condition);
+      if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
       nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
       nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr);
    } else {
@@ -1171,7 +1177,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
    this->result = instr;
 }
 
-nir_src
+nir_ssa_def *
 nir_visitor::evaluate_rvalue(ir_rvalue* ir)
 {
    ir->accept(this);
@@ -1192,7 +1198,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
    nir_dest *dest = get_instr_dest(this->result);
    assert(dest->is_ssa);
 
-   return nir_src_for_ssa(&dest->ssa);
+   return &dest->ssa;
 }
 
 nir_alu_instr *
@@ -1248,9 +1254,9 @@ nir_visitor::visit(ir_expression *ir)
       nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
       load->num_components = ir->type->vector_elements;
       load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
-      load->src[0] = evaluate_rvalue(ir->operands[0]);
+      load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
       if (!const_index)
-         load->src[1] = evaluate_rvalue(ir->operands[1]);
+         load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
       add_instr(&load->instr, ir->type->vector_elements);
 
       /*
@@ -1328,7 +1334,7 @@ nir_visitor::visit(ir_expression *ir)
 
       if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset ||
           intrin->intrinsic == nir_intrinsic_interp_var_at_sample)
-         intrin->src[0] = evaluate_rvalue(ir->operands[1]);
+         intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
 
       add_instr(&intrin->instr, deref->type->vector_elements);
 
@@ -1357,7 +1363,7 @@ nir_visitor::visit(ir_expression *ir)
 
    nir_src srcs[4];
    for (unsigned i = 0; i < ir->get_num_operands(); i++)
-      srcs[i] = evaluate_rvalue(ir->operands[i]);
+      srcs[i] = nir_src_for_ssa(evaluate_rvalue(ir->operands[i]));
 
    glsl_base_type types[4];
    for (unsigned i = 0; i < ir->get_num_operands(); i++)
@@ -1565,7 +1571,7 @@ nir_visitor::visit(ir_expression *ir)
          this->shader,
          nir_intrinsic_get_buffer_size);
       load->num_components = ir->type->vector_elements;
-      load->src[0] = evaluate_rvalue(ir->operands[0]);
+      load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
       add_instr(&load->instr, ir->type->vector_elements);
       return;
    }
@@ -1908,7 +1914,7 @@ nir_visitor::visit(ir_swizzle *ir)
 {
    nir_alu_instr *instr = emit(supports_ints ? nir_op_imov : nir_op_fmov,
                                ir->type->vector_elements,
-                               evaluate_rvalue(ir->val));
+                               nir_src_for_ssa(evaluate_rvalue(ir->val)));
 
    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
    for (unsigned i = 0; i < ir->type->vector_elements; i++)
@@ -2018,19 +2024,22 @@ nir_visitor::visit(ir_texture *ir)
 
    if (ir->coordinate != NULL) {
       instr->coord_components = ir->coordinate->type->vector_elements;
-      instr->src[src_number].src = evaluate_rvalue(ir->coordinate);
+      instr->src[src_number].src =
+         nir_src_for_ssa(evaluate_rvalue(ir->coordinate));
       instr->src[src_number].src_type = nir_tex_src_coord;
       src_number++;
    }
 
    if (ir->projector != NULL) {
-      instr->src[src_number].src = evaluate_rvalue(ir->projector);
+      instr->src[src_number].src =
+         nir_src_for_ssa(evaluate_rvalue(ir->projector));
       instr->src[src_number].src_type = nir_tex_src_projector;
       src_number++;
    }
 
    if (ir->shadow_comparitor != NULL) {
-      instr->src[src_number].src = evaluate_rvalue(ir->shadow_comparitor);
+      instr->src[src_number].src =
+         nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparitor));
       instr->src[src_number].src_type = nir_tex_src_comparitor;
       src_number++;
    }
@@ -2044,7 +2053,8 @@ nir_visitor::visit(ir_texture *ir)
          for (unsigned i = 0; i < const_offset->type->vector_elements; i++)
             instr->const_offset[i] = const_offset->value.i[i];
       } else {
-         instr->src[src_number].src = evaluate_rvalue(ir->offset);
+         instr->src[src_number].src =
+            nir_src_for_ssa(evaluate_rvalue(ir->offset));
          instr->src[src_number].src_type = nir_tex_src_offset;
          src_number++;
       }
@@ -2052,7 +2062,8 @@ nir_visitor::visit(ir_texture *ir)
 
    switch (ir->op) {
    case ir_txb:
-      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.bias);
+      instr->src[src_number].src =
+         nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias));
       instr->src[src_number].src_type = nir_tex_src_bias;
       src_number++;
       break;
@@ -2061,23 +2072,27 @@ nir_visitor::visit(ir_texture *ir)
    case ir_txf:
    case ir_txs:
       if (ir->lod_info.lod != NULL) {
-         instr->src[src_number].src = evaluate_rvalue(ir->lod_info.lod);
+         instr->src[src_number].src =
+            nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod));
          instr->src[src_number].src_type = nir_tex_src_lod;
          src_number++;
       }
       break;
 
    case ir_txd:
-      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdx);
+      instr->src[src_number].src =
+         nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx));
       instr->src[src_number].src_type = nir_tex_src_ddx;
       src_number++;
-      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.grad.dPdy);
+      instr->src[src_number].src =
+         nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy));
       instr->src[src_number].src_type = nir_tex_src_ddy;
       src_number++;
       break;
 
    case ir_txf_ms:
-      instr->src[src_number].src = evaluate_rvalue(ir->lod_info.sample_index);
+      instr->src[src_number].src =
+         nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index));
       instr->src[src_number].src_type = nir_tex_src_ms_index;
       src_number++;
       break;
@@ -2152,7 +2167,8 @@ nir_visitor::visit(ir_dereference_array *ir)
       deref->base_offset = const_index->value.u[0];
    } else {
       deref->deref_array_type = nir_deref_array_type_indirect;
-      deref->indirect = evaluate_rvalue(ir->array_index);
+      deref->indirect =
+         nir_src_for_ssa(evaluate_rvalue(ir->array_index));
    }
 
    ir->array->accept(this);

From a60e990dd25e2fb45799a7a5cfa58abc9abf7665 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 30 Oct 2015 23:47:46 -0400
Subject: [PATCH 137/287] nir/glsl: convert nir_visitor::result to a
 nir_ssa_def *

Its only user now returns a nir_ssa_def *, and we'll need this since the
builder returns a nir_ssa_def *.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/glsl_to_nir.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 3b8fb00abcf..10faa370ad1 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -87,7 +87,7 @@ private:
    nir_shader *shader;
    nir_function_impl *impl;
    exec_list *cf_node_list;
-   nir_instr *result; /* result of the expression tree last visited */
+   nir_ssa_def *result; /* result of the expression tree last visited */
 
    nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
 
@@ -1174,7 +1174,11 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
       nir_ssa_dest_init(instr, dest, num_components, NULL);
 
    nir_instr_insert_after_cf_list(this->cf_node_list, instr);
-   this->result = instr;
+
+   if (dest) {
+      assert(dest->is_ssa);
+      this->result = &dest->ssa;
+   }
 }
 
 nir_ssa_def *
@@ -1195,10 +1199,7 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
       add_instr(&load_instr->instr, ir->type->vector_elements);
    }
 
-   nir_dest *dest = get_instr_dest(this->result);
-   assert(dest->is_ssa);
-
-   return &dest->ssa;
+   return this->result;
 }
 
 nir_alu_instr *

From fbbfb7c0250b105555a9869e80bc9c77adf1b997 Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Fri, 30 Oct 2015 23:56:49 -0400
Subject: [PATCH 138/287] nir/glsl: make emit() take nir_ssa_def * sources

Again, this matches what the builder will have to do.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/glsl_to_nir.cpp | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 10faa370ad1..689c3e0dcaa 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -75,12 +75,12 @@ private:
    void add_instr(nir_instr *instr, unsigned num_components);
    nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
 
-   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src *srcs);
-   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1);
-   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1,
-                       nir_src src2);
-   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_src src1,
-                       nir_src src2, nir_src src3);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+                       nir_ssa_def *src2);
+   nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+                       nir_ssa_def *src2, nir_ssa_def *src3);
 
    bool supports_ints;
 
@@ -1203,38 +1203,38 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
 }
 
 nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_src *srcs)
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs)
 {
    nir_alu_instr *instr = nir_alu_instr_create(this->shader, op);
    for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
-      instr->src[i].src = srcs[i];
+      instr->src[i].src = nir_src_for_ssa(srcs[i]);
    instr->dest.write_mask = (1 << dest_size) - 1;
    add_instr(&instr->instr, dest_size);
    return instr;
 }
 
 nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1)
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def *src1)
 {
    assert(nir_op_infos[op].num_inputs == 1);
    return emit(op, dest_size, &src1);
 }
 
 nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1,
-                  nir_src src2)
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+                  nir_ssa_def *src2)
 {
    assert(nir_op_infos[op].num_inputs == 2);
-   nir_src srcs[] = { src1, src2 };
+   nir_ssa_def *srcs[] = { src1, src2 };
    return emit(op, dest_size, srcs);
 }
 
 nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_src src1,
-                  nir_src src2, nir_src src3)
+nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
+                  nir_ssa_def *src2, nir_ssa_def *src3)
 {
    assert(nir_op_infos[op].num_inputs == 3);
-   nir_src srcs[] = { src1, src2, src3 };
+   nir_ssa_def *srcs[] = { src1, src2, src3 };
    return emit(op, dest_size, srcs);
 }
 
@@ -1362,9 +1362,9 @@ nir_visitor::visit(ir_expression *ir)
       break;
    }
 
-   nir_src srcs[4];
+   nir_ssa_def *srcs[4];
    for (unsigned i = 0; i < ir->get_num_operands(); i++)
-      srcs[i] = nir_src_for_ssa(evaluate_rvalue(ir->operands[i]));
+      srcs[i] = evaluate_rvalue(ir->operands[i]);
 
    glsl_base_type types[4];
    for (unsigned i = 0; i < ir->get_num_operands(); i++)
@@ -1915,7 +1915,7 @@ nir_visitor::visit(ir_swizzle *ir)
 {
    nir_alu_instr *instr = emit(supports_ints ? nir_op_imov : nir_op_fmov,
                                ir->type->vector_elements,
-                               nir_src_for_ssa(evaluate_rvalue(ir->val)));
+                               evaluate_rvalue(ir->val));
 
    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
    for (unsigned i = 0; i < ir->type->vector_elements; i++)

From 213f86416f5a107d3f517e049df27fc5c5a9a28e Mon Sep 17 00:00:00 2001
From: Connor Abbott <cwabbott0@gmail.com>
Date: Sat, 31 Oct 2015 16:31:59 -0400
Subject: [PATCH 139/287] nir/glsl: switch to using the builder

v2: use nir_bulder_cf_insert (Ken)

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/glsl/nir/glsl_to_nir.cpp | 696 +++++++++++++----------------------
 1 file changed, 257 insertions(+), 439 deletions(-)

diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 689c3e0dcaa..b10d1923e0a 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -27,6 +27,7 @@
 
 #include "glsl_to_nir.h"
 #include "nir_control_flow.h"
+#include "nir_builder.h"
 #include "ir_visitor.h"
 #include "ir_hierarchical_visitor.h"
 #include "ir.h"
@@ -86,7 +87,7 @@ private:
 
    nir_shader *shader;
    nir_function_impl *impl;
-   exec_list *cf_node_list;
+   nir_builder b;
    nir_ssa_def *result; /* result of the expression tree last visited */
 
    nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir);
@@ -535,7 +536,8 @@ nir_visitor::visit(ir_function_signature *ir)
 
       this->is_global = false;
 
-      this->cf_node_list = &impl->body;
+      nir_builder_init(&b, impl);
+      b.cursor = nir_after_cf_list(&impl->body);
       visit_exec_list(&ir->body, this);
 
       this->is_global = true;
@@ -547,14 +549,12 @@ nir_visitor::visit(ir_function_signature *ir)
 void
 nir_visitor::visit(ir_loop *ir)
 {
-   exec_list *old_list = this->cf_node_list;
-
    nir_loop *loop = nir_loop_create(this->shader);
-   nir_cf_node_insert_end(old_list, &loop->cf_node);
-   this->cf_node_list = &loop->body;
-   visit_exec_list(&ir->body_instructions, this);
+   nir_builder_cf_insert(&b, &loop->cf_node);
 
-   this->cf_node_list = old_list;
+   b.cursor = nir_after_cf_list(&loop->body);
+   visit_exec_list(&ir->body_instructions, this);
+   b.cursor = nir_after_cf_node(&loop->cf_node);
 }
 
 void
@@ -563,19 +563,17 @@ nir_visitor::visit(ir_if *ir)
    nir_src condition =
       nir_src_for_ssa(evaluate_rvalue(ir->condition));
 
-   exec_list *old_list = this->cf_node_list;
-
    nir_if *if_stmt = nir_if_create(this->shader);
    if_stmt->condition = condition;
-   nir_cf_node_insert_end(old_list, &if_stmt->cf_node);
+   nir_builder_cf_insert(&b, &if_stmt->cf_node);
 
-   this->cf_node_list = &if_stmt->then_list;
+   b.cursor = nir_after_cf_list(&if_stmt->then_list);
    visit_exec_list(&ir->then_instructions, this);
 
-   this->cf_node_list = &if_stmt->else_list;
+   b.cursor = nir_after_cf_list(&if_stmt->else_list);
    visit_exec_list(&ir->else_instructions, this);
 
-   this->cf_node_list = old_list;
+   b.cursor = nir_after_cf_node(&if_stmt->cf_node);
 }
 
 void
@@ -597,7 +595,8 @@ nir_visitor::visit(ir_discard *ir)
    } else {
       discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard);
    }
-   nir_instr_insert_after_cf_list(this->cf_node_list, &discard->instr);
+
+   nir_builder_instr_insert(&b, &discard->instr);
 }
 
 void
@@ -606,7 +605,7 @@ nir_visitor::visit(ir_emit_vertex *ir)
    nir_intrinsic_instr *instr =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex);
    instr->const_index[0] = ir->stream_id();
-   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(&b, &instr->instr);
 }
 
 void
@@ -615,7 +614,7 @@ nir_visitor::visit(ir_end_primitive *ir)
    nir_intrinsic_instr *instr =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive);
    instr->const_index[0] = ir->stream_id();
-   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(&b, &instr->instr);
 }
 
 void
@@ -634,7 +633,7 @@ nir_visitor::visit(ir_loop_jump *ir)
    }
 
    nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);
-   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(&b, &instr->instr);
 }
 
 void
@@ -649,7 +648,7 @@ nir_visitor::visit(ir_return *ir)
    }
 
    nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);
-   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(&b, &instr->instr);
 }
 
 void
@@ -748,7 +747,7 @@ nir_visitor::visit(ir_call *ir)
             (ir_dereference *) ir->actual_parameters.get_head();
          instr->variables[0] = evaluate_deref(&instr->instr, param);
          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
-         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
       case nir_intrinsic_image_load:
@@ -765,8 +764,7 @@ nir_visitor::visit(ir_call *ir)
       case nir_intrinsic_image_size: {
          nir_ssa_undef_instr *instr_undef =
             nir_ssa_undef_instr_create(shader, 1);
-         nir_instr_insert_after_cf_list(this->cf_node_list,
-                                        &instr_undef->instr);
+         nir_builder_instr_insert(&b, &instr_undef->instr);
 
          /* Set the image variable dereference. */
          exec_node *param = ir->actual_parameters.get_head();
@@ -787,29 +785,25 @@ nir_visitor::visit(ir_call *ir)
 
          if (op == nir_intrinsic_image_size ||
              op == nir_intrinsic_image_samples) {
-            nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+            nir_builder_instr_insert(&b, &instr->instr);
             break;
          }
 
          /* Set the address argument, extending the coordinate vector to four
           * components.
           */
-         const nir_src src_addr =
-            nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
-         nir_alu_instr *instr_addr = nir_alu_instr_create(shader, nir_op_vec4);
-         nir_ssa_dest_init(&instr_addr->instr, &instr_addr->dest.dest, 4, NULL);
+         nir_ssa_def *src_addr =
+            evaluate_rvalue((ir_dereference *)param);
+         nir_ssa_def *srcs[4];
 
          for (int i = 0; i < 4; i++) {
-            if (i < type->coordinate_components()) {
-               instr_addr->src[i].src = src_addr;
-               instr_addr->src[i].swizzle[0] = i;
-            } else {
-               instr_addr->src[i].src = nir_src_for_ssa(&instr_undef->def);
-            }
+            if (i < type->coordinate_components())
+               srcs[i] = nir_channel(&b, src_addr, i);
+            else
+               srcs[i] = &instr_undef->def;
          }
 
-         nir_instr_insert_after_cf_list(cf_node_list, &instr_addr->instr);
-         instr->src[0] = nir_src_for_ssa(&instr_addr->dest.dest.ssa);
+         instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
          param = param->get_next();
 
          /* Set the sample argument, which is undefined for single-sample
@@ -835,7 +829,7 @@ nir_visitor::visit(ir_call *ir)
                nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
             param = param->get_next();
          }
-         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
       case nir_intrinsic_memory_barrier:
@@ -844,11 +838,11 @@ nir_visitor::visit(ir_call *ir)
       case nir_intrinsic_memory_barrier_buffer:
       case nir_intrinsic_memory_barrier_image:
       case nir_intrinsic_memory_barrier_shared:
-         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         nir_builder_instr_insert(&b, &instr->instr);
          break;
       case nir_intrinsic_shader_clock:
          nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
-         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         nir_builder_instr_insert(&b, &instr->instr);
          break;
       case nir_intrinsic_store_ssbo: {
          exec_node *param = ir->actual_parameters.get_head();
@@ -882,7 +876,7 @@ nir_visitor::visit(ir_call *ir)
          instr->num_components = val->type->vector_elements;
 
          instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
-         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
       case nir_intrinsic_load_ssbo: {
@@ -917,7 +911,7 @@ nir_visitor::visit(ir_call *ir)
          /* Insert the created nir instruction now since in the case of boolean
           * result we will need to emit another instruction after it
           */
-         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         nir_builder_instr_insert(&b, &instr->instr);
 
          /*
           * In SSBO/UBO's, a true boolean value is any non-zero value, but we
@@ -925,26 +919,19 @@ nir_visitor::visit(ir_call *ir)
           * comparison.
           */
          if (type->base_type == GLSL_TYPE_BOOL) {
-            nir_load_const_instr *const_zero =
-               nir_load_const_instr_create(shader, 1);
-            const_zero->value.u[0] = 0;
-            nir_instr_insert_after_cf_list(this->cf_node_list,
-                                           &const_zero->instr);
-
             nir_alu_instr *load_ssbo_compare =
                nir_alu_instr_create(shader, nir_op_ine);
             load_ssbo_compare->src[0].src.is_ssa = true;
             load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa;
-            load_ssbo_compare->src[1].src.is_ssa = true;
-            load_ssbo_compare->src[1].src.ssa = &const_zero->def;
+            load_ssbo_compare->src[1].src =
+               nir_src_for_ssa(nir_imm_int(&b, 0));
             for (unsigned i = 0; i < type->vector_elements; i++)
                load_ssbo_compare->src[1].swizzle[i] = 0;
             nir_ssa_dest_init(&load_ssbo_compare->instr,
                               &load_ssbo_compare->dest.dest,
                               type->vector_elements, NULL);
             load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
-            nir_instr_insert_after_cf_list(this->cf_node_list,
-                                           &load_ssbo_compare->instr);
+            nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
             dest = &load_ssbo_compare->dest.dest;
          }
          break;
@@ -989,7 +976,7 @@ nir_visitor::visit(ir_call *ir)
          assert(ir->return_deref);
          nir_ssa_dest_init(&instr->instr, &instr->dest,
                            ir->return_deref->type->vector_elements, NULL);
-         nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+         nir_builder_instr_insert(&b, &instr->instr);
          break;
       }
       default:
@@ -1005,8 +992,7 @@ nir_visitor::visit(ir_call *ir)
             evaluate_deref(&store_instr->instr, ir->return_deref);
          store_instr->src[0] = nir_src_for_ssa(&dest->ssa);
 
-         nir_instr_insert_after_cf_list(this->cf_node_list,
-                                        &store_instr->instr);
+         nir_builder_instr_insert(&b, &store_instr->instr);
       }
 
       return;
@@ -1026,7 +1012,7 @@ nir_visitor::visit(ir_call *ir)
    }
 
    instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref);
-   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(&b, &instr->instr);
 }
 
 void
@@ -1046,10 +1032,11 @@ nir_visitor::visit(ir_assignment *ir)
       if (ir->condition) {
          nir_if *if_stmt = nir_if_create(this->shader);
          if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
-         nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
+         nir_builder_cf_insert(&b, &if_stmt->cf_node);
          nir_instr_insert_after_cf_list(&if_stmt->then_list, &copy->instr);
+         b.cursor = nir_after_cf_node(&if_stmt->cf_node);
       } else {
-         nir_instr_insert_after_cf_list(this->cf_node_list, &copy->instr);
+         nir_builder_instr_insert(&b, &copy->instr);
       }
       return;
    }
@@ -1058,7 +1045,7 @@ nir_visitor::visit(ir_assignment *ir)
 
    ir->lhs->accept(this);
    nir_deref_var *lhs_deref = this->deref_head;
-   nir_src src = nir_src_for_ssa(evaluate_rvalue(ir->rhs));
+   nir_ssa_def *src = evaluate_rvalue(ir->rhs);
 
    if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {
       /*
@@ -1074,42 +1061,25 @@ nir_visitor::visit(ir_assignment *ir)
       nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
       load->variables[0] = lhs_deref;
       ralloc_steal(load, load->variables[0]);
-      nir_instr_insert_after_cf_list(this->cf_node_list, &load->instr);
+      nir_builder_instr_insert(&b, &load->instr);
 
-      nir_op vec_op;
-      switch (ir->lhs->type->vector_elements) {
-         case 1: vec_op = nir_op_imov; break;
-         case 2: vec_op = nir_op_vec2; break;
-         case 3: vec_op = nir_op_vec3; break;
-         case 4: vec_op = nir_op_vec4; break;
-         default: unreachable("Invalid number of components"); break;
-      }
-      nir_alu_instr *vec = nir_alu_instr_create(this->shader, vec_op);
-      nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
-      vec->dest.write_mask = (1 << num_components) - 1;
+      nir_ssa_def *srcs[4];
 
       unsigned component = 0;
       for (unsigned i = 0; i < ir->lhs->type->vector_elements; i++) {
          if (ir->write_mask & (1 << i)) {
-            vec->src[i].src = src;
-
             /* GLSL IR will give us the input to the write-masked assignment
              * in a single packed vector.  So, for example, if the
              * writemask is xzw, then we have to swizzle x -> x, y -> z,
              * and z -> w and get the y component from the load.
              */
-            vec->src[i].swizzle[0] = component++;
+            srcs[i] = nir_channel(&b, src, component++);
          } else {
-            vec->src[i].src.is_ssa = true;
-            vec->src[i].src.ssa = &load->dest.ssa;
-            vec->src[i].swizzle[0] = i;
+            srcs[i] = nir_channel(&b, &load->dest.ssa, i);
          }
       }
 
-      nir_instr_insert_after_cf_list(this->cf_node_list, &vec->instr);
-
-      src.is_ssa = true;
-      src.ssa = &vec->dest.dest.ssa;
+      src = nir_vec(&b, srcs, ir->lhs->type->vector_elements);
    }
 
    nir_intrinsic_instr *store =
@@ -1117,15 +1087,16 @@ nir_visitor::visit(ir_assignment *ir)
    store->num_components = ir->lhs->type->vector_elements;
    nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref);
    store->variables[0] = nir_deref_as_var(store_deref);
-   store->src[0] = src;
+   store->src[0] = nir_src_for_ssa(src);
 
    if (ir->condition) {
       nir_if *if_stmt = nir_if_create(this->shader);
       if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition));
-      nir_cf_node_insert_end(this->cf_node_list, &if_stmt->cf_node);
+      nir_builder_cf_insert(&b, &if_stmt->cf_node);
       nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr);
+      b.cursor = nir_after_cf_node(&if_stmt->cf_node);
    } else {
-      nir_instr_insert_after_cf_list(this->cf_node_list, &store->instr);
+      nir_builder_instr_insert(&b, &store->instr);
    }
 }
 
@@ -1173,7 +1144,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
    if (dest)
       nir_ssa_dest_init(instr, dest, num_components, NULL);
 
-   nir_instr_insert_after_cf_list(this->cf_node_list, instr);
+   nir_builder_instr_insert(&b, instr);
 
    if (dest) {
       assert(dest->is_ssa);
@@ -1202,42 +1173,6 @@ nir_visitor::evaluate_rvalue(ir_rvalue* ir)
    return this->result;
 }
 
-nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs)
-{
-   nir_alu_instr *instr = nir_alu_instr_create(this->shader, op);
-   for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++)
-      instr->src[i].src = nir_src_for_ssa(srcs[i]);
-   instr->dest.write_mask = (1 << dest_size) - 1;
-   add_instr(&instr->instr, dest_size);
-   return instr;
-}
-
-nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def *src1)
-{
-   assert(nir_op_infos[op].num_inputs == 1);
-   return emit(op, dest_size, &src1);
-}
-
-nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
-                  nir_ssa_def *src2)
-{
-   assert(nir_op_infos[op].num_inputs == 2);
-   nir_ssa_def *srcs[] = { src1, src2 };
-   return emit(op, dest_size, srcs);
-}
-
-nir_alu_instr *
-nir_visitor::emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
-                  nir_ssa_def *src2, nir_ssa_def *src3)
-{
-   assert(nir_op_infos[op].num_inputs == 3);
-   nir_ssa_def *srcs[] = { src1, src2, src3 };
-   return emit(op, dest_size, srcs);
-}
-
 void
 nir_visitor::visit(ir_expression *ir)
 {
@@ -1265,22 +1200,8 @@ nir_visitor::visit(ir_expression *ir)
        * a true boolean to be ~0. Fix this up with a != 0 comparison.
        */
 
-      if (ir->type->base_type == GLSL_TYPE_BOOL) {
-         nir_load_const_instr *const_zero = nir_load_const_instr_create(shader, 1);
-         const_zero->value.u[0] = 0;
-         nir_instr_insert_after_cf_list(this->cf_node_list, &const_zero->instr);
-
-         nir_alu_instr *compare = nir_alu_instr_create(shader, nir_op_ine);
-         compare->src[0].src.is_ssa = true;
-         compare->src[0].src.ssa = &load->dest.ssa;
-         compare->src[1].src.is_ssa = true;
-         compare->src[1].src.ssa = &const_zero->def;
-         for (unsigned i = 0; i < ir->type->vector_elements; i++)
-            compare->src[1].swizzle[i] = 0;
-         compare->dest.write_mask = (1 << ir->type->vector_elements) - 1;
-
-         add_instr(&compare->instr, ir->type->vector_elements);
-      }
+      if (ir->type->base_type == GLSL_TYPE_BOOL)
+         this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0));
 
       return;
    }
@@ -1340,19 +1261,12 @@ nir_visitor::visit(ir_expression *ir)
       add_instr(&intrin->instr, deref->type->vector_elements);
 
       if (swizzle) {
-         nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov);
-         mov->dest.write_mask = (1 << swizzle->type->vector_elements) - 1;
-         mov->src[0].src.is_ssa = true;
-         mov->src[0].src.ssa = &intrin->dest.ssa;
+         unsigned swiz[4] = {
+            swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w
+         };
 
-         mov->src[0].swizzle[0] = swizzle->mask.x;
-         mov->src[0].swizzle[1] = swizzle->mask.y;
-         mov->src[0].swizzle[2] = swizzle->mask.z;
-         mov->src[0].swizzle[3] = swizzle->mask.w;
-         for (unsigned i = deref->type->vector_elements; i < 4; i++)
-            mov->src[0].swizzle[i] = 0;
-
-         add_instr(&mov->instr, swizzle->type->vector_elements);
+         result = nir_swizzle(&b, result, swiz,
+                              swizzle->type->vector_elements, false);
       }
 
       return;
@@ -1379,53 +1293,48 @@ nir_visitor::visit(ir_expression *ir)
    else
       out_type = GLSL_TYPE_FLOAT;
 
-   unsigned dest_size = ir->type->vector_elements;
-
-   nir_alu_instr *instr;
-   nir_op op;
-
    switch (ir->operation) {
-   case ir_unop_bit_not: emit(nir_op_inot, dest_size, srcs); break;
+   case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;
    case ir_unop_logic_not:
-      emit(supports_ints ? nir_op_inot : nir_op_fnot, dest_size, srcs);
+      result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]);
       break;
    case ir_unop_neg:
-      instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fneg : nir_op_ineg,
-                   dest_size, srcs);
+      result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0])
+                                             : nir_ineg(&b, srcs[0]);
       break;
    case ir_unop_abs:
-      instr = emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fabs : nir_op_iabs,
-                   dest_size, srcs);
+      result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0])
+                                             : nir_iabs(&b, srcs[0]);
       break;
    case ir_unop_saturate:
       assert(types[0] == GLSL_TYPE_FLOAT);
-      instr = emit(nir_op_fsat, dest_size, srcs);
+      result = nir_fsat(&b, srcs[0]);
       break;
    case ir_unop_sign:
-      emit(types[0] == GLSL_TYPE_FLOAT ? nir_op_fsign : nir_op_isign,
-           dest_size, srcs);
+      result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0])
+                                             : nir_isign(&b, srcs[0]);
       break;
-   case ir_unop_rcp:  emit(nir_op_frcp, dest_size, srcs);  break;
-   case ir_unop_rsq:  emit(nir_op_frsq, dest_size, srcs);  break;
-   case ir_unop_sqrt: emit(nir_op_fsqrt, dest_size, srcs); break;
+   case ir_unop_rcp:  result = nir_frcp(&b, srcs[0]);  break;
+   case ir_unop_rsq:  result = nir_frsq(&b, srcs[0]);  break;
+   case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break;
    case ir_unop_exp:  unreachable("ir_unop_exp should have been lowered");
    case ir_unop_log:  unreachable("ir_unop_log should have been lowered");
-   case ir_unop_exp2: emit(nir_op_fexp2, dest_size, srcs); break;
-   case ir_unop_log2: emit(nir_op_flog2, dest_size, srcs); break;
+   case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;
+   case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;
    case ir_unop_i2f:
-      emit(supports_ints ? nir_op_i2f : nir_op_fmov, dest_size, srcs);
+      result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
       break;
    case ir_unop_u2f:
-      emit(supports_ints ? nir_op_u2f : nir_op_fmov, dest_size, srcs);
+      result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
       break;
    case ir_unop_b2f:
-      emit(supports_ints ? nir_op_b2f : nir_op_fmov, dest_size, srcs);
+      result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]);
       break;
-   case ir_unop_f2i:  emit(nir_op_f2i, dest_size, srcs);   break;
-   case ir_unop_f2u:  emit(nir_op_f2u, dest_size, srcs);   break;
-   case ir_unop_f2b:  emit(nir_op_f2b, dest_size, srcs);   break;
-   case ir_unop_i2b:  emit(nir_op_i2b, dest_size, srcs);   break;
-   case ir_unop_b2i:  emit(nir_op_b2i, dest_size, srcs);   break;
+   case ir_unop_f2i:  result = nir_f2i(&b, srcs[0]);   break;
+   case ir_unop_f2u:  result = nir_f2u(&b, srcs[0]);   break;
+   case ir_unop_f2b:  result = nir_f2b(&b, srcs[0]);   break;
+   case ir_unop_i2b:  result = nir_i2b(&b, srcs[0]);   break;
+   case ir_unop_b2i:  result = nir_b2i(&b, srcs[0]);   break;
    case ir_unop_i2u:
    case ir_unop_u2i:
    case ir_unop_bitcast_i2f:
@@ -1434,132 +1343,132 @@ nir_visitor::visit(ir_expression *ir)
    case ir_unop_bitcast_f2u:
    case ir_unop_subroutine_to_int:
       /* no-op */
-      emit(nir_op_imov, dest_size, srcs);
+      result = nir_imov(&b, srcs[0]);
       break;
    case ir_unop_any:
       switch (ir->operands[0]->type->vector_elements) {
       case 2:
-         emit(supports_ints ? nir_op_bany2 : nir_op_fany2,
-              dest_size, srcs);
+         result = supports_ints ? nir_bany2(&b, srcs[0])
+                                : nir_fany2(&b, srcs[0]);
          break;
       case 3:
-         emit(supports_ints ? nir_op_bany3 : nir_op_fany3,
-              dest_size, srcs);
+         result = supports_ints ? nir_bany3(&b, srcs[0])
+                                : nir_fany3(&b, srcs[0]);
          break;
       case 4:
-         emit(supports_ints ? nir_op_bany4 : nir_op_fany4,
-              dest_size, srcs);
+         result = supports_ints ? nir_bany4(&b, srcs[0])
+                                : nir_fany4(&b, srcs[0]);
          break;
       default:
          unreachable("not reached");
       }
       break;
-   case ir_unop_trunc: emit(nir_op_ftrunc, dest_size, srcs); break;
-   case ir_unop_ceil:  emit(nir_op_fceil,  dest_size, srcs); break;
-   case ir_unop_floor: emit(nir_op_ffloor, dest_size, srcs); break;
-   case ir_unop_fract: emit(nir_op_ffract, dest_size, srcs); break;
-   case ir_unop_round_even: emit(nir_op_fround_even, dest_size, srcs); break;
-   case ir_unop_sin:   emit(nir_op_fsin,   dest_size, srcs); break;
-   case ir_unop_cos:   emit(nir_op_fcos,   dest_size, srcs); break;
-   case ir_unop_dFdx:        emit(nir_op_fddx,        dest_size, srcs); break;
-   case ir_unop_dFdy:        emit(nir_op_fddy,        dest_size, srcs); break;
-   case ir_unop_dFdx_fine:   emit(nir_op_fddx_fine,   dest_size, srcs); break;
-   case ir_unop_dFdy_fine:   emit(nir_op_fddy_fine,   dest_size, srcs); break;
-   case ir_unop_dFdx_coarse: emit(nir_op_fddx_coarse, dest_size, srcs); break;
-   case ir_unop_dFdy_coarse: emit(nir_op_fddy_coarse, dest_size, srcs); break;
+   case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;
+   case ir_unop_ceil:  result = nir_fceil(&b, srcs[0]); break;
+   case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;
+   case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;
+   case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;
+   case ir_unop_sin:   result = nir_fsin(&b, srcs[0]); break;
+   case ir_unop_cos:   result = nir_fcos(&b, srcs[0]); break;
+   case ir_unop_dFdx:        result = nir_fddx(&b, srcs[0]); break;
+   case ir_unop_dFdy:        result = nir_fddy(&b, srcs[0]); break;
+   case ir_unop_dFdx_fine:   result = nir_fddx_fine(&b, srcs[0]); break;
+   case ir_unop_dFdy_fine:   result = nir_fddy_fine(&b, srcs[0]); break;
+   case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;
+   case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;
    case ir_unop_pack_snorm_2x16:
-      emit(nir_op_pack_snorm_2x16, dest_size, srcs);
+      result = nir_pack_snorm_2x16(&b, srcs[0]);
       break;
    case ir_unop_pack_snorm_4x8:
-      emit(nir_op_pack_snorm_4x8, dest_size, srcs);
+      result = nir_pack_snorm_4x8(&b, srcs[0]);
       break;
    case ir_unop_pack_unorm_2x16:
-      emit(nir_op_pack_unorm_2x16, dest_size, srcs);
+      result = nir_pack_unorm_2x16(&b, srcs[0]);
       break;
    case ir_unop_pack_unorm_4x8:
-      emit(nir_op_pack_unorm_4x8, dest_size, srcs);
+      result = nir_pack_unorm_4x8(&b, srcs[0]);
       break;
    case ir_unop_pack_half_2x16:
-      emit(nir_op_pack_half_2x16, dest_size, srcs);
+      result = nir_pack_half_2x16(&b, srcs[0]);
       break;
    case ir_unop_unpack_snorm_2x16:
-      emit(nir_op_unpack_snorm_2x16, dest_size, srcs);
+      result = nir_unpack_snorm_2x16(&b, srcs[0]);
       break;
    case ir_unop_unpack_snorm_4x8:
-      emit(nir_op_unpack_snorm_4x8, dest_size, srcs);
+      result = nir_unpack_snorm_4x8(&b, srcs[0]);
       break;
    case ir_unop_unpack_unorm_2x16:
-      emit(nir_op_unpack_unorm_2x16, dest_size, srcs);
+      result = nir_unpack_unorm_2x16(&b, srcs[0]);
       break;
    case ir_unop_unpack_unorm_4x8:
-      emit(nir_op_unpack_unorm_4x8, dest_size, srcs);
+      result = nir_unpack_unorm_4x8(&b, srcs[0]);
       break;
    case ir_unop_unpack_half_2x16:
-      emit(nir_op_unpack_half_2x16, dest_size, srcs);
+      result = nir_unpack_half_2x16(&b, srcs[0]);
       break;
    case ir_unop_unpack_half_2x16_split_x:
-      emit(nir_op_unpack_half_2x16_split_x, dest_size, srcs);
+      result = nir_unpack_half_2x16_split_x(&b, srcs[0]);
       break;
    case ir_unop_unpack_half_2x16_split_y:
-      emit(nir_op_unpack_half_2x16_split_y, dest_size, srcs);
+      result = nir_unpack_half_2x16_split_y(&b, srcs[0]);
       break;
    case ir_unop_bitfield_reverse:
-      emit(nir_op_bitfield_reverse, dest_size, srcs);
+      result = nir_bitfield_reverse(&b, srcs[0]);
       break;
    case ir_unop_bit_count:
-      emit(nir_op_bit_count, dest_size, srcs);
+      result = nir_bit_count(&b, srcs[0]);
       break;
    case ir_unop_find_msb:
       switch (types[0]) {
       case GLSL_TYPE_UINT:
-         emit(nir_op_ufind_msb, dest_size, srcs);
+         result = nir_ufind_msb(&b, srcs[0]);
          break;
       case GLSL_TYPE_INT:
-         emit(nir_op_ifind_msb, dest_size, srcs);
+         result = nir_ifind_msb(&b, srcs[0]);
          break;
       default:
          unreachable("Invalid type for findMSB()");
       }
       break;
    case ir_unop_find_lsb:
-      emit(nir_op_find_lsb,  dest_size, srcs);
+      result = nir_find_lsb(&b, srcs[0]);
       break;
 
    case ir_unop_noise:
       switch (ir->type->vector_elements) {
       case 1:
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: emit(nir_op_fnoise1_1, dest_size, srcs); break;
-            case 2: emit(nir_op_fnoise1_2, dest_size, srcs); break;
-            case 3: emit(nir_op_fnoise1_3, dest_size, srcs); break;
-            case 4: emit(nir_op_fnoise1_4, dest_size, srcs); break;
+            case 1: result = nir_fnoise1_1(&b, srcs[0]); break;
+            case 2: result = nir_fnoise1_2(&b, srcs[0]); break;
+            case 3: result = nir_fnoise1_3(&b, srcs[0]); break;
+            case 4: result = nir_fnoise1_4(&b, srcs[0]); break;
             default: unreachable("not reached");
          }
          break;
       case 2:
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: emit(nir_op_fnoise2_1, dest_size, srcs); break;
-            case 2: emit(nir_op_fnoise2_2, dest_size, srcs); break;
-            case 3: emit(nir_op_fnoise2_3, dest_size, srcs); break;
-            case 4: emit(nir_op_fnoise2_4, dest_size, srcs); break;
+            case 1: result = nir_fnoise2_1(&b, srcs[0]); break;
+            case 2: result = nir_fnoise2_2(&b, srcs[0]); break;
+            case 3: result = nir_fnoise2_3(&b, srcs[0]); break;
+            case 4: result = nir_fnoise2_4(&b, srcs[0]); break;
             default: unreachable("not reached");
          }
          break;
       case 3:
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: emit(nir_op_fnoise3_1, dest_size, srcs); break;
-            case 2: emit(nir_op_fnoise3_2, dest_size, srcs); break;
-            case 3: emit(nir_op_fnoise3_3, dest_size, srcs); break;
-            case 4: emit(nir_op_fnoise3_4, dest_size, srcs); break;
+            case 1: result = nir_fnoise3_1(&b, srcs[0]); break;
+            case 2: result = nir_fnoise3_2(&b, srcs[0]); break;
+            case 3: result = nir_fnoise3_3(&b, srcs[0]); break;
+            case 4: result = nir_fnoise3_4(&b, srcs[0]); break;
             default: unreachable("not reached");
          }
          break;
       case 4:
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: emit(nir_op_fnoise4_1, dest_size, srcs); break;
-            case 2: emit(nir_op_fnoise4_2, dest_size, srcs); break;
-            case 3: emit(nir_op_fnoise4_3, dest_size, srcs); break;
-            case 4: emit(nir_op_fnoise4_4, dest_size, srcs); break;
+            case 1: result = nir_fnoise4_1(&b, srcs[0]); break;
+            case 2: result = nir_fnoise4_2(&b, srcs[0]); break;
+            case 3: result = nir_fnoise4_3(&b, srcs[0]); break;
+            case 4: result = nir_fnoise4_4(&b, srcs[0]); break;
             default: unreachable("not reached");
          }
          break;
@@ -1578,234 +1487,167 @@ nir_visitor::visit(ir_expression *ir)
    }
 
    case ir_binop_add:
+      result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1])
+                                             : nir_iadd(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_sub:
+      result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1])
+                                             : nir_isub(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_mul:
+      result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1])
+                                             : nir_imul(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_div:
+      if (out_type == GLSL_TYPE_FLOAT)
+         result = nir_fdiv(&b, srcs[0], srcs[1]);
+      else if (out_type == GLSL_TYPE_INT)
+         result = nir_idiv(&b, srcs[0], srcs[1]);
+      else
+         result = nir_udiv(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_mod:
+      result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1])
+                                             : nir_umod(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_min:
+      if (out_type == GLSL_TYPE_FLOAT)
+         result = nir_fmin(&b, srcs[0], srcs[1]);
+      else if (out_type == GLSL_TYPE_INT)
+         result = nir_imin(&b, srcs[0], srcs[1]);
+      else
+         result = nir_umin(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_max:
-   case ir_binop_pow:
-   case ir_binop_bit_and:
-   case ir_binop_bit_or:
-   case ir_binop_bit_xor:
+      if (out_type == GLSL_TYPE_FLOAT)
+         result = nir_fmax(&b, srcs[0], srcs[1]);
+      else if (out_type == GLSL_TYPE_INT)
+         result = nir_imax(&b, srcs[0], srcs[1]);
+      else
+         result = nir_umax(&b, srcs[0], srcs[1]);
+      break;
+   case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;
+   case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;
+   case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;
+   case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
    case ir_binop_logic_and:
+      result = supports_ints ? nir_iand(&b, srcs[0], srcs[1])
+                             : nir_fand(&b, srcs[0], srcs[1]);
+      break;
    case ir_binop_logic_or:
-   case ir_binop_logic_xor:
-   case ir_binop_lshift:
+      result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
+                             : nir_for(&b, srcs[0], srcs[1]);
+      break;
+   case ir_binop_logic_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
+      result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
+                             : nir_for(&b, srcs[0], srcs[1]);
+      break;
+   case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
    case ir_binop_rshift:
-      switch (ir->operation) {
-      case ir_binop_add:
-         if (out_type == GLSL_TYPE_FLOAT)
-            op = nir_op_fadd;
-         else
-            op = nir_op_iadd;
-         break;
-      case ir_binop_sub:
-         if (out_type == GLSL_TYPE_FLOAT)
-            op = nir_op_fsub;
-         else
-            op = nir_op_isub;
-         break;
-      case ir_binop_mul:
-         if (out_type == GLSL_TYPE_FLOAT)
-            op = nir_op_fmul;
-         else
-            op = nir_op_imul;
-         break;
-      case ir_binop_div:
-         if (out_type == GLSL_TYPE_FLOAT)
-            op = nir_op_fdiv;
-         else if (out_type == GLSL_TYPE_INT)
-            op = nir_op_idiv;
-         else
-            op = nir_op_udiv;
-         break;
-      case ir_binop_mod:
-         if (out_type == GLSL_TYPE_FLOAT)
-            op = nir_op_fmod;
-         else
-            op = nir_op_umod;
-         break;
-      case ir_binop_min:
-         if (out_type == GLSL_TYPE_FLOAT)
-            op = nir_op_fmin;
-         else if (out_type == GLSL_TYPE_INT)
-            op = nir_op_imin;
-         else
-            op = nir_op_umin;
-         break;
-      case ir_binop_max:
-         if (out_type == GLSL_TYPE_FLOAT)
-            op = nir_op_fmax;
-         else if (out_type == GLSL_TYPE_INT)
-            op = nir_op_imax;
-         else
-            op = nir_op_umax;
-         break;
-      case ir_binop_bit_and:
-         op = nir_op_iand;
-         break;
-      case ir_binop_bit_or:
-         op = nir_op_ior;
-         break;
-      case ir_binop_bit_xor:
-         op = nir_op_ixor;
-         break;
-      case ir_binop_logic_and:
-         if (supports_ints)
-            op = nir_op_iand;
-         else
-            op = nir_op_fand;
-         break;
-      case ir_binop_logic_or:
-         if (supports_ints)
-            op = nir_op_ior;
-         else
-            op = nir_op_for;
-         break;
-      case ir_binop_logic_xor:
-         if (supports_ints)
-            op = nir_op_ixor;
-         else
-            op = nir_op_fxor;
-         break;
-      case ir_binop_lshift:
-         op = nir_op_ishl;
-         break;
-      case ir_binop_rshift:
-         if (out_type == GLSL_TYPE_INT)
-            op = nir_op_ishr;
-         else
-            op = nir_op_ushr;
-         break;
-      case ir_binop_pow:
-         op = nir_op_fpow;
-         break;
-
-      default:
-         unreachable("not reached");
-      }
-
-      instr = emit(op, dest_size, srcs);
-
-      if (ir->operands[0]->type->vector_elements != 1 &&
-          ir->operands[1]->type->vector_elements == 1) {
-         for (unsigned i = 0; i < ir->operands[0]->type->vector_elements;
-              i++) {
-            instr->src[1].swizzle[i] = 0;
-         }
-      }
-
-      if (ir->operands[1]->type->vector_elements != 1 &&
-          ir->operands[0]->type->vector_elements == 1) {
-         for (unsigned i = 0; i < ir->operands[1]->type->vector_elements;
-              i++) {
-            instr->src[0].swizzle[i] = 0;
-         }
-      }
-
+      result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1])
+                                           : nir_ushr(&b, srcs[0], srcs[1]);
       break;
    case ir_binop_imul_high:
-      emit(out_type == GLSL_TYPE_UINT ? nir_op_umul_high : nir_op_imul_high,
-           dest_size, srcs);
+      result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])
+                                           : nir_umul_high(&b, srcs[0], srcs[1]);
       break;
-   case ir_binop_carry:  emit(nir_op_uadd_carry, dest_size, srcs);  break;
-   case ir_binop_borrow: emit(nir_op_usub_borrow, dest_size, srcs); break;
+   case ir_binop_carry:  result = nir_uadd_carry(&b, srcs[0], srcs[1]);  break;
+   case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;
    case ir_binop_less:
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT)
-            emit(nir_op_flt, dest_size, srcs);
+            result = nir_flt(&b, srcs[0], srcs[1]);
          else if (types[0] == GLSL_TYPE_INT)
-            emit(nir_op_ilt, dest_size, srcs);
+            result = nir_ilt(&b, srcs[0], srcs[1]);
          else
-            emit(nir_op_ult, dest_size, srcs);
+            result = nir_ult(&b, srcs[0], srcs[1]);
       } else {
-         emit(nir_op_slt, dest_size, srcs);
+         result = nir_slt(&b, srcs[0], srcs[1]);
       }
       break;
    case ir_binop_greater:
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT)
-            emit(nir_op_flt, dest_size, srcs[1], srcs[0]);
+            result = nir_flt(&b, srcs[1], srcs[0]);
          else if (types[0] == GLSL_TYPE_INT)
-            emit(nir_op_ilt, dest_size, srcs[1], srcs[0]);
+            result = nir_ilt(&b, srcs[1], srcs[0]);
          else
-            emit(nir_op_ult, dest_size, srcs[1], srcs[0]);
+            result = nir_ult(&b, srcs[1], srcs[0]);
       } else {
-         emit(nir_op_slt, dest_size, srcs[1], srcs[0]);
+         result = nir_slt(&b, srcs[1], srcs[0]);
       }
       break;
    case ir_binop_lequal:
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT)
-            emit(nir_op_fge, dest_size, srcs[1], srcs[0]);
+            result = nir_fge(&b, srcs[1], srcs[0]);
          else if (types[0] == GLSL_TYPE_INT)
-            emit(nir_op_ige, dest_size, srcs[1], srcs[0]);
+            result = nir_ige(&b, srcs[1], srcs[0]);
          else
-            emit(nir_op_uge, dest_size, srcs[1], srcs[0]);
+            result = nir_uge(&b, srcs[1], srcs[0]);
       } else {
-         emit(nir_op_slt, dest_size, srcs[1], srcs[0]);
+         result = nir_slt(&b, srcs[1], srcs[0]);
       }
       break;
    case ir_binop_gequal:
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT)
-            emit(nir_op_fge, dest_size, srcs);
+            result = nir_fge(&b, srcs[0], srcs[1]);
          else if (types[0] == GLSL_TYPE_INT)
-            emit(nir_op_ige, dest_size, srcs);
+            result = nir_ige(&b, srcs[0], srcs[1]);
          else
-            emit(nir_op_uge, dest_size, srcs);
+            result = nir_uge(&b, srcs[0], srcs[1]);
       } else {
-         emit(nir_op_slt, dest_size, srcs);
+         result = nir_slt(&b, srcs[0], srcs[1]);
       }
       break;
    case ir_binop_equal:
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT)
-            emit(nir_op_feq, dest_size, srcs);
+            result = nir_feq(&b, srcs[0], srcs[1]);
          else
-            emit(nir_op_ieq, dest_size, srcs);
+            result = nir_ieq(&b, srcs[0], srcs[1]);
       } else {
-         emit(nir_op_seq, dest_size, srcs);
+         result = nir_seq(&b, srcs[0], srcs[1]);
       }
       break;
    case ir_binop_nequal:
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT)
-            emit(nir_op_fne, dest_size, srcs);
+            result = nir_fne(&b, srcs[0], srcs[1]);
          else
-            emit(nir_op_ine, dest_size, srcs);
+            result = nir_ine(&b, srcs[0], srcs[1]);
       } else {
-         emit(nir_op_sne, dest_size, srcs);
+         result = nir_sne(&b, srcs[0], srcs[1]);
       }
       break;
    case ir_binop_all_equal:
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT) {
             switch (ir->operands[0]->type->vector_elements) {
-               case 1: emit(nir_op_feq, dest_size, srcs); break;
-               case 2: emit(nir_op_ball_fequal2, dest_size, srcs); break;
-               case 3: emit(nir_op_ball_fequal3, dest_size, srcs); break;
-               case 4: emit(nir_op_ball_fequal4, dest_size, srcs); break;
+               case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;
+               case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;
+               case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;
+               case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;
                default:
                   unreachable("not reached");
             }
          } else {
             switch (ir->operands[0]->type->vector_elements) {
-               case 1: emit(nir_op_ieq, dest_size, srcs); break;
-               case 2: emit(nir_op_ball_iequal2, dest_size, srcs); break;
-               case 3: emit(nir_op_ball_iequal3, dest_size, srcs); break;
-               case 4: emit(nir_op_ball_iequal4, dest_size, srcs); break;
+               case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;
+               case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;
+               case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;
+               case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;
                default:
                   unreachable("not reached");
             }
          }
       } else {
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: emit(nir_op_seq, dest_size, srcs); break;
-            case 2: emit(nir_op_fall_equal2, dest_size, srcs); break;
-            case 3: emit(nir_op_fall_equal3, dest_size, srcs); break;
-            case 4: emit(nir_op_fall_equal4, dest_size, srcs); break;
+            case 1: result = nir_seq(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break;
             default:
                unreachable("not reached");
          }
@@ -1815,29 +1657,29 @@ nir_visitor::visit(ir_expression *ir)
       if (supports_ints) {
          if (types[0] == GLSL_TYPE_FLOAT) {
             switch (ir->operands[0]->type->vector_elements) {
-               case 1: emit(nir_op_fne, dest_size, srcs); break;
-               case 2: emit(nir_op_bany_fnequal2, dest_size, srcs); break;
-               case 3: emit(nir_op_bany_fnequal3, dest_size, srcs); break;
-               case 4: emit(nir_op_bany_fnequal4, dest_size, srcs); break;
+               case 1: result = nir_fne(&b, srcs[0], srcs[1]); break;
+               case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;
+               case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;
+               case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;
                default:
                   unreachable("not reached");
             }
          } else {
             switch (ir->operands[0]->type->vector_elements) {
-               case 1: emit(nir_op_ine, dest_size, srcs); break;
-               case 2: emit(nir_op_bany_inequal2, dest_size, srcs); break;
-               case 3: emit(nir_op_bany_inequal3, dest_size, srcs); break;
-               case 4: emit(nir_op_bany_inequal4, dest_size, srcs); break;
+               case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;
+               case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;
+               case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;
+               case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;
                default:
                   unreachable("not reached");
             }
          }
       } else {
          switch (ir->operands[0]->type->vector_elements) {
-            case 1: emit(nir_op_sne, dest_size, srcs); break;
-            case 2: emit(nir_op_fany_nequal2, dest_size, srcs); break;
-            case 3: emit(nir_op_fany_nequal3, dest_size, srcs); break;
-            case 4: emit(nir_op_fany_nequal4, dest_size, srcs); break;
+            case 1: result = nir_sne(&b, srcs[0], srcs[1]); break;
+            case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break;
+            case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break;
+            case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break;
             default:
                unreachable("not reached");
          }
@@ -1845,64 +1687,44 @@ nir_visitor::visit(ir_expression *ir)
       break;
    case ir_binop_dot:
       switch (ir->operands[0]->type->vector_elements) {
-         case 2: emit(nir_op_fdot2, dest_size, srcs); break;
-         case 3: emit(nir_op_fdot3, dest_size, srcs); break;
-         case 4: emit(nir_op_fdot4, dest_size, srcs); break;
+         case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break;
+         case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break;
+         case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break;
          default:
             unreachable("not reached");
       }
       break;
 
    case ir_binop_pack_half_2x16_split:
-         emit(nir_op_pack_half_2x16_split, dest_size, srcs);
+         result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]);
          break;
-   case ir_binop_bfm:   emit(nir_op_bfm, dest_size, srcs);   break;
-   case ir_binop_ldexp: emit(nir_op_ldexp, dest_size, srcs); break;
-   case ir_triop_fma:   emit(nir_op_ffma, dest_size, srcs);  break;
+   case ir_binop_bfm:   result = nir_bfm(&b, srcs[0], srcs[1]);   break;
+   case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
+   case ir_triop_fma:
+      result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);
+      break;
    case ir_triop_lrp:
-      instr = emit(nir_op_flrp, dest_size, srcs);
-      if (ir->operands[0]->type->vector_elements != 1 &&
-          ir->operands[2]->type->vector_elements == 1) {
-         for (unsigned i = 0; i < ir->operands[0]->type->vector_elements;
-              i++) {
-            instr->src[2].swizzle[i] = 0;
-         }
-      }
+      result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_triop_csel:
       if (supports_ints)
-         emit(nir_op_bcsel, dest_size, srcs);
+         result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);
       else
-         emit(nir_op_fcsel, dest_size, srcs);
+         result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_triop_bfi:
-      instr = emit(nir_op_bfi, dest_size, srcs);
-      for (unsigned i = 0; i < ir->operands[1]->type->vector_elements; i++) {
-         instr->src[0].swizzle[i] = 0;
-      }
+      result = nir_bfi(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_triop_bitfield_extract:
-      instr = emit(out_type == GLSL_TYPE_INT ? nir_op_ibitfield_extract :
-                   nir_op_ubitfield_extract, dest_size, srcs);
-      for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) {
-         instr->src[1].swizzle[i] = 0;
-         instr->src[2].swizzle[i] = 0;
-      }
+      result = (out_type == GLSL_TYPE_INT) ?
+         nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) :
+         nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]);
       break;
    case ir_quadop_bitfield_insert:
-      instr = emit(nir_op_bitfield_insert, dest_size, srcs);
-      for (unsigned i = 0; i < ir->operands[0]->type->vector_elements; i++) {
-         instr->src[2].swizzle[i] = 0;
-         instr->src[3].swizzle[i] = 0;
-      }
+      result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]);
       break;
    case ir_quadop_vector:
-      switch (ir->type->vector_elements) {
-         case 2: emit(nir_op_vec2, dest_size, srcs); break;
-         case 3: emit(nir_op_vec3, dest_size, srcs); break;
-         case 4: emit(nir_op_vec4, dest_size, srcs); break;
-         default: unreachable("not reached");
-      }
+      result = nir_vec(&b, srcs, ir->type->vector_elements);
       break;
 
    default:
@@ -1913,13 +1735,9 @@ nir_visitor::visit(ir_expression *ir)
 void
 nir_visitor::visit(ir_swizzle *ir)
 {
-   nir_alu_instr *instr = emit(supports_ints ? nir_op_imov : nir_op_fmov,
-                               ir->type->vector_elements,
-                               evaluate_rvalue(ir->val));
-
    unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };
-   for (unsigned i = 0; i < ir->type->vector_elements; i++)
-      instr->src[0].swizzle[i] = swizzle[i];
+   result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,
+                        ir->type->vector_elements, !supports_ints);
 }
 
 void
@@ -2184,5 +2002,5 @@ nir_visitor::visit(ir_barrier *ir)
 {
    nir_intrinsic_instr *instr =
       nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier);
-   nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(&b, &instr->instr);
 }

From f0e95c2500fdb720d4102976f17361c294e1f3a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= <krh@bitplanet.net>
Date: Wed, 4 Nov 2015 14:50:51 -0800
Subject: [PATCH 140/287] glsl: Drop exec_list argument to lower_ubo_reference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We always pass in shader->ir and we already pass in the shader, so just
drop the exec_list. Most passes either take just a exec_list or a
shader, so this seems more consistent.

Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/glsl/ir_optimization.h                 | 2 +-
 src/glsl/lower_ubo_reference.cpp           | 4 ++--
 src/mesa/drivers/dri/i965/brw_link.cpp     | 2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index ce5c4929bfb..6d19a6ca476 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -124,7 +124,7 @@ bool lower_const_arrays_to_uniforms(exec_list *instructions);
 bool lower_clip_distance(gl_shader *shader);
 void lower_output_reads(unsigned stage, exec_list *instructions);
 bool lower_packing_builtins(exec_list *instructions, int op_mask);
-void lower_ubo_reference(struct gl_shader *shader, exec_list *instructions);
+void lower_ubo_reference(struct gl_shader *shader);
 void lower_packed_varyings(void *mem_ctx,
                            unsigned locations_used, ir_variable_mode mode,
                            unsigned gs_input_vertices, gl_shader *shader);
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 57a242b4074..24806ac6ce9 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -1270,7 +1270,7 @@ lower_ubo_reference_visitor::visit_enter(ir_call *ir)
 } /* unnamed namespace */
 
 void
-lower_ubo_reference(struct gl_shader *shader, exec_list *instructions)
+lower_ubo_reference(struct gl_shader *shader)
 {
    lower_ubo_reference_visitor v(shader);
 
@@ -1281,6 +1281,6 @@ lower_ubo_reference(struct gl_shader *shader, exec_list *instructions)
     */
    do {
       v.progress = false;
-      visit_list_elements(&v, instructions);
+      visit_list_elements(&v, shader->ir);
    } while (v.progress);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index fc9bee43d80..f1e38605935 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -157,7 +157,7 @@ process_glsl_ir(gl_shader_stage stage,
                  _mesa_shader_stage_to_abbrev(shader->Stage));
    }
 
-   lower_ubo_reference(shader, shader->ir);
+   lower_ubo_reference(shader);
 
    bool progress;
    do {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index f481e8902d8..ca00930e261 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5822,7 +5822,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
                          (options->EmitNoSat ? SAT_TO_CLAMP : 0));
 
-      lower_ubo_reference(prog->_LinkedShaders[i], ir);
+      lower_ubo_reference(prog->_LinkedShaders[i]);
       do_vec_index_to_cond_assign(ir);
       lower_vector_insert(ir, true);
       lower_quadop_vector(ir, false);

From 60dd5287ff8dbbbe0dbe76bdff6d13c7a5ea9ef0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= <krh@bitplanet.net>
Date: Wed, 4 Nov 2015 14:55:32 -0800
Subject: [PATCH 141/287] glsl: Lower UBO and SSBO access in glsl linker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All GLSL IR consumers run this lowering pass so we can move it to the
linker. This moves the pass up quite a bit, but that's the point: it
needs to run before we throw away information about per-component vector
access.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/glsl/linker.cpp                        | 8 ++++++++
 src/mesa/drivers/dri/i965/brw_link.cpp     | 2 --
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 2 ++
 src/mesa/main/mtypes.h                     | 2 ++
 src/mesa/state_tracker/st_extensions.c     | 1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 -
 6 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 26c02986be4..a8baee07f10 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -4445,6 +4445,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
    /* FINISHME: Assign fragment shader output locations. */
 
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (prog->_LinkedShaders[i] == NULL)
+	 continue;
+
+      if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
+         lower_ubo_reference(prog->_LinkedShaders[i]);
+   }
+
 done:
    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
       free(shader_list[i]);
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index f1e38605935..29911732761 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -157,8 +157,6 @@ process_glsl_ir(gl_shader_stage stage,
                  _mesa_shader_stage_to_abbrev(shader->Stage));
    }
 
-   lower_ubo_reference(shader);
-
    bool progress;
    do {
       progress = false;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 0312024ed1b..a6b57c3c3f1 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -148,6 +148,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
          compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
 
       compiler->glsl_compiler_options[i].NirOptions = nir_options;
+
+      compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
    }
 
    return compiler;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 05c546e00a0..777a1eecd12 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2875,6 +2875,8 @@ struct gl_shader_compiler_options
     */
    GLboolean OptimizeForAOS;
 
+   GLboolean LowerBufferInterfaceBlocks; /**< Lower UBO and SSBO access to intrinsics. */
+
    const struct nir_shader_compiler_options *NirOptions;
 };
 
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index bd7cbccc20c..bbb9027f4d7 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -254,6 +254,7 @@ void st_init_limits(struct pipe_screen *screen,
                                       PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT);
 
       options->LowerClipDistance = true;
+      options->LowerBufferInterfaceBlocks = true;
    }
 
    c->LowerTessLevel = true;
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index ca00930e261..9ee6f8f89be 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5822,7 +5822,6 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
                          (options->EmitNoSat ? SAT_TO_CLAMP : 0));
 
-      lower_ubo_reference(prog->_LinkedShaders[i]);
       do_vec_index_to_cond_assign(ir);
       lower_vector_insert(ir, true);
       lower_quadop_vector(ir, false);

From 96b22fb080894ba1840af2372f28a46cc0f40c76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristian=20H=C3=B8gsberg=20Kristensen?= <krh@bitplanet.net>
Date: Wed, 4 Nov 2015 14:58:54 -0800
Subject: [PATCH 142/287] glsl: Use array deref for access to vector components
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We've assumed that we could lower per-component vector access from

  vec[i] = scalar

to

  vec = ir_triop_vector_insert(vec, scalar, i)

but with SSBOs (and compute shader SLM and tesselation outputs) this is
no longer valid. If a vector is "externally visible", multiple threads
can write independent components simultaneously. With lowering to
ir_triop_vector_insert, each thread read the entire vector, changes one
component, then writes out the entire vector. This is racy.

Instead of generating a ir_binop_vector_extract when we see v[i], we
generate ir_dereference_array. We then add a lowering pass to lower the
ir_dereference_array to ir_binop_vector_extract for rvalues and for to
vector_insert for lvalues in a separate lowering pass.

The resulting IR is the same as before, but we now have a window between
ast->ir conversion and the lowering pass where v[i] appears in the IR as
an array deref. This lets us run lowering passes that lower the vector
access to I/O (eg for SSBO load/store) before we lower the per-component
access to full vector writes.

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Signed-off-by: Kristian Høgsberg Kristensen <krh@bitplanet.net>
---
 src/glsl/Makefile.sources        |   1 +
 src/glsl/ast_array_index.cpp     |   5 +-
 src/glsl/ast_function.cpp        |  24 ++-----
 src/glsl/ast_to_hir.cpp          |  43 -------------
 src/glsl/ir_optimization.h       |   1 +
 src/glsl/ir_validate.cpp         |   7 ++-
 src/glsl/linker.cpp              |   2 +
 src/glsl/lower_ubo_reference.cpp |  14 ++++-
 src/glsl/lower_vector_derefs.cpp | 104 +++++++++++++++++++++++++++++++
 src/glsl/opt_dead_code_local.cpp |   5 ++
 10 files changed, 138 insertions(+), 68 deletions(-)
 create mode 100644 src/glsl/lower_vector_derefs.cpp

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 0266f290ccb..78d295b8e91 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -176,6 +176,7 @@ LIBGLSL_FILES = \
 	lower_vec_index_to_cond_assign.cpp \
 	lower_vec_index_to_swizzle.cpp \
 	lower_vector.cpp \
+	lower_vector_derefs.cpp \
 	lower_vector_insert.cpp \
 	lower_vertex_id.cpp \
 	lower_output_reads.cpp \
diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp
index 74d403fdb65..ca7a9a10c36 100644
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -319,10 +319,9 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
     * expression.
     */
    if (array->type->is_array()
-       || array->type->is_matrix()) {
+       || array->type->is_matrix()
+       || array->type->is_vector()) {
       return new(mem_ctx) ir_dereference_array(array, idx);
-   } else if (array->type->is_vector()) {
-      return new(mem_ctx) ir_expression(ir_binop_vector_extract, array, idx);
    } else if (array->type->is_error()) {
       return array;
    } else {
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index e4e4a3fe148..55844706d35 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -256,18 +256,10 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
 			     actual->variable_referenced()->name);
 	    return false;
 	 } else if (!actual->is_lvalue()) {
-            /* Even though ir_binop_vector_extract is not an l-value, let it
-             * slop through.  generate_call will handle it correctly.
-             */
-            ir_expression *const expr = ((ir_rvalue *) actual)->as_expression();
-            if (expr == NULL
-                || expr->operation != ir_binop_vector_extract
-                || !expr->operands[0]->is_lvalue()) {
-               _mesa_glsl_error(&loc, state,
-                                "function parameter '%s %s' is not an lvalue",
-                                mode, formal->name);
-               return false;
-            }
+            _mesa_glsl_error(&loc, state,
+                             "function parameter '%s %s' is not an lvalue",
+                             mode, formal->name);
+            return false;
 	 }
       }
 
@@ -376,12 +368,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
 
    ir_rvalue *lhs = actual;
    if (expr != NULL && expr->operation == ir_binop_vector_extract) {
-      rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
-                                       expr->operands[0]->type,
-                                       expr->operands[0]->clone(mem_ctx, NULL),
-                                       rhs,
-                                       expr->operands[1]->clone(mem_ctx, NULL));
-      lhs = expr->operands[0]->clone(mem_ctx, NULL);
+      lhs == new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL),
+                                               expr->operands[1]->clone(mem_ctx, NULL));
    }
 
    ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs);
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 6f5f3c1b245..9d341e8cf92 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -850,43 +850,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 {
    void *ctx = state;
    bool error_emitted = (lhs->type->is_error() || rhs->type->is_error());
-   ir_rvalue *extract_channel = NULL;
-
-   /* If the assignment LHS comes back as an ir_binop_vector_extract
-    * expression, move it to the RHS as an ir_triop_vector_insert.
-    */
-   if (lhs->ir_type == ir_type_expression) {
-      ir_expression *const lhs_expr = lhs->as_expression();
-
-      if (unlikely(lhs_expr->operation == ir_binop_vector_extract)) {
-         ir_rvalue *new_rhs =
-            validate_assignment(state, lhs_loc, lhs,
-                                rhs, is_initializer);
-
-         if (new_rhs == NULL) {
-            return lhs;
-         } else {
-            /* This converts:
-             * - LHS: (expression float vector_extract <vec> <channel>)
-             * - RHS: <scalar>
-             * into:
-             * - LHS: <vec>
-             * - RHS: (expression vec2 vector_insert <vec> <channel> <scalar>)
-             *
-             * The LHS type is now a vector instead of a scalar.  Since GLSL
-             * allows assignments to be used as rvalues, we need to re-extract
-             * the channel from assignment_temp when returning the rvalue.
-             */
-            extract_channel = lhs_expr->operands[1];
-            rhs = new(ctx) ir_expression(ir_triop_vector_insert,
-                                         lhs_expr->operands[0]->type,
-                                         lhs_expr->operands[0],
-                                         new_rhs,
-                                         extract_channel);
-            lhs = lhs_expr->operands[0]->clone(ctx, NULL);
-         }
-      }
-   }
 
    ir_variable *lhs_var = lhs->variable_referenced();
    if (lhs_var)
@@ -984,12 +947,6 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
       }
       ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var);
 
-      if (extract_channel) {
-         rvalue = new(ctx) ir_expression(ir_binop_vector_extract,
-                                         rvalue,
-                                         extract_channel->clone(ctx, NULL));
-      }
-
       *out_rvalue = rvalue;
    } else {
       if (!error_emitted)
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 6d19a6ca476..2fee81c09c2 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -129,6 +129,7 @@ void lower_packed_varyings(void *mem_ctx,
                            unsigned locations_used, ir_variable_mode mode,
                            unsigned gs_input_vertices, gl_shader *shader);
 bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index);
+bool lower_vector_derefs(gl_shader *shader);
 void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader);
 bool optimize_redundant_jumps(exec_list *instructions);
 bool optimize_split_arrays(exec_list *instructions, bool linked);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 935571ae1d6..e63b5c318e3 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -110,9 +110,10 @@ ir_validate::visit(ir_dereference_variable *ir)
 ir_visitor_status
 ir_validate::visit_enter(class ir_dereference_array *ir)
 {
-   if (!ir->array->type->is_array() && !ir->array->type->is_matrix()) {
-      printf("ir_dereference_array @ %p does not specify an array or a "
-             "matrix\n",
+   if (!ir->array->type->is_array() && !ir->array->type->is_matrix() &&
+      !ir->array->type->is_vector()) {
+      printf("ir_dereference_array @ %p does not specify an array, a vector "
+             "or a matrix\n",
              (void *) ir);
       ir->print();
       printf("\n");
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index a8baee07f10..db00f8febc6 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -4451,6 +4451,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
 
       if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
          lower_ubo_reference(prog->_LinkedShaders[i]);
+
+      lower_vector_derefs(prog->_LinkedShaders[i]);
    }
 
 done:
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 24806ac6ce9..b74aa3d0630 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -390,7 +390,19 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
       case ir_type_dereference_array: {
          ir_dereference_array *deref_array = (ir_dereference_array *) deref;
          unsigned array_stride;
-         if (deref_array->array->type->is_matrix() && *row_major) {
+         if (deref_array->array->type->is_vector()) {
+            /* We get this when storing or loading a component out of a vector
+             * with a non-constant index. This happens for v[i] = f where v is
+             * a vector (or m[i][j] = f where m is a matrix). If we don't
+             * lower that here, it gets turned into v = vector_insert(v, i,
+             * f), which loads the entire vector, modifies one component and
+             * then write the entire thing back.  That breaks if another
+             * thread or SIMD channel is modifying the same vector.
+             */
+            array_stride = 4;
+            if (deref_array->array->type->is_double())
+               array_stride *= 2;
+         } else if (deref_array->array->type->is_matrix() && *row_major) {
             /* When loading a vector out of a row major matrix, the
              * step between the columns (vectors) is the size of a
              * float, while the step between the rows (elements of a
diff --git a/src/glsl/lower_vector_derefs.cpp b/src/glsl/lower_vector_derefs.cpp
new file mode 100644
index 00000000000..4a5d6f0da4c
--- /dev/null
+++ b/src/glsl/lower_vector_derefs.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "ir.h"
+#include "ir_builder.h"
+#include "ir_rvalue_visitor.h"
+#include "ir_optimization.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class vector_deref_visitor : public ir_rvalue_enter_visitor {
+public:
+   vector_deref_visitor()
+      : progress(false)
+   {
+   }
+
+   virtual ~vector_deref_visitor()
+   {
+   }
+
+   virtual void handle_rvalue(ir_rvalue **rv);
+   virtual ir_visitor_status visit_enter(ir_assignment *ir);
+
+   bool progress;
+};
+
+} /* anonymous namespace */
+
+ir_visitor_status
+vector_deref_visitor::visit_enter(ir_assignment *ir)
+{
+   if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array)
+      return ir_rvalue_enter_visitor::visit_enter(ir);
+
+   ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs;
+   if (!deref->array->type->is_vector())
+      return ir_rvalue_enter_visitor::visit_enter(ir);
+
+   ir_dereference *const new_lhs = (ir_dereference *) deref->array;
+   ir->set_lhs(new_lhs);
+
+   ir_constant *old_index_constant = deref->array_index->constant_expression_value();
+   void *mem_ctx = ralloc_parent(ir);
+   if (!old_index_constant) {
+      ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert,
+                                           new_lhs->type,
+                                           new_lhs->clone(mem_ctx, NULL),
+                                           ir->rhs,
+                                           deref->array_index);
+      ir->write_mask = (1 << new_lhs->type->vector_elements) - 1;
+   } else {
+      ir->write_mask = 1 << old_index_constant->get_int_component(0);
+   }
+
+   return ir_rvalue_enter_visitor::visit_enter(ir);
+}
+
+void
+vector_deref_visitor::handle_rvalue(ir_rvalue **rv)
+{
+   if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array)
+      return;
+
+   ir_dereference_array *const deref = (ir_dereference_array *) *rv;
+   if (!deref->array->type->is_vector())
+      return;
+
+   void *mem_ctx = ralloc_parent(deref);
+   *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract,
+                                    deref->array,
+                                    deref->array_index);
+}
+
+bool
+lower_vector_derefs(gl_shader *shader)
+{
+   vector_deref_visitor v;
+
+   visit_list_elements(&v, shader->ir);
+
+   return v.progress;
+}
diff --git a/src/glsl/opt_dead_code_local.cpp b/src/glsl/opt_dead_code_local.cpp
index 4770fcff2ea..ee9f22c0373 100644
--- a/src/glsl/opt_dead_code_local.cpp
+++ b/src/glsl/opt_dead_code_local.cpp
@@ -197,6 +197,11 @@ process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments)
 	    if (entry->lhs != var)
 	       continue;
 
+            /* Skip if the assignment we're trying to eliminate isn't a plain
+             * variable deref. */
+            if (entry->ir->lhs->ir_type != ir_type_dereference_variable)
+               continue;
+
 	    int remove = entry->unused & ir->write_mask;
 	    if (debug) {
 	       printf("%s 0x%01x - 0x%01x = 0x%01x\n",

From 94ff35204dba0ddbd7f5c4342206c8acba22d32f Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Thu, 22 Oct 2015 15:25:23 +0200
Subject: [PATCH 143/287] nir/nir_opt_peephole_ffma: Move this lowering pass to
 the i965 driver

Because the next patch will add an optimization that is specific to i965,
we want to move this loweing pass to that driver altogether.

This is safe because i965 is the only consumer.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/Makefile.sources                            |  1 -
 src/glsl/nir/nir.h                                   |  1 -
 src/mesa/drivers/dri/i965/Makefile.sources           |  1 +
 src/mesa/drivers/dri/i965/brw_nir.c                  |  2 +-
 src/mesa/drivers/dri/i965/brw_nir.h                  |  2 ++
 .../drivers/dri/i965/brw_nir_opt_peephole_ffma.c}    | 12 ++++++------
 6 files changed, 10 insertions(+), 9 deletions(-)
 rename src/{glsl/nir/nir_opt_peephole_ffma.c => mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c} (96%)

diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 78d295b8e91..d4b02c17b0d 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -67,7 +67,6 @@ NIR_FILES = \
 	nir/nir_opt_dead_cf.c \
 	nir/nir_opt_gcm.c \
 	nir/nir_opt_global_to_local.c \
-	nir/nir_opt_peephole_ffma.c \
 	nir/nir_opt_peephole_select.c \
 	nir/nir_opt_remove_phis.c \
 	nir/nir_opt_undef.c \
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 13ebbcae564..4ed2cbd2b67 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -2029,7 +2029,6 @@ bool nir_opt_dead_cf(nir_shader *shader);
 void nir_opt_gcm(nir_shader *shader);
 
 bool nir_opt_peephole_select(nir_shader *shader);
-bool nir_opt_peephole_ffma(nir_shader *shader);
 
 bool nir_opt_remove_phis(nir_shader *shader);
 
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 434583defe3..f5e84cb7f65 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -46,6 +46,7 @@ i965_compiler_FILES = \
 	brw_nir.h \
 	brw_nir.c \
 	brw_nir_analyze_boolean_resolves.c \
+	brw_nir_opt_peephole_ffma.c \
 	brw_nir_uniforms.cpp \
 	brw_packed_float.c \
 	brw_predicated_break.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index dece208233f..fe5cad4e435 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -293,7 +293,7 @@ brw_create_nir(struct brw_context *brw,
 
    if (brw->gen >= 6) {
       /* Try and fuse multiply-adds */
-      nir_opt_peephole_ffma(nir);
+      brw_nir_opt_peephole_ffma(nir);
       nir_validate_shader(nir);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index b4a6dc0f825..e7c93684fb3 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -94,6 +94,8 @@ void brw_nir_setup_glsl_uniforms(nir_shader *shader,
 void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
                                 struct brw_stage_prog_data *stage_prog_data);
 
+bool brw_nir_opt_peephole_ffma(nir_shader *shader);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
similarity index 96%
rename from src/glsl/nir/nir_opt_peephole_ffma.c
rename to src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
index 4f0f0dae04e..a8448e73a3f 100644
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
@@ -25,7 +25,7 @@
  *
  */
 
-#include "nir.h"
+#include "brw_nir.h"
 
 /*
  * Implements a small peephole optimization that looks for a multiply that
@@ -134,7 +134,7 @@ get_mul_for_src(nir_alu_src *src, int num_components,
 }
 
 static bool
-nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
+brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
 {
    struct peephole_ffma_state *state = void_state;
 
@@ -237,7 +237,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
 }
 
 static bool
-nir_opt_peephole_ffma_impl(nir_function_impl *impl)
+brw_nir_opt_peephole_ffma_impl(nir_function_impl *impl)
 {
    struct peephole_ffma_state state;
 
@@ -245,7 +245,7 @@ nir_opt_peephole_ffma_impl(nir_function_impl *impl)
    state.impl = impl;
    state.progress = false;
 
-   nir_foreach_block(impl, nir_opt_peephole_ffma_block, &state);
+   nir_foreach_block(impl, brw_nir_opt_peephole_ffma_block, &state);
 
    if (state.progress)
       nir_metadata_preserve(impl, nir_metadata_block_index |
@@ -255,13 +255,13 @@ nir_opt_peephole_ffma_impl(nir_function_impl *impl)
 }
 
 bool
-nir_opt_peephole_ffma(nir_shader *shader)
+brw_nir_opt_peephole_ffma(nir_shader *shader)
 {
    bool progress = false;
 
    nir_foreach_overload(shader, overload) {
       if (overload->impl)
-         progress |= nir_opt_peephole_ffma_impl(overload->impl);
+         progress |= brw_nir_opt_peephole_ffma_impl(overload->impl);
    }
 
    return progress;

From fb3b5669cee85781b603f612228387e9a2e4120f Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Fri, 23 Oct 2015 16:31:41 +0200
Subject: [PATCH 144/287] util: Add list_is_singular() helper function

Returns whether the list has exactly one element.

Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/util/list.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/util/list.h b/src/util/list.h
index b98ce59ff77..066f9b8dfe5 100644
--- a/src/util/list.h
+++ b/src/util/list.h
@@ -99,6 +99,14 @@ static inline bool list_empty(struct list_head *list)
    return list->next == list;
 }
 
+/**
+ * Returns whether the list has exactly one element.
+ */
+static inline bool list_is_singular(const struct list_head *list)
+{
+   return list->next != NULL && list->next->next == list;
+}
+
 static inline unsigned list_length(struct list_head *list)
 {
    struct list_head *node;

From de51676b410ff3ccab1df765f8eee15126c9de4c Mon Sep 17 00:00:00 2001
From: Eduardo Lima Mitev <elima@igalia.com>
Date: Thu, 22 Oct 2015 15:32:13 +0200
Subject: [PATCH 145/287] i965/nir/opt_peephole_ffma: Bypass fusion if any
 operand of fadd and fmul is a const

When both fadd and fmul instructions have at least one operand that is a
constant and it is only used once, the total number of instructions can
be reduced from 3 (1 ffma + 2 load_const) to 2 (1 fmul + 1 fadd); because
the constants will be progagated as immediate operands of fmul and fadd.

This patch detects these situations and prevents fusing fmul+fadd into ffma.

Shader-db results on i965 Haswell:

total instructions in shared programs: 6235835 -> 6225895 (-0.16%)
instructions in affected programs:     1124094 -> 1114154 (-0.88%)
total loops in shared programs:        1979 -> 1979 (0.00%)
helped:                                7612
HURT:                                  843
GAINED:                                4
LOST:                                  0

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 .../dri/i965/brw_nir_opt_peephole_ffma.c      | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
index a8448e73a3f..5603129bde7 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
+++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c
@@ -133,6 +133,28 @@ get_mul_for_src(nir_alu_src *src, int num_components,
    return alu;
 }
 
+/**
+ * Given a list of (at least two) nir_alu_src's, tells if any of them is a
+ * constant value and is used only once.
+ */
+static bool
+any_alu_src_is_a_constant(nir_alu_src srcs[])
+{
+   for (unsigned i = 0; i < 2; i++) {
+      if (srcs[i].src.ssa->parent_instr->type == nir_instr_type_load_const) {
+         nir_load_const_instr *load_const =
+            nir_instr_as_load_const (srcs[i].src.ssa->parent_instr);
+
+         if (list_is_singular(&load_const->def.uses) &&
+             list_empty(&load_const->def.if_uses)) {
+            return true;
+         }
+      }
+   }
+
+   return false;
+}
+
 static bool
 brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
 {
@@ -183,6 +205,15 @@ brw_nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
       mul_src[0] = mul->src[0].src.ssa;
       mul_src[1] = mul->src[1].src.ssa;
 
+      /* If any of the operands of the fmul and any of the fadd is a constant,
+       * we bypass because it will be more efficient as the constants will be
+       * propagated as operands, potentially saving two load_const instructions.
+       */
+      if (any_alu_src_is_a_constant(mul->src) &&
+          any_alu_src_is_a_constant(add->src)) {
+         continue;
+      }
+
       if (abs) {
          for (unsigned i = 0; i < 2; i++) {
             nir_alu_instr *abs = nir_alu_instr_create(state->mem_ctx,

From a1fc78911e9a6439db94d6ae91d5672c76e5fb1c Mon Sep 17 00:00:00 2001
From: Glenn Kennard <glenn.kennard@gmail.com>
Date: Sat, 17 Oct 2015 00:52:39 +0200
Subject: [PATCH 146/287] r600g: Pass conservative depth parameters to hw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Supported on R700 and up.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 13 +++++++++++++
 src/gallium/drivers/r600/evergreend.h      |  7 +++++++
 src/gallium/drivers/r600/r600_shader.c     |  1 +
 src/gallium/drivers/r600/r600_shader.h     |  2 ++
 src/gallium/drivers/r600/r600_state.c      | 15 +++++++++++++++
 src/gallium/drivers/r600/r600d.h           |  8 ++++++++
 6 files changed, 46 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index c6702a9ca34..96c6b115c98 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2940,6 +2940,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 	db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
 	db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
 
+	switch (rshader->ps_conservative_z) {
+	default: /* fall through */
+	case TGSI_FS_DEPTH_LAYOUT_ANY:
+		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z);
+		break;
+	case TGSI_FS_DEPTH_LAYOUT_GREATER:
+		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+		break;
+	case TGSI_FS_DEPTH_LAYOUT_LESS:
+		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
+		break;
+	}
+
 	exports_ps = 0;
 	for (i = 0; i < rshader->noutput; i++) {
 		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 937ffcbddb9..a9a65f75305 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -815,6 +815,13 @@
 #define     V_02880C_EXPORT_DB_FOUR16                  0x01
 #define     V_02880C_EXPORT_DB_TWO                     0x02
 #define   S_02880C_ALPHA_TO_MASK_DISABLE(x)            (((x) & 0x1) << 12)
+#define   S_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) & 0x03) << 16)
+#define   G_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) >> 16) & 0x03)
+#define   C_02880C_CONSERVATIVE_Z_EXPORT               0xFFFCFFFF
+#define     V_02880C_EXPORT_ANY_Z                      0
+#define     V_02880C_EXPORT_LESS_THAN_Z                1
+#define     V_02880C_EXPORT_GREATER_THAN_Z             2
+#define     V_02880C_EXPORT_RESERVED                   3
 
 #define R_028A00_PA_SU_POINT_SIZE                    0x028A00
 #define   S_028A00_HEIGHT(x)                           (((x) & 0xFFFF) << 0)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index fc6335ae8bc..101acd44e54 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2044,6 +2044,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 
 	shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
 	shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+	shader->ps_conservative_z = ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT];
 
 	if (shader->vs_as_gs_a)
 		vs_add_primid_output(&ctx, key.vs.prim_id_out);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index c240e7110c1..e08526376c5 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -76,6 +76,8 @@ struct r600_shader {
 	boolean			uses_tex_buffers;
 	boolean                 gs_prim_id_input;
 
+	unsigned		ps_conservative_z;
+
 	/* Size in bytes of a data item in the ring(s) (single vertex data).
 	   Stages with only one ring items 123 will be set to 0. */
 	unsigned		ring_item_sizes[4];
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 1be3e1b4de5..09b2325a239 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2533,6 +2533,21 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	if (rshader->uses_kill)
 		db_shader_control |= S_02880C_KILL_ENABLE(1);
 
+	if (rctx->b.chip_class >= R700) {
+		switch (rshader->ps_conservative_z) {
+		default: /* fall through */
+		case TGSI_FS_DEPTH_LAYOUT_ANY:
+			db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z);
+			break;
+		case TGSI_FS_DEPTH_LAYOUT_GREATER:
+			db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+			break;
+		case TGSI_FS_DEPTH_LAYOUT_LESS:
+			db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
+			break;
+		}
+	}
+
 	exports_ps = 0;
 	for (i = 0; i < rshader->noutput; i++) {
 		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 6bba88cb574..06c435605a3 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -853,6 +853,14 @@
 #define   S_02880C_DUAL_EXPORT_ENABLE(x)               (((x) & 0x1) << 9)
 #define   G_02880C_DUAL_EXPORT_ENABLE(x)               (((x) >> 9) & 0x1)
 #define   C_02880C_DUAL_EXPORT_ENABLE                  0xFFFFFDFF
+#define   S_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) & 0x03) << 13)
+#define   G_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) >> 13) & 0x03)
+#define   C_02880C_CONSERVATIVE_Z_EXPORT               0xFFFF9FFF
+#define     V_02880C_EXPORT_ANY_Z                      0
+#define     V_02880C_EXPORT_LESS_THAN_Z                1
+#define     V_02880C_EXPORT_GREATER_THAN_Z             2
+#define     V_02880C_EXPORT_RESERVED                   3
+
 #define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL       0x028DF8
 #define   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x)      (((x) & 0xFF) << 0)
 #define   G_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x)      (((x) >> 0) & 0xFF)

From c878d61124397706072e0f7f5a694a8bf644cc91 Mon Sep 17 00:00:00 2001
From: Glenn Kennard <glenn.kennard@gmail.com>
Date: Fri, 16 Oct 2015 01:53:47 +0200
Subject: [PATCH 147/287] r600g: Implement ARB_texture_view
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 docs/GL3.txt                               |  2 +-
 docs/relnotes/11.1.0.html                  |  2 +-
 src/gallium/drivers/r600/evergreen_state.c | 23 ++++++++++++++++------
 src/gallium/drivers/r600/r600_pipe.c       |  2 +-
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 7abdcd8dea1..845672b07d5 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -169,7 +169,7 @@ GL 4.3, GLSL 4.30:
   GL_ARB_texture_buffer_range                          DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
   GL_ARB_texture_query_levels                          DONE (all drivers that support GLSL 1.30)
   GL_ARB_texture_storage_multisample                   DONE (all drivers that support GL_ARB_texture_multisample)
-  GL_ARB_texture_view                                  DONE (i965, nv50, nvc0, radeonsi, llvmpipe, softpipe)
+  GL_ARB_texture_view                                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
   GL_ARB_vertex_attrib_binding                         DONE (all drivers)
 
 
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 11fbdfff236..14656fd97c9 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -55,7 +55,7 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
 <li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
 <li>GL_ARB_texture_query_lod on softpipe</li>
-<li>GL_ARB_texture_view on radeonsi</li>
+<li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
 <li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
 <li>GL_EXT_draw_elements_base_vertex on all drivers</li>
 <li>GL_OES_draw_elements_base_vertex on all drivers</li>
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 96c6b115c98..30e902673d0 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -666,6 +666,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	enum pipe_format pipe_format = state->format;
 	struct radeon_surf_level *surflevel;
 	unsigned base_level, first_level, last_level;
+	unsigned dim, last_layer;
 	uint64_t va;
 
 	if (view == NULL)
@@ -679,7 +680,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	view->base.reference.count = 1;
 	view->base.context = ctx;
 
-	if (texture->target == PIPE_BUFFER)
+	if (state->target == PIPE_BUFFER)
 		return texture_buffer_sampler_view(rctx, view, width0, height0);
 
 	swizzle[0] = state->swizzle_r;
@@ -773,12 +774,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	}
 	nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
 
-	if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
+	if (state->target == PIPE_TEXTURE_1D_ARRAY) {
 	        height = 1;
 		depth = texture->array_size;
-	} else if (texture->target == PIPE_TEXTURE_2D_ARRAY) {
+	} else if (state->target == PIPE_TEXTURE_2D_ARRAY) {
 		depth = texture->array_size;
-	} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
+	} else if (state->target == PIPE_TEXTURE_CUBE_ARRAY)
 		depth = texture->array_size / 6;
 
 	va = tmp->resource.gpu_address;
@@ -790,7 +791,13 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 		view->is_stencil_sampler = true;
 
 	view->tex_resource = &tmp->resource;
-	view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
+
+	/* array type views and views into array types need to use layer offset */
+	dim = state->target;
+	if (state->target != PIPE_TEXTURE_CUBE)
+		dim = MAX2(state->target, texture->target);
+
+	view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(dim, texture->nr_samples)) |
 				       S_030000_PITCH((pitch / 8) - 1) |
 				       S_030000_TEX_WIDTH(width - 1));
 	if (rscreen->b.chip_class == CAYMAN)
@@ -818,10 +825,14 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 		view->tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8;
 	}
 
+	last_layer = state->u.tex.last_layer;
+	if (state->target != texture->target && depth == 1) {
+		last_layer = state->u.tex.first_layer;
+	}
 	view->tex_resource_words[4] = (word4 |
 				       S_030010_ENDIAN_SWAP(endian));
 	view->tex_resource_words[5] = S_030014_BASE_ARRAY(state->u.tex.first_layer) |
-				      S_030014_LAST_ARRAY(state->u.tex.last_layer);
+				      S_030014_LAST_ARRAY(last_layer);
 	view->tex_resource_words[6] = S_030018_TILE_SPLIT(tile_split);
 
 	if (texture->nr_samples > 1) {
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 9f4cda2c142..29682390648 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -323,6 +323,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_GATHER_SM5:
 	case PIPE_CAP_TEXTURE_QUERY_LOD:
 	case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 		return family >= CHIP_CEDAR ? 1 : 0;
 	case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
 		return family >= CHIP_CEDAR ? 4 : 0;
@@ -338,7 +339,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-	case PIPE_CAP_SAMPLER_VIEW_TARGET:
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
 	case PIPE_CAP_DEPTH_BOUNDS_TEST:

From b3e793f2db0e294c2b18e030a45c6253f20a16a6 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 11 Nov 2015 09:05:50 +1000
Subject: [PATCH 148/287] Revert "r600g: Pass conservative depth parameters to
 hw"

This reverts commit a1fc78911e9a6439db94d6ae91d5672c76e5fb1c.

I pushed the wrong patch.
---
 src/gallium/drivers/r600/evergreen_state.c | 13 -------------
 src/gallium/drivers/r600/evergreend.h      |  7 -------
 src/gallium/drivers/r600/r600_shader.c     |  1 -
 src/gallium/drivers/r600/r600_shader.h     |  2 --
 src/gallium/drivers/r600/r600_state.c      | 15 ---------------
 src/gallium/drivers/r600/r600d.h           |  8 --------
 6 files changed, 46 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 30e902673d0..60747d1c89b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2951,19 +2951,6 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 	db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
 	db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
 
-	switch (rshader->ps_conservative_z) {
-	default: /* fall through */
-	case TGSI_FS_DEPTH_LAYOUT_ANY:
-		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z);
-		break;
-	case TGSI_FS_DEPTH_LAYOUT_GREATER:
-		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
-		break;
-	case TGSI_FS_DEPTH_LAYOUT_LESS:
-		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
-		break;
-	}
-
 	exports_ps = 0;
 	for (i = 0; i < rshader->noutput; i++) {
 		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index a9a65f75305..937ffcbddb9 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -815,13 +815,6 @@
 #define     V_02880C_EXPORT_DB_FOUR16                  0x01
 #define     V_02880C_EXPORT_DB_TWO                     0x02
 #define   S_02880C_ALPHA_TO_MASK_DISABLE(x)            (((x) & 0x1) << 12)
-#define   S_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) & 0x03) << 16)
-#define   G_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) >> 16) & 0x03)
-#define   C_02880C_CONSERVATIVE_Z_EXPORT               0xFFFCFFFF
-#define     V_02880C_EXPORT_ANY_Z                      0
-#define     V_02880C_EXPORT_LESS_THAN_Z                1
-#define     V_02880C_EXPORT_GREATER_THAN_Z             2
-#define     V_02880C_EXPORT_RESERVED                   3
 
 #define R_028A00_PA_SU_POINT_SIZE                    0x028A00
 #define   S_028A00_HEIGHT(x)                           (((x) & 0xFFFF) << 0)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 101acd44e54..fc6335ae8bc 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2044,7 +2044,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 
 	shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
 	shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
-	shader->ps_conservative_z = ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT];
 
 	if (shader->vs_as_gs_a)
 		vs_add_primid_output(&ctx, key.vs.prim_id_out);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index e08526376c5..c240e7110c1 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -76,8 +76,6 @@ struct r600_shader {
 	boolean			uses_tex_buffers;
 	boolean                 gs_prim_id_input;
 
-	unsigned		ps_conservative_z;
-
 	/* Size in bytes of a data item in the ring(s) (single vertex data).
 	   Stages with only one ring items 123 will be set to 0. */
 	unsigned		ring_item_sizes[4];
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 09b2325a239..1be3e1b4de5 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2533,21 +2533,6 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	if (rshader->uses_kill)
 		db_shader_control |= S_02880C_KILL_ENABLE(1);
 
-	if (rctx->b.chip_class >= R700) {
-		switch (rshader->ps_conservative_z) {
-		default: /* fall through */
-		case TGSI_FS_DEPTH_LAYOUT_ANY:
-			db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z);
-			break;
-		case TGSI_FS_DEPTH_LAYOUT_GREATER:
-			db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
-			break;
-		case TGSI_FS_DEPTH_LAYOUT_LESS:
-			db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
-			break;
-		}
-	}
-
 	exports_ps = 0;
 	for (i = 0; i < rshader->noutput; i++) {
 		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 06c435605a3..6bba88cb574 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -853,14 +853,6 @@
 #define   S_02880C_DUAL_EXPORT_ENABLE(x)               (((x) & 0x1) << 9)
 #define   G_02880C_DUAL_EXPORT_ENABLE(x)               (((x) >> 9) & 0x1)
 #define   C_02880C_DUAL_EXPORT_ENABLE                  0xFFFFFDFF
-#define   S_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) & 0x03) << 13)
-#define   G_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) >> 13) & 0x03)
-#define   C_02880C_CONSERVATIVE_Z_EXPORT               0xFFFF9FFF
-#define     V_02880C_EXPORT_ANY_Z                      0
-#define     V_02880C_EXPORT_LESS_THAN_Z                1
-#define     V_02880C_EXPORT_GREATER_THAN_Z             2
-#define     V_02880C_EXPORT_RESERVED                   3
-
 #define R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL       0x028DF8
 #define   S_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x)      (((x) & 0xFF) << 0)
 #define   G_028DF8_POLY_OFFSET_NEG_NUM_DB_BITS(x)      (((x) >> 0) & 0xFF)

From 3f45d29fe435266b137e8007acebcda498690b27 Mon Sep 17 00:00:00 2001
From: Glenn Kennard <glenn.kennard@gmail.com>
Date: Sat, 17 Oct 2015 16:53:28 +0200
Subject: [PATCH 149/287] r600g: Pass conservative depth parameters to hw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Supported on R700 and up.

Signed-off-by: Glenn Kennard <glenn.kennard@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 13 +++++++++++++
 src/gallium/drivers/r600/evergreend.h      |  7 +++++++
 src/gallium/drivers/r600/r600_pipe.h       |  1 +
 src/gallium/drivers/r600/r600_shader.c     |  1 +
 src/gallium/drivers/r600/r600_shader.h     |  2 ++
 src/gallium/drivers/r600/r600_state.c      | 22 +++++++++++++++++++++-
 src/gallium/drivers/r600/r600d.h           |  8 ++++++++
 7 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 60747d1c89b..30e902673d0 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2951,6 +2951,19 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 	db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
 	db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(mask_export);
 
+	switch (rshader->ps_conservative_z) {
+	default: /* fall through */
+	case TGSI_FS_DEPTH_LAYOUT_ANY:
+		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z);
+		break;
+	case TGSI_FS_DEPTH_LAYOUT_GREATER:
+		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
+		break;
+	case TGSI_FS_DEPTH_LAYOUT_LESS:
+		db_shader_control |= S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
+		break;
+	}
+
 	exports_ps = 0;
 	for (i = 0; i < rshader->noutput; i++) {
 		if (rshader->output[i].name == TGSI_SEMANTIC_POSITION ||
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index 937ffcbddb9..a9a65f75305 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -815,6 +815,13 @@
 #define     V_02880C_EXPORT_DB_FOUR16                  0x01
 #define     V_02880C_EXPORT_DB_TWO                     0x02
 #define   S_02880C_ALPHA_TO_MASK_DISABLE(x)            (((x) & 0x1) << 12)
+#define   S_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) & 0x03) << 16)
+#define   G_02880C_CONSERVATIVE_Z_EXPORT(x)            (((x) >> 16) & 0x03)
+#define   C_02880C_CONSERVATIVE_Z_EXPORT               0xFFFCFFFF
+#define     V_02880C_EXPORT_ANY_Z                      0
+#define     V_02880C_EXPORT_LESS_THAN_Z                1
+#define     V_02880C_EXPORT_GREATER_THAN_Z             2
+#define     V_02880C_EXPORT_RESERVED                   3
 
 #define R_028A00_PA_SU_POINT_SIZE                    0x028A00
 #define   S_028A00_HEIGHT(x)                           (((x) & 0xFFFF) << 0)
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 520b03f605d..950bb6be76c 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -116,6 +116,7 @@ struct r600_db_misc_state {
 	unsigned			log_samples;
 	unsigned			db_shader_control;
 	bool				htile_clear;
+	uint8_t				ps_conservative_z;
 };
 
 struct r600_cb_misc_state {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index fc6335ae8bc..560197c82b5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2044,6 +2044,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 
 	shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
 	shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
+	shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT];
 
 	if (shader->vs_as_gs_a)
 		vs_add_primid_output(&ctx, key.vs.prim_id_out);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index c240e7110c1..2040f732bf5 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -76,6 +76,8 @@ struct r600_shader {
 	boolean			uses_tex_buffers;
 	boolean                 gs_prim_id_input;
 
+	uint8_t			ps_conservative_z;
+
 	/* Size in bytes of a data item in the ring(s) (single vertex data).
 	   Stages with only one ring items 123 will be set to 0. */
 	unsigned		ring_item_sizes[4];
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 1be3e1b4de5..1305efa5660 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1628,6 +1628,21 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 		S_028D10_FORCE_HIS_ENABLE0(V_028D10_FORCE_DISABLE) |
 		S_028D10_FORCE_HIS_ENABLE1(V_028D10_FORCE_DISABLE);
 
+	if (rctx->b.chip_class >= R700) {
+		switch (a->ps_conservative_z) {
+		default: /* fall through */
+		case TGSI_FS_DEPTH_LAYOUT_ANY:
+			db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_ANY_Z);
+			break;
+		case TGSI_FS_DEPTH_LAYOUT_GREATER:
+			db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_GREATER_THAN_Z);
+			break;
+		case TGSI_FS_DEPTH_LAYOUT_LESS:
+			db_render_control |= S_028D0C_CONSERVATIVE_Z_EXPORT(V_028D0C_EXPORT_LESS_THAN_Z);
+			break;
+		}
+	}
+
 	if (a->occlusion_query_enabled) {
 		if (rctx->b.chip_class >= R700) {
 			db_render_control |= S_028D0C_R700_PERFECT_ZPASS_COUNTS(1);
@@ -2787,6 +2802,7 @@ void r600_update_db_shader_control(struct r600_context * rctx)
 {
 	bool dual_export;
 	unsigned db_shader_control;
+	uint8_t ps_conservative_z;
 
 	if (!rctx->ps_shader) {
 		return;
@@ -2798,6 +2814,8 @@ void r600_update_db_shader_control(struct r600_context * rctx)
 	db_shader_control = rctx->ps_shader->current->db_shader_control |
 			    S_02880C_DUAL_EXPORT_ENABLE(dual_export);
 
+	ps_conservative_z = rctx->ps_shader->current->shader.ps_conservative_z;
+
 	/* When alpha test is enabled we can't trust the hw to make the proper
 	 * decision on the order in which ztest should be run related to fragment
 	 * shader execution.
@@ -2811,8 +2829,10 @@ void r600_update_db_shader_control(struct r600_context * rctx)
 		db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
 	}
 
-	if (db_shader_control != rctx->db_misc_state.db_shader_control) {
+	if (db_shader_control != rctx->db_misc_state.db_shader_control ||
+		ps_conservative_z != rctx->db_misc_state.ps_conservative_z) {
 		rctx->db_misc_state.db_shader_control = db_shader_control;
+		rctx->db_misc_state.ps_conservative_z = ps_conservative_z;
 		r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
 	}
 }
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 6bba88cb574..53f5ad6db6a 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -781,6 +781,14 @@
 #define   S_028D0C_COPY_CENTROID(x)                    (((x) & 0x1) << 7)
 #define   S_028D0C_COPY_SAMPLE(x)                      (((x) & 0x1) << 8)
 #define   S_028D0C_R700_PERFECT_ZPASS_COUNTS(x)        (((x) & 0x1) << 15)
+#define   S_028D0C_CONSERVATIVE_Z_EXPORT(x)            (((x) & 0x03) << 13)
+#define   G_028D0C_CONSERVATIVE_Z_EXPORT(x)            (((x) >> 13) & 0x03)
+#define   C_028D0C_CONSERVATIVE_Z_EXPORT               0xFFFF9FFF
+#define     V_028D0C_EXPORT_ANY_Z                      0
+#define     V_028D0C_EXPORT_LESS_THAN_Z                1
+#define     V_028D0C_EXPORT_GREATER_THAN_Z             2
+#define     V_028D0C_EXPORT_RESERVED                   3
+
 #define R_028D10_DB_RENDER_OVERRIDE                  0x028D10
 #define   V_028D10_FORCE_OFF                         0
 #define   V_028D10_FORCE_ENABLE                      1

From 07948b03fbcbe9a36c8251ed209cac7f2508ef30 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Wed, 11 Nov 2015 10:05:57 +0000
Subject: [PATCH 150/287] docs: add release notes for 11.0.5

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
(cherry picked from commit ee57c22141c42d9b511a7dfa5971c4428cd1c6e7)
---
 docs/relnotes/11.0.5.html | 173 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 docs/relnotes/11.0.5.html

diff --git a/docs/relnotes/11.0.5.html b/docs/relnotes/11.0.5.html
new file mode 100644
index 00000000000..6a2fe55b2f4
--- /dev/null
+++ b/docs/relnotes/11.0.5.html
@@ -0,0 +1,173 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.5 Release Notes / November 11, 2015</h1>
+
+<p>
+Mesa 11.0.5 is a bug fix release which fixes bugs found since the 11.0.4 release.
+</p>
+<p>
+Mesa 11.0.5 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91993">Bug 91993</a> - Graphical glitch in Astromenace (open-source game).</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92214">Bug 92214</a> - Flightgear crashes during splashboot with R600 driver, LLVM 3.7.0 and mesa 11.0.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92437">Bug 92437</a> - osmesa: Expose GL entry points for Windows build, via .def file</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92476">Bug 92476</a> - [cts] ES2-CTS.gtf.GL2ExtensionTests.egl_image.egl_image fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92623">Bug 92623</a> - Differences in prog_data ignored when caching fragment programs (causes hangs)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeon/uvd: don't expose HEVC on old UVD hw (v3)</li>
+</ul>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965/skl: Add GT4 PCI IDs</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.4</li>
+  <li>cherry-ignore: ignore a possible wrong nomination</li>
+  <li>Revert "mesa/glformats: Undo code changes from _mesa_base_tex_format() move"</li>
+  <li>Update version to 11.0.5</li>
+</ul>
+
+<p>Emmanuel Gil Peyrot (1):</p>
+<ul>
+  <li>gbm.h: Add a missing stddef.h include for size_t.</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: When the create ioctl fails, free our cache and try again.</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>i965: Fix is-renderable check in intel_image_target_renderbuffer_storage</li>
+</ul>
+
+<p>Ilia Mirkin (3):</p>
+<ul>
+  <li>nvc0: respect edgeflag attribute width</li>
+  <li>nouveau: set MaxDrawBuffers to the same value as MaxColorAttachments</li>
+  <li>nouveau: relax fence emit space assert</li>
+</ul>
+
+<p>Ivan Kalvachev (1):</p>
+<ul>
+  <li>r600g: Fix special negative immediate constants when using ABS modifier.</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir/lower_vec_to_movs: Pass the shader around directly</li>
+  <li>nir: Report progress from lower_vec_to_movs().</li>
+</ul>
+
+<p>Jose Fonseca (2):</p>
+<ul>
+  <li>gallivm: Translate all util_cpu_caps bits to LLVM attributes.</li>
+  <li>gallivm: Explicitly disable unsupported CPU features.</li>
+</ul>
+
+<p>Julien Isorce (4):</p>
+<ul>
+  <li>st/va: pass picture desc to begin and decode</li>
+  <li>nvc0: fix crash when nv50_miptree_from_handle fails</li>
+  <li>st/va: do not destroy old buffer when new one failed</li>
+  <li>st/va: add more errors checks in vlVaBufferSetNumElements and vlVaMapBuffer</li>
+</ul>
+
+<p>Kenneth Graunke (6):</p>
+<ul>
+  <li>i965: Fix missing BRW_NEW_*_PROG_DATA flagging caused by cache reuse.</li>
+  <li>nir: Report progress from nir_split_var_copies().</li>
+  <li>nir: Properly invalidate metadata in nir_split_var_copies().</li>
+  <li>nir: Properly invalidate metadata in nir_opt_copy_prop().</li>
+  <li>nir: Properly invalidate metadata in nir_lower_vec_to_movs().</li>
+  <li>nir: Properly invalidate metadata in nir_opt_remove_phis().</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: add register definitions for Stoney</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/glformats: Undo code changes from _mesa_base_tex_format() move</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>st/mesa: fix mipmap generation for immutable textures with incomplete pyramids</li>
+</ul>
+
+<p>Nigel Stewart (1):</p>
+<ul>
+  <li>osmesa: Expose GL entry points for Windows build via DEF file.</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>gallivm: disable f16c when not using AVX</li>
+</ul>
+
+<p>Samuel Li (2):</p>
+<ul>
+  <li>radeonsi: add support for Stoney asics (v3)</li>
+  <li>radeonsi: add Stoney pci ids</li>
+</ul>
+
+
+</div>
+</body>
+</html>

From 6435d8ac5a54b6c09b4a8ba5aa83225acee132c0 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Wed, 11 Nov 2015 11:10:30 +0000
Subject: [PATCH 151/287] docs: add sha256 checksums for 11.0.5

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
(cherry picked from commit 66c949d0a19b1e601243be22b6506528b866388b)
---
 docs/relnotes/11.0.5.html | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/relnotes/11.0.5.html b/docs/relnotes/11.0.5.html
index 6a2fe55b2f4..05fa0b98325 100644
--- a/docs/relnotes/11.0.5.html
+++ b/docs/relnotes/11.0.5.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
 
 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+8495ef5c06f7f726452462b7d408a5b40048373ff908f2283a3b4d1f49b45ee6  mesa-11.0.5.tar.gz
+9c255a2a6695fcc6ef4a279e1df0aeaf417dc142f39ee59dfb533d80494bb67a  mesa-11.0.5.tar.xz
 </pre>
 
 

From cbb7d90e5784b1e44c1801f74c3088638940442d Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Wed, 11 Nov 2015 11:18:27 +0000
Subject: [PATCH 152/287] docs: add news item and link release notes for 11.0.5

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
---
 docs/index.html    | 6 ++++++
 docs/relnotes.html | 1 +
 2 files changed, 7 insertions(+)

diff --git a/docs/index.html b/docs/index.html
index c8d4a5c2699..0e317fdf411 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,12 @@
 
 <h1>News</h1>
 
+<h2>November 11, 2015</h2>
+<p>
+<a href="relnotes/11.0.5.html">Mesa 11.0.5</a> is released.
+This is a bug-fix release.
+</p>
+
 <h2>October 24, 2015</h2>
 <p>
 <a href="relnotes/11.0.4.html">Mesa 11.0.4</a> is released.
diff --git a/docs/relnotes.html b/docs/relnotes.html
index d1dde4fd726..588cf0b2c83 100644
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,7 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>
 
 <ul>
+<li><a href="relnotes/11.0.5.html">11.0.5 release notes</a>
 <li><a href="relnotes/11.0.4.html">11.0.4 release notes</a>
 <li><a href="relnotes/11.0.3.html">11.0.3 release notes</a>
 <li><a href="relnotes/10.6.9.html">10.6.9 release notes</a>

From a2987ff57f08325f6e1dedae578bd6251a22b2b4 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 24 Jul 2015 21:15:35 -0700
Subject: [PATCH 153/287] i965: Map GL_PATCHES to 3DPRIM_PATCHLIST_n.

Inspired by a patch by Fabian Bieler.

Fabian defined a _3DPRIM_PATCHLIST_0 macro (which isn't actually a valid
topology type); I instead chose to make a macro that takes an argument.
He also took the number of patch vertices from _mesa_prim (which was set
to ctx->TessCtrlProgram.patch_vertices) - I chose to use it directly to
avoid the need for the VBO patch.

v2: Change macro to 0x20 + (n - 1) instead of 0x1F + n to better match
    the documentation (suggested by Ian).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h | 2 ++
 src/mesa/drivers/dri/i965/brw_draw.c    | 9 ++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 0396e13d0c2..99a3a2d25d8 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -78,6 +78,8 @@
 #define _3DPRIM_LINESTRIP_BF      0x13
 #define _3DPRIM_LINESTRIP_CONT_BF 0x14
 #define _3DPRIM_TRIFAN_NOSTIPPLE  0x16
+#define _3DPRIM_PATCHLIST(n) ({ assert(n > 0 && n <= 32); 0x20 + (n - 1); })
+
 
 /* We use this offset to be able to pass native primitive types in struct
  * _mesa_prim::mode.  Native primitive types are BRW_PRIM_OFFSET +
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 39a26b05201..bff484f09d8 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -140,9 +140,16 @@ brw_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
 static void
 gen6_set_prim(struct brw_context *brw, const struct _mesa_prim *prim)
 {
+   const struct gl_context *ctx = &brw->ctx;
+   uint32_t hw_prim;
+
    DBG("PRIM: %s\n", _mesa_enum_to_string(prim->mode));
 
-   const uint32_t hw_prim = get_hw_prim_for_gl_prim(prim->mode);
+   if (prim->mode == GL_PATCHES)
+      hw_prim = _3DPRIM_PATCHLIST(ctx->TessCtrlProgram.patch_vertices);
+   else
+      hw_prim = get_hw_prim_for_gl_prim(prim->mode);
+
    if (hw_prim != brw->primitive) {
       brw->primitive = hw_prim;
       brw->ctx.NewDriverState |= BRW_NEW_PRIMITIVE;

From ecb5e0a9861679846a5dbae2a8511b0e624f8f6e Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 25 Nov 2014 02:59:28 -0800
Subject: [PATCH 154/287] i965: Combine BRW_NEW_*_BINDING_TABLE dirty bits.

A while back, we moved to directly emitting the Gen7+ state when
constructing the binding tables.  These flags are only used on
Gen4-6, which emit all the binding table pointers at once.

We gain nothing by having separate flags, so combine them.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
---
 .../drivers/dri/i965/brw_binding_tables.c     | 21 +++++++------------
 src/mesa/drivers/dri/i965/brw_context.h       |  8 ++-----
 src/mesa/drivers/dri/i965/brw_state.h         |  1 -
 src/mesa/drivers/dri/i965/brw_state_upload.c  |  4 +---
 src/mesa/drivers/dri/i965/gen6_sol.c          |  6 +++---
 5 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 508f1f019ae..d8226e0ca05 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -88,7 +88,6 @@ reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
 void
 brw_upload_binding_table(struct brw_context *brw,
                          uint32_t packet_name,
-                         GLbitfield brw_new_binding_table,
                          const struct brw_stage_prog_data *prog_data,
                          struct brw_stage_state *stage_state)
 {
@@ -127,7 +126,7 @@ brw_upload_binding_table(struct brw_context *brw,
       }
    }
 
-   brw->ctx.NewDriverState |= brw_new_binding_table;
+   brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
 
    if (brw->gen >= 7) {
       if (brw->use_resource_streamer) {
@@ -159,7 +158,7 @@ brw_vs_upload_binding_table(struct brw_context *brw)
    const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
    brw_upload_binding_table(brw,
                             _3DSTATE_BINDING_TABLE_POINTERS_VS,
-                            BRW_NEW_VS_BINDING_TABLE, prog_data,
+                            prog_data,
                             &brw->vs.base);
 }
 
@@ -183,7 +182,7 @@ brw_upload_wm_binding_table(struct brw_context *brw)
    const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
    brw_upload_binding_table(brw,
                             _3DSTATE_BINDING_TABLE_POINTERS_PS,
-                            BRW_NEW_PS_BINDING_TABLE, prog_data,
+                            prog_data,
                             &brw->wm.base);
 }
 
@@ -209,7 +208,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
    const struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
    brw_upload_binding_table(brw,
                             _3DSTATE_BINDING_TABLE_POINTERS_GS,
-                            BRW_NEW_GS_BINDING_TABLE, prog_data,
+                            prog_data,
                             &brw->gs.base);
 }
 
@@ -406,10 +405,8 @@ const struct brw_tracked_state brw_binding_table_pointers = {
    .dirty = {
       .mesa = 0,
       .brw = BRW_NEW_BATCH |
-             BRW_NEW_GS_BINDING_TABLE |
-             BRW_NEW_PS_BINDING_TABLE |
-             BRW_NEW_STATE_BASE_ADDRESS |
-             BRW_NEW_VS_BINDING_TABLE,
+             BRW_NEW_BINDING_TABLE_POINTERS |
+             BRW_NEW_STATE_BASE_ADDRESS,
    },
    .emit = gen4_upload_binding_table_pointers,
 };
@@ -442,10 +439,8 @@ const struct brw_tracked_state gen6_binding_table_pointers = {
    .dirty = {
       .mesa = 0,
       .brw = BRW_NEW_BATCH |
-             BRW_NEW_GS_BINDING_TABLE |
-             BRW_NEW_PS_BINDING_TABLE |
-             BRW_NEW_STATE_BASE_ADDRESS |
-             BRW_NEW_VS_BINDING_TABLE,
+             BRW_NEW_BINDING_TABLE_POINTERS |
+             BRW_NEW_STATE_BASE_ADDRESS,
    },
    .emit = gen6_upload_binding_table_pointers,
 };
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c83f47bdff7..4b2db61c758 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -184,9 +184,7 @@ enum brw_state_id {
    BRW_STATE_CONTEXT,
    BRW_STATE_PSP,
    BRW_STATE_SURFACES,
-   BRW_STATE_VS_BINDING_TABLE,
-   BRW_STATE_GS_BINDING_TABLE,
-   BRW_STATE_PS_BINDING_TABLE,
+   BRW_STATE_BINDING_TABLE_POINTERS,
    BRW_STATE_INDICES,
    BRW_STATE_VERTICES,
    BRW_STATE_BATCH,
@@ -261,9 +259,7 @@ enum brw_state_id {
 #define BRW_NEW_CONTEXT                 (1ull << BRW_STATE_CONTEXT)
 #define BRW_NEW_PSP                     (1ull << BRW_STATE_PSP)
 #define BRW_NEW_SURFACES                (1ull << BRW_STATE_SURFACES)
-#define BRW_NEW_VS_BINDING_TABLE        (1ull << BRW_STATE_VS_BINDING_TABLE)
-#define BRW_NEW_GS_BINDING_TABLE        (1ull << BRW_STATE_GS_BINDING_TABLE)
-#define BRW_NEW_PS_BINDING_TABLE        (1ull << BRW_STATE_PS_BINDING_TABLE)
+#define BRW_NEW_BINDING_TABLE_POINTERS  (1ull << BRW_STATE_BINDING_TABLE_POINTERS)
 #define BRW_NEW_INDICES                 (1ull << BRW_STATE_INDICES)
 #define BRW_NEW_VERTICES                (1ull << BRW_STATE_VERTICES)
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 2aa1248fea6..94734bae621 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -172,7 +172,6 @@ brw_state_dirty(struct brw_context *brw, GLuint mesa_flags, uint64_t brw_flags)
 /* brw_binding_tables.c */
 void brw_upload_binding_table(struct brw_context *brw,
                               uint32_t packet_name,
-                              GLbitfield brw_new_binding_table,
                               const struct brw_stage_prog_data *prog_data,
                               struct brw_stage_state *stage_state);
 
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 0344b8a7fb0..6f8daf6d4d2 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -589,9 +589,7 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_CONTEXT),
    DEFINE_BIT(BRW_NEW_PSP),
    DEFINE_BIT(BRW_NEW_SURFACES),
-   DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
-   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
-   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
    DEFINE_BIT(BRW_NEW_INDICES),
    DEFINE_BIT(BRW_NEW_VERTICES),
    DEFINE_BIT(BRW_NEW_BATCH),
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index 3899ce9451f..2f6eadffd2e 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -131,7 +131,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
       }
       if (!need_binding_table) {
          if (brw->ff_gs.bind_bo_offset != 0) {
-            brw->ctx.NewDriverState |= BRW_NEW_GS_BINDING_TABLE;
+            brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
             brw->ff_gs.bind_bo_offset = 0;
          }
          return;
@@ -162,7 +162,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
       if (!need_binding_table) {
          if (brw->gs.base.bind_bo_offset != 0) {
             brw->gs.base.bind_bo_offset = 0;
-            brw->ctx.NewDriverState |= BRW_NEW_GS_BINDING_TABLE;
+            brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
          }
          return;
       }
@@ -179,7 +179,7 @@ brw_gs_upload_binding_table(struct brw_context *brw)
              BRW_MAX_SURFACES * sizeof(uint32_t));
    }
 
-   brw->ctx.NewDriverState |= BRW_NEW_GS_BINDING_TABLE;
+   brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS;
 }
 
 const struct brw_tracked_state gen6_gs_binding_table = {

From e42a29531ae3d5dedb72011da2947357dfa8715b Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 9 Nov 2015 23:55:58 -0800
Subject: [PATCH 155/287] i965: Print force_writemask_all in
 dump_instructions().

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ad94fa479e2..be712e56209 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4761,6 +4761,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
 
    fprintf(file, " ");
 
+   if (inst->force_writemask_all)
+      fprintf(file, "NoMask ");
+
    if (dispatch_width == 16 && inst->exec_size == 8) {
       if (inst->force_sechalf)
          fprintf(file, "2ndhalf ");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 8350a024e88..5cba3b31dc2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1552,6 +1552,9 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
          fprintf(file, ", ");
    }
 
+   if (inst->force_writemask_all)
+      fprintf(file, " NoMask");
+
    fprintf(file, "\n");
 }
 

From eb812921ac3a9b43e2cdd5dc65db34ba3fe9189b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 9 Nov 2015 14:51:56 -0700
Subject: [PATCH 156/287] st/wgl: fix locking issue in
 stw_st_framebuffer_present_locked()

When stw_st_framebuffer_present_locked() is called, the
stw_framebuffer's mutex will already be locked.  Normally, the
stw_framebuffer_present_locked() function calls
stw_framebuffer_release() to unlock the mutex when it's done.  But if
for some reason the 'resource' pointer in
stw_st_framebuffer_present_locked() is null, we'd return without
unlocking the stw_framebuffer.  This fixes that to avoid potential
deadlocks.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_st.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c
index b41171a9195..2d5d4379932 100644
--- a/src/gallium/state_trackers/wgl/stw_st.c
+++ b/src/gallium/state_trackers/wgl/stw_st.c
@@ -169,6 +169,9 @@ stw_st_framebuffer_present_locked(HDC hdc,
    if (resource) {
       stw_framebuffer_present_locked(hdc, stwfb->fb, resource);
    }
+   else {
+      stw_framebuffer_release(stwfb->fb);
+   }
 
    return TRUE;
 }

From b93cb6c1dc6e87fe2528c46ec1c30c6a2a628587 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 9 Nov 2015 17:19:35 -0700
Subject: [PATCH 157/287] st/wgl: whitespace/formatting fixes

---
 src/gallium/state_trackers/wgl/stw_context.c  | 79 +++++++++----------
 .../state_trackers/wgl/stw_framebuffer.c      | 11 +--
 .../state_trackers/wgl/stw_framebuffer.h      | 21 ++---
 3 files changed, 48 insertions(+), 63 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index 93f61e76efa..0f859649217 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -59,11 +59,9 @@ stw_current_context(void)
    return (struct stw_context *) ((st) ? st->st_manager_private : NULL);
 }
 
+
 BOOL APIENTRY
-DrvCopyContext(
-   DHGLRC dhrcSource,
-   DHGLRC dhrcDest,
-   UINT fuMask )
+DrvCopyContext(DHGLRC dhrcSource, DHGLRC dhrcDest, UINT fuMask)
 {
    struct stw_context *src;
    struct stw_context *dst;
@@ -73,11 +71,11 @@ DrvCopyContext(
       return FALSE;
 
    pipe_mutex_lock( stw_dev->ctx_mutex );
-   
+
    src = stw_lookup_context_locked( dhrcSource );
    dst = stw_lookup_context_locked( dhrcDest );
 
-   if (src && dst) { 
+   if (src && dst) {
       /* FIXME */
       assert(0);
       (void) src;
@@ -86,14 +84,13 @@ DrvCopyContext(
    }
 
    pipe_mutex_unlock( stw_dev->ctx_mutex );
-   
+
    return ret;
 }
 
+
 BOOL APIENTRY
-DrvShareLists(
-   DHGLRC dhglrc1,
-   DHGLRC dhglrc2 )
+DrvShareLists(DHGLRC dhglrc1, DHGLRC dhglrc2)
 {
    struct stw_context *ctx1;
    struct stw_context *ctx2;
@@ -103,7 +100,7 @@ DrvShareLists(
       return FALSE;
 
    pipe_mutex_lock( stw_dev->ctx_mutex );
-   
+
    ctx1 = stw_lookup_context_locked( dhglrc1 );
    ctx2 = stw_lookup_context_locked( dhglrc2 );
 
@@ -111,21 +108,20 @@ DrvShareLists(
       ret = ctx2->st->share(ctx2->st, ctx1->st);
 
    pipe_mutex_unlock( stw_dev->ctx_mutex );
-   
+
    return ret;
 }
 
+
 DHGLRC APIENTRY
-DrvCreateContext(
-   HDC hdc )
+DrvCreateContext(HDC hdc)
 {
    return DrvCreateLayerContext( hdc, 0 );
 }
 
+
 DHGLRC APIENTRY
-DrvCreateLayerContext(
-   HDC hdc,
-   INT iLayerPlane )
+DrvCreateLayerContext(HDC hdc, INT iLayerPlane)
 {
    return stw_create_context_attribs(hdc, iLayerPlane, 0, 1, 0, 0,
                                      WGL_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB,
@@ -297,13 +293,13 @@ no_ctx:
    return 0;
 }
 
+
 BOOL APIENTRY
-DrvDeleteContext(
-   DHGLRC dhglrc )
+DrvDeleteContext(DHGLRC dhglrc)
 {
    struct stw_context *ctx ;
    BOOL ret = FALSE;
-   
+
    if (!stw_dev)
       return FALSE;
 
@@ -314,7 +310,7 @@ DrvDeleteContext(
 
    if (ctx) {
       struct stw_context *curctx = stw_current_context();
-      
+
       /* Unbind current if deleting current context. */
       if (curctx == ctx)
          stw_dev->stapi->make_current(stw_dev->stapi, NULL, NULL, NULL);
@@ -332,9 +328,9 @@ DrvDeleteContext(
    return ret;
 }
 
+
 BOOL APIENTRY
-DrvReleaseContext(
-   DHGLRC dhglrc )
+DrvReleaseContext(DHGLRC dhglrc)
 {
    struct stw_context *ctx;
 
@@ -347,7 +343,7 @@ DrvReleaseContext(
 
    if (!ctx)
       return FALSE;
-   
+
    /* The expectation is that ctx is the same context which is
     * current for this thread.  We should check that and return False
     * if not the case.
@@ -368,28 +364,28 @@ stw_get_current_context( void )
    struct stw_context *ctx;
 
    ctx = stw_current_context();
-   if(!ctx)
+   if (!ctx)
       return 0;
-   
+
    return ctx->dhglrc;
 }
 
+
 HDC
 stw_get_current_dc( void )
 {
    struct stw_context *ctx;
 
    ctx = stw_current_context();
-   if(!ctx)
+   if (!ctx)
       return NULL;
-   
+
    return ctx->hdc;
 }
 
+
 BOOL
-stw_make_current(
-   HDC hdc,
-   DHGLRC dhglrc )
+stw_make_current(HDC hdc, DHGLRC dhglrc)
 {
    struct stw_context *curctx = NULL;
    struct stw_context *ctx = NULL;
@@ -425,8 +421,9 @@ stw_make_current(
       }
       else {
          /* Applications should call SetPixelFormat before creating a context,
-          * but not all do, and the opengl32 runtime seems to use a default pixel
-          * format in some cases, so we must create a framebuffer for those here
+          * but not all do, and the opengl32 runtime seems to use a default
+          * pixel format in some cases, so we must create a framebuffer for
+          * those here.
           */
          int iPixelFormat = GetPixelFormat(hdc);
          if (iPixelFormat)
@@ -434,7 +431,7 @@ stw_make_current(
          if (!fb)
             goto fail;
       }
-   
+
       if (fb->iPixelFormat != ctx->iPixelFormat) {
          SetLastError(ERROR_INVALID_PIXEL_FORMAT);
          goto fail;
@@ -449,7 +446,7 @@ stw_make_current(
    } else {
       ret = stw_dev->stapi->make_current(stw_dev->stapi, NULL, NULL, NULL);
    }
-   
+
 fail:
 
    if (fb) {
@@ -457,7 +454,8 @@ fail:
    }
 
    /* On failure, make the thread's current rendering context not current
-    * before returning */
+    * before returning.
+    */
    if (!ret) {
       stw_dev->stapi->make_current(stw_dev->stapi, NULL, NULL, NULL);
       ctx = NULL;
@@ -483,6 +481,7 @@ stw_notify_current_locked( struct stw_framebuffer *fb )
    p_atomic_inc(&fb->stfb->stamp);
 }
 
+
 /**
  * Although WGL allows different dispatch entrypoints per context
  */
@@ -829,15 +828,13 @@ static const GLCLTPROCTABLE cpt =
    }
 };
 
+
 PGLCLTPROCTABLE APIENTRY
-DrvSetContext(
-   HDC hdc,
-   DHGLRC dhglrc,
-   PFN_SETPROCTABLE pfnSetProcTable )
+DrvSetContext(HDC hdc, DHGLRC dhglrc, PFN_SETPROCTABLE pfnSetProcTable)
 {
    PGLCLTPROCTABLE r = (PGLCLTPROCTABLE)&cpt;
 
-   if (!stw_make_current( hdc, dhglrc ))
+   if (!stw_make_current(hdc, dhglrc))
       r = NULL;
 
    return r;
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 11f60b6c763..f7cfab5d5e1 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -46,8 +46,7 @@
  * stw_dev::fb_mutex global lock.
  */
 static inline struct stw_framebuffer *
-stw_framebuffer_from_hwnd_locked(
-   HWND hwnd )
+stw_framebuffer_from_hwnd_locked(HWND hwnd)
 {
    struct stw_framebuffer *fb;
 
@@ -118,7 +117,6 @@ stw_framebuffer_get_size(struct stw_framebuffer *fb)
    /*
     * Sanity checking.
     */
-
    assert(fb->hWnd);
    assert(fb->width && fb->height);
    assert(fb->client_rect.right  == fb->client_rect.left + fb->width);
@@ -127,7 +125,6 @@ stw_framebuffer_get_size(struct stw_framebuffer *fb)
    /*
     * Get the client area size.
     */
-
    if (!GetClientRect(fb->hWnd, &client_rect)) {
       return;
    }
@@ -145,7 +142,6 @@ stw_framebuffer_get_size(struct stw_framebuffer *fb)
        * preserve the current window size, until the window is restored or
        * maximized again.
        */
-
       return;
    }
 
@@ -377,8 +373,7 @@ stw_framebuffer_cleanup(void)
  * Given an hdc, return the corresponding stw_framebuffer.
  */
 static inline struct stw_framebuffer *
-stw_framebuffer_from_hdc_locked(
-   HDC hdc )
+stw_framebuffer_from_hdc_locked(HDC hdc)
 {
    HWND hwnd;
 
@@ -444,7 +439,7 @@ DrvSetPixelFormat(HDC hdc, LONG iPixelFormat)
    fb = stw_framebuffer_from_hdc_locked(hdc);
    if (fb) {
       /*
-       * SetPixelFormat must be called only once.  However ignore 
+       * SetPixelFormat must be called only once.  However ignore
        * pbuffers, for which the framebuffer object is created first.
        */
       boolean bPbuffer = fb->bPbuffer;
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h
index 28962c8cbd5..c7498b245cb 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.h
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h
@@ -116,14 +116,11 @@ struct stw_framebuffer
  * must be called when done 
  */
 struct stw_framebuffer *
-stw_framebuffer_create(
-   HDC hdc,
-   int iPixelFormat );
+stw_framebuffer_create(HDC hdc, int iPixelFormat);
 
 void
-stw_framebuffer_reference(
-   struct stw_framebuffer **ptr,
-   struct stw_framebuffer *fb);
+stw_framebuffer_reference(struct stw_framebuffer **ptr,
+                          struct stw_framebuffer *fb);
 
 /**
  * Search a framebuffer with a matching HWND.
@@ -132,8 +129,7 @@ stw_framebuffer_reference(
  * must be called when done 
  */
 struct stw_framebuffer *
-stw_framebuffer_from_hwnd(
-   HWND hwnd );
+stw_framebuffer_from_hwnd(HWND hwnd);
 
 /**
  * Search a framebuffer with a matching HDC.
@@ -142,8 +138,7 @@ stw_framebuffer_from_hwnd(
  * must be called when done 
  */
 struct stw_framebuffer *
-stw_framebuffer_from_hdc(
-   HDC hdc );
+stw_framebuffer_from_hdc(HDC hdc);
 
 BOOL
 stw_framebuffer_present_locked(HDC hdc,
@@ -151,8 +146,7 @@ stw_framebuffer_present_locked(HDC hdc,
                                struct pipe_resource *res);
 
 void
-stw_framebuffer_update(
-   struct stw_framebuffer *fb);
+stw_framebuffer_update(struct stw_framebuffer *fb);
 
 /**
  * Release stw_framebuffer::mutex lock. This framebuffer must not be accessed
@@ -160,8 +154,7 @@ stw_framebuffer_update(
  * in the meanwhile.
  */
 void
-stw_framebuffer_release(
-   struct stw_framebuffer *fb);
+stw_framebuffer_release(struct stw_framebuffer *fb);
 
 /**
  * Cleanup any existing framebuffers when exiting application.

From 004ed6f4a9eb50d7a5e79040b036beff63a3440b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 9 Nov 2015 17:35:55 -0700
Subject: [PATCH 158/287] st/wgl: improve some function comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In particular, explain when stw_framebuffer objects are
locked/unlocked/etc.

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
---
 .../state_trackers/wgl/stw_framebuffer.c      | 36 +++++++++++++++----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index f7cfab5d5e1..95d892add34 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -44,6 +44,8 @@
 /**
  * Search the framebuffer with the matching HWND while holding the
  * stw_dev::fb_mutex global lock.
+ * If a stw_framebuffer is found, lock it and return the pointer.
+ * Else, return NULL.
  */
 static inline struct stw_framebuffer *
 stw_framebuffer_from_hwnd_locked(HWND hwnd)
@@ -61,9 +63,11 @@ stw_framebuffer_from_hwnd_locked(HWND hwnd)
 
 
 /**
- * Destroy this framebuffer. Both stw_dev::fb_mutex and stw_framebuffer::mutex
- * must be held, by this order.  If there are still references to the
- * framebuffer, nothing will happen.
+ * Decrement the reference count on the given stw_framebuffer object.
+ * If the reference count hits zero, destroy the object.
+ *
+ * Note: Both stw_dev::fb_mutex and stw_framebuffer::mutex must already
+ * be locked.
  */
 static void
 stw_framebuffer_destroy_locked(struct stw_framebuffer *fb)
@@ -77,6 +81,7 @@ stw_framebuffer_destroy_locked(struct stw_framebuffer *fb)
       return;
    }
 
+   /* remove this stw_framebuffer from the device's linked list */
    link = &stw_dev->fb_head;
    while (*link != fb)
       link = &(*link)->next;
@@ -98,6 +103,9 @@ stw_framebuffer_destroy_locked(struct stw_framebuffer *fb)
 }
 
 
+/**
+ * Unlock the given stw_framebuffer object.
+ */
 void
 stw_framebuffer_release(struct stw_framebuffer *fb)
 {
@@ -106,6 +114,10 @@ stw_framebuffer_release(struct stw_framebuffer *fb)
 }
 
 
+/**
+ * Query the size of the given framebuffer's on-screen window and update
+ * the stw_framebuffer's width/height.
+ */
 static void
 stw_framebuffer_get_size(struct stw_framebuffer *fb)
 {
@@ -229,6 +241,11 @@ stw_call_window_proc(int nCode, WPARAM wParam, LPARAM lParam)
 }
 
 
+/**
+ * Create a new stw_framebuffer object which corresponds to the given
+ * HDC/window.  If successful, we return the new stw_framebuffer object
+ * with its mutex locked.
+ */
 struct stw_framebuffer *
 stw_framebuffer_create(HDC hdc, int iPixelFormat)
 {
@@ -343,6 +360,9 @@ stw_framebuffer_update(struct stw_framebuffer *fb)
 }
 
 
+/**
+ * Try to free all stw_framebuffer objects associated with the device.
+ */
 void
 stw_framebuffer_cleanup(void)
 {
@@ -371,6 +391,7 @@ stw_framebuffer_cleanup(void)
 
 /**
  * Given an hdc, return the corresponding stw_framebuffer.
+ * The returned stw_framebuffer will have its mutex locked.
  */
 static inline struct stw_framebuffer *
 stw_framebuffer_from_hdc_locked(HDC hdc)
@@ -387,7 +408,8 @@ stw_framebuffer_from_hdc_locked(HDC hdc)
 
 
 /**
- * Given an hdc, return the corresponding stw_framebuffer.
+ * Given an HDC, return the corresponding stw_framebuffer.
+ * The returned stw_framebuffer will have its mutex locked.
  */
 struct stw_framebuffer *
 stw_framebuffer_from_hdc(HDC hdc)
@@ -406,7 +428,8 @@ stw_framebuffer_from_hdc(HDC hdc)
 
 
 /**
- * Given an hdc, return the corresponding stw_framebuffer.
+ * Given an HWND, return the corresponding stw_framebuffer.
+ * The returned stw_framebuffer will have its mutex locked.
  */
 struct stw_framebuffer *
 stw_framebuffer_from_hwnd(HWND hwnd)
@@ -545,7 +568,8 @@ DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data)
 /**
  * Queue a composition.
  *
- * It will drop the lock on success.
+ * The stw_framebuffer object must have its mutex locked.  The mutex will
+ * be unlocked here before returning.
  */
 BOOL
 stw_framebuffer_present_locked(HDC hdc,

From 9870a5c6c9664d34432ba551ab9dce6ae79beaa4 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 9 Nov 2015 17:25:22 -0700
Subject: [PATCH 159/287] st/wgl: clarify code in
 stw_framebuffer_from_hwnd_locked()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just a minor code change to make it obvious that NULL is returned when
we don't find the given HWND.

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_framebuffer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 95d892add34..09dede8d2a1 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -55,10 +55,10 @@ stw_framebuffer_from_hwnd_locked(HWND hwnd)
    for (fb = stw_dev->fb_head; fb != NULL; fb = fb->next)
       if (fb->hWnd == hwnd) {
          pipe_mutex_lock(fb->mutex);
-         break;
+         return fb;
       }
 
-   return fb;
+   return NULL;
 }
 
 

From 68993f77cd9db5b9cb77fe3e7d1c451cf9bd6538 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 17:03:37 -0700
Subject: [PATCH 160/287] mesa: remove old comments in arrayobj.c

---
 src/mesa/main/arrayobj.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 061e557a397..897dac6e5db 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -149,8 +149,6 @@ unbind_array_object_vbos(struct gl_context *ctx, struct gl_vertex_array_object *
 
 /**
  * Allocate and initialize a new vertex array object.
- *
- * This function is intended to be called via
  */
 struct gl_vertex_array_object *
 _mesa_new_vao(struct gl_context *ctx, GLuint name)
@@ -164,9 +162,6 @@ _mesa_new_vao(struct gl_context *ctx, GLuint name)
 
 /**
  * Delete an array object.
- *
- * This function is intended to be called via
- * \c dd_function_table::DeleteArrayObject.
  */
 void
 _mesa_delete_vao(struct gl_context *ctx, struct gl_vertex_array_object *obj)

From 80890eb0d322483fb13558afddc4feae5210f996 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Wed, 19 Aug 2015 22:15:33 -0700
Subject: [PATCH 161/287] i965/brw_reg: Add a brw_VxH_indirect helper

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_reg.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 083c46a3726..c3f77c0210a 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -994,6 +994,17 @@ brw_vec1_indirect(unsigned subnr, int offset)
    return reg;
 }
 
+static inline struct brw_reg
+brw_VxH_indirect(unsigned subnr, int offset)
+{
+   struct brw_reg reg = brw_vec1_grf(0, 0);
+   reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
 static inline struct brw_reg
 deref_4f(struct brw_indirect ptr, int offset)
 {

From 912babba7bf1abd3caa49f6372d581ae1afe7e84 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 8 Nov 2015 04:46:38 -0500
Subject: [PATCH 162/287] mesa/copyimage: allow width/height to not be
 multiples of block

For compressed textures, the image size is not necessarily a multiple of
the block size (e.g. the last mip levels). Section 18.3.2 (Copying
Between Images) of the OpenGL 4.5 Core Profile spec says:

    An INVALID_VALUE error is generated if the dimensions of either
    subregion exceeds the boundaries of the corresponding image
    object, or if the image format is compressed and the dimensions of
    the subregion fail to meet the alignment constraints of the
    format.

and Section 8.7 (Compressed Texture Images) says:

    An INVALID_OPERATION error is generated if any of the following
    conditions occurs:

      * width is not a multiple of four, and width + xoffset is not
        equal to the value of TEXTURE_WIDTH.
      * height is not a multiple of four, and height + yoffset is not
        equal to the value of TEXTURE_HEIGHT.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92860
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/mesa/main/copyimage.c | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/copyimage.c b/src/mesa/main/copyimage.c
index f02e842f34d..d571d221bce 100644
--- a/src/mesa/main/copyimage.c
+++ b/src/mesa/main/copyimage.c
@@ -62,6 +62,8 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
                struct gl_renderbuffer **renderbuffer,
                mesa_format *format,
                GLenum *internalFormat,
+               GLuint *width,
+               GLuint *height,
                const char *dbg_prefix)
 {
    if (name == 0) {
@@ -126,6 +128,8 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
       *renderbuffer = rb;
       *format = rb->Format;
       *internalFormat = rb->InternalFormat;
+      *width = rb->Width;
+      *height = rb->Height;
       *tex_image = NULL;
    } else {
       struct gl_texture_object *texObj = _mesa_lookup_texture(ctx, name);
@@ -194,6 +198,8 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
       *renderbuffer = NULL;
       *format = (*tex_image)->TexFormat;
       *internalFormat = (*tex_image)->InternalFormat;
+      *width = (*tex_image)->Width;
+      *height = (*tex_image)->Height;
    }
 
    return true;
@@ -423,6 +429,7 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
    struct gl_renderbuffer *srcRenderbuffer, *dstRenderbuffer;
    mesa_format srcFormat, dstFormat;
    GLenum srcIntFormat, dstIntFormat;
+   GLuint src_w, src_h, dst_w, dst_h;
    GLuint src_bw, src_bh, dst_bw, dst_bh;
    int dstWidth, dstHeight, dstDepth;
    int i;
@@ -445,17 +452,41 @@ _mesa_CopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel,
 
    if (!prepare_target(ctx, srcName, srcTarget, srcLevel, srcZ, srcDepth,
                        &srcTexImage, &srcRenderbuffer, &srcFormat,
-                       &srcIntFormat, "src"))
+                       &srcIntFormat, &src_w, &src_h, "src"))
       return;
 
    if (!prepare_target(ctx, dstName, dstTarget, dstLevel, dstZ, srcDepth,
                        &dstTexImage, &dstRenderbuffer, &dstFormat,
-                       &dstIntFormat, "dst"))
+                       &dstIntFormat, &dst_w, &dst_h, "dst"))
       return;
 
    _mesa_get_format_block_size(srcFormat, &src_bw, &src_bh);
+
+   /* Section 18.3.2 (Copying Between Images) of the OpenGL 4.5 Core Profile
+    * spec says:
+    *
+    *    An INVALID_VALUE error is generated if the dimensions of either
+    *    subregion exceeds the boundaries of the corresponding image object,
+    *    or if the image format is compressed and the dimensions of the
+    *    subregion fail to meet the alignment constraints of the format.
+    *
+    * and Section 8.7 (Compressed Texture Images) says:
+    *
+    *    An INVALID_OPERATION error is generated if any of the following
+    *    conditions occurs:
+    *
+    *      * width is not a multiple of four, and width + xoffset is not
+    *        equal to the value of TEXTURE_WIDTH.
+    *      * height is not a multiple of four, and height + yoffset is not
+    *        equal to the value of TEXTURE_HEIGHT.
+    *
+    * so we take that to mean that you can copy the "last" block of a
+    * compressed texture image even if it's smaller than the minimum block
+    * dimensions.
+    */
    if ((srcX % src_bw != 0) || (srcY % src_bh != 0) ||
-       (srcWidth % src_bw != 0) || (srcHeight % src_bh != 0)) {
+       (srcWidth % src_bw != 0 && (srcX + srcWidth) != src_w) ||
+       (srcHeight % src_bh != 0 && (srcY + srcHeight) != src_h)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCopyImageSubData(unaligned src rectangle)");
       return;

From 918bda23dda36004c95f6441328ecc892e068886 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 4 Nov 2015 23:05:07 -0800
Subject: [PATCH 163/287] i965: Split nir_emit_intrinsic by stage with a
 general fallback.

Many intrinsics only apply to a particular stage (such as discard).
In other cases, we may want to interpret them differently based on
the stage (such as load_primitive_id or load_input).

The current method isn't that pretty - we handle all intrinsics in
one giant function.  Sometimes we assert on stage, sometimes we forget.
Different behaviors are handled via if-ladders based on stage.

This commit introduces new nir_emit_<stage>_intrinsic() functions,
and makes nir_emit_instr() call those.  In turn, those fall back to
the generic nir_emit_intrinsic() function for cases they don't want
to handle specially.

This makes it clear which intrinsics only exist in one stage, and makes
it easy to handle inputs/outputs differently for various stages.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.h       |   8 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 650 +++++++++++++----------
 2 files changed, 381 insertions(+), 277 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 2dfcab1c51a..8a93b564c81 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -263,6 +263,14 @@ public:
                             nir_load_const_instr *instr);
    void nir_emit_undef(const brw::fs_builder &bld,
                        nir_ssa_undef_instr *instr);
+   void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
+                              nir_intrinsic_instr *instr);
+   void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
+                              nir_intrinsic_instr *instr);
+   void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
+                              nir_intrinsic_instr *instr);
+   void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
+                              nir_intrinsic_instr *instr);
    void nir_emit_intrinsic(const brw::fs_builder &bld,
                            nir_intrinsic_instr *instr);
    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 02b9f5bbc8a..52d5ad1f204 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -360,7 +360,22 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
       break;
 
    case nir_instr_type_intrinsic:
-      nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr));
+      switch (stage) {
+      case MESA_SHADER_VERTEX:
+         nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+         break;
+      case MESA_SHADER_GEOMETRY:
+         nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+         break;
+      case MESA_SHADER_FRAGMENT:
+         nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+         break;
+      case MESA_SHADER_COMPUTE:
+         nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr));
+         break;
+      default:
+         unreachable("unsupported shader stage");
+      }
       break;
 
    case nir_instr_type_tex:
@@ -1568,15 +1583,128 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
 }
 
 void
-fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
+fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
+                                  nir_intrinsic_instr *instr)
 {
+   assert(stage == MESA_SHADER_VERTEX);
+
    fs_reg dest;
    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
       dest = get_nir_dest(instr->dest);
 
-   bool has_indirect = false;
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_vertex_id:
+      unreachable("should be lowered by lower_vertex_id()");
+
+   case nir_intrinsic_load_vertex_id_zero_base:
+   case nir_intrinsic_load_base_vertex:
+   case nir_intrinsic_load_instance_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      bld.MOV(dest, val);
+      break;
+   }
+
+   default:
+      nir_emit_intrinsic(bld, instr);
+      break;
+   }
+}
+
+void
+fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
+                                  nir_intrinsic_instr *instr)
+{
+   assert(stage == MESA_SHADER_GEOMETRY);
+
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
 
    switch (instr->intrinsic) {
+   case nir_intrinsic_load_primitive_id:
+      assert(stage == MESA_SHADER_GEOMETRY);
+      assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
+              retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
+      break;
+
+   case nir_intrinsic_load_input_indirect:
+   case nir_intrinsic_load_input:
+      unreachable("load_input intrinsics are invalid for the GS stage");
+
+   case nir_intrinsic_load_per_vertex_input_indirect:
+      assert(!"Not allowed");
+   case nir_intrinsic_load_per_vertex_input:
+      emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
+                         instr->num_components);
+      break;
+
+   case nir_intrinsic_emit_vertex_with_counter:
+      emit_gs_vertex(instr->src[0], instr->const_index[0]);
+      break;
+
+   case nir_intrinsic_end_primitive_with_counter:
+      emit_gs_end_primitive(instr->src[0]);
+      break;
+
+   case nir_intrinsic_set_vertex_count:
+      bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0]));
+      break;
+
+   case nir_intrinsic_load_invocation_id: {
+      fs_reg val = nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      bld.MOV(dest, val);
+      break;
+   }
+
+   default:
+      nir_emit_intrinsic(bld, instr);
+      break;
+   }
+}
+
+void
+fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
+                                  nir_intrinsic_instr *instr)
+{
+   assert(stage == MESA_SHADER_FRAGMENT);
+   struct brw_wm_prog_data *wm_prog_data =
+      (struct brw_wm_prog_data *) prog_data;
+
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_front_face:
+      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+              *emit_frontfacing_interpolation());
+      break;
+
+   case nir_intrinsic_load_sample_pos: {
+      fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
+      assert(sample_pos.file != BAD_FILE);
+      dest.type = sample_pos.type;
+      bld.MOV(dest, sample_pos);
+      bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
+      break;
+   }
+
+   case nir_intrinsic_load_sample_mask_in:
+   case nir_intrinsic_load_sample_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      bld.MOV(dest, val);
+      break;
+   }
+
    case nir_intrinsic_discard:
    case nir_intrinsic_discard_if: {
       /* We track our discarded pixels in f0.1.  By predicating on it, we can
@@ -1602,6 +1730,248 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_interp_var_at_centroid:
+   case nir_intrinsic_interp_var_at_sample:
+   case nir_intrinsic_interp_var_at_offset: {
+      /* Handle ARB_gpu_shader5 interpolation intrinsics
+       *
+       * It's worth a quick word of explanation as to why we handle the full
+       * variable-based interpolation intrinsic rather than a lowered version
+       * with like we do for other inputs.  We have to do that because the way
+       * we set up inputs doesn't allow us to use the already setup inputs for
+       * interpolation.  At the beginning of the shader, we go through all of
+       * the input variables and do the initial interpolation and put it in
+       * the nir_inputs array based on its location as determined in
+       * nir_lower_io.  If the input isn't used, dead code cleans up and
+       * everything works fine.  However, when we get to the ARB_gpu_shader5
+       * interpolation intrinsics, we need to reinterpolate the input
+       * differently.  If we used an intrinsic that just had an index it would
+       * only give us the offset into the nir_inputs array.  However, this is
+       * useless because that value is post-interpolation and we need
+       * pre-interpolation.  In order to get the actual location of the bits
+       * we get from the vertex fetching hardware, we need the variable.
+       */
+      wm_prog_data->pulls_bary = true;
+
+      fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
+      const glsl_interp_qualifier interpolation =
+         (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
+
+      switch (instr->intrinsic) {
+      case nir_intrinsic_interp_var_at_centroid:
+         emit_pixel_interpolater_send(bld,
+                                      FS_OPCODE_INTERPOLATE_AT_CENTROID,
+                                      dst_xy,
+                                      fs_reg(), /* src */
+                                      fs_reg(0u),
+                                      interpolation);
+         break;
+
+      case nir_intrinsic_interp_var_at_sample: {
+         nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
+
+         if (const_sample) {
+            unsigned msg_data = const_sample->i[0] << 4;
+
+            emit_pixel_interpolater_send(bld,
+                                         FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+                                         dst_xy,
+                                         fs_reg(), /* src */
+                                         fs_reg(msg_data),
+                                         interpolation);
+         } else {
+            const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+                                             BRW_REGISTER_TYPE_UD);
+
+            if (nir_src_is_dynamically_uniform(instr->src[0])) {
+               const fs_reg sample_id = bld.emit_uniformize(sample_src);
+               const fs_reg msg_data = vgrf(glsl_type::uint_type);
+               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+               emit_pixel_interpolater_send(bld,
+                                            FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+                                            dst_xy,
+                                            fs_reg(), /* src */
+                                            msg_data,
+                                            interpolation);
+            } else {
+               /* Make a loop that sends a message to the pixel interpolater
+                * for the sample number in each live channel. If there are
+                * multiple channels with the same sample number then these
+                * will be handled simultaneously with a single interation of
+                * the loop.
+                */
+               bld.emit(BRW_OPCODE_DO);
+
+               /* Get the next live sample number into sample_id_reg */
+               const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+               /* Set the flag register so that we can perform the send
+                * message on all channels that have the same sample number
+                */
+               bld.CMP(bld.null_reg_ud(),
+                       sample_src, sample_id,
+                       BRW_CONDITIONAL_EQ);
+               const fs_reg msg_data = vgrf(glsl_type::uint_type);
+               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
+               fs_inst *inst =
+                  emit_pixel_interpolater_send(bld,
+                                               FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+                                               dst_xy,
+                                               fs_reg(), /* src */
+                                               msg_data,
+                                               interpolation);
+               set_predicate(BRW_PREDICATE_NORMAL, inst);
+
+               /* Continue the loop if there are any live channels left */
+               set_predicate_inv(BRW_PREDICATE_NORMAL,
+                                 true, /* inverse */
+                                 bld.emit(BRW_OPCODE_WHILE));
+            }
+         }
+
+         break;
+      }
+
+      case nir_intrinsic_interp_var_at_offset: {
+         nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+
+         if (const_offset) {
+            unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
+            unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
+
+            emit_pixel_interpolater_send(bld,
+                                         FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+                                         dst_xy,
+                                         fs_reg(), /* src */
+                                         fs_reg(off_x | (off_y << 4)),
+                                         interpolation);
+         } else {
+            fs_reg src = vgrf(glsl_type::ivec2_type);
+            fs_reg offset_src = retype(get_nir_src(instr->src[0]),
+                                       BRW_REGISTER_TYPE_F);
+            for (int i = 0; i < 2; i++) {
+               fs_reg temp = vgrf(glsl_type::float_type);
+               bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
+               fs_reg itemp = vgrf(glsl_type::int_type);
+               bld.MOV(itemp, temp);  /* float to int */
+
+               /* Clamp the upper end of the range to +7/16.
+                * ARB_gpu_shader5 requires that we support a maximum offset
+                * of +0.5, which isn't representable in a S0.4 value -- if
+                * we didn't clamp it, we'd end up with -8/16, which is the
+                * opposite of what the shader author wanted.
+                *
+                * This is legal due to ARB_gpu_shader5's quantization
+                * rules:
+                *
+                * "Not all values of <offset> may be supported; x and y
+                * offsets may be rounded to fixed-point values with the
+                * number of fraction bits given by the
+                * implementation-dependent constant
+                * FRAGMENT_INTERPOLATION_OFFSET_BITS"
+                */
+               set_condmod(BRW_CONDITIONAL_L,
+                           bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
+            }
+
+            const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+            emit_pixel_interpolater_send(bld,
+                                         opcode,
+                                         dst_xy,
+                                         src,
+                                         fs_reg(0u),
+                                         interpolation);
+         }
+         break;
+      }
+
+      default:
+         unreachable("Invalid intrinsic");
+      }
+
+      for (unsigned j = 0; j < instr->num_components; j++) {
+         fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
+         src.type = dest.type;
+
+         bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
+         dest = offset(dest, bld, 1);
+      }
+      break;
+   }
+   default:
+      nir_emit_intrinsic(bld, instr);
+      break;
+   }
+}
+
+void
+fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
+                                  nir_intrinsic_instr *instr)
+{
+   assert(stage == MESA_SHADER_COMPUTE);
+   struct brw_cs_prog_data *cs_prog_data =
+      (struct brw_cs_prog_data *) prog_data;
+
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   switch (instr->intrinsic) {
+   case nir_intrinsic_barrier:
+      emit_barrier();
+      cs_prog_data->uses_barrier = true;
+      break;
+
+   case nir_intrinsic_load_local_invocation_id:
+   case nir_intrinsic_load_work_group_id: {
+      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
+      fs_reg val = nir_system_values[sv];
+      assert(val.file != BAD_FILE);
+      dest.type = val.type;
+      for (unsigned i = 0; i < 3; i++)
+         bld.MOV(offset(dest, bld, i), offset(val, bld, i));
+      break;
+   }
+
+   case nir_intrinsic_load_num_work_groups: {
+      const unsigned surface =
+         cs_prog_data->binding_table.work_groups_start;
+
+      cs_prog_data->uses_num_work_groups = true;
+
+      fs_reg surf_index = fs_reg(surface);
+      brw_mark_surface_used(prog_data, surface);
+
+      /* Read the 3 GLuint components of gl_NumWorkGroups */
+      for (unsigned i = 0; i < 3; i++) {
+         fs_reg read_result =
+            emit_untyped_read(bld, surf_index,
+                              fs_reg(i << 2),
+                              1 /* dims */, 1 /* size */,
+                              BRW_PREDICATE_NONE);
+         read_result.type = dest.type;
+         bld.MOV(dest, read_result);
+         dest = offset(dest, bld, 1);
+      }
+      break;
+   }
+
+   default:
+      nir_emit_intrinsic(bld, instr);
+      break;
+   }
+}
+
+void
+fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
+{
+   fs_reg dest;
+   if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+      dest = get_nir_dest(instr->dest);
+
+   bool has_indirect = false;
+
+   switch (instr->intrinsic) {
    case nir_intrinsic_atomic_counter_inc:
    case nir_intrinsic_atomic_counter_dec:
    case nir_intrinsic_atomic_counter_read: {
@@ -1789,44 +2159,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
       break;
 
-   case nir_intrinsic_load_front_face:
-      bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
-              *emit_frontfacing_interpolation());
-      break;
-
-   case nir_intrinsic_load_vertex_id:
-      unreachable("should be lowered by lower_vertex_id()");
-
-   case nir_intrinsic_load_primitive_id:
-      assert(stage == MESA_SHADER_GEOMETRY);
-      assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
-      bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
-              retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
-      break;
-
-   case nir_intrinsic_load_vertex_id_zero_base:
-   case nir_intrinsic_load_base_vertex:
-   case nir_intrinsic_load_instance_id:
-   case nir_intrinsic_load_invocation_id:
-   case nir_intrinsic_load_sample_mask_in:
-   case nir_intrinsic_load_sample_id: {
-      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
-      fs_reg val = nir_system_values[sv];
-      assert(val.file != BAD_FILE);
-      dest.type = val.type;
-      bld.MOV(dest, val);
-      break;
-   }
-
-   case nir_intrinsic_load_sample_pos: {
-      fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
-      assert(sample_pos.file != BAD_FILE);
-      dest.type = sample_pos.type;
-      bld.MOV(dest, sample_pos);
-      bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
-      break;
-   }
-
    case nir_intrinsic_load_uniform_indirect:
       has_indirect = true;
       /* fallthrough */
@@ -1980,185 +2312,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_load_per_vertex_input_indirect:
-      assert(!"Not allowed");
-      /* fallthrough */
-   case nir_intrinsic_load_per_vertex_input:
-      emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
-                         instr->num_components);
-      break;
-
-   /* Handle ARB_gpu_shader5 interpolation intrinsics
-    *
-    * It's worth a quick word of explanation as to why we handle the full
-    * variable-based interpolation intrinsic rather than a lowered version
-    * with like we do for other inputs.  We have to do that because the way
-    * we set up inputs doesn't allow us to use the already setup inputs for
-    * interpolation.  At the beginning of the shader, we go through all of
-    * the input variables and do the initial interpolation and put it in
-    * the nir_inputs array based on its location as determined in
-    * nir_lower_io.  If the input isn't used, dead code cleans up and
-    * everything works fine.  However, when we get to the ARB_gpu_shader5
-    * interpolation intrinsics, we need to reinterpolate the input
-    * differently.  If we used an intrinsic that just had an index it would
-    * only give us the offset into the nir_inputs array.  However, this is
-    * useless because that value is post-interpolation and we need
-    * pre-interpolation.  In order to get the actual location of the bits
-    * we get from the vertex fetching hardware, we need the variable.
-    */
-   case nir_intrinsic_interp_var_at_centroid:
-   case nir_intrinsic_interp_var_at_sample:
-   case nir_intrinsic_interp_var_at_offset: {
-      assert(stage == MESA_SHADER_FRAGMENT);
-
-      ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
-
-      fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
-      const glsl_interp_qualifier interpolation =
-         (glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
-
-      switch (instr->intrinsic) {
-      case nir_intrinsic_interp_var_at_centroid:
-         emit_pixel_interpolater_send(bld,
-                                      FS_OPCODE_INTERPOLATE_AT_CENTROID,
-                                      dst_xy,
-                                      fs_reg(), /* src */
-                                      fs_reg(0u),
-                                      interpolation);
-         break;
-
-      case nir_intrinsic_interp_var_at_sample: {
-         nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
-
-         if (const_sample) {
-            unsigned msg_data = const_sample->i[0] << 4;
-
-            emit_pixel_interpolater_send(bld,
-                                         FS_OPCODE_INTERPOLATE_AT_SAMPLE,
-                                         dst_xy,
-                                         fs_reg(), /* src */
-                                         fs_reg(msg_data),
-                                         interpolation);
-         } else {
-            const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
-                                             BRW_REGISTER_TYPE_UD);
-
-            if (nir_src_is_dynamically_uniform(instr->src[0])) {
-               const fs_reg sample_id = bld.emit_uniformize(sample_src);
-               const fs_reg msg_data = vgrf(glsl_type::uint_type);
-               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
-               emit_pixel_interpolater_send(bld,
-                                            FS_OPCODE_INTERPOLATE_AT_SAMPLE,
-                                            dst_xy,
-                                            fs_reg(), /* src */
-                                            msg_data,
-                                            interpolation);
-            } else {
-               /* Make a loop that sends a message to the pixel interpolater
-                * for the sample number in each live channel. If there are
-                * multiple channels with the same sample number then these
-                * will be handled simultaneously with a single interation of
-                * the loop.
-                */
-               bld.emit(BRW_OPCODE_DO);
-
-               /* Get the next live sample number into sample_id_reg */
-               const fs_reg sample_id = bld.emit_uniformize(sample_src);
-
-               /* Set the flag register so that we can perform the send
-                * message on all channels that have the same sample number
-                */
-               bld.CMP(bld.null_reg_ud(),
-                       sample_src, sample_id,
-                       BRW_CONDITIONAL_EQ);
-               const fs_reg msg_data = vgrf(glsl_type::uint_type);
-               bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
-               fs_inst *inst =
-                  emit_pixel_interpolater_send(bld,
-                                               FS_OPCODE_INTERPOLATE_AT_SAMPLE,
-                                               dst_xy,
-                                               fs_reg(), /* src */
-                                               msg_data,
-                                               interpolation);
-               set_predicate(BRW_PREDICATE_NORMAL, inst);
-
-               /* Continue the loop if there are any live channels left */
-               set_predicate_inv(BRW_PREDICATE_NORMAL,
-                                 true, /* inverse */
-                                 bld.emit(BRW_OPCODE_WHILE));
-            }
-         }
-
-         break;
-      }
-
-      case nir_intrinsic_interp_var_at_offset: {
-         nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
-
-         if (const_offset) {
-            unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
-            unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
-
-            emit_pixel_interpolater_send(bld,
-                                         FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
-                                         dst_xy,
-                                         fs_reg(), /* src */
-                                         fs_reg(off_x | (off_y << 4)),
-                                         interpolation);
-         } else {
-            fs_reg src = vgrf(glsl_type::ivec2_type);
-            fs_reg offset_src = retype(get_nir_src(instr->src[0]),
-                                       BRW_REGISTER_TYPE_F);
-            for (int i = 0; i < 2; i++) {
-               fs_reg temp = vgrf(glsl_type::float_type);
-               bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
-               fs_reg itemp = vgrf(glsl_type::int_type);
-               bld.MOV(itemp, temp);  /* float to int */
-
-               /* Clamp the upper end of the range to +7/16.
-                * ARB_gpu_shader5 requires that we support a maximum offset
-                * of +0.5, which isn't representable in a S0.4 value -- if
-                * we didn't clamp it, we'd end up with -8/16, which is the
-                * opposite of what the shader author wanted.
-                *
-                * This is legal due to ARB_gpu_shader5's quantization
-                * rules:
-                *
-                * "Not all values of <offset> may be supported; x and y
-                * offsets may be rounded to fixed-point values with the
-                * number of fraction bits given by the
-                * implementation-dependent constant
-                * FRAGMENT_INTERPOLATION_OFFSET_BITS"
-                */
-               set_condmod(BRW_CONDITIONAL_L,
-                           bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
-            }
-
-            const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
-            emit_pixel_interpolater_send(bld,
-                                         opcode,
-                                         dst_xy,
-                                         src,
-                                         fs_reg(0u),
-                                         interpolation);
-         }
-         break;
-      }
-
-      default:
-         unreachable("Invalid intrinsic");
-      }
-
-      for (unsigned j = 0; j < instr->num_components; j++) {
-         fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
-         src.type = dest.type;
-
-         bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
-         dest = offset(dest, bld, 1);
-      }
-      break;
-   }
-
    case nir_intrinsic_store_ssbo_indirect:
       has_indirect = true;
       /* fallthrough */
@@ -2240,23 +2393,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_barrier:
-      emit_barrier();
-      if (stage == MESA_SHADER_COMPUTE)
-         ((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
-      break;
-
-   case nir_intrinsic_load_local_invocation_id:
-   case nir_intrinsic_load_work_group_id: {
-      gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
-      fs_reg val = nir_system_values[sv];
-      assert(val.file != BAD_FILE);
-      dest.type = val.type;
-      for (unsigned i = 0; i < 3; i++)
-         bld.MOV(offset(dest, bld, i), offset(val, bld, i));
-      break;
-   }
-
    case nir_intrinsic_ssbo_atomic_add:
       nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
       break;
@@ -2312,46 +2448,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_load_num_work_groups: {
-      assert(devinfo->gen >= 7);
-      assert(stage == MESA_SHADER_COMPUTE);
-
-      struct brw_cs_prog_data *cs_prog_data =
-         (struct brw_cs_prog_data *) prog_data;
-      const unsigned surface =
-         cs_prog_data->binding_table.work_groups_start;
-
-      cs_prog_data->uses_num_work_groups = true;
-
-      fs_reg surf_index = fs_reg(surface);
-      brw_mark_surface_used(prog_data, surface);
-
-      /* Read the 3 GLuint components of gl_NumWorkGroups */
-      for (unsigned i = 0; i < 3; i++) {
-         fs_reg read_result =
-            emit_untyped_read(bld, surf_index,
-                              fs_reg(i << 2),
-                              1 /* dims */, 1 /* size */,
-                              BRW_PREDICATE_NONE);
-         read_result.type = dest.type;
-         bld.MOV(dest, read_result);
-         dest = offset(dest, bld, 1);
-      }
-      break;
-   }
-
-   case nir_intrinsic_emit_vertex_with_counter:
-      emit_gs_vertex(instr->src[0], instr->const_index[0]);
-      break;
-
-   case nir_intrinsic_end_primitive_with_counter:
-      emit_gs_end_primitive(instr->src[0]);
-      break;
-
-   case nir_intrinsic_set_vertex_count:
-      bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0]));
-      break;
-
    default:
       unreachable("unknown intrinsic");
    }

From df8af7d75155845d12d5a14a3a5ca644f07cb3b1 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 12 Nov 2015 08:34:18 +1000
Subject: [PATCH 164/287] r600: initialised PGM_RESOURCES_2 for ES/GS

This fixes the corruption on rendering that we are seeing in
certain geometry shaders.

Fixes:  https://bugs.freedesktop.org/show_bug.cgi?id=91780
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Tested / Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
Cc: "10.6" "11.0" <mesa-stable@lists.freedesktop.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r600/evergreen_state.c | 4 ++++
 src/gallium/drivers/r600/evergreend.h      | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 30e902673d0..4b29a2296ec 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2373,6 +2373,8 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
 
 	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
 
 	/* to avoid GPU doing any preloading of constant from random address */
@@ -2812,6 +2814,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
 
 	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);
 
 	/* to avoid GPU doing any preloading of constant from random address */
diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h
index a9a65f75305..25237c6f650 100644
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1504,6 +1504,7 @@
 #define   S_028878_UNCACHED_FIRST_INST(x)              (((x) & 0x1) << 28)
 #define   G_028878_UNCACHED_FIRST_INST(x)              (((x) >> 28) & 0x1)
 #define   C_028878_UNCACHED_FIRST_INST                 0xEFFFFFFF
+#define R_02887C_SQ_PGM_RESOURCES_2_GS                 0x02887C
 
 #define R_028890_SQ_PGM_RESOURCES_ES                 0x028890
 #define   S_028890_NUM_GPRS(x)                         (((x) & 0xFF) << 0)
@@ -1518,6 +1519,7 @@
 #define   S_028890_UNCACHED_FIRST_INST(x)              (((x) & 0x1) << 28)
 #define   G_028890_UNCACHED_FIRST_INST(x)              (((x) >> 28) & 0x1)
 #define   C_028890_UNCACHED_FIRST_INST                 0xEFFFFFFF
+#define R_028894_SQ_PGM_RESOURCES_2_ES                 0x028894
 
 #define R_028864_SQ_PGM_RESOURCES_2_VS               0x028864
 #define   S_028864_SINGLE_ROUND(x)                     (((x) & 0x3) << 0)

From 82e4f22d1ec65269328bbd472261d6a8e0689558 Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Mon, 5 Oct 2015 00:01:45 +1100
Subject: [PATCH 165/287] mesa: add ARB_enhanced_layouts

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/glsl/glcpp/glcpp-parse.y    | 1 +
 src/glsl/glsl_parser_extras.cpp | 1 +
 src/glsl/glsl_parser_extras.h   | 2 ++
 src/mesa/main/extensions.c      | 1 +
 src/mesa/main/mtypes.h          | 1 +
 5 files changed, 6 insertions(+)

diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 4acccf74065..6aa7abec00e 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2387,6 +2387,7 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 	   }
 	} else {
 	   add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
+           add_builtin_define(parser, "GL_ARB_enhanced_layouts", 1);
            add_builtin_define(parser, "GL_ARB_separate_shader_objects", 1);
 	   add_builtin_define(parser, "GL_ARB_texture_rectangle", 1);
            add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1);
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 14cb9fc78b7..2dba7d9f48a 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -594,6 +594,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
    EXT(ARB_derivative_control,           true,  false,     ARB_derivative_control),
    EXT(ARB_draw_buffers,                 true,  false,     dummy_true),
    EXT(ARB_draw_instanced,               true,  false,     ARB_draw_instanced),
+   EXT(ARB_enhanced_layouts,             true,  false,     ARB_enhanced_layouts),
    EXT(ARB_explicit_attrib_location,     true,  false,     ARB_explicit_attrib_location),
    EXT(ARB_explicit_uniform_location,    true,  false,     ARB_explicit_uniform_location),
    EXT(ARB_fragment_coord_conventions,   true,  false,     ARB_fragment_coord_conventions),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index b54c5359149..684b917afa0 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -499,6 +499,8 @@ struct _mesa_glsl_parse_state {
    bool ARB_draw_buffers_warn;
    bool ARB_draw_instanced_enable;
    bool ARB_draw_instanced_warn;
+   bool ARB_enhanced_layouts_enable;
+   bool ARB_enhanced_layouts_warn;
    bool ARB_explicit_attrib_location_enable;
    bool ARB_explicit_attrib_location_warn;
    bool ARB_explicit_uniform_location_enable;
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index bdc68175bf2..1facad1fa7e 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -111,6 +111,7 @@ static const struct extension extension_table[] = {
    { "GL_ARB_draw_elements_base_vertex",           o(ARB_draw_elements_base_vertex),           GL,             2009 },
    { "GL_ARB_draw_indirect",                       o(ARB_draw_indirect),                       GLC,            2010 },
    { "GL_ARB_draw_instanced",                      o(ARB_draw_instanced),                      GL,             2008 },
+   { "GL_ARB_enhanced_layouts",                    o(ARB_enhanced_layouts),                    GLC,            2013 },
    { "GL_ARB_explicit_attrib_location",            o(ARB_explicit_attrib_location),            GL,             2009 },
    { "GL_ARB_explicit_uniform_location",           o(ARB_explicit_uniform_location),           GL,             2012 },
    { "GL_ARB_fragment_coord_conventions",          o(ARB_fragment_coord_conventions),          GL,             2009 },
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 777a1eecd12..8de9c773b9f 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3678,6 +3678,7 @@ struct gl_extensions
    GLboolean ARB_fragment_shader;
    GLboolean ARB_framebuffer_no_attachments;
    GLboolean ARB_framebuffer_object;
+   GLboolean ARB_enhanced_layouts;
    GLboolean ARB_explicit_attrib_location;
    GLboolean ARB_explicit_uniform_location;
    GLboolean ARB_geometry_shader4;

From 725fcdfbb11ff4c4399d58e08403e4e0064a8c5e Mon Sep 17 00:00:00 2001
From: Timothy Arceri <timothy.arceri@collabora.com>
Date: Wed, 28 Oct 2015 07:42:49 +1100
Subject: [PATCH 166/287] glsl: add helper to check for enhanced layouts
 support

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
---
 src/glsl/glsl_parser_extras.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 684b917afa0..1d8c1b8799f 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -209,6 +209,11 @@ struct _mesa_glsl_parse_state {
       return ARB_shader_atomic_counters_enable || is_version(420, 310);
    }
 
+   bool has_enhanced_layouts() const
+   {
+      return ARB_enhanced_layouts_enable || is_version(440, 0);
+   }
+
    bool has_explicit_attrib_stream() const
    {
       return ARB_gpu_shader5_enable || is_version(400, 0);

From 3695b253f908b58290d69e2d770209abf0c6beee Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 9 Nov 2015 13:27:07 -0500
Subject: [PATCH 167/287] gallium: add PIPE_CAP_CLEAR_TEXTURE and clear_texture
 prototype
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/gallium/docs/source/context.rst              |  4 ++++
 src/gallium/docs/source/screen.rst               |  2 ++
 src/gallium/drivers/freedreno/freedreno_screen.c |  1 +
 src/gallium/drivers/i915/i915_screen.c           |  1 +
 src/gallium/drivers/ilo/ilo_screen.c             |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c         |  1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   |  1 +
 src/gallium/drivers/r300/r300_screen.c           |  1 +
 src/gallium/drivers/r600/r600_pipe.c             |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c           |  1 +
 src/gallium/drivers/softpipe/sp_screen.c         |  1 +
 src/gallium/drivers/svga/svga_screen.c           |  1 +
 src/gallium/drivers/vc4/vc4_screen.c             |  1 +
 src/gallium/drivers/virgl/virgl_screen.c         |  1 +
 src/gallium/include/pipe/p_context.h             | 10 ++++++++++
 src/gallium/include/pipe/p_defines.h             |  1 +
 18 files changed, 31 insertions(+)

diff --git a/src/gallium/docs/source/context.rst b/src/gallium/docs/source/context.rst
index dbc087700b5..9a32716f921 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -227,6 +227,10 @@ is is also possible to only clear one or the other part). While it is only
 possible to clear one surface at a time (which can include several layers),
 this surface need not be bound to the framebuffer.
 
+``clear_texture`` clears a non-PIPE_BUFFER resource's specified level
+and bounding box with a clear value provided in that resource's native
+format.
+
 ``clear_buffer`` clears a PIPE_BUFFER resource with the specified clear value
 (which may be multiple bytes in length). Logically this is a memset with a
 multi-byte element value starting at offset bytes from resource start, going
diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
index 91fdb43cfbb..e900283f731 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -281,6 +281,8 @@ The integer capabilities:
 * ``PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS``:
   Whether copying between compressed and plain formats is supported where
   a compressed block is copied to/from a plain pixel of the same size.
+* ``PIPE_CAP_CLEAR_TEXTURE``: Whether `clear_texture` will be
+  available in contexts.
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 7ee1a3fa9cf..56d1834ef9c 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -239,6 +239,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
 	case PIPE_CAP_SHAREABLE_SHADERS:
 	case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+	case PIPE_CAP_CLEAR_TEXTURE:
 		return 0;
 
 	case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index 2d2fd375656..a5b161882cd 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -253,6 +253,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
 
    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
index 888f7aa6782..cfa2fb41152 100644
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -475,6 +475,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index d1c50aefc84..9f5e7378ac7 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -300,6 +300,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
    }
    /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 44aac22010f..154c3d370f7 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -173,6 +173,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 5dda98141de..adf67b77bca 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -218,6 +218,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7f8ce21a348..4a4d7025fa6 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -204,6 +204,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index d5981248a86..606e25f915b 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -199,6 +199,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
         case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
         case PIPE_CAP_SHAREABLE_SHADERS:
         case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+        case PIPE_CAP_CLEAR_TEXTURE:
             return 0;
 
         /* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 29682390648..ceae6e891a3 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -345,6 +345,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
 	case PIPE_CAP_SHAREABLE_SHADERS:
 	case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+	case PIPE_CAP_CLEAR_TEXTURE:
 		return 0;
 
 	/* Stream output. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 60baad3d13c..2316cca9c2e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -337,6 +337,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_VERTEXID_NOBASE:
+	case PIPE_CAP_CLEAR_TEXTURE:
 		return 0;
 
 	case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index c0fc82b2f2c..bb4cef29ec9 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -250,6 +250,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
    }
    /* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 5aa7b0d86eb..a80bc9b9119 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -383,6 +383,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
    }
 
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index bb867611804..88ee48c0e8f 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -184,6 +184,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
 	case PIPE_CAP_SHAREABLE_SHADERS:
 	case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+	case PIPE_CAP_CLEAR_TEXTURE:
                 return 0;
 
                 /* Stream output. */
diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c
index cca379d47ab..26a4f7736e3 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -218,6 +218,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param)
    case PIPE_CAP_TGSI_TXQS:
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
    case PIPE_CAP_VENDOR_ID:
       return 0x1af4;
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 5adbd18e690..27f358f8fb9 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -379,6 +379,16 @@ struct pipe_context {
                                unsigned dstx, unsigned dsty,
                                unsigned width, unsigned height);
 
+   /**
+    * Clear the texture with the specified texel. Not guaranteed to be a
+    * renderable format. Data provided in the resource's format.
+    */
+   void (*clear_texture)(struct pipe_context *pipe,
+                         struct pipe_resource *res,
+                         unsigned level,
+                         const struct pipe_box *box,
+                         const void *data);
+
    /**
     * Clear a buffer. Runs a memset over the specified region with the element
     * value passed in through clear_value of size clear_value_size.
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index d6f87ccae12..7240154727e 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -634,6 +634,7 @@ enum pipe_cap
    PIPE_CAP_FORCE_PERSAMPLE_INTERP,
    PIPE_CAP_SHAREABLE_SHADERS,
    PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS,
+   PIPE_CAP_CLEAR_TEXTURE,
 };
 
 #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)

From ae39b0fda81ce592b8c965b715469650d35e5fef Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 4 Mar 2014 21:51:55 -0500
Subject: [PATCH 168/287] st/mesa: implement ARB_clear_texture
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
---
 src/mesa/state_tracker/st_cb_texture.c | 29 ++++++++++++++++++++++++++
 src/mesa/state_tracker/st_extensions.c |  1 +
 2 files changed, 30 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index d4c916e8057..62f149aa0fb 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1873,6 +1873,34 @@ st_TextureView(struct gl_context *ctx,
    return GL_TRUE;
 }
 
+static void
+st_ClearTexSubImage(struct gl_context *ctx,
+                    struct gl_texture_image *texImage,
+                    GLint xoffset, GLint yoffset, GLint zoffset,
+                    GLsizei width, GLsizei height, GLsizei depth,
+                    const GLvoid *clearValue)
+{
+   static const char zeros[16] = {0};
+   struct st_texture_image *stImage = st_texture_image(texImage);
+   struct pipe_resource *pt = stImage->pt;
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   unsigned level = texImage->Level;
+   struct pipe_box box;
+
+   if (!pt)
+      return;
+
+   u_box_3d(xoffset, yoffset, zoffset + texImage->Face,
+            width, height, depth, &box);
+   if (texImage->TexObject->Immutable) {
+      level += texImage->TexObject->MinLevel;
+      box.z += texImage->TexObject->MinLayer;
+   }
+
+   pipe->clear_texture(pipe, pt, level, &box, clearValue ? clearValue : zeros);
+}
+
 void
 st_init_texture_functions(struct dd_function_table *functions)
 {
@@ -1904,4 +1932,5 @@ st_init_texture_functions(struct dd_function_table *functions)
 
    functions->AllocTextureStorage = st_AllocTextureStorage;
    functions->TextureView = st_TextureView;
+   functions->ClearTexSubImage = st_ClearTexSubImage;
 }
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index bbb9027f4d7..99e96e1f3ae 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -439,6 +439,7 @@ void st_init_extensions(struct pipe_screen *screen,
    static const struct st_extension_cap_mapping cap_mapping[] = {
       { o(ARB_base_instance),                PIPE_CAP_START_INSTANCE                   },
       { o(ARB_buffer_storage),               PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT   },
+      { o(ARB_clear_texture),                PIPE_CAP_CLEAR_TEXTURE                    },
       { o(ARB_color_buffer_float),           PIPE_CAP_VERTEX_COLOR_UNCLAMPED           },
       { o(ARB_copy_image),                   PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS },
       { o(ARB_depth_clamp),                  PIPE_CAP_DEPTH_CLIP_DISABLE               },

From c4182bb9b0897b4a4ac4f06b54fc7f6a2ddeb105 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 9 Nov 2015 12:39:05 -0500
Subject: [PATCH 169/287] nv50,nvc0: add ARB_clear_texture support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 docs/GL3.txt                                  |  2 +-
 docs/relnotes/11.1.0.html                     |  1 +
 .../drivers/nouveau/nv50/nv50_resource.h      |  7 ++
 .../drivers/nouveau/nv50/nv50_screen.c        |  2 +-
 .../drivers/nouveau/nv50/nv50_surface.c       | 92 ++++++++++++++++++-
 .../drivers/nouveau/nvc0/nvc0_screen.c        |  2 +-
 .../drivers/nouveau/nvc0/nvc0_surface.c       |  5 +
 7 files changed, 103 insertions(+), 8 deletions(-)

diff --git a/docs/GL3.txt b/docs/GL3.txt
index 845672b07d5..b768eea789a 100644
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -177,7 +177,7 @@ GL 4.4, GLSL 4.40:
 
   GL_MAX_VERTEX_ATTRIB_STRIDE                          DONE (all drivers)
   GL_ARB_buffer_storage                                DONE (i965, nv50, nvc0, r600, radeonsi)
-  GL_ARB_clear_texture                                 DONE (i965) (gallium - in progress, VMware)
+  GL_ARB_clear_texture                                 DONE (i965, nv50, nvc0)
   GL_ARB_enhanced_layouts                              in progress (Timothy)
   - compile-time constant expressions                  in progress
   - explicit byte offsets for blocks                   in progress
diff --git a/docs/relnotes/11.1.0.html b/docs/relnotes/11.1.0.html
index 14656fd97c9..82ee3c4037b 100644
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers.
 <ul>
 <li>GL_ARB_arrays_of_arrays on i965</li>
 <li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_clear_texture on nv50, nvc0</li>
 <li>GL_ARB_copy_image on nv50, nvc0, radeonsi</li>
 <li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
 <li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
index a46e622c597..b40370a1d78 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -151,4 +151,11 @@ nv50_surface_from_buffer(struct pipe_context *pipe,
 void
 nv50_surface_destroy(struct pipe_context *, struct pipe_surface *);
 
+void
+nv50_clear_texture(struct pipe_context *pipe,
+                   struct pipe_resource *res,
+                   unsigned level,
+                   const struct pipe_box *box,
+                   const void *data);
+
 #endif /* __NV50_RESOURCE_H__ */
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index adf67b77bca..f47e998ab1e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -182,6 +182,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_TGSI_TXQS:
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
    case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP:
       return 1; /* class_3d >= NVA0_3D_CLASS; */
@@ -218,7 +219,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
-   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 237d76d6adb..916a7d44a31 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -27,6 +27,7 @@
 #include "util/u_inlines.h"
 #include "util/u_pack_color.h"
 #include "util/u_format.h"
+#include "util/u_math.h"
 #include "util/u_surface.h"
 
 #include "tgsi/tgsi_ureg.h"
@@ -324,6 +325,9 @@ nv50_clear_render_target(struct pipe_context *pipe,
    else
       PUSH_DATA(push, 512);
 
+   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+   PUSH_DATA (push, mt->ms_mode);
+
    if (!nouveau_bo_memtype(bo)) {
       BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
       PUSH_DATA (push, 0);
@@ -404,6 +408,9 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
    BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
    PUSH_DATA (push, 512);
 
+   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+   PUSH_DATA (push, mt->ms_mode);
+
    BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
    PUSH_DATA (push, (width << 16) | dstx);
    PUSH_DATA (push, (height << 16) | dsty);
@@ -417,6 +424,80 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
    nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
 }
 
+void
+nv50_clear_texture(struct pipe_context *pipe,
+                   struct pipe_resource *res,
+                   unsigned level,
+                   const struct pipe_box *box,
+                   const void *data)
+{
+   struct pipe_surface tmpl = {{0}}, *sf;
+
+   tmpl.format = res->format;
+   tmpl.u.tex.first_layer = box->z;
+   tmpl.u.tex.last_layer = box->z + box->depth - 1;
+   tmpl.u.tex.level = level;
+   sf = pipe->create_surface(pipe, res, &tmpl);
+   if (!sf)
+      return;
+
+   if (util_format_is_depth_or_stencil(res->format)) {
+      float depth = 0;
+      uint8_t stencil = 0;
+      unsigned clear = 0;
+      const struct util_format_description *desc =
+         util_format_description(res->format);
+
+      if (util_format_has_depth(desc)) {
+         clear |= PIPE_CLEAR_DEPTH;
+         desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
+      }
+      if (util_format_has_stencil(desc)) {
+         clear |= PIPE_CLEAR_STENCIL;
+         desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
+      }
+      pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
+                                box->x, box->y, box->width, box->height);
+   } else {
+      union pipe_color_union color;
+
+      switch (util_format_get_blocksizebits(res->format)) {
+      case 128:
+         sf->format = PIPE_FORMAT_R32G32B32A32_UINT;
+         memcpy(&color.ui, data, 128 / 8);
+         break;
+      case 64:
+         sf->format = PIPE_FORMAT_R32G32_UINT;
+         memcpy(&color.ui, data, 64 / 8);
+         memset(&color.ui[2], 0, 64 / 8);
+         break;
+      case 32:
+         sf->format = PIPE_FORMAT_R32_UINT;
+         memcpy(&color.ui, data, 32 / 8);
+         memset(&color.ui[1], 0, 96 / 8);
+         break;
+      case 16:
+         sf->format = PIPE_FORMAT_R16_UINT;
+         color.ui[0] = util_cpu_to_le32(
+            util_le16_to_cpu(*(unsigned short *)data));
+         memset(&color.ui[1], 0, 96 / 8);
+         break;
+      case 8:
+         sf->format = PIPE_FORMAT_R8_UINT;
+         color.ui[0] = util_cpu_to_le32(*(unsigned char *)data);
+         memset(&color.ui[1], 0, 96 / 8);
+         break;
+      default:
+         assert(!"Unknown texel element size");
+         return;
+      }
+
+      pipe->clear_render_target(pipe, sf, &color,
+                                box->x, box->y, box->width, box->height);
+   }
+   pipe->surface_destroy(pipe, sf);
+}
+
 void
 nv50_clear(struct pipe_context *pipe, unsigned buffers,
            const union pipe_color_union *color,
@@ -464,11 +545,9 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
    if (mode) {
       int zs_layers = 0, color0_layers = 0;
       if (fb->cbufs[0] && (mode & 0x3c))
-         color0_layers = fb->cbufs[0]->u.tex.last_layer -
-            fb->cbufs[0]->u.tex.first_layer + 1;
+         color0_layers = nv50_surface(fb->cbufs[0])->depth;
       if (fb->zsbuf && (mode & ~0x3c))
-         zs_layers = fb->zsbuf->u.tex.last_layer -
-            fb->zsbuf->u.tex.first_layer + 1;
+         zs_layers = nv50_surface(fb->zsbuf)->depth;
 
       for (j = 0; j < MIN2(zs_layers, color0_layers); j++) {
          BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
@@ -488,7 +567,7 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
       struct pipe_surface *sf = fb->cbufs[i];
       if (!sf || !(buffers & (PIPE_CLEAR_COLOR0 << i)))
          continue;
-      for (j = 0; j <= sf->u.tex.last_layer - sf->u.tex.first_layer; j++) {
+      for (j = 0; j < nv50_surface(sf)->depth; j++) {
          BEGIN_NV04(push, NV50_3D(CLEAR_BUFFERS), 1);
          PUSH_DATA (push, (i << 6) | 0x3c |
                     (j << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
@@ -585,6 +664,8 @@ nv50_clear_buffer(struct pipe_context *pipe,
    PUSH_DATA (push, height);
    BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
    PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);
+   PUSH_DATA (push, 0);
 
    /* NOTE: only works with D3D clear flag (5097/0x143c bit 4) */
 
@@ -1593,6 +1674,7 @@ nv50_init_surface_functions(struct nv50_context *nv50)
    pipe->resource_copy_region = nv50_resource_copy_region;
    pipe->blit = nv50_blit;
    pipe->flush_resource = nv50_flush_resource;
+   pipe->clear_texture = nv50_clear_texture;
    pipe->clear_render_target = nv50_clear_render_target;
    pipe->clear_depth_stencil = nv50_clear_depth_stencil;
    pipe->clear_buffer = nv50_clear_buffer;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 4a4d7025fa6..461fcaaf677 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -182,6 +182,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
    case PIPE_CAP_SHAREABLE_SHADERS:
+   case PIPE_CAP_CLEAR_TEXTURE:
       return 1;
    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
       return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
@@ -204,7 +205,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_VERTEXID_NOBASE:
    case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-   case PIPE_CAP_CLEAR_TEXTURE:
       return 0;
 
    case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 5f47bad22f3..cdb1fc1145f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -319,6 +319,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
       PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
       PUSH_DATA(push, mt->layer_stride >> 2);
       PUSH_DATA(push, dst->u.tex.first_layer);
+      IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
    } else {
       if (res->base.target == PIPE_BUFFER) {
          PUSH_DATA(push, 262144);
@@ -334,6 +335,7 @@ nvc0_clear_render_target(struct pipe_context *pipe,
       PUSH_DATA(push, 0);
 
       IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
+      IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
 
       /* tiled textures don't have to be fenced, they're not mapped directly */
       nvc0_resource_fence(res, NOUVEAU_BO_WR);
@@ -466,6 +468,7 @@ nvc0_clear_buffer(struct pipe_context *pipe,
    PUSH_DATA (push, 0);
 
    IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
+   IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 0);
 
    IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
 
@@ -540,6 +543,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe,
    PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
    BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
    PUSH_DATA (push, dst->u.tex.first_layer);
+   IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), mt->ms_mode);
 
    BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
    for (z = 0; z < sf->depth; ++z) {
@@ -1541,5 +1545,6 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0)
    pipe->flush_resource = nvc0_flush_resource;
    pipe->clear_render_target = nvc0_clear_render_target;
    pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
+   pipe->clear_texture = nv50_clear_texture;
    pipe->clear_buffer = nvc0_clear_buffer;
 }

From 55314c5be4cbf933ab7fbd20f6aa49207e04c946 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <benjamin.widawsky@intel.com>
Date: Fri, 6 Nov 2015 18:12:27 -0800
Subject: [PATCH 170/287] i965/skl/gt4: Fix URB programming restriction.

The comment in the code details the restriction. Thanks to Ken for having a very
helpful conversation with me, and spotting the blurb in the link I sent him :P.

There are still stability problems for me on GT4, but this definitely helps with
some of the failures.

v2: Comment fixes

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_device_info.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c
index 2ebc0845300..45505507be7 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -337,6 +337,15 @@ static const struct brw_device_info brw_device_info_skl_gt3 = {
 
 static const struct brw_device_info brw_device_info_skl_gt4 = {
    GEN9_FEATURES, .gt = 4,
+   /* From the "L3 Allocation and Programming" documentation:
+    *
+    * "URB is limited to 1008KB due to programming restrictions.  This is not a
+    * restriction of the L3 implementation, but of the FF and other clients.
+    * Therefore, in a GT4 implementation it is possible for the programmed
+    * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
+    * only 1008KB of this will be used."
+    */
+   .urb.size = 1008 / 3,
 };
 
 static const struct brw_device_info brw_device_info_bxt = {

From d4fdb84f80dd3dbad2b71ea6e877f24dc625aa2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Tue, 10 Nov 2015 13:45:21 +0100
Subject: [PATCH 171/287] i965/fs/nir: fix the number of register written by
 FS_OPCODE_GET_BUFFER_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FS_OPCODE_GET_BUFFER_SIZE is calculated with a resinfo's sampler message.

This patch adjusts the number of registers written by the opcode
following what the PRM spec says about the number of registers written
by the SIMD8 and SIMD16's writeback messages for sampler messages.

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 52d5ad1f204..73b09f5d03d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2433,16 +2433,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       fs_reg source = fs_reg(0);
 
       int mlen = 1 * reg_width;
+
+      /* A resinfo's sampler message is used to get the buffer size.
+       * The SIMD8's writeback message consists of four registers and
+       * SIMD16's writeback message consists of 8 destination registers
+       * (two per each component), although we are only interested on the
+       * first component, where resinfo returns the buffer size for
+       * SURFTYPE_BUFFER.
+       */
+      int regs_written = 4 * mlen;
       fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
                                   BRW_REGISTER_TYPE_UD);
       bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
-
+      fs_reg buffer_size = fs_reg(GRF, alloc.allocate(regs_written),
+                                  BRW_REGISTER_TYPE_UD);
       const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index;
-      fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, dest,
+      fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, buffer_size,
                                src_payload, fs_reg(index));
       inst->header_size = 0;
       inst->mlen = mlen;
+      inst->regs_written = regs_written;
       bld.emit(inst);
+      bld.MOV(retype(dest, buffer_size.type), buffer_size);
 
       brw_mark_surface_used(prog_data, index);
       break;

From d6a61673543deeebac773801cbd717b7a342626e Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 26 Feb 2015 12:15:16 +0100
Subject: [PATCH 172/287] glsl: Add API to put default precision qualifiers in
 the symbol table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These have scoping rules that match the ones defined for other things such
as variables, so we want them in the symbol table.

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/glsl/glsl_symbol_table.cpp | 24 ++++++++++++++++++++++++
 src/glsl/glsl_symbol_table.h   |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/src/glsl/glsl_symbol_table.cpp b/src/glsl/glsl_symbol_table.cpp
index 536f0a3a8c2..6c682acf560 100644
--- a/src/glsl/glsl_symbol_table.cpp
+++ b/src/glsl/glsl_symbol_table.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "glsl_symbol_table.h"
+#include "ast.h"
 
 class symbol_table_entry {
 public:
@@ -201,6 +202,20 @@ bool glsl_symbol_table::add_function(ir_function *f)
    return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0;
 }
 
+bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name,
+                                                        int precision)
+{
+   char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name);
+
+   ast_type_specifier *default_specifier = new(mem_ctx) ast_type_specifier(name);
+   default_specifier->default_precision = precision;
+
+   symbol_table_entry *entry =
+      new(mem_ctx) symbol_table_entry(default_specifier);
+
+   return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0;
+}
+
 void glsl_symbol_table::add_global_function(ir_function *f)
 {
    symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f);
@@ -234,6 +249,15 @@ ir_function *glsl_symbol_table::get_function(const char *name)
    return entry != NULL ? entry->f : NULL;
 }
 
+int glsl_symbol_table::get_default_precision_qualifier(const char *type_name)
+{
+   char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name);
+   symbol_table_entry *entry = get_entry(name);
+   if (!entry)
+      return ast_precision_none;
+   return entry->a->default_precision;
+}
+
 symbol_table_entry *glsl_symbol_table::get_entry(const char *name)
 {
    return (symbol_table_entry *)
diff --git a/src/glsl/glsl_symbol_table.h b/src/glsl/glsl_symbol_table.h
index e32b88b8699..5d654e5e6a7 100644
--- a/src/glsl/glsl_symbol_table.h
+++ b/src/glsl/glsl_symbol_table.h
@@ -72,6 +72,7 @@ struct glsl_symbol_table {
    bool add_function(ir_function *f);
    bool add_interface(const char *name, const glsl_type *i,
                       enum ir_variable_mode mode);
+   bool add_default_precision_qualifier(const char *type_name, int precision);
    /*@}*/
 
    /**
@@ -88,6 +89,7 @@ struct glsl_symbol_table {
    ir_function *get_function(const char *name);
    const glsl_type *get_interface(const char *name,
                                   enum ir_variable_mode mode);
+   int get_default_precision_qualifier(const char *type_name);
    /*@}*/
 
    /**

From e3082fb2732c2af836875201b8d7f49e864c8e4e Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 26 Feb 2015 12:15:17 +0100
Subject: [PATCH 173/287] glsl: Add default precision qualifiers to the symbol
 table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GLSL ES spec specifies default precision qualifiers for certain types,
so populate the symbol table with these.

Notice that the desktop GLSL spec also indicates defaults for some types
but this is not really useful since precision qualifiers are completely
ignored in desktop GLSL.

v2: simplify and add samplerExternalOES, specified by
    OES_EGL_image_external (Tapani)

v3: add atomic_uint (reported missing by Marta)

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
---
 src/glsl/glsl_parser.yy | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 4ac8e45b63a..31e254a4fd3 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -313,6 +313,18 @@ translation_unit:
    {
       delete state->symbols;
       state->symbols = new(ralloc_parent(state)) glsl_symbol_table;
+      if (state->es_shader) {
+         if (state->stage == MESA_SHADER_FRAGMENT) {
+            state->symbols->add_default_precision_qualifier("int", ast_precision_medium);
+         } else {
+            state->symbols->add_default_precision_qualifier("float", ast_precision_high);
+            state->symbols->add_default_precision_qualifier("int", ast_precision_high);
+         }
+         state->symbols->add_default_precision_qualifier("sampler2D", ast_precision_low);
+         state->symbols->add_default_precision_qualifier("samplerExternalOES", ast_precision_low);
+         state->symbols->add_default_precision_qualifier("samplerCube", ast_precision_low);
+         state->symbols->add_default_precision_qualifier("atomic_uint", ast_precision_high);
+      }
       _mesa_glsl_initialize_types(state);
    }
    ;

From e6629d814f9a860b3a5390684be06370b270be14 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 26 Feb 2015 12:15:18 +0100
Subject: [PATCH 174/287] glsl: Add user-defined default precision qualifiers
 to the symbol table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Notice that the spec requires that a default precision has been set for every
type used by a shader that can use a precision qualifier and does not have a
predefined precision, however, at the moment, Mesa only checks this for floats
in the fragment shader. This is probably because the GLSL ES 1.0 specs mentions
this case specifically, but GLSL ES 3.0 clarifies that the same applies to
other types:

"The fragment language has no default precision qualifier for floating point
 types. Hence for float, floating point vector and matrix variable
 declarations, either the declaration must include a precision qualifier or
 the default float precision must have been previously declared. Similarly,
 there is no default precision qualifier for the following sampler types in
 either the vertex or fragment language:

 sampler3D;
 samplerCubeShadow;
 sampler2DShadow;
 sampler2DArray;
 sampler2DArrayShadow;
 isampler2D;
 isampler3D;
 isamplerCube;
 isampler2DArray;
 usampler2D;
 usampler3D;
 usamplerCube;
 usampler2DArray;"

we will fix this in a later patch.

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/glsl/ast_to_hir.cpp | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 9d341e8cf92..a701753d959 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2141,11 +2141,15 @@ ast_fully_specified_type::glsl_type(const char **name,
    if (type == NULL)
       return NULL;
 
+   /* The fragment language does not define a default precision value
+    * for float types, so check that one is defined if the type declaration
+    * isn't providing one explictly.
+    */
    if (type->base_type == GLSL_TYPE_FLOAT
        && state->es_shader
        && state->stage == MESA_SHADER_FRAGMENT
        && this->qualifier.precision == ast_precision_none
-       && state->symbols->get_variable("#default precision") == NULL) {
+       && state->symbols->get_default_precision_qualifier("float") == ast_precision_none) {
       YYLTYPE loc = this->get_location();
       _mesa_glsl_error(&loc, state,
                        "no precision specified this scope for type `%s'",
@@ -5714,20 +5718,10 @@ ast_type_specifier::hir(exec_list *instructions,
          return NULL;
       }
 
-      if (type->base_type == GLSL_TYPE_FLOAT
-          && state->es_shader
-          && state->stage == MESA_SHADER_FRAGMENT) {
+      if (state->es_shader) {
          /* Section 4.5.3 (Default Precision Qualifiers) of the GLSL ES 1.00
           * spec says:
           *
-          *     "The fragment language has no default precision qualifier for
-          *     floating point types."
-          *
-          * As a result, we have to track whether or not default precision has
-          * been specified for float in GLSL ES fragment shaders.
-          *
-          * Earlier in that same section, the spec says:
-          *
           *     "Non-precision qualified declarations will use the precision
           *     qualifier specified in the most recent precision statement
           *     that is still in scope. The precision statement has the same
@@ -5740,16 +5734,13 @@ ast_type_specifier::hir(exec_list *instructions,
           *     overriding earlier statements within that scope."
           *
           * Default precision specifications follow the same scope rules as
-          * variables.  So, we can track the state of the default float
-          * precision in the symbol table, and the rules will just work.  This
+          * variables.  So, we can track the state of the default precision
+          * qualifiers in the symbol table, and the rules will just work.  This
           * is a slight abuse of the symbol table, but it has the semantics
           * that we want.
           */
-         ir_variable *const junk =
-            new(state) ir_variable(type, "#default precision",
-                                   ir_var_auto);
-
-         state->symbols->add_variable(junk);
+         state->symbols->add_default_precision_qualifier(this->type_name,
+                                                         this->default_precision);
       }
 
       /* FINISHME: Translate precision statements into IR. */

From 9a00e1a69deba6ffc4c21fdaa77de4a3d74717ba Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Thu, 5 Nov 2015 08:18:46 +0200
Subject: [PATCH 175/287] glsl: Move the definition of
 precision_qualifier_allowed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will need this to build later patches

Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/glsl/ast_to_hir.cpp | 71 ++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 36 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index a701753d959..7206f1bd913 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2116,6 +2116,41 @@ process_array_type(YYLTYPE *loc, const glsl_type *base,
    return array_type;
 }
 
+static bool
+precision_qualifier_allowed(const glsl_type *type)
+{
+   /* Precision qualifiers apply to floating point, integer and opaque
+    * types.
+    *
+    * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says:
+    *    "Any floating point or any integer declaration can have the type
+    *    preceded by one of these precision qualifiers [...] Literal
+    *    constants do not have precision qualifiers. Neither do Boolean
+    *    variables.
+    *
+    * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30
+    * spec also says:
+    *
+    *     "Precision qualifiers are added for code portability with OpenGL
+    *     ES, not for functionality. They have the same syntax as in OpenGL
+    *     ES."
+    *
+    * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says:
+    *
+    *     "uniform lowp sampler2D sampler;
+    *     highp vec2 coord;
+    *     ...
+    *     lowp vec4 col = texture2D (sampler, coord);
+    *                                            // texture2D returns lowp"
+    *
+    * From this, we infer that GLSL 1.30 (and later) should allow precision
+    * qualifiers on sampler types just like float and integer types.
+    */
+   return type->is_float()
+       || type->is_integer()
+       || type->is_record()
+       || type->contains_opaque();
+}
 
 const glsl_type *
 ast_type_specifier::glsl_type(const char **name,
@@ -3610,42 +3645,6 @@ validate_identifier(const char *identifier, YYLTYPE loc,
    }
 }
 
-static bool
-precision_qualifier_allowed(const glsl_type *type)
-{
-   /* Precision qualifiers apply to floating point, integer and opaque
-    * types.
-    *
-    * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says:
-    *    "Any floating point or any integer declaration can have the type
-    *    preceded by one of these precision qualifiers [...] Literal
-    *    constants do not have precision qualifiers. Neither do Boolean
-    *    variables.
-    *
-    * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30
-    * spec also says:
-    *
-    *     "Precision qualifiers are added for code portability with OpenGL
-    *     ES, not for functionality. They have the same syntax as in OpenGL
-    *     ES."
-    *
-    * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says:
-    *
-    *     "uniform lowp sampler2D sampler;
-    *     highp vec2 coord;
-    *     ...
-    *     lowp vec4 col = texture2D (sampler, coord);
-    *                                            // texture2D returns lowp"
-    *
-    * From this, we infer that GLSL 1.30 (and later) should allow precision
-    * qualifiers on sampler types just like float and integer types.
-    */
-   return type->is_float()
-       || type->is_integer()
-       || type->is_record()
-       || type->contains_opaque();
-}
-
 ir_rvalue *
 ast_declarator_list::hir(exec_list *instructions,
                          struct _mesa_glsl_parse_state *state)

From f84bc57d7dc02fceb805803131426c791eadeff9 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Tue, 10 Nov 2015 08:22:07 +0200
Subject: [PATCH 176/287] glsl: Add precision information to ir_variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will need this later on when we implement proper support for
precision qualifiers in the drivers and also to do link time checks for
uniforms as indicated by the spec.

This patch also adds compile-time checks for variables without precision
information (currently, Mesa only checks that a default precision is set
for floats in fragment shaders).

As indicated by Ian, the addition of the precision information to
ir_variable has been done using a bitfield and pahole to identify an
available hole so that memory requirements for ir_variable stay the
same.

v2 (Ian):
  - Avoid if-ladders by defining arrays of supported sampler names and
    indexing
    into them with type->sampler_array + 2 * type->sampler_shadow
  - Make the code that selects the precision qualifier to use an utility
    function
  - Fix a typo

v3 (Tapani):
  - rebased
  - squashed in "Precision qualifiers are not allowed on structs"
  - fixed select_gles_precision for sampler arrays
  - fixed precision_qualifier_allowed for arrays of structs

v4 (Tapani):
  - add atomic_uint handling
  - do not allow precision qualifier on images
  (issues reported by Marta)

v5 (Tapani):
  - support precision qualifier on image types

v6 (Tapani):
  - set precision qualifier on interface block members

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
---
 src/glsl/ast_to_hir.cpp     | 299 +++++++++++++++++++++++++++++++++---
 src/glsl/ir.h               |  13 ++
 src/glsl/nir/glsl_types.cpp |   4 +
 src/glsl/nir/glsl_types.h   |  13 +-
 4 files changed, 304 insertions(+), 25 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 7206f1bd913..51ea183147d 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2146,10 +2146,10 @@ precision_qualifier_allowed(const glsl_type *type)
     * From this, we infer that GLSL 1.30 (and later) should allow precision
     * qualifiers on sampler types just like float and integer types.
     */
-   return type->is_float()
+   return (type->is_float()
        || type->is_integer()
-       || type->is_record()
-       || type->contains_opaque();
+       || type->contains_opaque())
+       && !type->without_array()->is_record();
 }
 
 const glsl_type *
@@ -2167,31 +2167,268 @@ ast_type_specifier::glsl_type(const char **name,
    return type;
 }
 
+/**
+ * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers:
+ *
+ * "The precision statement
+ *
+ *    precision precision-qualifier type;
+ *
+ *  can be used to establish a default precision qualifier. The type field can
+ *  be either int or float or any of the sampler types, (...) If type is float,
+ *  the directive applies to non-precision-qualified floating point type
+ *  (scalar, vector, and matrix) declarations. If type is int, the directive
+ *  applies to all non-precision-qualified integer type (scalar, vector, signed,
+ *  and unsigned) declarations."
+ *
+ * We use the symbol table to keep the values of the default precisions for
+ * each 'type' in each scope and we use the 'type' string from the precision
+ * statement as key in the symbol table. When we want to retrieve the default
+ * precision associated with a given glsl_type we need to know the type string
+ * associated with it. This is what this function returns.
+ */
+static const char *
+get_type_name_for_precision_qualifier(const glsl_type *type)
+{
+   switch (type->base_type) {
+   case GLSL_TYPE_FLOAT:
+      return "float";
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+      return "int";
+   case GLSL_TYPE_ATOMIC_UINT:
+      return "atomic_uint";
+   case GLSL_TYPE_IMAGE:
+   /* fallthrough */
+   case GLSL_TYPE_SAMPLER: {
+      const unsigned type_idx =
+         type->sampler_array + 2 * type->sampler_shadow;
+      const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4;
+      assert(type_idx < 4);
+      switch (type->sampler_type) {
+      case GLSL_TYPE_FLOAT:
+         switch (type->sampler_dimensionality) {
+         case GLSL_SAMPLER_DIM_1D: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "sampler1D", "sampler1DArray",
+              "sampler1DShadow", "sampler1DArrayShadow"
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_2D: {
+            static const char *const names[8] = {
+              "sampler2D", "sampler2DArray",
+              "sampler2DShadow", "sampler2DArrayShadow",
+              "image2D", "image2DArray", NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_3D: {
+            static const char *const names[8] = {
+              "sampler3D", NULL, NULL, NULL,
+              "image3D", NULL, NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_CUBE: {
+            static const char *const names[8] = {
+              "samplerCube", "samplerCubeArray",
+              "samplerCubeShadow", "samplerCubeArrayShadow",
+              "imageCube", NULL, NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_MS: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "sampler2DMS", "sampler2DMSArray", NULL, NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_RECT: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "samplerRect", NULL, "samplerRectShadow", NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_BUF: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "samplerBuffer", NULL, NULL, NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_EXTERNAL: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "samplerExternalOES", NULL, NULL, NULL
+            };
+            return names[type_idx];
+         }
+         default:
+            unreachable("Unsupported sampler/image dimensionality");
+         } /* sampler/image float dimensionality */
+         break;
+      case GLSL_TYPE_INT:
+         switch (type->sampler_dimensionality) {
+         case GLSL_SAMPLER_DIM_1D: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "isampler1D", "isampler1DArray", NULL, NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_2D: {
+            static const char *const names[8] = {
+              "isampler2D", "isampler2DArray", NULL, NULL,
+              "iimage2D", "iimage2DArray", NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_3D: {
+            static const char *const names[8] = {
+              "isampler3D", NULL, NULL, NULL,
+              "iimage3D", NULL, NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_CUBE: {
+            static const char *const names[8] = {
+              "isamplerCube", "isamplerCubeArray", NULL, NULL,
+              "iimageCube", NULL, NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_MS: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "isampler2DMS", "isampler2DMSArray", NULL, NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_RECT: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "isamplerRect", NULL, "isamplerRectShadow", NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_BUF: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "isamplerBuffer", NULL, NULL, NULL
+            };
+            return names[type_idx];
+         }
+         default:
+            unreachable("Unsupported isampler/iimage dimensionality");
+         } /* sampler/image int dimensionality */
+         break;
+      case GLSL_TYPE_UINT:
+         switch (type->sampler_dimensionality) {
+         case GLSL_SAMPLER_DIM_1D: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "usampler1D", "usampler1DArray", NULL, NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_2D: {
+            static const char *const names[8] = {
+              "usampler2D", "usampler2DArray", NULL, NULL,
+              "uimage2D", "uimage2DArray", NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_3D: {
+            static const char *const names[8] = {
+              "usampler3D", NULL, NULL, NULL,
+              "uimage3D", NULL, NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_CUBE: {
+            static const char *const names[8] = {
+              "usamplerCube", "usamplerCubeArray", NULL, NULL,
+              "uimageCube", NULL, NULL, NULL
+            };
+            return names[offset + type_idx];
+         }
+         case GLSL_SAMPLER_DIM_MS: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "usampler2DMS", "usampler2DMSArray", NULL, NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_RECT: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "usamplerRect", NULL, "usamplerRectShadow", NULL
+            };
+            return names[type_idx];
+         }
+         case GLSL_SAMPLER_DIM_BUF: {
+            assert(type->base_type == GLSL_TYPE_SAMPLER);
+            static const char *const names[4] = {
+              "usamplerBuffer", NULL, NULL, NULL
+            };
+            return names[type_idx];
+         }
+         default:
+            unreachable("Unsupported usampler/uimage dimensionality");
+         } /* sampler/image uint dimensionality */
+         break;
+      default:
+         unreachable("Unsupported sampler/image type");
+      } /* sampler/image type */
+      break;
+   } /* GLSL_TYPE_SAMPLER/GLSL_TYPE_IMAGE */
+   break;
+   default:
+      unreachable("Unsupported type");
+   } /* base type */
+}
+
+static unsigned
+select_gles_precision(unsigned qual_precision,
+                      const glsl_type *type,
+                      struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
+{
+   /* Precision qualifiers do not have any meaning in Desktop GLSL.
+    * In GLES we take the precision from the type qualifier if present,
+    * otherwise, if the type of the variable allows precision qualifiers at
+    * all, we look for the default precision qualifier for that type in the
+    * current scope.
+    */
+   assert(state->es_shader);
+
+   unsigned precision = GLSL_PRECISION_NONE;
+   if (qual_precision) {
+      precision = qual_precision;
+   } else if (precision_qualifier_allowed(type)) {
+      const char *type_name =
+         get_type_name_for_precision_qualifier(type->without_array());
+      assert(type_name != NULL);
+
+      precision =
+         state->symbols->get_default_precision_qualifier(type_name);
+      if (precision == ast_precision_none) {
+         _mesa_glsl_error(loc, state,
+                          "No precision specified in this scope for type `%s'",
+                          type->name);
+      }
+   }
+   return precision;
+}
+
 const glsl_type *
 ast_fully_specified_type::glsl_type(const char **name,
                                     struct _mesa_glsl_parse_state *state) const
 {
-   const struct glsl_type *type = this->specifier->glsl_type(name, state);
-
-   if (type == NULL)
-      return NULL;
-
-   /* The fragment language does not define a default precision value
-    * for float types, so check that one is defined if the type declaration
-    * isn't providing one explictly.
-    */
-   if (type->base_type == GLSL_TYPE_FLOAT
-       && state->es_shader
-       && state->stage == MESA_SHADER_FRAGMENT
-       && this->qualifier.precision == ast_precision_none
-       && state->symbols->get_default_precision_qualifier("float") == ast_precision_none) {
-      YYLTYPE loc = this->get_location();
-      _mesa_glsl_error(&loc, state,
-                       "no precision specified this scope for type `%s'",
-                       type->name);
-   }
-
-   return type;
+   return this->specifier->glsl_type(name, state);
 }
 
 /**
@@ -2729,6 +2966,12 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
    if (qual->flags.q.sample)
       var->data.sample = 1;
 
+   /* Precision qualifiers do not hold any meaning in Desktop GLSL */
+   if (state->es_shader) {
+      var->data.precision =
+         select_gles_precision(qual->precision, var->type, state, loc);
+   }
+
    if (state->stage == MESA_SHADER_GEOMETRY &&
        qual->flags.q.out && qual->flags.q.stream) {
       var->data.stream = qual->stream;
@@ -5918,6 +6161,7 @@ ast_process_structure_or_interface_block(exec_list *instructions,
          fields[i].centroid = qual->flags.q.centroid ? 1 : 0;
          fields[i].sample = qual->flags.q.sample ? 1 : 0;
          fields[i].patch = qual->flags.q.patch ? 1 : 0;
+         fields[i].precision = qual->precision;
 
          /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
           *
@@ -6599,6 +6843,13 @@ ast_interface_block::hir(exec_list *instructions,
          if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
             var->data.read_only = true;
 
+         /* Precision qualifiers do not have any meaning in Desktop GLSL */
+         if (state->es_shader) {
+            var->data.precision =
+               select_gles_precision(fields[i].precision, fields[i].type,
+                                     state, &loc);
+         }
+
          if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) {
             var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
                ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 32a766ef0f0..d59dee1e369 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -770,6 +770,19 @@ public:
        */
       unsigned index:1;
 
+      /**
+       * Precision qualifier.
+       *
+       * In desktop GLSL we do not care about precision qualifiers at all, in
+       * fact, the spec says that precision qualifiers are ignored.
+       *
+       * To make things easy, we make it so that this field is always
+       * GLSL_PRECISION_NONE on desktop shaders. This way all the variables
+       * have the same precision value and the checks we add in the compiler
+       * for this field will never break a desktop shader compile.
+       */
+      unsigned precision:2;
+
       /**
        * \brief Layout qualifier for gl_FragDepth.
        *
diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 1c66dce85c4..975b815b0cc 100644
--- a/src/glsl/nir/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -162,6 +162,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
       this->fields.structure[i].sample = fields[i].sample;
       this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
       this->fields.structure[i].patch = fields[i].patch;
+      this->fields.structure[i].precision = fields[i].precision;
    }
 
    mtx_unlock(&glsl_type::mutex);
@@ -779,6 +780,9 @@ glsl_type::record_compare(const glsl_type *b) const
       if (this->fields.structure[i].image_restrict
           != b->fields.structure[i].image_restrict)
          return false;
+      if (this->fields.structure[i].precision
+          != b->fields.structure[i].precision)
+         return false;
    }
 
    return true;
diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index 1f17ad5c5b0..d841a3277db 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -102,6 +102,13 @@ enum glsl_matrix_layout {
    GLSL_MATRIX_LAYOUT_ROW_MAJOR
 };
 
+enum {
+   GLSL_PRECISION_NONE = 0,
+   GLSL_PRECISION_HIGH,
+   GLSL_PRECISION_MEDIUM,
+   GLSL_PRECISION_LOW
+};
+
 #ifdef __cplusplus
 #include "GL/gl.h"
 #include "util/ralloc.h"
@@ -320,7 +327,6 @@ struct glsl_type {
     */
    unsigned count_attribute_slots() const;
 
-
    /**
     * Alignment in bytes of the start of this type in a std140 uniform
     * block.
@@ -828,6 +834,11 @@ struct glsl_struct_field {
     */
    unsigned patch:1;
 
+   /**
+    * Precision qualifier
+    */
+   unsigned precision;
+
    /**
     * Image qualifiers, applicable to buffer variables defined in shader
     * storage buffer objects (SSBOs)

From 5bd122cad9d16596f89260f3b115cd0fb72cb886 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?= <tapani.palli@intel.com>
Date: Thu, 5 Nov 2015 12:23:17 +0200
Subject: [PATCH 177/287] glsl: do not lose precision information when packing
 varyings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This information will be used by cross stage validation of varyings
for pipeline objects.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/glsl/lower_packed_varyings.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp
index 5d66ca931cf..037c27d88ab 100644
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -621,6 +621,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
       packed_var->data.patch = unpacked_var->data.patch;
       packed_var->data.interpolation = unpacked_var->data.interpolation;
       packed_var->data.location = location;
+      packed_var->data.precision = unpacked_var->data.precision;
       unpacked_var->insert_before(packed_var);
       this->packed_varyings[slot] = packed_var;
    } else {

From 7e6dac11866d264c21a108b9623114943d6e88ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?= <tapani.palli@intel.com>
Date: Thu, 5 Nov 2015 12:52:26 +0200
Subject: [PATCH 178/287] mesa: validate precision of varyings during
 ValidateProgramPipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes following failing ES3.1 CTS tests:

   ES31-CTS.sepshaderobjs.InterfacePrecisionMatchingFloat
   ES31-CTS.sepshaderobjs.InterfacePrecisionMatchingInt
   ES31-CTS.sepshaderobjs.InterfacePrecisionMatchingUInt

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
---
 src/mesa/main/pipelineobj.c    | 15 ++++++++
 src/mesa/main/shader_query.cpp | 62 ++++++++++++++++++++++++++++++++++
 src/mesa/main/shaderobj.h      |  3 ++
 3 files changed, 80 insertions(+)

diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c
index 699a2ae47eb..90dff13485b 100644
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -907,6 +907,21 @@ _mesa_ValidateProgramPipeline(GLuint pipeline)
 
    _mesa_validate_program_pipeline(ctx, pipe,
                                    (ctx->_Shader->Name == pipe->Name));
+
+   /* Validate inputs against outputs, this cannot be done during linking
+    * since programs have been linked separately from each other.
+    *
+    * From OpenGL 4.5 Core spec:
+    *     "Separable program objects may have validation failures that cannot be
+    *     detected without the complete program pipeline. Mismatched interfaces,
+    *     improper usage of program objects together, and the same
+    *     state-dependent failures can result in validation errors for such
+    *     program objects."
+    *
+    * OpenGL ES 3.1 specification has the same text.
+    */
+   if (!_mesa_validate_pipeline_io(pipe))
+      pipe->Validated = GL_FALSE;
 }
 
 void GLAPIENTRY
diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp
index 5cb877b0104..58ba04153e6 100644
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -1359,3 +1359,65 @@ _mesa_get_program_resourceiv(struct gl_shader_program *shProg,
    if (length)
       *length = amount;
 }
+
+static bool
+validate_io(const struct gl_shader *input_stage,
+            const struct gl_shader *output_stage)
+{
+   assert(input_stage && output_stage);
+
+   /* For each output in a, find input in b and do any required checks. */
+   foreach_in_list(ir_instruction, out, input_stage->ir) {
+      ir_variable *out_var = out->as_variable();
+      if (!out_var || out_var->data.mode != ir_var_shader_out)
+         continue;
+
+      foreach_in_list(ir_instruction, in, output_stage->ir) {
+         ir_variable *in_var = in->as_variable();
+         if (!in_var || in_var->data.mode != ir_var_shader_in)
+            continue;
+
+         if (strcmp(in_var->name, out_var->name) == 0) {
+            /* From OpenGL ES 3.1 spec:
+             *     "When both shaders are in separate programs, mismatched
+             *     precision qualifiers will result in a program interface
+             *     mismatch that will result in program pipeline validation
+             *     failures, as described in section 7.4.1 (“Shader Interface
+             *     Matching”) of the OpenGL ES 3.1 Specification."
+             */
+            if (in_var->data.precision != out_var->data.precision)
+               return false;
+         }
+      }
+   }
+   return true;
+}
+
+/**
+ * Validate inputs against outputs in a program pipeline.
+ */
+extern "C" bool
+_mesa_validate_pipeline_io(struct gl_pipeline_object *pipeline)
+{
+   struct gl_shader_program **shProg =
+      (struct gl_shader_program **) pipeline->CurrentProgram;
+
+   /* Find first active stage in pipeline. */
+   unsigned idx, prev = 0;
+   for (idx = 0; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
+      if (shProg[idx]) {
+         prev = idx;
+         break;
+      }
+   }
+
+   for (idx = prev + 1; idx < ARRAY_SIZE(pipeline->CurrentProgram); idx++) {
+      if (shProg[idx]) {
+         if (!validate_io(shProg[prev]->_LinkedShaders[prev],
+                          shProg[idx]->_LinkedShaders[idx]))
+            return false;
+         prev = idx;
+      }
+   }
+   return true;
+}
diff --git a/src/mesa/main/shaderobj.h b/src/mesa/main/shaderobj.h
index 796de470735..be80752d7f2 100644
--- a/src/mesa/main/shaderobj.h
+++ b/src/mesa/main/shaderobj.h
@@ -234,6 +234,9 @@ _mesa_shader_stage_to_subroutine_uniform(gl_shader_stage stage)
    }
 }
 
+extern bool
+_mesa_validate_pipeline_io(struct gl_pipeline_object *);
+
 #ifdef __cplusplus
 }
 #endif

From f2fe6072617fd77f1abe213ff8fd2c233908b63d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tapani=20P=C3=A4lli?= <tapani.palli@intel.com>
Date: Mon, 2 Nov 2015 13:36:19 +0200
Subject: [PATCH 179/287] glsl: set matrix_stride for non matrices with atomic
 counter buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch sets matrix_stride as 0 for non matrix uniforms that are in a
atomic counter buffer. Matrix stride calculation for actual matrix
uniforms is done during link_assign_uniform_locations.

From ARB_program_interface_query specification:

GL_MATRIX_STRIDE:

   "For active variables not declared as a matrix or array of matrices,
   zero is written to <params>.  For active variables not backed by a
   buffer object, -1 is written to <params>, regardless of the variable
   type."

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Marta Lofstedt <marta.lofstedt@intel.com>
---
 src/glsl/link_atomics.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
index cdcc06d53e2..3aa52dbd18a 100644
--- a/src/glsl/link_atomics.cpp
+++ b/src/glsl/link_atomics.cpp
@@ -240,6 +240,8 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
          storage->offset = var->data.atomic.offset;
          storage->array_stride = (var->type->is_array() ?
                                   var->type->without_array()->atomic_size() : 0);
+         if (!var->type->is_matrix())
+            storage->matrix_stride = 0;
       }
 
       /* Assign stage-specific fields. */

From a02385cd691df9dd35844a727350db72b17f586b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 12 Nov 2015 09:06:25 -0700
Subject: [PATCH 180/287] gallium/hud: add cpu graph support for Windows

We support "cpu" but not "cpu#" because there's no good way of querying
per-cpu usage.  Also, the cpu usage is for the process, not the whole
system.

Original code cobbled together by Brian and then fixed/polished by Jose.

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 src/gallium/auxiliary/hud/hud_cpu.c | 54 +++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/gallium/auxiliary/hud/hud_cpu.c b/src/gallium/auxiliary/hud/hud_cpu.c
index cd20deec9bd..c06e7770d63 100644
--- a/src/gallium/auxiliary/hud/hud_cpu.c
+++ b/src/gallium/auxiliary/hud/hud_cpu.c
@@ -33,6 +33,58 @@
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <inttypes.h>
+#ifdef PIPE_OS_WINDOWS
+#include <windows.h>
+#endif
+
+
+#ifdef PIPE_OS_WINDOWS
+
+static inline uint64_t
+filetime_to_scalar(FILETIME ft)
+{
+   ULARGE_INTEGER uli;
+   uli.LowPart = ft.dwLowDateTime;
+   uli.HighPart = ft.dwHighDateTime;
+   return uli.QuadPart;
+}
+
+static boolean
+get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
+{
+   SYSTEM_INFO sysInfo;
+   FILETIME ftNow, ftCreation, ftExit, ftKernel, ftUser;
+
+   GetSystemInfo(&sysInfo);
+   assert(sysInfo.dwNumberOfProcessors >= 1);
+   if (cpu_index != ALL_CPUS && cpu_index >= sysInfo.dwNumberOfProcessors) {
+      /* Tell hud_get_num_cpus there are only this many CPUs. */
+      return FALSE;
+   }
+
+   /* Get accumulated user and sys time for all threads */
+   if (!GetProcessTimes(GetCurrentProcess(), &ftCreation, &ftExit,
+                        &ftKernel, &ftUser))
+      return FALSE;
+
+   GetSystemTimeAsFileTime(&ftNow);
+
+   *busy_time = filetime_to_scalar(ftUser) + filetime_to_scalar(ftKernel);
+   *total_time = filetime_to_scalar(ftNow) - filetime_to_scalar(ftCreation);
+
+   /* busy_time already has the time accross all cpus.
+    * XXX: if we want 100% to mean one CPU, 200% two cpus, eliminate the
+    * following line.
+    */
+   *total_time *= sysInfo.dwNumberOfProcessors;
+
+   /* XXX: we ignore cpu_index, i.e, we assume that the individual CPU usage
+    * and the system usage are one and the same.
+    */
+   return TRUE;
+}
+
+#else
 
 static boolean
 get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
@@ -81,6 +133,8 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
    fclose(f);
    return FALSE;
 }
+#endif
+
 
 struct cpu_info {
    unsigned cpu_index;

From fa30de7643ca6c70ac2661684b22f7b220a40b0b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 14:10:45 -0700
Subject: [PATCH 181/287] st/wgl: re-implement stw_device::ctx_mutex with
 CRITICAL_SECTION

This is Windows-only code so we can use the native Win32 functions for
critical sections.  This will also allow us to (cleanly) add some mutex
check/debug code in subsequent patches.

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_context.c | 29 ++++++++++----------
 src/gallium/state_trackers/wgl/stw_device.c  |  8 +++---
 src/gallium/state_trackers/wgl/stw_device.h  | 16 ++++++++++-
 3 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index 0f859649217..229fdfd1625 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -70,7 +70,7 @@ DrvCopyContext(DHGLRC dhrcSource, DHGLRC dhrcDest, UINT fuMask)
    if (!stw_dev)
       return FALSE;
 
-   pipe_mutex_lock( stw_dev->ctx_mutex );
+   stw_lock_contexts(stw_dev);
 
    src = stw_lookup_context_locked( dhrcSource );
    dst = stw_lookup_context_locked( dhrcDest );
@@ -83,7 +83,7 @@ DrvCopyContext(DHGLRC dhrcSource, DHGLRC dhrcDest, UINT fuMask)
       (void) fuMask;
    }
 
-   pipe_mutex_unlock( stw_dev->ctx_mutex );
+   stw_unlock_contexts(stw_dev);
 
    return ret;
 }
@@ -99,7 +99,7 @@ DrvShareLists(DHGLRC dhglrc1, DHGLRC dhglrc2)
    if (!stw_dev)
       return FALSE;
 
-   pipe_mutex_lock( stw_dev->ctx_mutex );
+   stw_lock_contexts(stw_dev);
 
    ctx1 = stw_lookup_context_locked( dhglrc1 );
    ctx2 = stw_lookup_context_locked( dhglrc2 );
@@ -107,7 +107,7 @@ DrvShareLists(DHGLRC dhglrc1, DHGLRC dhglrc2)
    if (ctx1 && ctx2 && ctx2->st->share)
       ret = ctx2->st->share(ctx2->st, ctx1->st);
 
-   pipe_mutex_unlock( stw_dev->ctx_mutex );
+   stw_unlock_contexts(stw_dev);
 
    return ret;
 }
@@ -173,9 +173,9 @@ stw_create_context_attribs(HDC hdc, INT iLayerPlane, DHGLRC hShareContext,
    pfi = stw_pixelformat_get_info( iPixelFormat );
 
    if (hShareContext != 0) {
-      pipe_mutex_lock( stw_dev->ctx_mutex );
+      stw_lock_contexts(stw_dev);
       shareCtx = stw_lookup_context_locked( hShareContext );
-      pipe_mutex_unlock( stw_dev->ctx_mutex );
+      stw_unlock_contexts(stw_dev);
    }
 
    ctx = CALLOC_STRUCT( stw_context );
@@ -250,7 +250,7 @@ stw_create_context_attribs(HDC hdc, INT iLayerPlane, DHGLRC hShareContext,
       ctx->hud = hud_create(ctx->st->pipe, ctx->st->cso_context);
    }
 
-   pipe_mutex_lock( stw_dev->ctx_mutex );
+   stw_lock_contexts(stw_dev);
    if (handle) {
       /* We're replacing the context data for this handle. See the
        * wglCreateContextAttribsARB() function.
@@ -276,7 +276,8 @@ stw_create_context_attribs(HDC hdc, INT iLayerPlane, DHGLRC hShareContext,
 
    ctx->dhglrc = handle;
 
-   pipe_mutex_unlock( stw_dev->ctx_mutex );
+   stw_unlock_contexts(stw_dev);
+
    if (!ctx->dhglrc)
       goto no_hglrc;
 
@@ -303,10 +304,10 @@ DrvDeleteContext(DHGLRC dhglrc)
    if (!stw_dev)
       return FALSE;
 
-   pipe_mutex_lock( stw_dev->ctx_mutex );
+   stw_lock_contexts(stw_dev);
    ctx = stw_lookup_context_locked(dhglrc);
    handle_table_remove(stw_dev->ctx_table, dhglrc);
-   pipe_mutex_unlock( stw_dev->ctx_mutex );
+   stw_unlock_contexts(stw_dev);
 
    if (ctx) {
       struct stw_context *curctx = stw_current_context();
@@ -337,9 +338,9 @@ DrvReleaseContext(DHGLRC dhglrc)
    if (!stw_dev)
       return FALSE;
 
-   pipe_mutex_lock( stw_dev->ctx_mutex );
+   stw_lock_contexts(stw_dev);
    ctx = stw_lookup_context_locked( dhglrc );
-   pipe_mutex_unlock( stw_dev->ctx_mutex );
+   stw_unlock_contexts(stw_dev);
 
    if (!ctx)
       return FALSE;
@@ -408,9 +409,9 @@ stw_make_current(HDC hdc, DHGLRC dhglrc)
    }
 
    if (dhglrc) {
-      pipe_mutex_lock( stw_dev->ctx_mutex );
+      stw_lock_contexts(stw_dev);
       ctx = stw_lookup_context_locked( dhglrc );
-      pipe_mutex_unlock( stw_dev->ctx_mutex );
+      stw_unlock_contexts(stw_dev);
       if (!ctx) {
          goto fail;
       }
diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c
index 25b6341ecad..2f51fdb1cbf 100644
--- a/src/gallium/state_trackers/wgl/stw_device.c
+++ b/src/gallium/state_trackers/wgl/stw_device.c
@@ -106,7 +106,7 @@ stw_init(const struct stw_winsys *stw_winsys)
          screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
    stw_dev->max_2d_length = 1 << (stw_dev->max_2d_levels - 1);
 
-   pipe_mutex_init( stw_dev->ctx_mutex );
+   InitializeCriticalSection(&stw_dev->ctx_mutex);
    pipe_mutex_init( stw_dev->fb_mutex );
 
    stw_dev->ctx_table = handle_table_create();
@@ -156,9 +156,9 @@ stw_cleanup(void)
     * Abort cleanup if there are still active contexts. In some situations
     * this DLL may be unloaded before the DLL that is using GL contexts is.
     */
-   pipe_mutex_lock( stw_dev->ctx_mutex );
+   stw_lock_contexts(stw_dev);
    dhglrc = handle_table_get_first_handle(stw_dev->ctx_table);
-   pipe_mutex_unlock( stw_dev->ctx_mutex );
+   stw_unlock_contexts(stw_dev);
    if (dhglrc) {
       debug_printf("%s: contexts still active -- cleanup aborted\n", __FUNCTION__);
       stw_dev = NULL;
@@ -170,7 +170,7 @@ stw_cleanup(void)
    stw_framebuffer_cleanup();
    
    pipe_mutex_destroy( stw_dev->fb_mutex );
-   pipe_mutex_destroy( stw_dev->ctx_mutex );
+   DeleteCriticalSection(&stw_dev->ctx_mutex);
    
    FREE(stw_dev->smapi);
    stw_dev->stapi->destroy(stw_dev->stapi);
diff --git a/src/gallium/state_trackers/wgl/stw_device.h b/src/gallium/state_trackers/wgl/stw_device.h
index e35a4b94036..f271762f6b1 100644
--- a/src/gallium/state_trackers/wgl/stw_device.h
+++ b/src/gallium/state_trackers/wgl/stw_device.h
@@ -65,7 +65,7 @@ struct stw_device
 
    GLCALLBACKTABLE callbacks;
 
-   pipe_mutex ctx_mutex;
+   CRITICAL_SECTION ctx_mutex;
    struct handle_table *ctx_table;
    
    pipe_mutex fb_mutex;
@@ -89,4 +89,18 @@ stw_lookup_context_locked( DHGLRC dhglrc )
 }
 
 
+static inline void
+stw_lock_contexts(struct stw_device *stw_dev)
+{
+   EnterCriticalSection(&stw_dev->ctx_mutex);
+}
+
+
+static inline void
+stw_unlock_contexts(struct stw_device *stw_dev)
+{
+   LeaveCriticalSection(&stw_dev->ctx_mutex);
+}
+
+
 #endif /* STW_DEVICE_H_ */

From fce68832c5e06a046b69fbad1e1424aaec6fc0b2 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 14:24:18 -0700
Subject: [PATCH 182/287] st/wgl: reimplement stw_device::fb_mutex with
 CRITICAL_SECTION

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_device.c   |  4 ++--
 src/gallium/state_trackers/wgl/stw_device.h   | 16 ++++++++++++-
 .../state_trackers/wgl/stw_framebuffer.c      | 24 +++++++++----------
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_device.c b/src/gallium/state_trackers/wgl/stw_device.c
index 2f51fdb1cbf..287b937afe5 100644
--- a/src/gallium/state_trackers/wgl/stw_device.c
+++ b/src/gallium/state_trackers/wgl/stw_device.c
@@ -107,7 +107,7 @@ stw_init(const struct stw_winsys *stw_winsys)
    stw_dev->max_2d_length = 1 << (stw_dev->max_2d_levels - 1);
 
    InitializeCriticalSection(&stw_dev->ctx_mutex);
-   pipe_mutex_init( stw_dev->fb_mutex );
+   InitializeCriticalSection(&stw_dev->fb_mutex);
 
    stw_dev->ctx_table = handle_table_create();
    if (!stw_dev->ctx_table) {
@@ -169,7 +169,7 @@ stw_cleanup(void)
 
    stw_framebuffer_cleanup();
    
-   pipe_mutex_destroy( stw_dev->fb_mutex );
+   DeleteCriticalSection(&stw_dev->fb_mutex);
    DeleteCriticalSection(&stw_dev->ctx_mutex);
    
    FREE(stw_dev->smapi);
diff --git a/src/gallium/state_trackers/wgl/stw_device.h b/src/gallium/state_trackers/wgl/stw_device.h
index f271762f6b1..d695e8f4e12 100644
--- a/src/gallium/state_trackers/wgl/stw_device.h
+++ b/src/gallium/state_trackers/wgl/stw_device.h
@@ -68,7 +68,7 @@ struct stw_device
    CRITICAL_SECTION ctx_mutex;
    struct handle_table *ctx_table;
    
-   pipe_mutex fb_mutex;
+   CRITICAL_SECTION fb_mutex;
    struct stw_framebuffer *fb_head;
    
 #ifdef DEBUG
@@ -103,4 +103,18 @@ stw_unlock_contexts(struct stw_device *stw_dev)
 }
 
 
+static inline void
+stw_lock_framebuffers(struct stw_device *stw_dev)
+{
+   EnterCriticalSection(&stw_dev->fb_mutex);
+}
+
+
+static inline void
+stw_unlock_framebuffers(struct stw_device *stw_dev)
+{
+   LeaveCriticalSection(&stw_dev->fb_mutex);
+}
+
+
 #endif /* STW_DEVICE_H_ */
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 09dede8d2a1..ce5b2c3e0b8 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -230,11 +230,11 @@ stw_call_window_proc(int nCode, WPARAM wParam, LPARAM lParam)
       }
    }
    else if (pParams->message == WM_DESTROY) {
-      pipe_mutex_lock( stw_dev->fb_mutex );
+      stw_lock_framebuffers(stw_dev);
       fb = stw_framebuffer_from_hwnd_locked( pParams->hwnd );
       if (fb)
          stw_framebuffer_destroy_locked(fb);
-      pipe_mutex_unlock( stw_dev->fb_mutex );
+      stw_unlock_framebuffers(stw_dev);
    }
 
    return CallNextHookEx(tls_data->hCallWndProcHook, nCode, wParam, lParam);
@@ -304,10 +304,10 @@ stw_framebuffer_create(HDC hdc, int iPixelFormat)
     */
    pipe_mutex_lock( fb->mutex );
 
-   pipe_mutex_lock( stw_dev->fb_mutex );
+   stw_lock_framebuffers(stw_dev);
    fb->next = stw_dev->fb_head;
    stw_dev->fb_head = fb;
-   pipe_mutex_unlock( stw_dev->fb_mutex );
+   stw_unlock_framebuffers(stw_dev);
 
    return fb;
 }
@@ -328,12 +328,12 @@ stw_framebuffer_reference(struct stw_framebuffer **ptr,
    if (fb)
       fb->refcnt++;
    if (old_fb) {
-      pipe_mutex_lock(stw_dev->fb_mutex);
+      stw_lock_framebuffers(stw_dev);
 
       pipe_mutex_lock(old_fb->mutex);
       stw_framebuffer_destroy_locked(old_fb);
 
-      pipe_mutex_unlock(stw_dev->fb_mutex);
+      stw_unlock_framebuffers(stw_dev);
    }
 
    *ptr = fb;
@@ -372,7 +372,7 @@ stw_framebuffer_cleanup(void)
    if (!stw_dev)
       return;
 
-   pipe_mutex_lock( stw_dev->fb_mutex );
+   stw_lock_framebuffers(stw_dev);
 
    fb = stw_dev->fb_head;
    while (fb) {
@@ -385,7 +385,7 @@ stw_framebuffer_cleanup(void)
    }
    stw_dev->fb_head = NULL;
 
-   pipe_mutex_unlock( stw_dev->fb_mutex );
+   stw_unlock_framebuffers(stw_dev);
 }
 
 
@@ -419,9 +419,9 @@ stw_framebuffer_from_hdc(HDC hdc)
    if (!stw_dev)
       return NULL;
 
-   pipe_mutex_lock( stw_dev->fb_mutex );
+   stw_lock_framebuffers(stw_dev);
    fb = stw_framebuffer_from_hdc_locked(hdc);
-   pipe_mutex_unlock( stw_dev->fb_mutex );
+   stw_unlock_framebuffers(stw_dev);
 
    return fb;
 }
@@ -436,9 +436,9 @@ stw_framebuffer_from_hwnd(HWND hwnd)
 {
    struct stw_framebuffer *fb;
 
-   pipe_mutex_lock( stw_dev->fb_mutex );
+   stw_lock_framebuffers(stw_dev);
    fb = stw_framebuffer_from_hwnd_locked(hwnd);
-   pipe_mutex_unlock( stw_dev->fb_mutex );
+   stw_unlock_framebuffers(stw_dev);
 
    return fb;
 }

From f71508ae793aaea999d3aa5bdd5f4cf157f016c6 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 14:34:51 -0700
Subject: [PATCH 183/287] st/wgl: include u_debug.h

To get declaration for debug_printf() directly instead of getting it
indirectly through os_thread.h

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_ext_context.c    | 2 ++
 src/gallium/state_trackers/wgl/stw_ext_pbuffer.c    | 2 ++
 src/gallium/state_trackers/wgl/stw_getprocaddress.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/src/gallium/state_trackers/wgl/stw_ext_context.c b/src/gallium/state_trackers/wgl/stw_ext_context.c
index 6af20627398..4c58316a0e1 100644
--- a/src/gallium/state_trackers/wgl/stw_ext_context.c
+++ b/src/gallium/state_trackers/wgl/stw_ext_context.c
@@ -35,6 +35,8 @@
 #include "stw_device.h"
 #include "stw_ext_context.h"
 
+#include "util/u_debug.h"
+
 
 wglCreateContext_t wglCreateContext_func = 0;
 wglDeleteContext_t wglDeleteContext_func = 0;
diff --git a/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c b/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c
index 0bd60c064d7..e7887b6ac0c 100644
--- a/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c
@@ -35,6 +35,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 
+#include "util/u_debug.h"
+
 #include "stw_device.h"
 #include "stw_pixelformat.h"
 #include "stw_framebuffer.h"
diff --git a/src/gallium/state_trackers/wgl/stw_getprocaddress.c b/src/gallium/state_trackers/wgl/stw_getprocaddress.c
index 33949b6606f..28d10d2e312 100644
--- a/src/gallium/state_trackers/wgl/stw_getprocaddress.c
+++ b/src/gallium/state_trackers/wgl/stw_getprocaddress.c
@@ -37,6 +37,8 @@
 #include "stw_icd.h"
 #include "stw_nopfuncs.h"
 
+#include "util/u_debug.h"
+
 struct stw_extension_entry
 {
    const char *name;

From dabc423ed0f946bc32268a42dc8fee12a1cd0b0a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 14:38:25 -0700
Subject: [PATCH 184/287] st/wgl: reimplement stw_framebuffer::mutex with
 CRITICAL_SECTION

v2: update comments on the stw_framebuffer::mutex field regarding locking
order.

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_device.h   |  1 -
 .../state_trackers/wgl/stw_framebuffer.c      | 27 +++++------------
 .../state_trackers/wgl/stw_framebuffer.h      | 29 ++++++++++++++-----
 src/gallium/state_trackers/wgl/stw_st.c       |  4 +--
 4 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_device.h b/src/gallium/state_trackers/wgl/stw_device.h
index d695e8f4e12..3f0dffe408b 100644
--- a/src/gallium/state_trackers/wgl/stw_device.h
+++ b/src/gallium/state_trackers/wgl/stw_device.h
@@ -30,7 +30,6 @@
 
 
 #include "pipe/p_compiler.h"
-#include "os/os_thread.h"
 #include "util/u_handle_table.h"
 #include "stw_icd.h"
 #include "stw_pixelformat.h"
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index ce5b2c3e0b8..a3342ab2562 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -54,7 +54,7 @@ stw_framebuffer_from_hwnd_locked(HWND hwnd)
 
    for (fb = stw_dev->fb_head; fb != NULL; fb = fb->next)
       if (fb->hWnd == hwnd) {
-         pipe_mutex_lock(fb->mutex);
+         stw_framebuffer_lock(fb);
          return fb;
       }
 
@@ -77,7 +77,7 @@ stw_framebuffer_destroy_locked(struct stw_framebuffer *fb)
    /* check the reference count */
    fb->refcnt--;
    if (fb->refcnt) {
-      pipe_mutex_unlock( fb->mutex );
+      stw_framebuffer_release(fb);
       return;
    }
 
@@ -95,25 +95,14 @@ stw_framebuffer_destroy_locked(struct stw_framebuffer *fb)
 
    stw_st_destroy_framebuffer_locked(fb->stfb);
 
-   pipe_mutex_unlock( fb->mutex );
+   stw_framebuffer_release(fb);
 
-   pipe_mutex_destroy( fb->mutex );
+   DeleteCriticalSection(&fb->mutex);
 
    FREE( fb );
 }
 
 
-/**
- * Unlock the given stw_framebuffer object.
- */
-void
-stw_framebuffer_release(struct stw_framebuffer *fb)
-{
-   assert(fb);
-   pipe_mutex_unlock( fb->mutex );
-}
-
-
 /**
  * Query the size of the given framebuffer's on-screen window and update
  * the stw_framebuffer's width/height.
@@ -296,13 +285,13 @@ stw_framebuffer_create(HDC hdc, int iPixelFormat)
 
    stw_framebuffer_get_size(fb);
 
-   pipe_mutex_init( fb->mutex );
+   InitializeCriticalSection(&fb->mutex);
 
    /* This is the only case where we lock the stw_framebuffer::mutex before
     * stw_dev::fb_mutex, since no other thread can know about this framebuffer
     * and we must prevent any other thread from destroying it before we return.
     */
-   pipe_mutex_lock( fb->mutex );
+   stw_framebuffer_lock(fb);
 
    stw_lock_framebuffers(stw_dev);
    fb->next = stw_dev->fb_head;
@@ -330,7 +319,7 @@ stw_framebuffer_reference(struct stw_framebuffer **ptr,
    if (old_fb) {
       stw_lock_framebuffers(stw_dev);
 
-      pipe_mutex_lock(old_fb->mutex);
+      stw_framebuffer_lock(old_fb);
       stw_framebuffer_destroy_locked(old_fb);
 
       stw_unlock_framebuffers(stw_dev);
@@ -378,7 +367,7 @@ stw_framebuffer_cleanup(void)
    while (fb) {
       next = fb->next;
 
-      pipe_mutex_lock(fb->mutex);
+      stw_framebuffer_lock(fb);
       stw_framebuffer_destroy_locked(fb);
 
       fb = next;
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h
index c7498b245cb..ce9aace7aa0 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.h
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h
@@ -30,7 +30,8 @@
 
 #include <windows.h>
 
-#include "os/os_thread.h"
+#include "util/u_debug.h"
+
 
 struct pipe_resource;
 struct st_framebuffer_iface;
@@ -45,11 +46,11 @@ struct stw_framebuffer
     * This mutex has two purposes:
     * - protect the access to the mutable data members below
     * - prevent the framebuffer from being deleted while being accessed.
-    * 
-    * It is OK to lock this mutex while holding the stw_device::fb_mutex lock, 
-    * but the opposite must never happen.
+    *
+    * Note: if both this mutex and the stw_device::fb_mutex need to be locked,
+    * the stw_device::fb_mutex needs to be locked first.
     */
-   pipe_mutex mutex;
+   CRITICAL_SECTION mutex;
    
    /*
     * Immutable members.
@@ -148,13 +149,27 @@ stw_framebuffer_present_locked(HDC hdc,
 void
 stw_framebuffer_update(struct stw_framebuffer *fb);
 
+
+static inline void
+stw_framebuffer_lock(struct stw_framebuffer *fb)
+{
+   assert(fb);
+   EnterCriticalSection(&fb->mutex);
+}
+
+
 /**
  * Release stw_framebuffer::mutex lock. This framebuffer must not be accessed
  * after calling this function, as it may have been deleted by another thread
  * in the meanwhile.
  */
-void
-stw_framebuffer_release(struct stw_framebuffer *fb);
+static inline void
+stw_framebuffer_release(struct stw_framebuffer *fb)
+{
+   assert(fb);
+   LeaveCriticalSection(&fb->mutex);
+}
+
 
 /**
  * Cleanup any existing framebuffers when exiting application.
diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c
index 2d5d4379932..6e5ccbb8d5c 100644
--- a/src/gallium/state_trackers/wgl/stw_st.c
+++ b/src/gallium/state_trackers/wgl/stw_st.c
@@ -136,7 +136,7 @@ stw_st_framebuffer_validate(struct st_context_iface *stctx,
    for (i = 0; i < count; i++)
       statt_mask |= 1 << statts[i];
 
-   pipe_mutex_lock(stwfb->fb->mutex);
+   stw_framebuffer_lock(stwfb->fb);
 
    if (stwfb->fb->must_resize || (statt_mask & ~stwfb->texture_mask)) {
       stw_st_framebuffer_validate_locked(&stwfb->base,
@@ -185,7 +185,7 @@ stw_st_framebuffer_flush_front(struct st_context_iface *stctx,
    boolean ret;
    HDC hDC;
 
-   pipe_mutex_lock(stwfb->fb->mutex);
+   stw_framebuffer_lock(stwfb->fb);
 
    /* We must not cache HDCs anywhere, as they can be invalidated by the
     * application, or screen resolution changes. */

From 166769fe4bf4042ecb2a54ee5c7b23e3b0cd471d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 14:51:26 -0700
Subject: [PATCH 185/287] st/wgl: rename stw_framebuffer_release() to
 stw_framebuffer_unlock()

To match the new stw_framebuffer_lock() function.

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_context.c  |  4 ++--
 .../state_trackers/wgl/stw_ext_pbuffer.c      |  2 +-
 .../state_trackers/wgl/stw_framebuffer.c      | 20 +++++++++----------
 .../state_trackers/wgl/stw_framebuffer.h      |  8 ++++----
 src/gallium/state_trackers/wgl/stw_st.c       |  4 ++--
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index 229fdfd1625..63877235408 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -165,7 +165,7 @@ stw_create_context_attribs(HDC hdc, INT iLayerPlane, DHGLRC hShareContext,
    fb = stw_framebuffer_from_hdc( hdc );
    if (fb) {
       iPixelFormat = fb->iPixelFormat;
-      stw_framebuffer_release(fb);
+      stw_framebuffer_unlock(fb);
    } else {
       return 0;
    }
@@ -451,7 +451,7 @@ stw_make_current(HDC hdc, DHGLRC dhglrc)
 fail:
 
    if (fb) {
-      stw_framebuffer_release(fb);
+      stw_framebuffer_unlock(fb);
    }
 
    /* On failure, make the thread's current rendering context not current
diff --git a/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c b/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c
index e7887b6ac0c..c99fa3e513d 100644
--- a/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_ext_pbuffer.c
@@ -222,7 +222,7 @@ wglCreatePbufferARB(HDC hCurrentDC,
    fb->bPbuffer = TRUE;
    iDisplayablePixelFormat = fb->iDisplayablePixelFormat;
 
-   stw_framebuffer_release(fb);
+   stw_framebuffer_unlock(fb);
 
    /*
     * We need to set a displayable pixel format on the hidden window DC
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index a3342ab2562..7d7248e0030 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -77,7 +77,7 @@ stw_framebuffer_destroy_locked(struct stw_framebuffer *fb)
    /* check the reference count */
    fb->refcnt--;
    if (fb->refcnt) {
-      stw_framebuffer_release(fb);
+      stw_framebuffer_unlock(fb);
       return;
    }
 
@@ -95,7 +95,7 @@ stw_framebuffer_destroy_locked(struct stw_framebuffer *fb)
 
    stw_st_destroy_framebuffer_locked(fb->stfb);
 
-   stw_framebuffer_release(fb);
+   stw_framebuffer_unlock(fb);
 
    DeleteCriticalSection(&fb->mutex);
 
@@ -214,7 +214,7 @@ stw_call_window_proc(int nCode, WPARAM wParam, LPARAM lParam)
              * of the client area via GetClientRect.
              */
             stw_framebuffer_get_size(fb);
-            stw_framebuffer_release(fb);
+            stw_framebuffer_unlock(fb);
          }
       }
    }
@@ -456,7 +456,7 @@ DrvSetPixelFormat(HDC hdc, LONG iPixelFormat)
        */
       boolean bPbuffer = fb->bPbuffer;
 
-      stw_framebuffer_release( fb );
+      stw_framebuffer_unlock( fb );
 
       return bPbuffer;
    }
@@ -466,7 +466,7 @@ DrvSetPixelFormat(HDC hdc, LONG iPixelFormat)
       return FALSE;
    }
 
-   stw_framebuffer_release( fb );
+   stw_framebuffer_unlock( fb );
 
    /* Some applications mistakenly use the undocumented wglSetPixelFormat
     * function instead of SetPixelFormat, so we call SetPixelFormat here to
@@ -491,7 +491,7 @@ stw_pixelformat_get(HDC hdc)
    fb = stw_framebuffer_from_hdc(hdc);
    if (fb) {
       iPixelFormat = fb->iPixelFormat;
-      stw_framebuffer_release(fb);
+      stw_framebuffer_unlock(fb);
    }
 
    return iPixelFormat;
@@ -548,7 +548,7 @@ DrvPresentBuffers(HDC hdc, PGLPRESENTBUFFERSDATA data)
    stw_framebuffer_update(fb);
    stw_notify_current_locked(fb);
 
-   stw_framebuffer_release(fb);
+   stw_framebuffer_unlock(fb);
 
    return TRUE;
 }
@@ -577,7 +577,7 @@ stw_framebuffer_present_locked(HDC hdc,
       data.pPrivateData = (void *)res;
 
       stw_notify_current_locked(fb);
-      stw_framebuffer_release(fb);
+      stw_framebuffer_unlock(fb);
 
       return stw_dev->callbacks.wglCbPresentBuffers(hdc, &data);
    }
@@ -588,7 +588,7 @@ stw_framebuffer_present_locked(HDC hdc,
 
       stw_framebuffer_update(fb);
       stw_notify_current_locked(fb);
-      stw_framebuffer_release(fb);
+      stw_framebuffer_unlock(fb);
 
       return TRUE;
    }
@@ -609,7 +609,7 @@ DrvSwapBuffers(HDC hdc)
       return FALSE;
 
    if (!(fb->pfi->pfd.dwFlags & PFD_DOUBLEBUFFER)) {
-      stw_framebuffer_release(fb);
+      stw_framebuffer_unlock(fb);
       return TRUE;
    }
 
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.h b/src/gallium/state_trackers/wgl/stw_framebuffer.h
index ce9aace7aa0..109c79dd002 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.h
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.h
@@ -113,7 +113,7 @@ struct stw_framebuffer
 /**
  * Create a new framebuffer object which will correspond to the given HDC.
  * 
- * This function will acquire stw_framebuffer::mutex. stw_framebuffer_release
+ * This function will acquire stw_framebuffer::mutex. stw_framebuffer_unlock
  * must be called when done 
  */
 struct stw_framebuffer *
@@ -126,7 +126,7 @@ stw_framebuffer_reference(struct stw_framebuffer **ptr,
 /**
  * Search a framebuffer with a matching HWND.
  * 
- * This function will acquire stw_framebuffer::mutex. stw_framebuffer_release
+ * This function will acquire stw_framebuffer::mutex. stw_framebuffer_unlock
  * must be called when done 
  */
 struct stw_framebuffer *
@@ -135,7 +135,7 @@ stw_framebuffer_from_hwnd(HWND hwnd);
 /**
  * Search a framebuffer with a matching HDC.
  * 
- * This function will acquire stw_framebuffer::mutex. stw_framebuffer_release
+ * This function will acquire stw_framebuffer::mutex. stw_framebuffer_unlock
  * must be called when done 
  */
 struct stw_framebuffer *
@@ -164,7 +164,7 @@ stw_framebuffer_lock(struct stw_framebuffer *fb)
  * in the meanwhile.
  */
 static inline void
-stw_framebuffer_release(struct stw_framebuffer *fb)
+stw_framebuffer_unlock(struct stw_framebuffer *fb)
 {
    assert(fb);
    LeaveCriticalSection(&fb->mutex);
diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c
index 6e5ccbb8d5c..ecf4cfcc843 100644
--- a/src/gallium/state_trackers/wgl/stw_st.c
+++ b/src/gallium/state_trackers/wgl/stw_st.c
@@ -149,7 +149,7 @@ stw_st_framebuffer_validate(struct st_context_iface *stctx,
       pipe_resource_reference(&out[i], stwfb->textures[statts[i]]);
    }
 
-   stw_framebuffer_release(stwfb->fb);
+   stw_framebuffer_unlock(stwfb->fb);
 
    return TRUE;
 }
@@ -170,7 +170,7 @@ stw_st_framebuffer_present_locked(HDC hdc,
       stw_framebuffer_present_locked(hdc, stwfb->fb, resource);
    }
    else {
-      stw_framebuffer_release(stwfb->fb);
+      stw_framebuffer_unlock(stwfb->fb);
    }
 
    return TRUE;

From a1c9feafd592d13f8215e7a535c68a03fd84849e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= <jfonseca@vmware.com>
Date: Tue, 10 Nov 2015 14:41:30 -0700
Subject: [PATCH 186/287] st/wgl: add some mutex checking code

This would have caught the locking bug that was fixed in the earlier
"st/wgl: fix locking issue in stw_st_framebuffer_present_locked()"
patch.

v2: minor coding style changes by Brian.

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_st.c | 26 +++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c
index ecf4cfcc843..78586db1969 100644
--- a/src/gallium/state_trackers/wgl/stw_st.c
+++ b/src/gallium/state_trackers/wgl/stw_st.c
@@ -52,6 +52,28 @@ stw_st_framebuffer(struct st_framebuffer_iface *stfb)
    return (struct stw_st_framebuffer *) stfb;
 }
 
+
+/**
+ * Is the given mutex held by the calling thread?
+ */
+static bool
+own_mutex(const CRITICAL_SECTION *cs)
+{
+   // We can't compare OwningThread with our thread handle/id (see
+   // http://stackoverflow.com/a/12675635 ) but we can compare with the
+   // OwningThread member of a critical section we know we own.
+   CRITICAL_SECTION dummy;
+   InitializeCriticalSection(&dummy);
+   EnterCriticalSection(&dummy);
+   if (0)
+      _debug_printf("%p %p\n", cs->OwningThread, dummy.OwningThread);
+   bool ret = cs->OwningThread == dummy.OwningThread;
+   LeaveCriticalSection(&dummy);
+   DeleteCriticalSection(&dummy);
+   return ret;
+}
+
+
 /**
  * Remove outdated textures and create the requested ones.
  */
@@ -165,6 +187,8 @@ stw_st_framebuffer_present_locked(HDC hdc,
    struct stw_st_framebuffer *stwfb = stw_st_framebuffer(stfb);
    struct pipe_resource *resource;
 
+   assert(own_mutex(&stwfb->fb->mutex));
+
    resource = stwfb->textures[statt];
    if (resource) {
       stw_framebuffer_present_locked(hdc, stwfb->fb, resource);
@@ -173,6 +197,8 @@ stw_st_framebuffer_present_locked(HDC hdc,
       stw_framebuffer_unlock(stwfb->fb);
    }
 
+   assert(!own_mutex(&stwfb->fb->mutex));
+
    return TRUE;
 }
 

From f45b644e11127e07fe909f418d1c0e2ca3285743 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 14:49:01 -0700
Subject: [PATCH 187/287] st/wgl: add a lock assertion in
 stw_framebuffer_from_hwnd_locked()

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_framebuffer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 7d7248e0030..b49bc22e21f 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -55,6 +55,7 @@ stw_framebuffer_from_hwnd_locked(HWND hwnd)
    for (fb = stw_dev->fb_head; fb != NULL; fb = fb->next)
       if (fb->hWnd == hwnd) {
          stw_framebuffer_lock(fb);
+         assert(fb->mutex.RecursionCount == 1);
          return fb;
       }
 

From 3e74038280319cf02c55f3879d95d7c1aec210fe Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 10 Nov 2015 14:49:17 -0700
Subject: [PATCH 188/287] st/wgl: add a comment about recursive locking in
 stw_make_current()

Reviewed-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Charmaine Lee <charmainel@vmware.com>
---
 src/gallium/state_trackers/wgl/stw_context.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/state_trackers/wgl/stw_context.c b/src/gallium/state_trackers/wgl/stw_context.c
index 63877235408..5978ca63677 100644
--- a/src/gallium/state_trackers/wgl/stw_context.c
+++ b/src/gallium/state_trackers/wgl/stw_context.c
@@ -441,6 +441,10 @@ stw_make_current(HDC hdc, DHGLRC dhglrc)
       /* Bind the new framebuffer */
       ctx->hdc = hdc;
 
+      /* Note: when we call this function we will wind up in the
+       * stw_st_framebuffer_validate_locked() function which will incur
+       * a recursive fb->mutex lock.
+       */
       ret = stw_dev->stapi->make_current(stw_dev->stapi, ctx->st,
                                          fb->stfb, fb->stfb);
       stw_framebuffer_reference(&ctx->current_framebuffer, fb);

From 903050694bd42b658145b96e501c803c3c21127e Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 8 Oct 2015 14:19:10 -0700
Subject: [PATCH 189/287] i965: Consolidate is_3src() functions.

Otherwise I'll have to add another later in this series.
---
 src/mesa/drivers/dri/i965/brw_eu.h         | 6 ++++++
 src/mesa/drivers/dri/i965/brw_eu_compact.c | 7 -------
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 0ac1ad9378b..1345db77c80 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -533,6 +533,12 @@ next_offset(const struct brw_device_info *devinfo, void *store, int offset)
       return offset + 16;
 }
 
+static inline bool
+is_3src(enum opcode opcode)
+{
+   return opcode_descs[opcode].nsrc == 3;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c
index 07ace6bfbcb..bca8a84154f 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -954,13 +954,6 @@ is_compactable_immediate(unsigned imm)
    return imm == 0 || imm == 0xfffff000;
 }
 
-/* Returns whether an opcode takes three sources. */
-static bool
-is_3src(uint32_t op)
-{
-   return opcode_descs[op].nsrc == 3;
-}
-
 /**
  * Tries to compact instruction src into dst.
  *
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index a6b57c3c3f1..481b69c6e18 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -782,7 +782,7 @@ backend_instruction::is_commutative() const
 bool
 backend_instruction::is_3src() const
 {
-   return opcode < ARRAY_SIZE(opcode_descs) && opcode_descs[opcode].nsrc == 3;
+   return ::is_3src(opcode);
 }
 
 bool

From 238877207eda2d7513cb0d2b415f8fc7ac821d0e Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 29 Jun 2015 15:05:19 -0700
Subject: [PATCH 190/287] ralloc: Set *start in ralloc_vasprintf_rewrite_tail()
 if str is NULL.

We were leaving it undefined, even though we were writing a string to
*str.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/util/ralloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/util/ralloc.c b/src/util/ralloc.c
index e07fce74f23..bb4cf9612eb 100644
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -499,6 +499,7 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
    if (unlikely(*str == NULL)) {
       // Assuming a NULL context is probably bad, but it's expected behavior.
       *str = ralloc_vasprintf(NULL, fmt, args);
+      *start = strlen(*str);
       return true;
    }
 

From 0865e743c18cb7ba65962d794be8387d6edc0b8a Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 29 Jun 2015 14:03:55 -0700
Subject: [PATCH 191/287] i965: Fill out instruction list.

Add some instructions: illegal, movi, sends, sendsc.

Remove some instructions with reused opcodes: msave, mrestore, push,
pop, goto. I did have some gross code for disassembling opcodes
per-generation, but there's very little meaningful overlap so it's
probably not needed.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h  | 38 +++++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_disasm.c   | 16 ++++++----
 src/mesa/drivers/dri/i965/brw_shader.cpp |  2 +-
 3 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 99a3a2d25d8..62bdb1fbb67 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -840,43 +840,62 @@ enum PACKED brw_horizontal_stride {
 
 enum opcode {
    /* These are the actual hardware opcodes. */
+   BRW_OPCODE_ILLEGAL = 0,
    BRW_OPCODE_MOV =	1,
    BRW_OPCODE_SEL =	2,
+   BRW_OPCODE_MOVI =	3,   /**< G45+ */
    BRW_OPCODE_NOT =	4,
    BRW_OPCODE_AND =	5,
    BRW_OPCODE_OR =	6,
    BRW_OPCODE_XOR =	7,
    BRW_OPCODE_SHR =	8,
    BRW_OPCODE_SHL =	9,
+   // BRW_OPCODE_DIM =	10,  /**< Gen7.5 only */ /* Reused */
+   // BRW_OPCODE_SMOV =	10,  /**< Gen8+       */ /* Reused */
+   /* Reserved - 11 */
    BRW_OPCODE_ASR =	12,
+   /* Reserved - 13-15 */
    BRW_OPCODE_CMP =	16,
    BRW_OPCODE_CMPN =	17,
    BRW_OPCODE_CSEL =	18,  /**< Gen8+ */
    BRW_OPCODE_F32TO16 = 19,  /**< Gen7 only */
    BRW_OPCODE_F16TO32 = 20,  /**< Gen7 only */
+   /* Reserved - 21-22 */
    BRW_OPCODE_BFREV =	23,  /**< Gen7+ */
    BRW_OPCODE_BFE =	24,  /**< Gen7+ */
    BRW_OPCODE_BFI1 =	25,  /**< Gen7+ */
    BRW_OPCODE_BFI2 =	26,  /**< Gen7+ */
+   /* Reserved - 27-31 */
    BRW_OPCODE_JMPI =	32,
+   // BRW_OPCODE_BRD =	33,  /**< Gen7+ */
    BRW_OPCODE_IF =	34,
-   BRW_OPCODE_IFF =	35,  /**< Pre-Gen6 */
+   BRW_OPCODE_IFF =	35,  /**< Pre-Gen6    */ /* Reused */
+   // BRW_OPCODE_BRC =	35,  /**< Gen7+       */ /* Reused */
    BRW_OPCODE_ELSE =	36,
    BRW_OPCODE_ENDIF =	37,
-   BRW_OPCODE_DO =	38,
+   BRW_OPCODE_DO =	38,  /**< Pre-Gen6    */ /* Reused */
+   // BRW_OPCODE_CASE =	38,  /**< Gen6 only   */ /* Reused */
    BRW_OPCODE_WHILE =	39,
    BRW_OPCODE_BREAK =	40,
    BRW_OPCODE_CONTINUE = 41,
    BRW_OPCODE_HALT =	42,
-   BRW_OPCODE_MSAVE =	44,  /**< Pre-Gen6 */
-   BRW_OPCODE_MRESTORE = 45, /**< Pre-Gen6 */
-   BRW_OPCODE_PUSH =	46,  /**< Pre-Gen6 */
-   BRW_OPCODE_GOTO =	46,  /**< Gen8+    */
-   BRW_OPCODE_POP =	47,  /**< Pre-Gen6 */
+   // BRW_OPCODE_CALLA =	43,  /**< Gen7.5+     */
+   // BRW_OPCODE_MSAVE =	44,  /**< Pre-Gen6    */ /* Reused */
+   // BRW_OPCODE_CALL =	44,  /**< Gen6+       */ /* Reused */
+   // BRW_OPCODE_MREST =	45,  /**< Pre-Gen6    */ /* Reused */
+   // BRW_OPCODE_RET =	45,  /**< Gen6+       */ /* Reused */
+   // BRW_OPCODE_PUSH =	46,  /**< Pre-Gen6    */ /* Reused */
+   // BRW_OPCODE_FORK =	46,  /**< Gen6 only   */ /* Reused */
+   // BRW_OPCODE_GOTO =	46,  /**< Gen8+       */ /* Reused */
+   // BRW_OPCODE_POP =	47,  /**< Pre-Gen6    */
    BRW_OPCODE_WAIT =	48,
    BRW_OPCODE_SEND =	49,
    BRW_OPCODE_SENDC =	50,
+   BRW_OPCODE_SENDS =	51,  /**< Gen9+ */
+   BRW_OPCODE_SENDSC =	52,  /**< Gen9+ */
+   /* Reserved 53-55 */
    BRW_OPCODE_MATH =	56,  /**< Gen6+ */
+   /* Reserved 57-63 */
    BRW_OPCODE_ADD =	64,
    BRW_OPCODE_MUL =	65,
    BRW_OPCODE_AVG =	66,
@@ -895,16 +914,21 @@ enum opcode {
    BRW_OPCODE_SUBB =	79,  /**< Gen7+ */
    BRW_OPCODE_SAD2 =	80,
    BRW_OPCODE_SADA2 =	81,
+   /* Reserved 82-83 */
    BRW_OPCODE_DP4 =	84,
    BRW_OPCODE_DPH =	85,
    BRW_OPCODE_DP3 =	86,
    BRW_OPCODE_DP2 =	87,
+   /* Reserved 88 */
    BRW_OPCODE_LINE =	89,
    BRW_OPCODE_PLN =	90,  /**< G45+ */
    BRW_OPCODE_MAD =	91,  /**< Gen6+ */
    BRW_OPCODE_LRP =	92,  /**< Gen6+ */
+   // BRW_OPCODE_MADM =	93,  /**< Gen8+ */
+   /* Reserved 94-124 */
    BRW_OPCODE_NENOP =	125, /**< G45 only */
    BRW_OPCODE_NOP =	126,
+   /* Reserved 127 */
 
    /* These are compiler backend opcodes that get translated into other
     * instructions.
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index fd93beaec19..01f1871701b 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -34,6 +34,7 @@
 
 const struct opcode_desc opcode_descs[128] = {
    [BRW_OPCODE_MOV]      = { .name = "mov",     .nsrc = 1, .ndst = 1 },
+   [BRW_OPCODE_MOVI]     = { .name = "movi",    .nsrc = 2, .ndst = 1 },
    [BRW_OPCODE_FRC]      = { .name = "frc",     .nsrc = 1, .ndst = 1 },
    [BRW_OPCODE_RNDU]     = { .name = "rndu",    .nsrc = 1, .ndst = 1 },
    [BRW_OPCODE_RNDD]     = { .name = "rndd",    .nsrc = 1, .ndst = 1 },
@@ -83,6 +84,9 @@ const struct opcode_desc opcode_descs[128] = {
 
    [BRW_OPCODE_SEND]     = { .name = "send",    .nsrc = 1, .ndst = 1 },
    [BRW_OPCODE_SENDC]    = { .name = "sendc",   .nsrc = 1, .ndst = 1 },
+   [BRW_OPCODE_SENDS]    = { .name = "sends",   .nsrc = 2, .ndst = 1 },
+   [BRW_OPCODE_SENDSC]   = { .name = "sendsc",  .nsrc = 2, .ndst = 1 },
+   [BRW_OPCODE_ILLEGAL]  = { .name = "illegal", .nsrc = 0, .ndst = 0 },
    [BRW_OPCODE_NOP]      = { .name = "nop",     .nsrc = 0, .ndst = 0 },
    [BRW_OPCODE_NENOP]    = { .name = "nenop",   .nsrc = 0, .ndst = 0 },
    [BRW_OPCODE_JMPI]     = { .name = "jmpi",    .nsrc = 0, .ndst = 0 },
@@ -93,10 +97,10 @@ const struct opcode_desc opcode_descs[128] = {
    [BRW_OPCODE_BREAK]    = { .name = "break",   .nsrc = 2, .ndst = 0 },
    [BRW_OPCODE_CONTINUE] = { .name = "cont",    .nsrc = 1, .ndst = 0 },
    [BRW_OPCODE_HALT]     = { .name = "halt",    .nsrc = 1, .ndst = 0 },
-   [BRW_OPCODE_MSAVE]    = { .name = "msave",   .nsrc = 1, .ndst = 1 },
-   [BRW_OPCODE_PUSH]     = { .name = "push",    .nsrc = 1, .ndst = 1 },
-   [BRW_OPCODE_MRESTORE] = { .name = "mrest",   .nsrc = 1, .ndst = 1 },
-   [BRW_OPCODE_POP]      = { .name = "pop",     .nsrc = 2, .ndst = 0 },
+   // [BRW_OPCODE_MSAVE]    = { .name = "msave",   .nsrc = 1, .ndst = 1 },
+   // [BRW_OPCODE_PUSH]     = { .name = "push",    .nsrc = 1, .ndst = 1 },
+   // [BRW_OPCODE_MREST]    = { .name = "mrest",   .nsrc = 1, .ndst = 1 },
+   // [BRW_OPCODE_POP]      = { .name = "pop",     .nsrc = 2, .ndst = 0 },
    [BRW_OPCODE_WAIT]     = { .name = "wait",    .nsrc = 1, .ndst = 0 },
    [BRW_OPCODE_DO]       = { .name = "do",      .nsrc = 0, .ndst = 0 },
    [BRW_OPCODE_ENDIF]    = { .name = "endif",   .nsrc = 2, .ndst = 0 },
@@ -137,8 +141,8 @@ has_branch_ctrl(const struct brw_device_info *devinfo, enum opcode opcode)
       return false;
 
    return opcode == BRW_OPCODE_IF ||
-          opcode == BRW_OPCODE_ELSE ||
-          opcode == BRW_OPCODE_GOTO;
+          opcode == BRW_OPCODE_ELSE;
+          /* opcode == BRW_OPCODE_GOTO; */
 }
 
 static bool
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 481b69c6e18..de21e769871 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -290,7 +290,7 @@ const char *
 brw_instruction_name(enum opcode op)
 {
    switch (op) {
-   case BRW_OPCODE_MOV ... BRW_OPCODE_NOP:
+   case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP:
       assert(opcode_descs[op].name);
       return opcode_descs[op].name;
    case FS_OPCODE_FB_WRITE:

From 9ab45b4df91fadcbbec62828265644d7463b78bb Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 15 Oct 2015 11:38:43 -0700
Subject: [PATCH 192/287] i965: Don't consider control flow instructions to
 have sources.

And why did IFF have a destination?

I suspect that once upon a time the disassembler used this information
to know which fields to find the jump targets in. The jump targets have
moved, so the disassembler has to know how to handle these
per-generation anyway.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_disasm.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 01f1871701b..76b9bed8548 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -90,20 +90,20 @@ const struct opcode_desc opcode_descs[128] = {
    [BRW_OPCODE_NOP]      = { .name = "nop",     .nsrc = 0, .ndst = 0 },
    [BRW_OPCODE_NENOP]    = { .name = "nenop",   .nsrc = 0, .ndst = 0 },
    [BRW_OPCODE_JMPI]     = { .name = "jmpi",    .nsrc = 0, .ndst = 0 },
-   [BRW_OPCODE_IF]       = { .name = "if",      .nsrc = 2, .ndst = 0 },
-   [BRW_OPCODE_IFF]      = { .name = "iff",     .nsrc = 2, .ndst = 1 },
-   [BRW_OPCODE_WHILE]    = { .name = "while",   .nsrc = 2, .ndst = 0 },
-   [BRW_OPCODE_ELSE]     = { .name = "else",    .nsrc = 2, .ndst = 0 },
-   [BRW_OPCODE_BREAK]    = { .name = "break",   .nsrc = 2, .ndst = 0 },
-   [BRW_OPCODE_CONTINUE] = { .name = "cont",    .nsrc = 1, .ndst = 0 },
-   [BRW_OPCODE_HALT]     = { .name = "halt",    .nsrc = 1, .ndst = 0 },
+   [BRW_OPCODE_IF]       = { .name = "if",      .nsrc = 0, .ndst = 0 },
+   [BRW_OPCODE_IFF]      = { .name = "iff",     .nsrc = 0, .ndst = 0 },
+   [BRW_OPCODE_WHILE]    = { .name = "while",   .nsrc = 0, .ndst = 0 },
+   [BRW_OPCODE_ELSE]     = { .name = "else",    .nsrc = 0, .ndst = 0 },
+   [BRW_OPCODE_BREAK]    = { .name = "break",   .nsrc = 0, .ndst = 0 },
+   [BRW_OPCODE_CONTINUE] = { .name = "cont",    .nsrc = 0, .ndst = 0 },
+   [BRW_OPCODE_HALT]     = { .name = "halt",    .nsrc = 0, .ndst = 0 },
    // [BRW_OPCODE_MSAVE]    = { .name = "msave",   .nsrc = 1, .ndst = 1 },
    // [BRW_OPCODE_PUSH]     = { .name = "push",    .nsrc = 1, .ndst = 1 },
    // [BRW_OPCODE_MREST]    = { .name = "mrest",   .nsrc = 1, .ndst = 1 },
    // [BRW_OPCODE_POP]      = { .name = "pop",     .nsrc = 2, .ndst = 0 },
    [BRW_OPCODE_WAIT]     = { .name = "wait",    .nsrc = 1, .ndst = 0 },
    [BRW_OPCODE_DO]       = { .name = "do",      .nsrc = 0, .ndst = 0 },
-   [BRW_OPCODE_ENDIF]    = { .name = "endif",   .nsrc = 2, .ndst = 0 },
+   [BRW_OPCODE_ENDIF]    = { .name = "endif",   .nsrc = 0, .ndst = 0 },
 };
 
 static bool

From 93e371c140cb1aa438ce3c1a9946811d92032897 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 29 Jun 2015 14:05:27 -0700
Subject: [PATCH 193/287] i965: Set annotation_info's mem_ctx.

It was being memset to 0 previously.

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +-
 src/mesa/drivers/dri/i965/intel_asm_annotation.c | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 974219f3ece..34fdc16cc66 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2267,7 +2267,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 
       dump_assembly(p->store, annotation.ann_count, annotation.ann,
                     p->devinfo);
-      ralloc_free(annotation.ann);
+      ralloc_free(annotation.mem_ctx);
    }
 
    compiler->shader_debug_log(log_data,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 693f5835412..f5e493efc8f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1560,7 +1560,7 @@ generate_code(struct brw_codegen *p,
 
       dump_assembly(p->store, annotation.ann_count, annotation.ann,
                     p->devinfo);
-      ralloc_free(annotation.ann);
+      ralloc_free(annotation.mem_ctx);
    }
 
    compiler->shader_debug_log(log_data,
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index b3d6324a5fe..f87a9bbe967 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -86,6 +86,9 @@ void annotate(const struct brw_device_info *devinfo,
               struct annotation_info *annotation, const struct cfg_t *cfg,
               struct backend_instruction *inst, unsigned offset)
 {
+   if (annotation->mem_ctx == NULL)
+      annotation->mem_ctx = ralloc_context(NULL);
+
    if (annotation->ann_size <= annotation->ann_count) {
       int old_size = annotation->ann_size;
       annotation->ann_size = MAX2(1024, annotation->ann_size * 2);

From a280e83d71bb046098ed5380cb053318f9e8cf8e Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Wed, 7 Oct 2015 21:04:48 -0700
Subject: [PATCH 194/287] i965: Combine assembly annotations if possible.

Often annotations are identical between sets of consecutive
instructions. We can perhaps avoid some memory allocations by reusing
the previous annotation.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 .../drivers/dri/i965/intel_asm_annotation.c   | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index f87a9bbe967..fe9d80a5e67 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -112,6 +112,24 @@ void annotate(const struct brw_device_info *devinfo,
       ann->block_start = cfg->blocks[annotation->cur_block];
    }
 
+   if (bblock_end(cfg->blocks[annotation->cur_block]) == inst) {
+      ann->block_end = cfg->blocks[annotation->cur_block];
+      annotation->cur_block++;
+   }
+
+   /* Merge this annotation with the previous if possible. */
+   struct annotation *prev = annotation->ann_count > 1 ?
+         &annotation->ann[annotation->ann_count - 2] : NULL;
+   if (prev != NULL &&
+       ann->ir == prev->ir &&
+       ann->annotation == prev->annotation &&
+       ann->block_start == NULL &&
+       prev->block_end == NULL) {
+      if (ann->block_end == NULL)
+         annotation->ann_count--;
+      return;
+   }
+
    /* There is no hardware DO instruction on Gen6+, so since DO always
     * starts a basic block, we need to set the .block_start of the next
     * instruction's annotation with a pointer to the bblock started by
@@ -123,11 +141,6 @@ void annotate(const struct brw_device_info *devinfo,
    if (devinfo->gen >= 6 && inst->opcode == BRW_OPCODE_DO) {
       annotation->ann_count--;
    }
-
-   if (bblock_end(cfg->blocks[annotation->cur_block]) == inst) {
-      ann->block_end = cfg->blocks[annotation->cur_block];
-      annotation->cur_block++;
-   }
 }
 
 void

From 34ed45557e9b8a834af2816e774165a0ee7acdd2 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Wed, 21 Oct 2015 15:23:10 -0700
Subject: [PATCH 195/287] i965: Add annotation_insert_error() and support for
 printing errors.

Will allow annotations to contain error messages (indicating an
instruction violates a rule for instance) that are printed after the
disassembly of the block.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 .../drivers/dri/i965/intel_asm_annotation.c   | 79 ++++++++++++++++---
 .../drivers/dri/i965/intel_asm_annotation.h   |  7 ++
 2 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index fe9d80a5e67..52878fde43e 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -69,6 +69,10 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation
 
       brw_disassemble(devinfo, assembly, start_offset, end_offset, stderr);
 
+      if (annotation[i].error) {
+         fputs(annotation[i].error, stderr);
+      }
+
       if (annotation[i].block_end) {
          fprintf(stderr, "   END B%d", annotation[i].block_end->num);
          foreach_list_typed(struct bblock_link, successor_link, link,
@@ -82,6 +86,24 @@ dump_assembly(void *assembly, int num_annotations, struct annotation *annotation
    fprintf(stderr, "\n");
 }
 
+static bool
+annotation_array_ensure_space(struct annotation_info *annotation)
+{
+   if (annotation->ann_size <= annotation->ann_count) {
+      int old_size = annotation->ann_size;
+      annotation->ann_size = MAX2(1024, annotation->ann_size * 2);
+      annotation->ann = reralloc(annotation->mem_ctx, annotation->ann,
+                                 struct annotation, annotation->ann_size);
+      if (!annotation->ann)
+         return false;
+
+      memset(annotation->ann + old_size, 0,
+             (annotation->ann_size - old_size) * sizeof(struct annotation));
+   }
+
+   return true;
+}
+
 void annotate(const struct brw_device_info *devinfo,
               struct annotation_info *annotation, const struct cfg_t *cfg,
               struct backend_instruction *inst, unsigned offset)
@@ -89,17 +111,8 @@ void annotate(const struct brw_device_info *devinfo,
    if (annotation->mem_ctx == NULL)
       annotation->mem_ctx = ralloc_context(NULL);
 
-   if (annotation->ann_size <= annotation->ann_count) {
-      int old_size = annotation->ann_size;
-      annotation->ann_size = MAX2(1024, annotation->ann_size * 2);
-      annotation->ann = reralloc(annotation->mem_ctx, annotation->ann,
-                                 struct annotation, annotation->ann_size);
-      if (!annotation->ann)
-         return;
-
-      memset(annotation->ann + old_size, 0,
-             (annotation->ann_size - old_size) * sizeof(struct annotation));
-   }
+   if (!annotation_array_ensure_space(annotation))
+      return;
 
    struct annotation *ann = &annotation->ann[annotation->ann_count++];
    ann->offset = offset;
@@ -156,3 +169,47 @@ annotation_finalize(struct annotation_info *annotation,
    }
    annotation->ann[annotation->ann_count].offset = next_inst_offset;
 }
+
+void
+annotation_insert_error(struct annotation_info *annotation, unsigned offset,
+                        const char *error)
+{
+   struct annotation *ann;
+
+   if (!annotation->ann_count)
+      return;
+
+   /* We may have to split an annotation, so ensure we have enough space
+    * allocated for that case up front.
+    */
+   if (!annotation_array_ensure_space(annotation))
+      return;
+
+   for (int i = 0; i < annotation->ann_count; i++) {
+      struct annotation *cur = &annotation->ann[i];
+      struct annotation *next = &annotation->ann[i + 1];
+      ann = cur;
+
+      if (next->offset <= offset)
+         continue;
+
+      if (offset + sizeof(brw_inst) != next->offset) {
+         memmove(next, cur,
+                 (annotation->ann_count - i + 2) * sizeof(struct annotation));
+         cur->error = NULL;
+         cur->error_length = 0;
+         cur->block_end = NULL;
+         next->offset = offset + sizeof(brw_inst);
+         next->block_start = NULL;
+         annotation->ann_count++;
+      }
+      break;
+   }
+
+   assume(ann != NULL);
+
+   if (ann->error)
+      ralloc_strcat(&ann->error, error);
+   else
+      ann->error = ralloc_strdup(annotation->mem_ctx, error);
+}
diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.h b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
index 6c72326f058..662a4b4e0f7 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.h
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.h
@@ -37,6 +37,9 @@ struct cfg_t;
 struct annotation {
    int offset;
 
+   size_t error_length;
+   char *error;
+
    /* Pointers to the basic block in the CFG if the instruction group starts
     * or ends a basic block.
     */
@@ -69,6 +72,10 @@ annotate(const struct brw_device_info *devinfo,
 void
 annotation_finalize(struct annotation_info *annotation, unsigned offset);
 
+void
+annotation_insert_error(struct annotation_info *annotation, unsigned offset,
+                        const char *error);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif

From 0b45d47f71f57f685ce1a12a3dcd4fdb63c160b4 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 29 Jun 2015 14:08:51 -0700
Subject: [PATCH 196/287] i965: Add initial assembly validation pass.

Initially just checks that sources are non-NULL, which would have
alerted us to the problem fixed by commit 6c846dc5.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/Makefile.sources    |   1 +
 src/mesa/drivers/dri/i965/brw_eu.h            |   4 +
 src/mesa/drivers/dri/i965/brw_eu_validate.c   | 153 ++++++++++++++++++
 .../drivers/dri/i965/brw_fs_generator.cpp     |   8 +
 .../drivers/dri/i965/brw_vec4_generator.cpp   |   8 +
 5 files changed, 174 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/brw_eu_validate.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index f5e84cb7f65..5a88d66a31b 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -14,6 +14,7 @@ i965_compiler_FILES = \
 	brw_eu_emit.c \
 	brw_eu.h \
 	brw_eu_util.c \
+	brw_eu_validate.c \
 	brw_fs_builder.h \
 	brw_fs_channel_expressions.cpp \
 	brw_fs_cmod_propagation.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 1345db77c80..829e39330f2 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -522,6 +522,10 @@ bool brw_try_compact_instruction(const struct brw_device_info *devinfo,
 void brw_debug_compact_uncompact(const struct brw_device_info *devinfo,
                                  brw_inst *orig, brw_inst *uncompacted);
 
+/* brw_eu_validate.c */
+bool brw_validate_instructions(const struct brw_codegen *p, int start_offset,
+                               struct annotation_info *annotation);
+
 static inline int
 next_offset(const struct brw_device_info *devinfo, void *store, int offset)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_eu_validate.c b/src/mesa/drivers/dri/i965/brw_eu_validate.c
new file mode 100644
index 00000000000..ed536bfff2b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu_validate.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_eu_validate.c
+ *
+ * This file implements a pass that validates shader assembly.
+ */
+
+#include "brw_eu.h"
+
+/* We're going to do lots of string concatenation, so this should help. */
+struct string {
+   char *str;
+   size_t len;
+};
+
+static void
+cat(struct string *dest, const struct string src)
+{
+   dest->str = realloc(dest->str, dest->len + src.len + 1);
+   memcpy(dest->str + dest->len, src.str, src.len);
+   dest->str[dest->len + src.len + 1] = '\0';
+   dest->len = dest->len + src.len;
+}
+#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
+
+#define error(str) "\tERROR: " str "\n"
+
+#define ERROR_IF(cond, msg)          \
+   do {                              \
+      if (cond) {                    \
+         CAT(error_msg, error(msg)); \
+         valid = false;              \
+      }                              \
+   } while(0)
+
+static bool
+src0_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
+{
+   return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
+          brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
+}
+
+static bool
+src1_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
+{
+   return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
+          brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
+}
+
+static unsigned
+num_sources_from_inst(const struct brw_device_info *devinfo,
+                      const brw_inst *inst)
+{
+   unsigned math_function;
+
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) {
+      math_function = brw_inst_math_function(devinfo, inst);
+   } else if (devinfo->gen < 6 &&
+              brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) {
+      if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) {
+         math_function = brw_inst_math_msg_function(devinfo, inst);
+      } else {
+         /* Send instructions are allowed to have null sources since they use
+          * the base_mrf field to specify which message register source.
+          */
+         return 0;
+      }
+   } else {
+      return opcode_descs[brw_inst_opcode(devinfo, inst)].nsrc;
+   }
+
+   switch (math_function) {
+   case BRW_MATH_FUNCTION_INV:
+   case BRW_MATH_FUNCTION_LOG:
+   case BRW_MATH_FUNCTION_EXP:
+   case BRW_MATH_FUNCTION_SQRT:
+   case BRW_MATH_FUNCTION_RSQ:
+   case BRW_MATH_FUNCTION_SIN:
+   case BRW_MATH_FUNCTION_COS:
+   case BRW_MATH_FUNCTION_SINCOS:
+   case GEN8_MATH_FUNCTION_INVM:
+   case GEN8_MATH_FUNCTION_RSQRTM:
+      return 1;
+   case BRW_MATH_FUNCTION_FDIV:
+   case BRW_MATH_FUNCTION_POW:
+   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
+   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
+      return 2;
+   default:
+      unreachable("not reached");
+   }
+}
+
+bool
+brw_validate_instructions(const struct brw_codegen *p, int start_offset,
+                          struct annotation_info *annotation)
+{
+   const struct brw_device_info *devinfo = p->devinfo;
+   const void *store = p->store + start_offset / 16;
+   bool valid = true;
+
+   for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
+        src_offset += sizeof(brw_inst)) {
+      struct string error_msg = { .str = NULL, .len = 0 };
+      const brw_inst *inst = store + src_offset;
+
+      switch (num_sources_from_inst(devinfo, inst)) {
+      case 3:
+         /* Nothing to test. 3-src instructions can only have GRF sources, and
+          * there's no bit to control the file.
+          */
+         break;
+      case 2:
+         ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
+         /* fallthrough */
+      case 1:
+         ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
+         break;
+      case 0:
+      default:
+         break;
+      }
+
+      if (error_msg.str && annotation) {
+         annotation_insert_error(annotation, src_offset, error_msg.str);
+      }
+      free(error_msg.str);
+   }
+
+   return valid;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 34fdc16cc66..48775047e9d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2252,6 +2252,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
    brw_set_uip_jip(p);
    annotation_finalize(&annotation, p->next_insn_offset);
 
+#ifndef NDEBUG
+   bool validated = brw_validate_instructions(p, start_offset, &annotation);
+#else
+   if (unlikely(debug_flag))
+      brw_validate_instructions(p, start_offset, &annotation);
+#endif
+
    int before_size = p->next_insn_offset - start_offset;
    brw_compact_instructions(p, start_offset, annotation.ann_count,
                             annotation.ann);
@@ -2269,6 +2276,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
                     p->devinfo);
       ralloc_free(annotation.mem_ctx);
    }
+   assert(validated);
 
    compiler->shader_debug_log(log_data,
                               "%s SIMD%d shader: %d inst, %d loops, %u cycles, "
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index f5e493efc8f..531acb37b6a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1543,6 +1543,13 @@ generate_code(struct brw_codegen *p,
    brw_set_uip_jip(p);
    annotation_finalize(&annotation, p->next_insn_offset);
 
+#ifndef NDEBUG
+   bool validated = brw_validate_instructions(p, 0, &annotation);
+#else
+   if (unlikely(debug_flag))
+      brw_validate_instructions(p, 0, &annotation);
+#endif
+
    int before_size = p->next_insn_offset;
    brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann);
    int after_size = p->next_insn_offset;
@@ -1562,6 +1569,7 @@ generate_code(struct brw_codegen *p,
                     p->devinfo);
       ralloc_free(annotation.mem_ctx);
    }
+   assert(validated);
 
    compiler->shader_debug_log(log_data,
                               "%s vec4 shader: %d inst, %d loops, %u cycles, "

From 74e48e95441e5735a03934243abb1051875e053a Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 29 Jun 2015 15:59:37 -0700
Subject: [PATCH 197/287] i965: Check instructions appear only on supported
 hardware.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_eu_validate.c | 254 ++++++++++++++++++++
 1 file changed, 254 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_validate.c b/src/mesa/drivers/dri/i965/brw_eu_validate.c
index ed536bfff2b..eb57962bea3 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_validate.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_validate.c
@@ -68,6 +68,234 @@ src1_is_null(const struct brw_device_info *devinfo, const brw_inst *inst)
           brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
 }
 
+enum gen {
+   GEN4  = (1 << 0),
+   GEN45 = (1 << 1),
+   GEN5  = (1 << 2),
+   GEN6  = (1 << 3),
+   GEN7  = (1 << 4),
+   GEN75 = (1 << 5),
+   GEN8  = (1 << 6),
+   GEN9  = (1 << 7),
+   GEN_ALL = ~0
+};
+
+#define GEN_GE(gen) (~((gen) - 1) | gen)
+#define GEN_LE(gen) (((gen) - 1) | gen)
+
+struct inst_info {
+   enum gen gen;
+};
+
+static const struct inst_info inst_info[128] = {
+   [BRW_OPCODE_ILLEGAL] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_MOV] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_SEL] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_MOVI] = {
+      .gen = GEN_GE(GEN45),
+   },
+   [BRW_OPCODE_NOT] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_AND] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_OR] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_XOR] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_SHR] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_SHL] = {
+      .gen = GEN_ALL,
+   },
+   /* BRW_OPCODE_DIM / BRW_OPCODE_SMOV */
+   /* Reserved - 11 */
+   [BRW_OPCODE_ASR] = {
+      .gen = GEN_ALL,
+   },
+   /* Reserved - 13-15 */
+   [BRW_OPCODE_CMP] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_CMPN] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_CSEL] = {
+      .gen = GEN_GE(GEN8),
+   },
+   [BRW_OPCODE_F32TO16] = {
+      .gen = GEN7 | GEN75,
+   },
+   [BRW_OPCODE_F16TO32] = {
+      .gen = GEN7 | GEN75,
+   },
+   /* Reserved - 21-22 */
+   [BRW_OPCODE_BFREV] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_BFE] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_BFI1] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_BFI2] = {
+      .gen = GEN_GE(GEN7),
+   },
+   /* Reserved - 27-31 */
+   [BRW_OPCODE_JMPI] = {
+      .gen = GEN_ALL,
+   },
+   /* BRW_OPCODE_BRD */
+   [BRW_OPCODE_IF] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_IFF] = { /* also BRW_OPCODE_BRC */
+      .gen = GEN_LE(GEN5),
+   },
+   [BRW_OPCODE_ELSE] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_ENDIF] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_DO] = { /* also BRW_OPCODE_CASE */
+      .gen = GEN_LE(GEN5),
+   },
+   [BRW_OPCODE_WHILE] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_BREAK] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_CONTINUE] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_HALT] = {
+      .gen = GEN_ALL,
+   },
+   /* BRW_OPCODE_CALLA */
+   /* BRW_OPCODE_MSAVE / BRW_OPCODE_CALL */
+   /* BRW_OPCODE_MREST / BRW_OPCODE_RET */
+   /* BRW_OPCODE_PUSH / BRW_OPCODE_FORK / BRW_OPCODE_GOTO */
+   /* BRW_OPCODE_POP */
+   [BRW_OPCODE_WAIT] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_SEND] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_SENDC] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_SENDS] = {
+      .gen = GEN_GE(GEN9),
+   },
+   [BRW_OPCODE_SENDSC] = {
+      .gen = GEN_GE(GEN9),
+   },
+   /* Reserved 53-55 */
+   [BRW_OPCODE_MATH] = {
+      .gen = GEN_GE(GEN6),
+   },
+   /* Reserved 57-63 */
+   [BRW_OPCODE_ADD] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_MUL] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_AVG] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_FRC] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_RNDU] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_RNDD] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_RNDE] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_RNDZ] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_MAC] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_MACH] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_LZD] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_FBH] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_FBL] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_CBIT] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_ADDC] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_SUBB] = {
+      .gen = GEN_GE(GEN7),
+   },
+   [BRW_OPCODE_SAD2] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_SADA2] = {
+      .gen = GEN_ALL,
+   },
+   /* Reserved 82-83 */
+   [BRW_OPCODE_DP4] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_DPH] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_DP3] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_DP2] = {
+      .gen = GEN_ALL,
+   },
+   /* Reserved 88 */
+   [BRW_OPCODE_LINE] = {
+      .gen = GEN_ALL,
+   },
+   [BRW_OPCODE_PLN] = {
+      .gen = GEN_GE(GEN45),
+   },
+   [BRW_OPCODE_MAD] = {
+      .gen = GEN_GE(GEN6),
+   },
+   [BRW_OPCODE_LRP] = {
+      .gen = GEN_GE(GEN6),
+   },
+   /* Reserved 93-124 */
+   /* BRW_OPCODE_NENOP */
+   [BRW_OPCODE_NOP] = {
+      .gen = GEN_ALL,
+   },
+};
+
 static unsigned
 num_sources_from_inst(const struct brw_device_info *devinfo,
                       const brw_inst *inst)
@@ -113,6 +341,29 @@ num_sources_from_inst(const struct brw_device_info *devinfo,
    }
 }
 
+static enum gen
+gen_from_devinfo(const struct brw_device_info *devinfo)
+{
+   switch (devinfo->gen) {
+   case 4: return devinfo->is_g4x ? GEN45 : GEN4;
+   case 5: return GEN5;
+   case 6: return GEN6;
+   case 7: return devinfo->is_haswell ? GEN75 : GEN7;
+   case 8: return GEN8;
+   case 9: return GEN9;
+   default:
+      unreachable("not reached");
+   }
+}
+
+static bool
+is_unsupported_inst(const struct brw_device_info *devinfo,
+                    const brw_inst *inst)
+{
+   enum gen gen = gen_from_devinfo(devinfo);
+   return (inst_info[brw_inst_opcode(devinfo, inst)].gen & gen) == 0;
+}
+
 bool
 brw_validate_instructions(const struct brw_codegen *p, int start_offset,
                           struct annotation_info *annotation)
@@ -143,6 +394,9 @@ brw_validate_instructions(const struct brw_codegen *p, int start_offset,
          break;
       }
 
+      ERROR_IF(is_unsupported_inst(devinfo, inst),
+               "Instruction not supported on this Gen");
+
       if (error_msg.str && annotation) {
          annotation_insert_error(annotation, src_offset, error_msg.str);
       }

From e5af09f9ba4a2bca256d7cd1087187fd8efe7f00 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Fri, 11 Sep 2015 09:59:32 -0700
Subject: [PATCH 198/287] mesa/extensions: Remove array sentinel

Simplify future updates to the extension struct array by removing
the sentinel.

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/extensions.c | 43 ++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 1facad1fa7e..5bf6d20aeae 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -410,8 +410,6 @@ static const struct extension extension_table[] = {
    { "GL_SGIS_texture_edge_clamp",                 o(dummy_true),                              GLL,            1997 },
    { "GL_SGIS_texture_lod",                        o(dummy_true),                              GLL,            1997 },
    { "GL_SUN_multi_draw_arrays",                   o(dummy_true),                              GLL,            1999 },
-
-   { 0, 0, 0, 0 },
 };
 
 
@@ -426,14 +424,14 @@ static const struct extension extension_table[] = {
 static size_t
 name_to_offset(const char* name)
 {
-   const struct extension *i;
+   unsigned i;
 
    if (name == 0)
       return 0;
 
-   for (i = extension_table; i->name != 0; ++i) {
-      if (strcmp(name, i->name) == 0)
-	 return i->offset;
+   for (i = 0; i < ARRAY_SIZE(extension_table); ++i) {
+      if (strcmp(name, extension_table[i].name) == 0)
+	 return extension_table[i].offset;
    }
 
    return 0;
@@ -446,15 +444,16 @@ name_to_offset(const char* name)
 static void
 override_extensions_in_context(struct gl_context *ctx)
 {
-   const struct extension *i;
+   unsigned i;
    const GLboolean *enables =
       (GLboolean*) &_mesa_extension_override_enables;
    const GLboolean *disables =
       (GLboolean*) &_mesa_extension_override_disables;
    GLboolean *ctx_ext = (GLboolean*)&ctx->Extensions;
 
-   for (i = extension_table; i->name != 0; ++i) {
-      size_t offset = i->offset;
+   for (i = 0; i < ARRAY_SIZE(extension_table); ++i) {
+      size_t offset = extension_table[i].offset;
+
       assert(!enables[offset] || !disables[offset]);
       if (enables[offset]) {
          ctx_ext[offset] = 1;
@@ -778,7 +777,7 @@ _mesa_make_extension_string(struct gl_context *ctx)
    /* String of extra extensions. */
    char *extra_extensions = get_extension_override(ctx);
    GLboolean *base = (GLboolean *) &ctx->Extensions;
-   const struct extension *i;
+   unsigned k;
    unsigned j;
    unsigned maxYear = ~0;
    unsigned api_set = (1 << ctx->API);
@@ -799,7 +798,9 @@ _mesa_make_extension_string(struct gl_context *ctx)
 
    /* Compute length of the extension string. */
    count = 0;
-   for (i = extension_table; i->name != 0; ++i) {
+   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
+      const struct extension *i = extension_table + k;
+
       if (base[i->offset] &&
           i->year <= maxYear &&
           (i->api_set & api_set)) {
@@ -829,11 +830,13 @@ _mesa_make_extension_string(struct gl_context *ctx)
     * expect will fit into that buffer.
     */
    j = 0;
-   for (i = extension_table; i->name != 0; ++i) {
+   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
+      const struct extension *i = extension_table + k;
+
       if (base[i->offset] &&
           i->year <= maxYear &&
           (i->api_set & api_set)) {
-         extension_indices[j++] = i - extension_table;
+         extension_indices[j++] = k;
       }
    }
    assert(j == count);
@@ -842,7 +845,7 @@ _mesa_make_extension_string(struct gl_context *ctx)
 
    /* Build the extension string.*/
    for (j = 0; j < count; ++j) {
-      i = &extension_table[extension_indices[j]];
+      const struct extension *i = &extension_table[extension_indices[j]];
       assert(base[i->offset] && (i->api_set & api_set));
       strcat(exts, i->name);
       strcat(exts, " ");
@@ -863,7 +866,7 @@ GLuint
 _mesa_get_extension_count(struct gl_context *ctx)
 {
    GLboolean *base;
-   const struct extension *i;
+   unsigned k;
    unsigned api_set = (1 << ctx->API);
    if (_mesa_is_gles3(ctx))
       api_set |= ES3;
@@ -875,7 +878,9 @@ _mesa_get_extension_count(struct gl_context *ctx)
       return ctx->Extensions.Count;
 
    base = (GLboolean *) &ctx->Extensions;
-   for (i = extension_table; i->name != 0; ++i) {
+   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
+      const struct extension *i = extension_table + k;
+
       if (base[i->offset] && (i->api_set & api_set)) {
 	 ctx->Extensions.Count++;
       }
@@ -891,7 +896,7 @@ _mesa_get_enabled_extension(struct gl_context *ctx, GLuint index)
 {
    const GLboolean *base;
    size_t n;
-   const struct extension *i;
+   unsigned k;
    unsigned api_set = (1 << ctx->API);
    if (_mesa_is_gles3(ctx))
       api_set |= ES3;
@@ -900,7 +905,9 @@ _mesa_get_enabled_extension(struct gl_context *ctx, GLuint index)
 
    base = (GLboolean*) &ctx->Extensions;
    n = 0;
-   for (i = extension_table; i->name != 0; ++i) {
+   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
+      const struct extension *i = extension_table + k;
+
       if (base[i->offset] && (i->api_set & api_set)) {
          if (n == index)
             return (const GLubyte*) i->name;

From c0b568f3db7968bde2b6c6e887522f2c208188cf Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 2 Sep 2015 11:26:57 -0700
Subject: [PATCH 199/287] mesa/extensions: Wrap array entries in macros

Now that we're using macros, remove the redundant text from each entry.

Remove comments between the entries to make editing easier and separate
the sections with blank lines. Structure the EXT macros in a way that
helps reviewers verify that no meaning has been altered.

v2: Indent the entries (Chad)

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/extensions.c | 649 +++++++++++++++++++------------------
 1 file changed, 325 insertions(+), 324 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 5bf6d20aeae..4ddf750dd6c 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -83,333 +83,334 @@ struct extension {
  * \brief Table of supported OpenGL extensions for all API's.
  */
 static const struct extension extension_table[] = {
-   /* ARB Extensions */
-   { "GL_ARB_ES2_compatibility",                   o(ARB_ES2_compatibility),                   GL,             2009 },
-   { "GL_ARB_ES3_compatibility",                   o(ARB_ES3_compatibility),                   GL,             2012 },
-   { "GL_ARB_arrays_of_arrays",                    o(ARB_arrays_of_arrays),                    GL,             2012 },
-   { "GL_ARB_base_instance",                       o(ARB_base_instance),                       GL,             2011 },
-   { "GL_ARB_blend_func_extended",                 o(ARB_blend_func_extended),                 GL,             2009 },
-   { "GL_ARB_buffer_storage",                      o(ARB_buffer_storage),                      GL,             2013 },
-   { "GL_ARB_clear_buffer_object",                 o(dummy_true),                              GL,             2012 },
-   { "GL_ARB_clear_texture",                       o(ARB_clear_texture),                       GL,             2013 },
-   { "GL_ARB_clip_control",                        o(ARB_clip_control),                        GL,             2014 },
-   { "GL_ARB_color_buffer_float",                  o(ARB_color_buffer_float),                  GL,             2004 },
-   { "GL_ARB_compressed_texture_pixel_storage",    o(dummy_true),                              GL,             2011 },
-   { "GL_ARB_compute_shader",                      o(ARB_compute_shader),                      GL,             2012 },
-   { "GL_ARB_conditional_render_inverted",         o(ARB_conditional_render_inverted),         GL,             2014 },
-   { "GL_ARB_copy_buffer",                         o(dummy_true),                              GL,             2008 },
-   { "GL_ARB_copy_image",                          o(ARB_copy_image),                          GL,             2012 },
-   { "GL_ARB_conservative_depth",                  o(ARB_conservative_depth),                  GL,             2011 },
-   { "GL_ARB_debug_output",                        o(dummy_true),                              GL,             2009 },
-   { "GL_ARB_depth_buffer_float",                  o(ARB_depth_buffer_float),                  GL,             2008 },
-   { "GL_ARB_depth_clamp",                         o(ARB_depth_clamp),                         GL,             2003 },
-   { "GL_ARB_depth_texture",                       o(ARB_depth_texture),                       GLL,            2001 },
-   { "GL_ARB_derivative_control",                  o(ARB_derivative_control),                  GL,             2014 },
-   { "GL_ARB_direct_state_access",                 o(dummy_true),                              GLC,            2014 },
-   { "GL_ARB_draw_buffers",                        o(dummy_true),                              GL,             2002 },
-   { "GL_ARB_draw_buffers_blend",                  o(ARB_draw_buffers_blend),                  GL,             2009 },
-   { "GL_ARB_draw_elements_base_vertex",           o(ARB_draw_elements_base_vertex),           GL,             2009 },
-   { "GL_ARB_draw_indirect",                       o(ARB_draw_indirect),                       GLC,            2010 },
-   { "GL_ARB_draw_instanced",                      o(ARB_draw_instanced),                      GL,             2008 },
-   { "GL_ARB_enhanced_layouts",                    o(ARB_enhanced_layouts),                    GLC,            2013 },
-   { "GL_ARB_explicit_attrib_location",            o(ARB_explicit_attrib_location),            GL,             2009 },
-   { "GL_ARB_explicit_uniform_location",           o(ARB_explicit_uniform_location),           GL,             2012 },
-   { "GL_ARB_fragment_coord_conventions",          o(ARB_fragment_coord_conventions),          GL,             2009 },
-   { "GL_ARB_fragment_layer_viewport",             o(ARB_fragment_layer_viewport),             GLC,            2012 },
-   { "GL_ARB_fragment_program",                    o(ARB_fragment_program),                    GLL,            2002 },
-   { "GL_ARB_fragment_program_shadow",             o(ARB_fragment_program_shadow),             GLL,            2003 },
-   { "GL_ARB_fragment_shader",                     o(ARB_fragment_shader),                     GL,             2002 },
-   { "GL_ARB_framebuffer_no_attachments",          o(ARB_framebuffer_no_attachments),          GL,             2012 },
-   { "GL_ARB_framebuffer_object",                  o(ARB_framebuffer_object),                  GL,             2005 },
-   { "GL_ARB_framebuffer_sRGB",                    o(EXT_framebuffer_sRGB),                    GL,             1998 },
-   { "GL_ARB_get_program_binary",                  o(dummy_true),                              GL,             2010 },
-   { "GL_ARB_get_texture_sub_image",               o(dummy_true),                              GL,             2014 },
-   { "GL_ARB_gpu_shader5",                         o(ARB_gpu_shader5),                         GLC,            2010 },
-   { "GL_ARB_gpu_shader_fp64",                     o(ARB_gpu_shader_fp64),                     GLC,            2010 },
-   { "GL_ARB_half_float_pixel",                    o(dummy_true),                              GL,             2003 },
-   { "GL_ARB_half_float_vertex",                   o(ARB_half_float_vertex),                   GL,             2008 },
-   { "GL_ARB_instanced_arrays",                    o(ARB_instanced_arrays),                    GL,             2008 },
-   { "GL_ARB_internalformat_query",                o(ARB_internalformat_query),                GL,             2011 },
-   { "GL_ARB_invalidate_subdata",                  o(dummy_true),                              GL,             2012 },
-   { "GL_ARB_map_buffer_alignment",                o(dummy_true),                              GL,             2011 },
-   { "GL_ARB_map_buffer_range",                    o(ARB_map_buffer_range),                    GL,             2008 },
-   { "GL_ARB_multi_bind",                          o(dummy_true),                              GL,             2013 },
-   { "GL_ARB_multi_draw_indirect",                 o(ARB_draw_indirect),                       GLC,            2012 },
-   { "GL_ARB_multisample",                         o(dummy_true),                              GLL,            1994 },
-   { "GL_ARB_multitexture",                        o(dummy_true),                              GLL,            1998 },
-   { "GL_ARB_occlusion_query2",                    o(ARB_occlusion_query2),                    GL,             2003 },
-   { "GL_ARB_occlusion_query",                     o(ARB_occlusion_query),                     GLL,            2001 },
-   { "GL_ARB_pipeline_statistics_query",           o(ARB_pipeline_statistics_query),           GL,             2014 },
-   { "GL_ARB_pixel_buffer_object",                 o(EXT_pixel_buffer_object),                 GL,             2004 },
-   { "GL_ARB_point_parameters",                    o(EXT_point_parameters),                    GLL,            1997 },
-   { "GL_ARB_point_sprite",                        o(ARB_point_sprite),                        GL,             2003 },
-   { "GL_ARB_program_interface_query",             o(dummy_true),                              GL,             2012 },
-   { "GL_ARB_provoking_vertex",                    o(EXT_provoking_vertex),                    GL,             2009 },
-   { "GL_ARB_robustness",                          o(dummy_true),                              GL,             2010 },
-   { "GL_ARB_sample_shading",                      o(ARB_sample_shading),                      GL,             2009 },
-   { "GL_ARB_sampler_objects",                     o(dummy_true),                              GL,             2009 },
-   { "GL_ARB_seamless_cube_map",                   o(ARB_seamless_cube_map),                   GL,             2009 },
-   { "GL_ARB_seamless_cubemap_per_texture",        o(AMD_seamless_cubemap_per_texture),        GL,             2013 },
-   { "GL_ARB_separate_shader_objects",             o(dummy_true),                              GL,             2010 },
-   { "GL_ARB_shader_atomic_counters",              o(ARB_shader_atomic_counters),              GL,             2011 },
-   { "GL_ARB_shader_bit_encoding",                 o(ARB_shader_bit_encoding),                 GL,             2010 },
-   { "GL_ARB_shader_clock",                        o(ARB_shader_clock),                        GL,             2015 },
-   { "GL_ARB_shader_image_load_store",             o(ARB_shader_image_load_store),             GL,             2011 },
-   { "GL_ARB_shader_image_size",                   o(ARB_shader_image_size),                   GL,             2012 },
-   { "GL_ARB_shader_objects",                      o(dummy_true),                              GL,             2002 },
-   { "GL_ARB_shader_precision",                    o(ARB_shader_precision),                    GL,             2010 },
-   { "GL_ARB_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },
-   { "GL_ARB_shader_storage_buffer_object",        o(ARB_shader_storage_buffer_object),        GL,             2012 },
-   { "GL_ARB_shader_subroutine",                   o(ARB_shader_subroutine),                   GLC,            2010 },
-   { "GL_ARB_shader_texture_image_samples",        o(ARB_shader_texture_image_samples),        GL,             2014 },
-   { "GL_ARB_shader_texture_lod",                  o(ARB_shader_texture_lod),                  GL,             2009 },
-   { "GL_ARB_shading_language_100",                o(dummy_true),                              GLL,            2003 },
-   { "GL_ARB_shading_language_packing",            o(ARB_shading_language_packing),            GL,             2011 },
-   { "GL_ARB_shading_language_420pack",            o(ARB_shading_language_420pack),            GL,             2011 },
-   { "GL_ARB_shadow",                              o(ARB_shadow),                              GLL,            2001 },
-   { "GL_ARB_stencil_texturing",                   o(ARB_stencil_texturing),                   GL,             2012 },
-   { "GL_ARB_sync",                                o(ARB_sync),                                GL,             2003 },
-   { "GL_ARB_texture_barrier",                     o(NV_texture_barrier),                      GL,             2014 },
-   { "GL_ARB_tessellation_shader",                 o(ARB_tessellation_shader),                 GLC,            2009 },
-   { "GL_ARB_texture_border_clamp",                o(ARB_texture_border_clamp),                GLL,            2000 },
-   { "GL_ARB_texture_buffer_object",               o(ARB_texture_buffer_object),               GLC,            2008 },
-   { "GL_ARB_texture_buffer_object_rgb32",         o(ARB_texture_buffer_object_rgb32),         GLC,            2009 },
-   { "GL_ARB_texture_buffer_range",                o(ARB_texture_buffer_range),                GLC,            2012 },
-   { "GL_ARB_texture_compression",                 o(dummy_true),                              GLL,            2000 },
-   { "GL_ARB_texture_compression_bptc",            o(ARB_texture_compression_bptc),            GL,             2010 },
-   { "GL_ARB_texture_compression_rgtc",            o(ARB_texture_compression_rgtc),            GL,             2004 },
-   { "GL_ARB_texture_cube_map",                    o(ARB_texture_cube_map),                    GLL,            1999 },
-   { "GL_ARB_texture_cube_map_array",              o(ARB_texture_cube_map_array),              GL,             2009 },
-   { "GL_ARB_texture_env_add",                     o(dummy_true),                              GLL,            1999 },
-   { "GL_ARB_texture_env_combine",                 o(ARB_texture_env_combine),                 GLL,            2001 },
-   { "GL_ARB_texture_env_crossbar",                o(ARB_texture_env_crossbar),                GLL,            2001 },
-   { "GL_ARB_texture_env_dot3",                    o(ARB_texture_env_dot3),                    GLL,            2001 },
-   { "GL_ARB_texture_float",                       o(ARB_texture_float),                       GL,             2004 },
-   { "GL_ARB_texture_gather",                      o(ARB_texture_gather),                      GL,             2009 },
-   { "GL_ARB_texture_mirrored_repeat",             o(dummy_true),                              GLL,            2001 },
-   { "GL_ARB_texture_mirror_clamp_to_edge",        o(ARB_texture_mirror_clamp_to_edge),        GL,             2013 },
-   { "GL_ARB_texture_multisample",                 o(ARB_texture_multisample),                 GL,             2009 },
-   { "GL_ARB_texture_non_power_of_two",            o(ARB_texture_non_power_of_two),            GL,             2003 },
-   { "GL_ARB_texture_query_levels",                o(ARB_texture_query_levels),                GL,             2012 },
-   { "GL_ARB_texture_query_lod",                   o(ARB_texture_query_lod),                   GL,             2009 },
-   { "GL_ARB_texture_rectangle",                   o(NV_texture_rectangle),                    GL,             2004 },
-   { "GL_ARB_texture_rgb10_a2ui",                  o(ARB_texture_rgb10_a2ui),                  GL,             2009 },
-   { "GL_ARB_texture_rg",                          o(ARB_texture_rg),                          GL,             2008 },
-   { "GL_ARB_texture_stencil8",                    o(ARB_texture_stencil8),                    GL,             2013 },
-   { "GL_ARB_texture_storage",                     o(dummy_true),                              GL,             2011 },
-   { "GL_ARB_texture_storage_multisample",         o(ARB_texture_multisample),                 GL,             2012 },
-   { "GL_ARB_texture_view",                        o(ARB_texture_view),                        GL,             2012 },
-   { "GL_ARB_texture_swizzle",                     o(EXT_texture_swizzle),                     GL,             2008 },
-   { "GL_ARB_timer_query",                         o(ARB_timer_query),                         GL,             2010 },
-   { "GL_ARB_transform_feedback2",                 o(ARB_transform_feedback2),                 GL,             2010 },
-   { "GL_ARB_transform_feedback3",                 o(ARB_transform_feedback3),                 GL,             2010 },
-   { "GL_ARB_transform_feedback_instanced",        o(ARB_transform_feedback_instanced),        GL,             2011 },
-   { "GL_ARB_transpose_matrix",                    o(dummy_true),                              GLL,            1999 },
-   { "GL_ARB_uniform_buffer_object",               o(ARB_uniform_buffer_object),               GL,             2009 },
-   { "GL_ARB_vertex_array_bgra",                   o(EXT_vertex_array_bgra),                   GL,             2008 },
-   { "GL_ARB_vertex_array_object",                 o(dummy_true),                              GL,             2006 },
-   { "GL_ARB_vertex_attrib_binding",               o(dummy_true),                              GL,             2012 },
-   { "GL_ARB_vertex_buffer_object",                o(dummy_true),                              GLL,            2003 },
-   { "GL_ARB_vertex_program",                      o(ARB_vertex_program),                      GLL,            2002 },
-   { "GL_ARB_vertex_shader",                       o(ARB_vertex_shader),                       GL,             2002 },
-   { "GL_ARB_vertex_attrib_64bit",                 o(ARB_vertex_attrib_64bit),                 GLC,            2010 },
-   { "GL_ARB_vertex_type_10f_11f_11f_rev",         o(ARB_vertex_type_10f_11f_11f_rev),         GL,             2013 },
-   { "GL_ARB_vertex_type_2_10_10_10_rev",          o(ARB_vertex_type_2_10_10_10_rev),          GL,             2009 },
-   { "GL_ARB_viewport_array",                      o(ARB_viewport_array),                      GLC,            2010 },
-   { "GL_ARB_window_pos",                          o(dummy_true),                              GLL,            2001 },
-   /* EXT extensions */
-   { "GL_EXT_abgr",                                o(dummy_true),                              GL,             1995 },
-   { "GL_EXT_bgra",                                o(dummy_true),                              GLL,            1995 },
-   { "GL_EXT_blend_color",                         o(EXT_blend_color),                         GLL,            1995 },
-   { "GL_EXT_blend_equation_separate",             o(EXT_blend_equation_separate),             GL,             2003 },
-   { "GL_EXT_blend_func_separate",                 o(EXT_blend_func_separate),                 GLL,            1999 },
-   { "GL_EXT_buffer_storage",                      o(ARB_buffer_storage),                                 ES31, 2015 },
-   { "GL_EXT_discard_framebuffer",                 o(dummy_true),                                    ES1 | ES2, 2009 },
-   { "GL_EXT_blend_minmax",                        o(EXT_blend_minmax),                        GLL | ES1 | ES2, 1995 },
-   { "GL_EXT_blend_subtract",                      o(dummy_true),                              GLL,            1995 },
-   { "GL_EXT_compiled_vertex_array",               o(dummy_true),                              GLL,            1996 },
-   { "GL_EXT_copy_texture",                        o(dummy_true),                              GLL,            1995 },
-   { "GL_EXT_depth_bounds_test",                   o(EXT_depth_bounds_test),                   GL,             2002 },
-   { "GL_EXT_draw_buffers",                        o(dummy_true),                                         ES2, 2012 },
-   { "GL_EXT_draw_buffers2",                       o(EXT_draw_buffers2),                       GL,             2006 },
-   { "GL_EXT_draw_elements_base_vertex",           o(ARB_draw_elements_base_vertex),                      ES2, 2014 },
-   { "GL_EXT_draw_instanced",                      o(ARB_draw_instanced),                      GL,             2006 },
-   { "GL_EXT_draw_range_elements",                 o(dummy_true),                              GLL,            1997 },
-   { "GL_EXT_fog_coord",                           o(dummy_true),                              GLL,            1999 },
-   { "GL_EXT_framebuffer_blit",                    o(dummy_true),                              GL,             2005 },
-   { "GL_EXT_framebuffer_multisample",             o(EXT_framebuffer_multisample),             GL,             2005 },
-   { "GL_EXT_framebuffer_multisample_blit_scaled", o(EXT_framebuffer_multisample_blit_scaled), GL,             2011 },
-   { "GL_EXT_framebuffer_object",                  o(dummy_true),                              GLL,            2000 },
-   { "GL_EXT_framebuffer_sRGB",                    o(EXT_framebuffer_sRGB),                    GL,             1998 },
-   { "GL_EXT_gpu_program_parameters",              o(EXT_gpu_program_parameters),              GLL,            2006 },
-   { "GL_EXT_gpu_shader4",                         o(EXT_gpu_shader4),                         GL,             2006 },
-   { "GL_EXT_map_buffer_range",                    o(ARB_map_buffer_range),                          ES1 | ES2, 2012 },
-   { "GL_EXT_multi_draw_arrays",                   o(dummy_true),                              GLL | ES1 | ES2, 1999 },
-   { "GL_EXT_packed_depth_stencil",                o(dummy_true),                              GL,             2005 },
-   { "GL_EXT_packed_float",                        o(EXT_packed_float),                        GL,             2004 },
-   { "GL_EXT_packed_pixels",                       o(dummy_true),                              GLL,            1997 },
-   { "GL_EXT_pixel_buffer_object",                 o(EXT_pixel_buffer_object),                 GL,             2004 },
-   { "GL_EXT_point_parameters",                    o(EXT_point_parameters),                    GLL,            1997 },
-   { "GL_EXT_polygon_offset",                      o(dummy_true),                              GLL,            1995 },
-   { "GL_EXT_polygon_offset_clamp",                o(EXT_polygon_offset_clamp),                GL,             2014 },
-   { "GL_EXT_provoking_vertex",                    o(EXT_provoking_vertex),                    GL,             2009 },
-   { "GL_EXT_rescale_normal",                      o(dummy_true),                              GLL,            1997 },
-   { "GL_EXT_secondary_color",                     o(dummy_true),                              GLL,            1999 },
-   { "GL_EXT_separate_shader_objects",             o(dummy_true),                                         ES2, 2013 },
-   { "GL_EXT_separate_specular_color",             o(dummy_true),                              GLL,            1997 },
-   { "GL_EXT_shader_integer_mix",                  o(EXT_shader_integer_mix),                  GL       | ES3, 2013 },
-   { "GL_EXT_shadow_funcs",                        o(ARB_shadow),                              GLL,            2002 },
-   { "GL_EXT_stencil_two_side",                    o(EXT_stencil_two_side),                    GLL,            2001 },
-   { "GL_EXT_stencil_wrap",                        o(dummy_true),                              GLL,            2002 },
-   { "GL_EXT_subtexture",                          o(dummy_true),                              GLL,            1995 },
-   { "GL_EXT_texture3D",                           o(EXT_texture3D),                           GLL,            1996 },
-   { "GL_EXT_texture_array",                       o(EXT_texture_array),                       GL,             2006 },
-   { "GL_EXT_texture_compression_dxt1",            o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2004 },
-   { "GL_ANGLE_texture_compression_dxt3",          o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2011 },
-   { "GL_ANGLE_texture_compression_dxt5",          o(ANGLE_texture_compression_dxt),           GL | ES1 | ES2, 2011 },
-   { "GL_EXT_texture_compression_latc",            o(EXT_texture_compression_latc),            GLL,            2006 },
-   { "GL_EXT_texture_compression_rgtc",            o(ARB_texture_compression_rgtc),            GL,             2004 },
-   { "GL_EXT_texture_compression_s3tc",            o(EXT_texture_compression_s3tc),            GL,             2000 },
-   { "GL_EXT_texture_cube_map",                    o(ARB_texture_cube_map),                    GLL,            2001 },
-   { "GL_EXT_texture_edge_clamp",                  o(dummy_true),                              GLL,            1997 },
-   { "GL_EXT_texture_env_add",                     o(dummy_true),                              GLL,            1999 },
-   { "GL_EXT_texture_env_combine",                 o(dummy_true),                              GLL,            2000 },
-   { "GL_EXT_texture_env_dot3",                    o(EXT_texture_env_dot3),                    GLL,            2000 },
-   { "GL_EXT_texture_filter_anisotropic",          o(EXT_texture_filter_anisotropic),          GL | ES1 | ES2, 1999 },
-   { "GL_EXT_texture_format_BGRA8888",             o(dummy_true),                                   ES1 | ES2, 2005 },
-   { "GL_EXT_texture_rg",                          o(ARB_texture_rg),                                     ES2, 2011 },
-   { "GL_EXT_read_format_bgra",                    o(dummy_true),                                   ES1 | ES2, 2009 },
-   { "GL_EXT_texture_integer",                     o(EXT_texture_integer),                     GL,             2006 },
-   { "GL_EXT_texture_lod_bias",                    o(dummy_true),                              GLL | ES1,      1999 },
-   { "GL_EXT_texture_mirror_clamp",                o(EXT_texture_mirror_clamp),                GL,             2004 },
-   { "GL_EXT_texture_object",                      o(dummy_true),                              GLL,            1995 },
-   { "GL_EXT_texture",                             o(dummy_true),                              GLL,            1996 },
-   { "GL_EXT_texture_rectangle",                   o(NV_texture_rectangle),                    GLL,            2004 },
-   { "GL_EXT_texture_shared_exponent",             o(EXT_texture_shared_exponent),             GL,             2004 },
-   { "GL_EXT_texture_snorm",                       o(EXT_texture_snorm),                       GL,             2009 },
-   { "GL_EXT_texture_sRGB",                        o(EXT_texture_sRGB),                        GL,             2004 },
-   { "GL_EXT_texture_sRGB_decode",                 o(EXT_texture_sRGB_decode),                        GL,      2006 },
-   { "GL_EXT_texture_swizzle",                     o(EXT_texture_swizzle),                     GL,             2008 },
-   { "GL_EXT_texture_type_2_10_10_10_REV",         o(dummy_true),                                         ES2, 2008 },
-   { "GL_EXT_timer_query",                         o(EXT_timer_query),                         GL,             2006 },
-   { "GL_EXT_transform_feedback",                  o(EXT_transform_feedback),                  GL,             2011 },
-   { "GL_EXT_unpack_subimage",                     o(dummy_true),                                         ES2, 2011 },
-   { "GL_EXT_vertex_array_bgra",                   o(EXT_vertex_array_bgra),                   GL,             2008 },
-   { "GL_EXT_vertex_array",                        o(dummy_true),                              GLL,            1995 },
-   { "GL_EXT_color_buffer_float",                  o(dummy_true),                                         ES3, 2013 },
+#define EXT(name_str, driver_cap, api_flags, yyyy) \
+        { .name = "GL_" #name_str, .offset = o(driver_cap), .api_set = api_flags, .year = yyyy},
+   EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GL                   , 2009)
+   EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GL                   , 2012)
+   EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GL                   , 2012)
+   EXT(ARB_base_instance                       , ARB_base_instance                      , GL                   , 2011)
+   EXT(ARB_blend_func_extended                 , ARB_blend_func_extended                , GL                   , 2009)
+   EXT(ARB_buffer_storage                      , ARB_buffer_storage                     , GL                   , 2013)
+   EXT(ARB_clear_buffer_object                 , dummy_true                             , GL                   , 2012)
+   EXT(ARB_clear_texture                       , ARB_clear_texture                      , GL                   , 2013)
+   EXT(ARB_clip_control                        , ARB_clip_control                       , GL                   , 2014)
+   EXT(ARB_color_buffer_float                  , ARB_color_buffer_float                 , GL                   , 2004)
+   EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GL                   , 2011)
+   EXT(ARB_compute_shader                      , ARB_compute_shader                     , GL                   , 2012)
+   EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GL                   , 2014)
+   EXT(ARB_copy_buffer                         , dummy_true                             , GL                   , 2008)
+   EXT(ARB_copy_image                          , ARB_copy_image                         , GL                   , 2012)
+   EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2011)
+   EXT(ARB_debug_output                        , dummy_true                             , GL                   , 2009)
+   EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GL                   , 2008)
+   EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GL                   , 2003)
+   EXT(ARB_depth_texture                       , ARB_depth_texture                      , GLL                  , 2001)
+   EXT(ARB_derivative_control                  , ARB_derivative_control                 , GL                   , 2014)
+   EXT(ARB_direct_state_access                 , dummy_true                             , GLC                  , 2014)
+   EXT(ARB_draw_buffers                        , dummy_true                             , GL                   , 2002)
+   EXT(ARB_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
+   EXT(ARB_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , GL                   , 2009)
+   EXT(ARB_draw_indirect                       , ARB_draw_indirect                      , GLC                  , 2010)
+   EXT(ARB_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2008)
+   EXT(ARB_enhanced_layouts                    , ARB_enhanced_layouts                   , GLC                  , 2013)
+   EXT(ARB_explicit_attrib_location            , ARB_explicit_attrib_location           , GL                   , 2009)
+   EXT(ARB_explicit_uniform_location           , ARB_explicit_uniform_location          , GL                   , 2012)
+   EXT(ARB_fragment_coord_conventions          , ARB_fragment_coord_conventions         , GL                   , 2009)
+   EXT(ARB_fragment_layer_viewport             , ARB_fragment_layer_viewport            , GLC                  , 2012)
+   EXT(ARB_fragment_program                    , ARB_fragment_program                   , GLL                  , 2002)
+   EXT(ARB_fragment_program_shadow             , ARB_fragment_program_shadow            , GLL                  , 2003)
+   EXT(ARB_fragment_shader                     , ARB_fragment_shader                    , GL                   , 2002)
+   EXT(ARB_framebuffer_no_attachments          , ARB_framebuffer_no_attachments         , GL                   , 2012)
+   EXT(ARB_framebuffer_object                  , ARB_framebuffer_object                 , GL                   , 2005)
+   EXT(ARB_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
+   EXT(ARB_get_program_binary                  , dummy_true                             , GL                   , 2010)
+   EXT(ARB_get_texture_sub_image               , dummy_true                             , GL                   , 2014)
+   EXT(ARB_gpu_shader5                         , ARB_gpu_shader5                        , GLC                  , 2010)
+   EXT(ARB_gpu_shader_fp64                     , ARB_gpu_shader_fp64                    , GLC                  , 2010)
+   EXT(ARB_half_float_pixel                    , dummy_true                             , GL                   , 2003)
+   EXT(ARB_half_float_vertex                   , ARB_half_float_vertex                  , GL                   , 2008)
+   EXT(ARB_instanced_arrays                    , ARB_instanced_arrays                   , GL                   , 2008)
+   EXT(ARB_internalformat_query                , ARB_internalformat_query               , GL                   , 2011)
+   EXT(ARB_invalidate_subdata                  , dummy_true                             , GL                   , 2012)
+   EXT(ARB_map_buffer_alignment                , dummy_true                             , GL                   , 2011)
+   EXT(ARB_map_buffer_range                    , ARB_map_buffer_range                   , GL                   , 2008)
+   EXT(ARB_multi_bind                          , dummy_true                             , GL                   , 2013)
+   EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      , GLC                  , 2012)
+   EXT(ARB_multisample                         , dummy_true                             , GLL                  , 1994)
+   EXT(ARB_multitexture                        , dummy_true                             , GLL                  , 1998)
+   EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GL                   , 2003)
+   EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL                  , 2001)
+   EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GL                   , 2014)
+   EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
+   EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
+   EXT(ARB_point_sprite                        , ARB_point_sprite                       , GL                   , 2003)
+   EXT(ARB_program_interface_query             , dummy_true                             , GL                   , 2012)
+   EXT(ARB_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
+   EXT(ARB_robustness                          , dummy_true                             , GL                   , 2010)
+   EXT(ARB_sample_shading                      , ARB_sample_shading                     , GL                   , 2009)
+   EXT(ARB_sampler_objects                     , dummy_true                             , GL                   , 2009)
+   EXT(ARB_seamless_cube_map                   , ARB_seamless_cube_map                  , GL                   , 2009)
+   EXT(ARB_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2013)
+   EXT(ARB_separate_shader_objects             , dummy_true                             , GL                   , 2010)
+   EXT(ARB_shader_atomic_counters              , ARB_shader_atomic_counters             , GL                   , 2011)
+   EXT(ARB_shader_bit_encoding                 , ARB_shader_bit_encoding                , GL                   , 2010)
+   EXT(ARB_shader_clock                        , ARB_shader_clock                       , GL                   , 2015)
+   EXT(ARB_shader_image_load_store             , ARB_shader_image_load_store            , GL                   , 2011)
+   EXT(ARB_shader_image_size                   , ARB_shader_image_size                  , GL                   , 2012)
+   EXT(ARB_shader_objects                      , dummy_true                             , GL                   , 2002)
+   EXT(ARB_shader_precision                    , ARB_shader_precision                   , GL                   , 2010)
+   EXT(ARB_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
+   EXT(ARB_shader_storage_buffer_object        , ARB_shader_storage_buffer_object       , GL                   , 2012)
+   EXT(ARB_shader_subroutine                   , ARB_shader_subroutine                  , GLC                  , 2010)
+   EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GL                   , 2014)
+   EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GL                   , 2009)
+   EXT(ARB_shading_language_100                , dummy_true                             , GLL                  , 2003)
+   EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GL                   , 2011)
+   EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GL                   , 2011)
+   EXT(ARB_shadow                              , ARB_shadow                             , GLL                  , 2001)
+   EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GL                   , 2012)
+   EXT(ARB_sync                                , ARB_sync                               , GL                   , 2003)
+   EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GL                   , 2014)
+   EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                , GLC                  , 2009)
+   EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL                  , 2000)
+   EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              , GLC                  , 2008)
+   EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        , GLC                  , 2009)
+   EXT(ARB_texture_buffer_range                , ARB_texture_buffer_range               , GLC                  , 2012)
+   EXT(ARB_texture_compression                 , dummy_true                             , GLL                  , 2000)
+   EXT(ARB_texture_compression_bptc            , ARB_texture_compression_bptc           , GL                   , 2010)
+   EXT(ARB_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
+   EXT(ARB_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 1999)
+   EXT(ARB_texture_cube_map_array              , ARB_texture_cube_map_array             , GL                   , 2009)
+   EXT(ARB_texture_env_add                     , dummy_true                             , GLL                  , 1999)
+   EXT(ARB_texture_env_combine                 , ARB_texture_env_combine                , GLL                  , 2001)
+   EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar               , GLL                  , 2001)
+   EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL                  , 2001)
+   EXT(ARB_texture_float                       , ARB_texture_float                      , GL                   , 2004)
+   EXT(ARB_texture_gather                      , ARB_texture_gather                     , GL                   , 2009)
+   EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL                  , 2001)
+   EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GL                   , 2013)
+   EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GL                   , 2009)
+   EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GL                   , 2003)
+   EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GL                   , 2012)
+   EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GL                   , 2009)
+   EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GL                   , 2004)
+   EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GL                   , 2009)
+   EXT(ARB_texture_rg                          , ARB_texture_rg                         , GL                   , 2008)
+   EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GL                   , 2013)
+   EXT(ARB_texture_storage                     , dummy_true                             , GL                   , 2011)
+   EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GL                   , 2012)
+   EXT(ARB_texture_view                        , ARB_texture_view                       , GL                   , 2012)
+   EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
+   EXT(ARB_timer_query                         , ARB_timer_query                        , GL                   , 2010)
+   EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GL                   , 2010)
+   EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GL                   , 2010)
+   EXT(ARB_transform_feedback_instanced        , ARB_transform_feedback_instanced       , GL                   , 2011)
+   EXT(ARB_transpose_matrix                    , dummy_true                             , GLL                  , 1999)
+   EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GL                   , 2009)
+   EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
+   EXT(ARB_vertex_array_object                 , dummy_true                             , GL                   , 2006)
+   EXT(ARB_vertex_attrib_binding               , dummy_true                             , GL                   , 2012)
+   EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL                  , 2003)
+   EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL                  , 2002)
+   EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GL                   , 2002)
+   EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                , GLC                  , 2010)
+   EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GL                   , 2013)
+   EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GL                   , 2009)
+   EXT(ARB_viewport_array                      , ARB_viewport_array                     , GLC                  , 2010)
+   EXT(ARB_window_pos                          , dummy_true                             , GLL                  , 2001)
 
-   /* OES extensions */
-   { "GL_OES_blend_equation_separate",             o(EXT_blend_equation_separate),                  ES1,       2009 },
-   { "GL_OES_blend_func_separate",                 o(EXT_blend_func_separate),                      ES1,       2009 },
-   { "GL_OES_blend_subtract",                      o(dummy_true),                                   ES1,       2009 },
-   { "GL_OES_byte_coordinates",                    o(dummy_true),                                   ES1,       2002 },
-   { "GL_OES_compressed_ETC1_RGB8_texture",        o(OES_compressed_ETC1_RGB8_texture),             ES1 | ES2, 2005 },
-   { "GL_OES_compressed_paletted_texture",         o(dummy_true),                                   ES1,       2003 },
-   { "GL_OES_depth24",                             o(dummy_true),                                   ES1 | ES2, 2005 },
-   { "GL_OES_depth32",                             o(dummy_false),                     DISABLE,                2005 },
-   { "GL_OES_depth_texture",                       o(ARB_depth_texture),                                  ES2, 2006 },
-   { "GL_OES_depth_texture_cube_map",              o(OES_depth_texture_cube_map),                         ES2, 2012 },
-   { "GL_OES_draw_elements_base_vertex",           o(ARB_draw_elements_base_vertex),                      ES2, 2014 },
-   { "GL_OES_draw_texture",                        o(OES_draw_texture),                             ES1,       2004 },
-   { "GL_OES_EGL_sync",                            o(dummy_true),                                   ES1 | ES2, 2010 },
-   /*  FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
-   { "GL_OES_EGL_image",                           o(OES_EGL_image),                           GL | ES1 | ES2, 2006 },
-   { "GL_OES_EGL_image_external",                  o(OES_EGL_image_external),                       ES1 | ES2, 2010 },
-   { "GL_OES_element_index_uint",                  o(dummy_true),                                   ES1 | ES2, 2005 },
-   { "GL_OES_fbo_render_mipmap",                   o(dummy_true),                                   ES1 | ES2, 2005 },
-   { "GL_OES_fixed_point",                         o(dummy_true),                                   ES1,       2002 },
-   { "GL_OES_framebuffer_object",                  o(dummy_true),                                   ES1,       2005 },
-   { "GL_OES_get_program_binary",                  o(dummy_true),                                         ES2, 2008 },
-   { "GL_OES_mapbuffer",                           o(dummy_true),                                   ES1 | ES2, 2005 },
-   { "GL_OES_packed_depth_stencil",                o(dummy_true),                                   ES1 | ES2, 2007 },
-   { "GL_OES_point_size_array",                    o(dummy_true),                                   ES1,       2004 },
-   { "GL_OES_point_sprite",                        o(ARB_point_sprite),                             ES1,       2004 },
-   { "GL_OES_query_matrix",                        o(dummy_true),                                   ES1,       2003 },
-   { "GL_OES_read_format",                         o(dummy_true),                              GL | ES1,       2003 },
-   { "GL_OES_rgb8_rgba8",                          o(dummy_true),                                   ES1 | ES2, 2005 },
-   { "GL_OES_single_precision",                    o(dummy_true),                                   ES1,       2003 },
-   { "GL_OES_standard_derivatives",                o(OES_standard_derivatives),                           ES2, 2005 },
-   { "GL_OES_stencil1",                            o(dummy_false),                     DISABLE,                2005 },
-   { "GL_OES_stencil4",                            o(dummy_false),                     DISABLE,                2005 },
-   { "GL_OES_stencil8",                            o(dummy_true),                                   ES1 | ES2, 2005 },
-   { "GL_OES_stencil_wrap",                        o(dummy_true),                                   ES1,       2002 },
-   { "GL_OES_surfaceless_context",                 o(dummy_true),                                   ES1 | ES2, 2012 },
-   { "GL_OES_texture_3D",                          o(EXT_texture3D),                                      ES2, 2005 },
-   { "GL_OES_texture_cube_map",                    o(ARB_texture_cube_map),                         ES1,       2007 },
-   { "GL_OES_texture_env_crossbar",                o(ARB_texture_env_crossbar),                     ES1,       2005 },
-   { "GL_OES_texture_float",                       o(OES_texture_float),                                  ES2, 2005 },
-   { "GL_OES_texture_float_linear",                o(OES_texture_float_linear),                           ES2, 2005 },
-   { "GL_OES_texture_half_float",                  o(OES_texture_half_float),                             ES2, 2005 },
-   { "GL_OES_texture_half_float_linear",           o(OES_texture_half_float_linear),                      ES2, 2005 },
-   { "GL_OES_texture_mirrored_repeat",             o(dummy_true),                                   ES1,       2005 },
-   { "GL_OES_texture_storage_multisample_2d_array",o(ARB_texture_multisample),                           ES31, 2014 },
-   { "GL_OES_texture_npot",                        o(ARB_texture_non_power_of_two),                 ES1 | ES2, 2005 },
-   { "GL_OES_vertex_array_object",                 o(dummy_true),                                   ES1 | ES2, 2010 },
+   EXT(EXT_abgr                                , dummy_true                             , GL                   , 1995)
+   EXT(EXT_bgra                                , dummy_true                             , GLL                  , 1995)
+   EXT(EXT_blend_color                         , EXT_blend_color                        , GLL                  , 1995)
+   EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
+   EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL                  , 1999)
+   EXT(EXT_buffer_storage                      , ARB_buffer_storage                     , ES31                 , 2015)
+   EXT(EXT_discard_framebuffer                 , dummy_true                             ,       ES1 | ES2      , 2009)
+   EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL | ES1 | ES2      , 1995)
+   EXT(EXT_blend_subtract                      , dummy_true                             , GLL                  , 1995)
+   EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL                  , 1996)
+   EXT(EXT_copy_texture                        , dummy_true                             , GLL                  , 1995)
+   EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GL                   , 2002)
+   EXT(EXT_draw_buffers                        , dummy_true                             ,             ES2      , 2012)
+   EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GL                   , 2006)
+   EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
+   EXT(EXT_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2006)
+   EXT(EXT_draw_range_elements                 , dummy_true                             , GLL                  , 1997)
+   EXT(EXT_fog_coord                           , dummy_true                             , GLL                  , 1999)
+   EXT(EXT_framebuffer_blit                    , dummy_true                             , GL                   , 2005)
+   EXT(EXT_framebuffer_multisample             , EXT_framebuffer_multisample            , GL                   , 2005)
+   EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GL                   , 2011)
+   EXT(EXT_framebuffer_object                  , dummy_true                             , GLL                  , 2000)
+   EXT(EXT_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
+   EXT(EXT_gpu_program_parameters              , EXT_gpu_program_parameters             , GLL                  , 2006)
+   EXT(EXT_gpu_shader4                         , EXT_gpu_shader4                        , GL                   , 2006)
+   EXT(EXT_map_buffer_range                    , ARB_map_buffer_range                   ,       ES1 | ES2      , 2012)
+   EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL | ES1 | ES2      , 1999)
+   EXT(EXT_packed_depth_stencil                , dummy_true                             , GL                   , 2005)
+   EXT(EXT_packed_float                        , EXT_packed_float                       , GL                   , 2004)
+   EXT(EXT_packed_pixels                       , dummy_true                             , GLL                  , 1997)
+   EXT(EXT_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
+   EXT(EXT_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
+   EXT(EXT_polygon_offset                      , dummy_true                             , GLL                  , 1995)
+   EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GL                   , 2014)
+   EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
+   EXT(EXT_rescale_normal                      , dummy_true                             , GLL                  , 1997)
+   EXT(EXT_secondary_color                     , dummy_true                             , GLL                  , 1999)
+   EXT(EXT_separate_shader_objects             , dummy_true                             ,            ES2       , 2013)
+   EXT(EXT_separate_specular_color             , dummy_true                             , GLL                  , 1997)
+   EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES3       , 2013)
+   EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL                  , 2002)
+   EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL                  , 2001)
+   EXT(EXT_stencil_wrap                        , dummy_true                             , GLL                  , 2002)
+   EXT(EXT_subtexture                          , dummy_true                             , GLL                  , 1995)
+   EXT(EXT_texture3D                           , EXT_texture3D                          , GLL                  , 1996)
+   EXT(EXT_texture_array                       , EXT_texture_array                      , GL                   , 2006)
+   EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2004)
+   EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
+   EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
+   EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL                  , 2006)
+   EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
+   EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GL                   , 2000)
+   EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 2001)
+   EXT(EXT_texture_edge_clamp                  , dummy_true                             , GLL                  , 1997)
+   EXT(EXT_texture_env_add                     , dummy_true                             , GLL                  , 1999)
+   EXT(EXT_texture_env_combine                 , dummy_true                             , GLL                  , 2000)
+   EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL                  , 2000)
+   EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GL | ES1 | ES2       , 1999)
+   EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,      ES1 | ES2       , 2005)
+   EXT(EXT_texture_rg                          , ARB_texture_rg                         ,            ES2       , 2011)
+   EXT(EXT_read_format_bgra                    , dummy_true                             ,      ES1 | ES2       , 2009)
+   EXT(EXT_texture_integer                     , EXT_texture_integer                    , GL                   , 2006)
+   EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL | ES1            , 1999)
+   EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GL                   , 2004)
+   EXT(EXT_texture_object                      , dummy_true                             , GLL                  , 1995)
+   EXT(EXT_texture                             , dummy_true                             , GLL                  , 1996)
+   EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL                  , 2004)
+   EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GL                   , 2004)
+   EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GL                   , 2009)
+   EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GL                   , 2004)
+   EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GL                   , 2006)
+   EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
+   EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,            ES2       , 2008)
+   EXT(EXT_timer_query                         , EXT_timer_query                        , GL                   , 2006)
+   EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GL                   , 2011)
+   EXT(EXT_unpack_subimage                     , dummy_true                             ,            ES2       , 2011)
+   EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
+   EXT(EXT_vertex_array                        , dummy_true                             , GLL                  , 1995)
+   EXT(EXT_color_buffer_float                  , dummy_true                             ,                  ES3 , 2013)
 
-   /* KHR extensions */
-   { "GL_KHR_debug",                               o(dummy_true),                              GL,             2012 },
-   { "GL_KHR_context_flush_control",               o(dummy_true),                              GL       | ES2, 2014 },
-   { "GL_KHR_texture_compression_astc_hdr",        o(KHR_texture_compression_astc_hdr),        GL       | ES2, 2012 },
-   { "GL_KHR_texture_compression_astc_ldr",        o(KHR_texture_compression_astc_ldr),        GL       | ES2, 2012 },
 
-   /* Vendor extensions */
-   { "GL_3DFX_texture_compression_FXT1",           o(TDFX_texture_compression_FXT1),           GL,             1999 },
-   { "GL_AMD_conservative_depth",                  o(ARB_conservative_depth),                  GL,             2009 },
-   { "GL_AMD_draw_buffers_blend",                  o(ARB_draw_buffers_blend),                  GL,             2009 },
-   { "GL_AMD_performance_monitor",                 o(AMD_performance_monitor),                 GL,             2007 },
-   { "GL_AMD_pinned_memory",                       o(AMD_pinned_memory),                       GL,             2013 },
-   { "GL_AMD_seamless_cubemap_per_texture",        o(AMD_seamless_cubemap_per_texture),        GL,             2009 },
-   { "GL_AMD_shader_stencil_export",               o(ARB_shader_stencil_export),               GL,             2009 },
-   { "GL_AMD_shader_trinary_minmax",               o(dummy_true),                              GL,             2012 },
-   { "GL_AMD_vertex_shader_layer",                 o(AMD_vertex_shader_layer),                 GLC,            2012 },
-   { "GL_AMD_vertex_shader_viewport_index",        o(AMD_vertex_shader_viewport_index),        GLC,            2012 },
-   { "GL_APPLE_object_purgeable",                  o(APPLE_object_purgeable),                  GL,             2006 },
-   { "GL_APPLE_packed_pixels",                     o(dummy_true),                              GLL,            2002 },
-   { "GL_APPLE_texture_max_level",                 o(dummy_true),                                   ES1 | ES2, 2009 },
-   { "GL_APPLE_vertex_array_object",               o(dummy_true),                              GLL,            2002 },
-   { "GL_ATI_blend_equation_separate",             o(EXT_blend_equation_separate),             GL,             2003 },
-   { "GL_ATI_draw_buffers",                        o(dummy_true),                              GLL,            2002 },
-   { "GL_ATI_fragment_shader",                     o(ATI_fragment_shader),                     GLL,            2001 },
-   { "GL_ATI_separate_stencil",                    o(ATI_separate_stencil),                    GLL,            2006 },
-   { "GL_ATI_texture_compression_3dc",             o(ATI_texture_compression_3dc),             GLL,            2004 },
-   { "GL_ATI_texture_env_combine3",                o(ATI_texture_env_combine3),                GLL,            2002 },
-   { "GL_ATI_texture_float",                       o(ARB_texture_float),                       GL,             2002 },
-   { "GL_ATI_texture_mirror_once",                 o(ATI_texture_mirror_once),                 GL,             2006 },
-   { "GL_IBM_multimode_draw_arrays",               o(dummy_true),                              GL,             1998 },
-   { "GL_IBM_rasterpos_clip",                      o(dummy_true),                              GLL,            1996 },
-   { "GL_IBM_texture_mirrored_repeat",             o(dummy_true),                              GLL,            1998 },
-   { "GL_INGR_blend_func_separate",                o(EXT_blend_func_separate),                 GLL,            1999 },
-   { "GL_INTEL_performance_query",                 o(INTEL_performance_query),                       GL | ES2, 2013 },
-   { "GL_MESA_pack_invert",                        o(MESA_pack_invert),                        GL,             2002 },
-   { "GL_MESA_texture_signed_rgba",                o(EXT_texture_snorm),                       GL,             2009 },
-   { "GL_MESA_window_pos",                         o(dummy_true),                              GLL,            2000 },
-   { "GL_MESA_ycbcr_texture",                      o(MESA_ycbcr_texture),                      GL,             2002 },
-   { "GL_NV_blend_square",                         o(dummy_true),                              GLL,            1999 },
-   { "GL_NV_conditional_render",                   o(NV_conditional_render),                   GL,             2008 },
-   { "GL_NV_depth_clamp",                          o(ARB_depth_clamp),                         GL,             2001 },
-   { "GL_NV_draw_buffers",                         o(dummy_true),                                         ES2, 2011 },
-   { "GL_NV_fbo_color_attachments",                o(dummy_true),                                         ES2, 2010 },
-   { "GL_NV_fog_distance",                         o(NV_fog_distance),                         GLL,            2001 },
-   { "GL_NV_fragment_program_option",              o(NV_fragment_program_option),              GLL,            2005 },
-   { "GL_NV_light_max_exponent",                   o(dummy_true),                              GLL,            1999 },
-   { "GL_NV_packed_depth_stencil",                 o(dummy_true),                              GL,             2000 },
-   { "GL_NV_point_sprite",                         o(NV_point_sprite),                         GL,             2001 },
-   { "GL_NV_primitive_restart",                    o(NV_primitive_restart),                    GLL,            2002 },
-   { "GL_NV_read_buffer",                          o(dummy_true),                              ES2,            2011 },
-   { "GL_NV_read_depth",                           o(dummy_true),                              ES2,            2011 },
-   { "GL_NV_read_depth_stencil",                   o(dummy_true),                              ES2,            2011 },
-   { "GL_NV_read_stencil",                         o(dummy_true),                              ES2,            2011 },
-   { "GL_NV_texgen_reflection",                    o(dummy_true),                              GLL,            1999 },
-   { "GL_NV_texture_barrier",                      o(NV_texture_barrier),                      GL,             2009 },
-   { "GL_NV_texture_env_combine4",                 o(NV_texture_env_combine4),                 GLL,            1999 },
-   { "GL_NV_texture_rectangle",                    o(NV_texture_rectangle),                    GLL,            2000 },
-   { "GL_NV_vdpau_interop",                        o(NV_vdpau_interop),                        GL,             2010 },
-   { "GL_S3_s3tc",                                 o(ANGLE_texture_compression_dxt),           GL,             1999 },
-   { "GL_SGIS_generate_mipmap",                    o(dummy_true),                              GLL,            1997 },
-   { "GL_SGIS_texture_border_clamp",               o(ARB_texture_border_clamp),                GLL,            1997 },
-   { "GL_SGIS_texture_edge_clamp",                 o(dummy_true),                              GLL,            1997 },
-   { "GL_SGIS_texture_lod",                        o(dummy_true),                              GLL,            1997 },
-   { "GL_SUN_multi_draw_arrays",                   o(dummy_true),                              GLL,            1999 },
+   EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,      ES1             , 2009)
+   EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,      ES1             , 2009)
+   EXT(OES_blend_subtract                      , dummy_true                             ,      ES1             , 2009)
+   EXT(OES_byte_coordinates                    , dummy_true                             ,      ES1             , 2002)
+   EXT(OES_compressed_ETC1_RGB8_texture        , OES_compressed_ETC1_RGB8_texture       ,      ES1 | ES2       , 2005)
+   EXT(OES_compressed_paletted_texture         , dummy_true                             ,      ES1             , 2003)
+   EXT(OES_depth24                             , dummy_true                             ,      ES1 | ES2       , 2005)
+   EXT(OES_depth32                             , dummy_false                            ,       DISABLE        , 2005)
+   EXT(OES_depth_texture                       , ARB_depth_texture                      ,            ES2       , 2006)
+   EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,            ES2       , 2012)
+   EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
+   EXT(OES_draw_texture                        , OES_draw_texture                       ,      ES1             , 2004)
+   EXT(OES_EGL_sync                            , dummy_true                             ,      ES1 | ES2       , 2010)
+   EXT(OES_EGL_image                           , OES_EGL_image                          , GL | ES1 | ES2       , 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+   EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,      ES1 | ES2       , 2010)
+   EXT(OES_element_index_uint                  , dummy_true                             ,      ES1 | ES2       , 2005)
+   EXT(OES_fbo_render_mipmap                   , dummy_true                             ,      ES1 | ES2       , 2005)
+   EXT(OES_fixed_point                         , dummy_true                             ,      ES1             , 2002)
+   EXT(OES_framebuffer_object                  , dummy_true                             ,      ES1             , 2005)
+   EXT(OES_get_program_binary                  , dummy_true                             ,            ES2       , 2008)
+   EXT(OES_mapbuffer                           , dummy_true                             ,      ES1 | ES2       , 2005)
+   EXT(OES_packed_depth_stencil                , dummy_true                             ,      ES1 | ES2       , 2007)
+   EXT(OES_point_size_array                    , dummy_true                             ,      ES1             , 2004)
+   EXT(OES_point_sprite                        , ARB_point_sprite                       ,      ES1             , 2004)
+   EXT(OES_query_matrix                        , dummy_true                             ,      ES1             , 2003)
+   EXT(OES_read_format                         , dummy_true                             , GL | ES1             , 2003)
+   EXT(OES_rgb8_rgba8                          , dummy_true                             ,      ES1 | ES2       , 2005)
+   EXT(OES_single_precision                    , dummy_true                             ,      ES1             , 2003)
+   EXT(OES_standard_derivatives                , OES_standard_derivatives               ,            ES2       , 2005)
+   EXT(OES_stencil1                            , dummy_false                            ,       DISABLE        , 2005)
+   EXT(OES_stencil4                            , dummy_false                            ,       DISABLE        , 2005)
+   EXT(OES_stencil8                            , dummy_true                             ,      ES1 | ES2       , 2005)
+   EXT(OES_stencil_wrap                        , dummy_true                             ,      ES1             , 2002)
+   EXT(OES_surfaceless_context                 , dummy_true                             ,      ES1 | ES2       , 2012)
+   EXT(OES_texture_3D                          , EXT_texture3D                          ,            ES2       , 2005)
+   EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,      ES1             , 2007)
+   EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,      ES1             , 2005)
+   EXT(OES_texture_float                       , OES_texture_float                      ,            ES2       , 2005)
+   EXT(OES_texture_float_linear                , OES_texture_float_linear               ,            ES2       , 2005)
+   EXT(OES_texture_half_float                  , OES_texture_half_float                 ,            ES2       , 2005)
+   EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,            ES2       , 2005)
+   EXT(OES_texture_mirrored_repeat             , dummy_true                             ,      ES1             , 2005)
+   EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,           ES31       , 2014)
+   EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,      ES1 | ES2       , 2005)
+   EXT(OES_vertex_array_object                 , dummy_true                             ,      ES1 | ES2       , 2010)
+
+
+   EXT(KHR_debug                               , dummy_true                             , GL                   , 2012)
+   EXT(KHR_context_flush_control               , dummy_true                             , GL       | ES2       , 2014)
+   EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GL       | ES2       , 2012)
+   EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GL       | ES2       , 2012)
+
+
+   EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GL                   , 1999)
+   EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2009)
+   EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
+   EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GL                   , 2007)
+   EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GL                   , 2013)
+   EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2009)
+   EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
+   EXT(AMD_shader_trinary_minmax               , dummy_true                             , GL                   , 2012)
+   EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                , GLC                  , 2012)
+   EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       , GLC                  , 2012)
+   EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GL                   , 2006)
+   EXT(APPLE_packed_pixels                     , dummy_true                             , GLL                  , 2002)
+   EXT(APPLE_texture_max_level                 , dummy_true                             ,      ES1 | ES2       , 2009)
+   EXT(APPLE_vertex_array_object               , dummy_true                             , GLL                  , 2002)
+   EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
+   EXT(ATI_draw_buffers                        , dummy_true                             , GLL                  , 2002)
+   EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL                  , 2001)
+   EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL                  , 2006)
+   EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL                  , 2004)
+   EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL                  , 2002)
+   EXT(ATI_texture_float                       , ARB_texture_float                      , GL                   , 2002)
+   EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GL                   , 2006)
+   EXT(IBM_multimode_draw_arrays               , dummy_true                             , GL                   , 1998)
+   EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL                  , 1996)
+   EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL                  , 1998)
+   EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL                  , 1999)
+   EXT(INTEL_performance_query                 , INTEL_performance_query                , GL       | ES2       , 2013)
+   EXT(MESA_pack_invert                        , MESA_pack_invert                       , GL                   , 2002)
+   EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GL                   , 2009)
+   EXT(MESA_window_pos                         , dummy_true                             , GLL                  , 2000)
+   EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GL                   , 2002)
+   EXT(NV_blend_square                         , dummy_true                             , GLL                  , 1999)
+   EXT(NV_conditional_render                   , NV_conditional_render                  , GL                   , 2008)
+   EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GL                   , 2001)
+   EXT(NV_draw_buffers                         , dummy_true                             ,            ES2       , 2011)
+   EXT(NV_fbo_color_attachments                , dummy_true                             ,            ES2       , 2010)
+   EXT(NV_fog_distance                         , NV_fog_distance                        , GLL                  , 2001)
+   EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL                  , 2005)
+   EXT(NV_light_max_exponent                   , dummy_true                             , GLL                  , 1999)
+   EXT(NV_packed_depth_stencil                 , dummy_true                             , GL                   , 2000)
+   EXT(NV_point_sprite                         , NV_point_sprite                        , GL                   , 2001)
+   EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL                  , 2002)
+   EXT(NV_read_buffer                          , dummy_true                             ,            ES2       , 2011)
+   EXT(NV_read_depth                           , dummy_true                             ,            ES2       , 2011)
+   EXT(NV_read_depth_stencil                   , dummy_true                             ,            ES2       , 2011)
+   EXT(NV_read_stencil                         , dummy_true                             ,            ES2       , 2011)
+   EXT(NV_texgen_reflection                    , dummy_true                             , GLL                  , 1999)
+   EXT(NV_texture_barrier                      , NV_texture_barrier                     , GL                   , 2009)
+   EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL                  , 1999)
+   EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL                  , 2000)
+   EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GL                   , 2010)
+   EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GL                   , 1999)
+   EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL                  , 1997)
+   EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL                  , 1997)
+   EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL                  , 1997)
+   EXT(SGIS_texture_lod                        , dummy_true                             , GLL                  , 1997)
+   EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL                  , 1999)
+#undef EXT
 };
 
 

From 8bd82a91c05804260041b572b1a5d812cb58ae96 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 16 Sep 2015 11:27:38 -0700
Subject: [PATCH 200/287] mesa/extensions: Move entries entries to separate
 file

With this infrastructure set in place, we can now reuse the entries to
generate useful code.

v2: Add the new file into Makefile.sources (Emil)

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/Makefile.sources        |   1 +
 src/mesa/main/extensions.c       | 326 +------------------------------
 src/mesa/main/extensions_table.h | 325 ++++++++++++++++++++++++++++++
 3 files changed, 327 insertions(+), 325 deletions(-)
 create mode 100644 src/mesa/main/extensions_table.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index de0e330b7d1..778b92d9892 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -77,6 +77,7 @@ MAIN_FILES = \
 	main/execmem.c \
 	main/extensions.c \
 	main/extensions.h \
+	main/extensions_table.h \
 	main/fbobject.c \
 	main/fbobject.h \
 	main/feedback.c \
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 4ddf750dd6c..30f5b9868d3 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -85,331 +85,7 @@ struct extension {
 static const struct extension extension_table[] = {
 #define EXT(name_str, driver_cap, api_flags, yyyy) \
         { .name = "GL_" #name_str, .offset = o(driver_cap), .api_set = api_flags, .year = yyyy},
-   EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GL                   , 2009)
-   EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GL                   , 2012)
-   EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GL                   , 2012)
-   EXT(ARB_base_instance                       , ARB_base_instance                      , GL                   , 2011)
-   EXT(ARB_blend_func_extended                 , ARB_blend_func_extended                , GL                   , 2009)
-   EXT(ARB_buffer_storage                      , ARB_buffer_storage                     , GL                   , 2013)
-   EXT(ARB_clear_buffer_object                 , dummy_true                             , GL                   , 2012)
-   EXT(ARB_clear_texture                       , ARB_clear_texture                      , GL                   , 2013)
-   EXT(ARB_clip_control                        , ARB_clip_control                       , GL                   , 2014)
-   EXT(ARB_color_buffer_float                  , ARB_color_buffer_float                 , GL                   , 2004)
-   EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GL                   , 2011)
-   EXT(ARB_compute_shader                      , ARB_compute_shader                     , GL                   , 2012)
-   EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GL                   , 2014)
-   EXT(ARB_copy_buffer                         , dummy_true                             , GL                   , 2008)
-   EXT(ARB_copy_image                          , ARB_copy_image                         , GL                   , 2012)
-   EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2011)
-   EXT(ARB_debug_output                        , dummy_true                             , GL                   , 2009)
-   EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GL                   , 2008)
-   EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GL                   , 2003)
-   EXT(ARB_depth_texture                       , ARB_depth_texture                      , GLL                  , 2001)
-   EXT(ARB_derivative_control                  , ARB_derivative_control                 , GL                   , 2014)
-   EXT(ARB_direct_state_access                 , dummy_true                             , GLC                  , 2014)
-   EXT(ARB_draw_buffers                        , dummy_true                             , GL                   , 2002)
-   EXT(ARB_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
-   EXT(ARB_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , GL                   , 2009)
-   EXT(ARB_draw_indirect                       , ARB_draw_indirect                      , GLC                  , 2010)
-   EXT(ARB_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2008)
-   EXT(ARB_enhanced_layouts                    , ARB_enhanced_layouts                   , GLC                  , 2013)
-   EXT(ARB_explicit_attrib_location            , ARB_explicit_attrib_location           , GL                   , 2009)
-   EXT(ARB_explicit_uniform_location           , ARB_explicit_uniform_location          , GL                   , 2012)
-   EXT(ARB_fragment_coord_conventions          , ARB_fragment_coord_conventions         , GL                   , 2009)
-   EXT(ARB_fragment_layer_viewport             , ARB_fragment_layer_viewport            , GLC                  , 2012)
-   EXT(ARB_fragment_program                    , ARB_fragment_program                   , GLL                  , 2002)
-   EXT(ARB_fragment_program_shadow             , ARB_fragment_program_shadow            , GLL                  , 2003)
-   EXT(ARB_fragment_shader                     , ARB_fragment_shader                    , GL                   , 2002)
-   EXT(ARB_framebuffer_no_attachments          , ARB_framebuffer_no_attachments         , GL                   , 2012)
-   EXT(ARB_framebuffer_object                  , ARB_framebuffer_object                 , GL                   , 2005)
-   EXT(ARB_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
-   EXT(ARB_get_program_binary                  , dummy_true                             , GL                   , 2010)
-   EXT(ARB_get_texture_sub_image               , dummy_true                             , GL                   , 2014)
-   EXT(ARB_gpu_shader5                         , ARB_gpu_shader5                        , GLC                  , 2010)
-   EXT(ARB_gpu_shader_fp64                     , ARB_gpu_shader_fp64                    , GLC                  , 2010)
-   EXT(ARB_half_float_pixel                    , dummy_true                             , GL                   , 2003)
-   EXT(ARB_half_float_vertex                   , ARB_half_float_vertex                  , GL                   , 2008)
-   EXT(ARB_instanced_arrays                    , ARB_instanced_arrays                   , GL                   , 2008)
-   EXT(ARB_internalformat_query                , ARB_internalformat_query               , GL                   , 2011)
-   EXT(ARB_invalidate_subdata                  , dummy_true                             , GL                   , 2012)
-   EXT(ARB_map_buffer_alignment                , dummy_true                             , GL                   , 2011)
-   EXT(ARB_map_buffer_range                    , ARB_map_buffer_range                   , GL                   , 2008)
-   EXT(ARB_multi_bind                          , dummy_true                             , GL                   , 2013)
-   EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      , GLC                  , 2012)
-   EXT(ARB_multisample                         , dummy_true                             , GLL                  , 1994)
-   EXT(ARB_multitexture                        , dummy_true                             , GLL                  , 1998)
-   EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GL                   , 2003)
-   EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL                  , 2001)
-   EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GL                   , 2014)
-   EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
-   EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
-   EXT(ARB_point_sprite                        , ARB_point_sprite                       , GL                   , 2003)
-   EXT(ARB_program_interface_query             , dummy_true                             , GL                   , 2012)
-   EXT(ARB_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
-   EXT(ARB_robustness                          , dummy_true                             , GL                   , 2010)
-   EXT(ARB_sample_shading                      , ARB_sample_shading                     , GL                   , 2009)
-   EXT(ARB_sampler_objects                     , dummy_true                             , GL                   , 2009)
-   EXT(ARB_seamless_cube_map                   , ARB_seamless_cube_map                  , GL                   , 2009)
-   EXT(ARB_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2013)
-   EXT(ARB_separate_shader_objects             , dummy_true                             , GL                   , 2010)
-   EXT(ARB_shader_atomic_counters              , ARB_shader_atomic_counters             , GL                   , 2011)
-   EXT(ARB_shader_bit_encoding                 , ARB_shader_bit_encoding                , GL                   , 2010)
-   EXT(ARB_shader_clock                        , ARB_shader_clock                       , GL                   , 2015)
-   EXT(ARB_shader_image_load_store             , ARB_shader_image_load_store            , GL                   , 2011)
-   EXT(ARB_shader_image_size                   , ARB_shader_image_size                  , GL                   , 2012)
-   EXT(ARB_shader_objects                      , dummy_true                             , GL                   , 2002)
-   EXT(ARB_shader_precision                    , ARB_shader_precision                   , GL                   , 2010)
-   EXT(ARB_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
-   EXT(ARB_shader_storage_buffer_object        , ARB_shader_storage_buffer_object       , GL                   , 2012)
-   EXT(ARB_shader_subroutine                   , ARB_shader_subroutine                  , GLC                  , 2010)
-   EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GL                   , 2014)
-   EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GL                   , 2009)
-   EXT(ARB_shading_language_100                , dummy_true                             , GLL                  , 2003)
-   EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GL                   , 2011)
-   EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GL                   , 2011)
-   EXT(ARB_shadow                              , ARB_shadow                             , GLL                  , 2001)
-   EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GL                   , 2012)
-   EXT(ARB_sync                                , ARB_sync                               , GL                   , 2003)
-   EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GL                   , 2014)
-   EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                , GLC                  , 2009)
-   EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL                  , 2000)
-   EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              , GLC                  , 2008)
-   EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        , GLC                  , 2009)
-   EXT(ARB_texture_buffer_range                , ARB_texture_buffer_range               , GLC                  , 2012)
-   EXT(ARB_texture_compression                 , dummy_true                             , GLL                  , 2000)
-   EXT(ARB_texture_compression_bptc            , ARB_texture_compression_bptc           , GL                   , 2010)
-   EXT(ARB_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
-   EXT(ARB_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 1999)
-   EXT(ARB_texture_cube_map_array              , ARB_texture_cube_map_array             , GL                   , 2009)
-   EXT(ARB_texture_env_add                     , dummy_true                             , GLL                  , 1999)
-   EXT(ARB_texture_env_combine                 , ARB_texture_env_combine                , GLL                  , 2001)
-   EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar               , GLL                  , 2001)
-   EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL                  , 2001)
-   EXT(ARB_texture_float                       , ARB_texture_float                      , GL                   , 2004)
-   EXT(ARB_texture_gather                      , ARB_texture_gather                     , GL                   , 2009)
-   EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL                  , 2001)
-   EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GL                   , 2013)
-   EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GL                   , 2009)
-   EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GL                   , 2003)
-   EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GL                   , 2012)
-   EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GL                   , 2009)
-   EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GL                   , 2004)
-   EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GL                   , 2009)
-   EXT(ARB_texture_rg                          , ARB_texture_rg                         , GL                   , 2008)
-   EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GL                   , 2013)
-   EXT(ARB_texture_storage                     , dummy_true                             , GL                   , 2011)
-   EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GL                   , 2012)
-   EXT(ARB_texture_view                        , ARB_texture_view                       , GL                   , 2012)
-   EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
-   EXT(ARB_timer_query                         , ARB_timer_query                        , GL                   , 2010)
-   EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GL                   , 2010)
-   EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GL                   , 2010)
-   EXT(ARB_transform_feedback_instanced        , ARB_transform_feedback_instanced       , GL                   , 2011)
-   EXT(ARB_transpose_matrix                    , dummy_true                             , GLL                  , 1999)
-   EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GL                   , 2009)
-   EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
-   EXT(ARB_vertex_array_object                 , dummy_true                             , GL                   , 2006)
-   EXT(ARB_vertex_attrib_binding               , dummy_true                             , GL                   , 2012)
-   EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL                  , 2003)
-   EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL                  , 2002)
-   EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GL                   , 2002)
-   EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                , GLC                  , 2010)
-   EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GL                   , 2013)
-   EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GL                   , 2009)
-   EXT(ARB_viewport_array                      , ARB_viewport_array                     , GLC                  , 2010)
-   EXT(ARB_window_pos                          , dummy_true                             , GLL                  , 2001)
-
-   EXT(EXT_abgr                                , dummy_true                             , GL                   , 1995)
-   EXT(EXT_bgra                                , dummy_true                             , GLL                  , 1995)
-   EXT(EXT_blend_color                         , EXT_blend_color                        , GLL                  , 1995)
-   EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
-   EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL                  , 1999)
-   EXT(EXT_buffer_storage                      , ARB_buffer_storage                     , ES31                 , 2015)
-   EXT(EXT_discard_framebuffer                 , dummy_true                             ,       ES1 | ES2      , 2009)
-   EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL | ES1 | ES2      , 1995)
-   EXT(EXT_blend_subtract                      , dummy_true                             , GLL                  , 1995)
-   EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL                  , 1996)
-   EXT(EXT_copy_texture                        , dummy_true                             , GLL                  , 1995)
-   EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GL                   , 2002)
-   EXT(EXT_draw_buffers                        , dummy_true                             ,             ES2      , 2012)
-   EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GL                   , 2006)
-   EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
-   EXT(EXT_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2006)
-   EXT(EXT_draw_range_elements                 , dummy_true                             , GLL                  , 1997)
-   EXT(EXT_fog_coord                           , dummy_true                             , GLL                  , 1999)
-   EXT(EXT_framebuffer_blit                    , dummy_true                             , GL                   , 2005)
-   EXT(EXT_framebuffer_multisample             , EXT_framebuffer_multisample            , GL                   , 2005)
-   EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GL                   , 2011)
-   EXT(EXT_framebuffer_object                  , dummy_true                             , GLL                  , 2000)
-   EXT(EXT_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
-   EXT(EXT_gpu_program_parameters              , EXT_gpu_program_parameters             , GLL                  , 2006)
-   EXT(EXT_gpu_shader4                         , EXT_gpu_shader4                        , GL                   , 2006)
-   EXT(EXT_map_buffer_range                    , ARB_map_buffer_range                   ,       ES1 | ES2      , 2012)
-   EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL | ES1 | ES2      , 1999)
-   EXT(EXT_packed_depth_stencil                , dummy_true                             , GL                   , 2005)
-   EXT(EXT_packed_float                        , EXT_packed_float                       , GL                   , 2004)
-   EXT(EXT_packed_pixels                       , dummy_true                             , GLL                  , 1997)
-   EXT(EXT_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
-   EXT(EXT_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
-   EXT(EXT_polygon_offset                      , dummy_true                             , GLL                  , 1995)
-   EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GL                   , 2014)
-   EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
-   EXT(EXT_rescale_normal                      , dummy_true                             , GLL                  , 1997)
-   EXT(EXT_secondary_color                     , dummy_true                             , GLL                  , 1999)
-   EXT(EXT_separate_shader_objects             , dummy_true                             ,            ES2       , 2013)
-   EXT(EXT_separate_specular_color             , dummy_true                             , GLL                  , 1997)
-   EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES3       , 2013)
-   EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL                  , 2002)
-   EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL                  , 2001)
-   EXT(EXT_stencil_wrap                        , dummy_true                             , GLL                  , 2002)
-   EXT(EXT_subtexture                          , dummy_true                             , GLL                  , 1995)
-   EXT(EXT_texture3D                           , EXT_texture3D                          , GLL                  , 1996)
-   EXT(EXT_texture_array                       , EXT_texture_array                      , GL                   , 2006)
-   EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2004)
-   EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
-   EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
-   EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL                  , 2006)
-   EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
-   EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GL                   , 2000)
-   EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 2001)
-   EXT(EXT_texture_edge_clamp                  , dummy_true                             , GLL                  , 1997)
-   EXT(EXT_texture_env_add                     , dummy_true                             , GLL                  , 1999)
-   EXT(EXT_texture_env_combine                 , dummy_true                             , GLL                  , 2000)
-   EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL                  , 2000)
-   EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GL | ES1 | ES2       , 1999)
-   EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,      ES1 | ES2       , 2005)
-   EXT(EXT_texture_rg                          , ARB_texture_rg                         ,            ES2       , 2011)
-   EXT(EXT_read_format_bgra                    , dummy_true                             ,      ES1 | ES2       , 2009)
-   EXT(EXT_texture_integer                     , EXT_texture_integer                    , GL                   , 2006)
-   EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL | ES1            , 1999)
-   EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GL                   , 2004)
-   EXT(EXT_texture_object                      , dummy_true                             , GLL                  , 1995)
-   EXT(EXT_texture                             , dummy_true                             , GLL                  , 1996)
-   EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL                  , 2004)
-   EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GL                   , 2004)
-   EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GL                   , 2009)
-   EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GL                   , 2004)
-   EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GL                   , 2006)
-   EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
-   EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,            ES2       , 2008)
-   EXT(EXT_timer_query                         , EXT_timer_query                        , GL                   , 2006)
-   EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GL                   , 2011)
-   EXT(EXT_unpack_subimage                     , dummy_true                             ,            ES2       , 2011)
-   EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
-   EXT(EXT_vertex_array                        , dummy_true                             , GLL                  , 1995)
-   EXT(EXT_color_buffer_float                  , dummy_true                             ,                  ES3 , 2013)
-
-
-   EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,      ES1             , 2009)
-   EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,      ES1             , 2009)
-   EXT(OES_blend_subtract                      , dummy_true                             ,      ES1             , 2009)
-   EXT(OES_byte_coordinates                    , dummy_true                             ,      ES1             , 2002)
-   EXT(OES_compressed_ETC1_RGB8_texture        , OES_compressed_ETC1_RGB8_texture       ,      ES1 | ES2       , 2005)
-   EXT(OES_compressed_paletted_texture         , dummy_true                             ,      ES1             , 2003)
-   EXT(OES_depth24                             , dummy_true                             ,      ES1 | ES2       , 2005)
-   EXT(OES_depth32                             , dummy_false                            ,       DISABLE        , 2005)
-   EXT(OES_depth_texture                       , ARB_depth_texture                      ,            ES2       , 2006)
-   EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,            ES2       , 2012)
-   EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
-   EXT(OES_draw_texture                        , OES_draw_texture                       ,      ES1             , 2004)
-   EXT(OES_EGL_sync                            , dummy_true                             ,      ES1 | ES2       , 2010)
-   EXT(OES_EGL_image                           , OES_EGL_image                          , GL | ES1 | ES2       , 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
-   EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,      ES1 | ES2       , 2010)
-   EXT(OES_element_index_uint                  , dummy_true                             ,      ES1 | ES2       , 2005)
-   EXT(OES_fbo_render_mipmap                   , dummy_true                             ,      ES1 | ES2       , 2005)
-   EXT(OES_fixed_point                         , dummy_true                             ,      ES1             , 2002)
-   EXT(OES_framebuffer_object                  , dummy_true                             ,      ES1             , 2005)
-   EXT(OES_get_program_binary                  , dummy_true                             ,            ES2       , 2008)
-   EXT(OES_mapbuffer                           , dummy_true                             ,      ES1 | ES2       , 2005)
-   EXT(OES_packed_depth_stencil                , dummy_true                             ,      ES1 | ES2       , 2007)
-   EXT(OES_point_size_array                    , dummy_true                             ,      ES1             , 2004)
-   EXT(OES_point_sprite                        , ARB_point_sprite                       ,      ES1             , 2004)
-   EXT(OES_query_matrix                        , dummy_true                             ,      ES1             , 2003)
-   EXT(OES_read_format                         , dummy_true                             , GL | ES1             , 2003)
-   EXT(OES_rgb8_rgba8                          , dummy_true                             ,      ES1 | ES2       , 2005)
-   EXT(OES_single_precision                    , dummy_true                             ,      ES1             , 2003)
-   EXT(OES_standard_derivatives                , OES_standard_derivatives               ,            ES2       , 2005)
-   EXT(OES_stencil1                            , dummy_false                            ,       DISABLE        , 2005)
-   EXT(OES_stencil4                            , dummy_false                            ,       DISABLE        , 2005)
-   EXT(OES_stencil8                            , dummy_true                             ,      ES1 | ES2       , 2005)
-   EXT(OES_stencil_wrap                        , dummy_true                             ,      ES1             , 2002)
-   EXT(OES_surfaceless_context                 , dummy_true                             ,      ES1 | ES2       , 2012)
-   EXT(OES_texture_3D                          , EXT_texture3D                          ,            ES2       , 2005)
-   EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,      ES1             , 2007)
-   EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,      ES1             , 2005)
-   EXT(OES_texture_float                       , OES_texture_float                      ,            ES2       , 2005)
-   EXT(OES_texture_float_linear                , OES_texture_float_linear               ,            ES2       , 2005)
-   EXT(OES_texture_half_float                  , OES_texture_half_float                 ,            ES2       , 2005)
-   EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,            ES2       , 2005)
-   EXT(OES_texture_mirrored_repeat             , dummy_true                             ,      ES1             , 2005)
-   EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,           ES31       , 2014)
-   EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,      ES1 | ES2       , 2005)
-   EXT(OES_vertex_array_object                 , dummy_true                             ,      ES1 | ES2       , 2010)
-
-
-   EXT(KHR_debug                               , dummy_true                             , GL                   , 2012)
-   EXT(KHR_context_flush_control               , dummy_true                             , GL       | ES2       , 2014)
-   EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GL       | ES2       , 2012)
-   EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GL       | ES2       , 2012)
-
-
-   EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GL                   , 1999)
-   EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2009)
-   EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
-   EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GL                   , 2007)
-   EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GL                   , 2013)
-   EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2009)
-   EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
-   EXT(AMD_shader_trinary_minmax               , dummy_true                             , GL                   , 2012)
-   EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                , GLC                  , 2012)
-   EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       , GLC                  , 2012)
-   EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GL                   , 2006)
-   EXT(APPLE_packed_pixels                     , dummy_true                             , GLL                  , 2002)
-   EXT(APPLE_texture_max_level                 , dummy_true                             ,      ES1 | ES2       , 2009)
-   EXT(APPLE_vertex_array_object               , dummy_true                             , GLL                  , 2002)
-   EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
-   EXT(ATI_draw_buffers                        , dummy_true                             , GLL                  , 2002)
-   EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL                  , 2001)
-   EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL                  , 2006)
-   EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL                  , 2004)
-   EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL                  , 2002)
-   EXT(ATI_texture_float                       , ARB_texture_float                      , GL                   , 2002)
-   EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GL                   , 2006)
-   EXT(IBM_multimode_draw_arrays               , dummy_true                             , GL                   , 1998)
-   EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL                  , 1996)
-   EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL                  , 1998)
-   EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL                  , 1999)
-   EXT(INTEL_performance_query                 , INTEL_performance_query                , GL       | ES2       , 2013)
-   EXT(MESA_pack_invert                        , MESA_pack_invert                       , GL                   , 2002)
-   EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GL                   , 2009)
-   EXT(MESA_window_pos                         , dummy_true                             , GLL                  , 2000)
-   EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GL                   , 2002)
-   EXT(NV_blend_square                         , dummy_true                             , GLL                  , 1999)
-   EXT(NV_conditional_render                   , NV_conditional_render                  , GL                   , 2008)
-   EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GL                   , 2001)
-   EXT(NV_draw_buffers                         , dummy_true                             ,            ES2       , 2011)
-   EXT(NV_fbo_color_attachments                , dummy_true                             ,            ES2       , 2010)
-   EXT(NV_fog_distance                         , NV_fog_distance                        , GLL                  , 2001)
-   EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL                  , 2005)
-   EXT(NV_light_max_exponent                   , dummy_true                             , GLL                  , 1999)
-   EXT(NV_packed_depth_stencil                 , dummy_true                             , GL                   , 2000)
-   EXT(NV_point_sprite                         , NV_point_sprite                        , GL                   , 2001)
-   EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL                  , 2002)
-   EXT(NV_read_buffer                          , dummy_true                             ,            ES2       , 2011)
-   EXT(NV_read_depth                           , dummy_true                             ,            ES2       , 2011)
-   EXT(NV_read_depth_stencil                   , dummy_true                             ,            ES2       , 2011)
-   EXT(NV_read_stencil                         , dummy_true                             ,            ES2       , 2011)
-   EXT(NV_texgen_reflection                    , dummy_true                             , GLL                  , 1999)
-   EXT(NV_texture_barrier                      , NV_texture_barrier                     , GL                   , 2009)
-   EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL                  , 1999)
-   EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL                  , 2000)
-   EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GL                   , 2010)
-   EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GL                   , 1999)
-   EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL                  , 1997)
-   EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL                  , 1997)
-   EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL                  , 1997)
-   EXT(SGIS_texture_lod                        , dummy_true                             , GLL                  , 1997)
-   EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL                  , 1999)
+#include "extensions_table.h"
 #undef EXT
 };
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
new file mode 100644
index 00000000000..084a04693d3
--- /dev/null
+++ b/src/mesa/main/extensions_table.h
@@ -0,0 +1,325 @@
+EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GL                   , 2009)
+EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GL                   , 2012)
+EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GL                   , 2012)
+EXT(ARB_base_instance                       , ARB_base_instance                      , GL                   , 2011)
+EXT(ARB_blend_func_extended                 , ARB_blend_func_extended                , GL                   , 2009)
+EXT(ARB_buffer_storage                      , ARB_buffer_storage                     , GL                   , 2013)
+EXT(ARB_clear_buffer_object                 , dummy_true                             , GL                   , 2012)
+EXT(ARB_clear_texture                       , ARB_clear_texture                      , GL                   , 2013)
+EXT(ARB_clip_control                        , ARB_clip_control                       , GL                   , 2014)
+EXT(ARB_color_buffer_float                  , ARB_color_buffer_float                 , GL                   , 2004)
+EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GL                   , 2011)
+EXT(ARB_compute_shader                      , ARB_compute_shader                     , GL                   , 2012)
+EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GL                   , 2014)
+EXT(ARB_copy_buffer                         , dummy_true                             , GL                   , 2008)
+EXT(ARB_copy_image                          , ARB_copy_image                         , GL                   , 2012)
+EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2011)
+EXT(ARB_debug_output                        , dummy_true                             , GL                   , 2009)
+EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GL                   , 2008)
+EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GL                   , 2003)
+EXT(ARB_depth_texture                       , ARB_depth_texture                      , GLL                  , 2001)
+EXT(ARB_derivative_control                  , ARB_derivative_control                 , GL                   , 2014)
+EXT(ARB_direct_state_access                 , dummy_true                             , GLC                  , 2014)
+EXT(ARB_draw_buffers                        , dummy_true                             , GL                   , 2002)
+EXT(ARB_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
+EXT(ARB_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , GL                   , 2009)
+EXT(ARB_draw_indirect                       , ARB_draw_indirect                      , GLC                  , 2010)
+EXT(ARB_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2008)
+EXT(ARB_enhanced_layouts                    , ARB_enhanced_layouts                   , GLC                  , 2013)
+EXT(ARB_explicit_attrib_location            , ARB_explicit_attrib_location           , GL                   , 2009)
+EXT(ARB_explicit_uniform_location           , ARB_explicit_uniform_location          , GL                   , 2012)
+EXT(ARB_fragment_coord_conventions          , ARB_fragment_coord_conventions         , GL                   , 2009)
+EXT(ARB_fragment_layer_viewport             , ARB_fragment_layer_viewport            , GLC                  , 2012)
+EXT(ARB_fragment_program                    , ARB_fragment_program                   , GLL                  , 2002)
+EXT(ARB_fragment_program_shadow             , ARB_fragment_program_shadow            , GLL                  , 2003)
+EXT(ARB_fragment_shader                     , ARB_fragment_shader                    , GL                   , 2002)
+EXT(ARB_framebuffer_no_attachments          , ARB_framebuffer_no_attachments         , GL                   , 2012)
+EXT(ARB_framebuffer_object                  , ARB_framebuffer_object                 , GL                   , 2005)
+EXT(ARB_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
+EXT(ARB_get_program_binary                  , dummy_true                             , GL                   , 2010)
+EXT(ARB_get_texture_sub_image               , dummy_true                             , GL                   , 2014)
+EXT(ARB_gpu_shader5                         , ARB_gpu_shader5                        , GLC                  , 2010)
+EXT(ARB_gpu_shader_fp64                     , ARB_gpu_shader_fp64                    , GLC                  , 2010)
+EXT(ARB_half_float_pixel                    , dummy_true                             , GL                   , 2003)
+EXT(ARB_half_float_vertex                   , ARB_half_float_vertex                  , GL                   , 2008)
+EXT(ARB_instanced_arrays                    , ARB_instanced_arrays                   , GL                   , 2008)
+EXT(ARB_internalformat_query                , ARB_internalformat_query               , GL                   , 2011)
+EXT(ARB_invalidate_subdata                  , dummy_true                             , GL                   , 2012)
+EXT(ARB_map_buffer_alignment                , dummy_true                             , GL                   , 2011)
+EXT(ARB_map_buffer_range                    , ARB_map_buffer_range                   , GL                   , 2008)
+EXT(ARB_multi_bind                          , dummy_true                             , GL                   , 2013)
+EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      , GLC                  , 2012)
+EXT(ARB_multisample                         , dummy_true                             , GLL                  , 1994)
+EXT(ARB_multitexture                        , dummy_true                             , GLL                  , 1998)
+EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GL                   , 2003)
+EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL                  , 2001)
+EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GL                   , 2014)
+EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
+EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
+EXT(ARB_point_sprite                        , ARB_point_sprite                       , GL                   , 2003)
+EXT(ARB_program_interface_query             , dummy_true                             , GL                   , 2012)
+EXT(ARB_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
+EXT(ARB_robustness                          , dummy_true                             , GL                   , 2010)
+EXT(ARB_sample_shading                      , ARB_sample_shading                     , GL                   , 2009)
+EXT(ARB_sampler_objects                     , dummy_true                             , GL                   , 2009)
+EXT(ARB_seamless_cube_map                   , ARB_seamless_cube_map                  , GL                   , 2009)
+EXT(ARB_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2013)
+EXT(ARB_separate_shader_objects             , dummy_true                             , GL                   , 2010)
+EXT(ARB_shader_atomic_counters              , ARB_shader_atomic_counters             , GL                   , 2011)
+EXT(ARB_shader_bit_encoding                 , ARB_shader_bit_encoding                , GL                   , 2010)
+EXT(ARB_shader_clock                        , ARB_shader_clock                       , GL                   , 2015)
+EXT(ARB_shader_image_load_store             , ARB_shader_image_load_store            , GL                   , 2011)
+EXT(ARB_shader_image_size                   , ARB_shader_image_size                  , GL                   , 2012)
+EXT(ARB_shader_objects                      , dummy_true                             , GL                   , 2002)
+EXT(ARB_shader_precision                    , ARB_shader_precision                   , GL                   , 2010)
+EXT(ARB_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
+EXT(ARB_shader_storage_buffer_object        , ARB_shader_storage_buffer_object       , GL                   , 2012)
+EXT(ARB_shader_subroutine                   , ARB_shader_subroutine                  , GLC                  , 2010)
+EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GL                   , 2014)
+EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GL                   , 2009)
+EXT(ARB_shading_language_100                , dummy_true                             , GLL                  , 2003)
+EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GL                   , 2011)
+EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GL                   , 2011)
+EXT(ARB_shadow                              , ARB_shadow                             , GLL                  , 2001)
+EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GL                   , 2012)
+EXT(ARB_sync                                , ARB_sync                               , GL                   , 2003)
+EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GL                   , 2014)
+EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                , GLC                  , 2009)
+EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL                  , 2000)
+EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              , GLC                  , 2008)
+EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        , GLC                  , 2009)
+EXT(ARB_texture_buffer_range                , ARB_texture_buffer_range               , GLC                  , 2012)
+EXT(ARB_texture_compression                 , dummy_true                             , GLL                  , 2000)
+EXT(ARB_texture_compression_bptc            , ARB_texture_compression_bptc           , GL                   , 2010)
+EXT(ARB_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
+EXT(ARB_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 1999)
+EXT(ARB_texture_cube_map_array              , ARB_texture_cube_map_array             , GL                   , 2009)
+EXT(ARB_texture_env_add                     , dummy_true                             , GLL                  , 1999)
+EXT(ARB_texture_env_combine                 , ARB_texture_env_combine                , GLL                  , 2001)
+EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar               , GLL                  , 2001)
+EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL                  , 2001)
+EXT(ARB_texture_float                       , ARB_texture_float                      , GL                   , 2004)
+EXT(ARB_texture_gather                      , ARB_texture_gather                     , GL                   , 2009)
+EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL                  , 2001)
+EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GL                   , 2013)
+EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GL                   , 2009)
+EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GL                   , 2003)
+EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GL                   , 2012)
+EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GL                   , 2009)
+EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GL                   , 2004)
+EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GL                   , 2009)
+EXT(ARB_texture_rg                          , ARB_texture_rg                         , GL                   , 2008)
+EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GL                   , 2013)
+EXT(ARB_texture_storage                     , dummy_true                             , GL                   , 2011)
+EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GL                   , 2012)
+EXT(ARB_texture_view                        , ARB_texture_view                       , GL                   , 2012)
+EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
+EXT(ARB_timer_query                         , ARB_timer_query                        , GL                   , 2010)
+EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GL                   , 2010)
+EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GL                   , 2010)
+EXT(ARB_transform_feedback_instanced        , ARB_transform_feedback_instanced       , GL                   , 2011)
+EXT(ARB_transpose_matrix                    , dummy_true                             , GLL                  , 1999)
+EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GL                   , 2009)
+EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
+EXT(ARB_vertex_array_object                 , dummy_true                             , GL                   , 2006)
+EXT(ARB_vertex_attrib_binding               , dummy_true                             , GL                   , 2012)
+EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL                  , 2003)
+EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL                  , 2002)
+EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GL                   , 2002)
+EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                , GLC                  , 2010)
+EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GL                   , 2013)
+EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GL                   , 2009)
+EXT(ARB_viewport_array                      , ARB_viewport_array                     , GLC                  , 2010)
+EXT(ARB_window_pos                          , dummy_true                             , GLL                  , 2001)
+
+EXT(EXT_abgr                                , dummy_true                             , GL                   , 1995)
+EXT(EXT_bgra                                , dummy_true                             , GLL                  , 1995)
+EXT(EXT_blend_color                         , EXT_blend_color                        , GLL                  , 1995)
+EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
+EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL                  , 1999)
+EXT(EXT_buffer_storage                      , ARB_buffer_storage                     , ES31                 , 2015)
+EXT(EXT_discard_framebuffer                 , dummy_true                             ,       ES1 | ES2      , 2009)
+EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL | ES1 | ES2      , 1995)
+EXT(EXT_blend_subtract                      , dummy_true                             , GLL                  , 1995)
+EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL                  , 1996)
+EXT(EXT_copy_texture                        , dummy_true                             , GLL                  , 1995)
+EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GL                   , 2002)
+EXT(EXT_draw_buffers                        , dummy_true                             ,             ES2      , 2012)
+EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GL                   , 2006)
+EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
+EXT(EXT_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2006)
+EXT(EXT_draw_range_elements                 , dummy_true                             , GLL                  , 1997)
+EXT(EXT_fog_coord                           , dummy_true                             , GLL                  , 1999)
+EXT(EXT_framebuffer_blit                    , dummy_true                             , GL                   , 2005)
+EXT(EXT_framebuffer_multisample             , EXT_framebuffer_multisample            , GL                   , 2005)
+EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GL                   , 2011)
+EXT(EXT_framebuffer_object                  , dummy_true                             , GLL                  , 2000)
+EXT(EXT_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
+EXT(EXT_gpu_program_parameters              , EXT_gpu_program_parameters             , GLL                  , 2006)
+EXT(EXT_gpu_shader4                         , EXT_gpu_shader4                        , GL                   , 2006)
+EXT(EXT_map_buffer_range                    , ARB_map_buffer_range                   ,       ES1 | ES2      , 2012)
+EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL | ES1 | ES2      , 1999)
+EXT(EXT_packed_depth_stencil                , dummy_true                             , GL                   , 2005)
+EXT(EXT_packed_float                        , EXT_packed_float                       , GL                   , 2004)
+EXT(EXT_packed_pixels                       , dummy_true                             , GLL                  , 1997)
+EXT(EXT_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
+EXT(EXT_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
+EXT(EXT_polygon_offset                      , dummy_true                             , GLL                  , 1995)
+EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GL                   , 2014)
+EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
+EXT(EXT_rescale_normal                      , dummy_true                             , GLL                  , 1997)
+EXT(EXT_secondary_color                     , dummy_true                             , GLL                  , 1999)
+EXT(EXT_separate_shader_objects             , dummy_true                             ,            ES2       , 2013)
+EXT(EXT_separate_specular_color             , dummy_true                             , GLL                  , 1997)
+EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES3       , 2013)
+EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL                  , 2002)
+EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL                  , 2001)
+EXT(EXT_stencil_wrap                        , dummy_true                             , GLL                  , 2002)
+EXT(EXT_subtexture                          , dummy_true                             , GLL                  , 1995)
+EXT(EXT_texture3D                           , EXT_texture3D                          , GLL                  , 1996)
+EXT(EXT_texture_array                       , EXT_texture_array                      , GL                   , 2006)
+EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2004)
+EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
+EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
+EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL                  , 2006)
+EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
+EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GL                   , 2000)
+EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 2001)
+EXT(EXT_texture_edge_clamp                  , dummy_true                             , GLL                  , 1997)
+EXT(EXT_texture_env_add                     , dummy_true                             , GLL                  , 1999)
+EXT(EXT_texture_env_combine                 , dummy_true                             , GLL                  , 2000)
+EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL                  , 2000)
+EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GL | ES1 | ES2       , 1999)
+EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,      ES1 | ES2       , 2005)
+EXT(EXT_texture_rg                          , ARB_texture_rg                         ,            ES2       , 2011)
+EXT(EXT_read_format_bgra                    , dummy_true                             ,      ES1 | ES2       , 2009)
+EXT(EXT_texture_integer                     , EXT_texture_integer                    , GL                   , 2006)
+EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL | ES1            , 1999)
+EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GL                   , 2004)
+EXT(EXT_texture_object                      , dummy_true                             , GLL                  , 1995)
+EXT(EXT_texture                             , dummy_true                             , GLL                  , 1996)
+EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL                  , 2004)
+EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GL                   , 2004)
+EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GL                   , 2009)
+EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GL                   , 2004)
+EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GL                   , 2006)
+EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
+EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,            ES2       , 2008)
+EXT(EXT_timer_query                         , EXT_timer_query                        , GL                   , 2006)
+EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GL                   , 2011)
+EXT(EXT_unpack_subimage                     , dummy_true                             ,            ES2       , 2011)
+EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
+EXT(EXT_vertex_array                        , dummy_true                             , GLL                  , 1995)
+EXT(EXT_color_buffer_float                  , dummy_true                             ,                  ES3 , 2013)
+
+
+EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,      ES1             , 2009)
+EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,      ES1             , 2009)
+EXT(OES_blend_subtract                      , dummy_true                             ,      ES1             , 2009)
+EXT(OES_byte_coordinates                    , dummy_true                             ,      ES1             , 2002)
+EXT(OES_compressed_ETC1_RGB8_texture        , OES_compressed_ETC1_RGB8_texture       ,      ES1 | ES2       , 2005)
+EXT(OES_compressed_paletted_texture         , dummy_true                             ,      ES1             , 2003)
+EXT(OES_depth24                             , dummy_true                             ,      ES1 | ES2       , 2005)
+EXT(OES_depth32                             , dummy_false                            ,       DISABLE        , 2005)
+EXT(OES_depth_texture                       , ARB_depth_texture                      ,            ES2       , 2006)
+EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,            ES2       , 2012)
+EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
+EXT(OES_draw_texture                        , OES_draw_texture                       ,      ES1             , 2004)
+EXT(OES_EGL_sync                            , dummy_true                             ,      ES1 | ES2       , 2010)
+EXT(OES_EGL_image                           , OES_EGL_image                          , GL | ES1 | ES2       , 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,      ES1 | ES2       , 2010)
+EXT(OES_element_index_uint                  , dummy_true                             ,      ES1 | ES2       , 2005)
+EXT(OES_fbo_render_mipmap                   , dummy_true                             ,      ES1 | ES2       , 2005)
+EXT(OES_fixed_point                         , dummy_true                             ,      ES1             , 2002)
+EXT(OES_framebuffer_object                  , dummy_true                             ,      ES1             , 2005)
+EXT(OES_get_program_binary                  , dummy_true                             ,            ES2       , 2008)
+EXT(OES_mapbuffer                           , dummy_true                             ,      ES1 | ES2       , 2005)
+EXT(OES_packed_depth_stencil                , dummy_true                             ,      ES1 | ES2       , 2007)
+EXT(OES_point_size_array                    , dummy_true                             ,      ES1             , 2004)
+EXT(OES_point_sprite                        , ARB_point_sprite                       ,      ES1             , 2004)
+EXT(OES_query_matrix                        , dummy_true                             ,      ES1             , 2003)
+EXT(OES_read_format                         , dummy_true                             , GL | ES1             , 2003)
+EXT(OES_rgb8_rgba8                          , dummy_true                             ,      ES1 | ES2       , 2005)
+EXT(OES_single_precision                    , dummy_true                             ,      ES1             , 2003)
+EXT(OES_standard_derivatives                , OES_standard_derivatives               ,            ES2       , 2005)
+EXT(OES_stencil1                            , dummy_false                            ,       DISABLE        , 2005)
+EXT(OES_stencil4                            , dummy_false                            ,       DISABLE        , 2005)
+EXT(OES_stencil8                            , dummy_true                             ,      ES1 | ES2       , 2005)
+EXT(OES_stencil_wrap                        , dummy_true                             ,      ES1             , 2002)
+EXT(OES_surfaceless_context                 , dummy_true                             ,      ES1 | ES2       , 2012)
+EXT(OES_texture_3D                          , EXT_texture3D                          ,            ES2       , 2005)
+EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,      ES1             , 2007)
+EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,      ES1             , 2005)
+EXT(OES_texture_float                       , OES_texture_float                      ,            ES2       , 2005)
+EXT(OES_texture_float_linear                , OES_texture_float_linear               ,            ES2       , 2005)
+EXT(OES_texture_half_float                  , OES_texture_half_float                 ,            ES2       , 2005)
+EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,            ES2       , 2005)
+EXT(OES_texture_mirrored_repeat             , dummy_true                             ,      ES1             , 2005)
+EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,           ES31       , 2014)
+EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,      ES1 | ES2       , 2005)
+EXT(OES_vertex_array_object                 , dummy_true                             ,      ES1 | ES2       , 2010)
+
+
+EXT(KHR_debug                               , dummy_true                             , GL                   , 2012)
+EXT(KHR_context_flush_control               , dummy_true                             , GL       | ES2       , 2014)
+EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GL       | ES2       , 2012)
+EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GL       | ES2       , 2012)
+
+
+EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GL                   , 1999)
+EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2009)
+EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
+EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GL                   , 2007)
+EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GL                   , 2013)
+EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2009)
+EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
+EXT(AMD_shader_trinary_minmax               , dummy_true                             , GL                   , 2012)
+EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                , GLC                  , 2012)
+EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       , GLC                  , 2012)
+EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GL                   , 2006)
+EXT(APPLE_packed_pixels                     , dummy_true                             , GLL                  , 2002)
+EXT(APPLE_texture_max_level                 , dummy_true                             ,      ES1 | ES2       , 2009)
+EXT(APPLE_vertex_array_object               , dummy_true                             , GLL                  , 2002)
+EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
+EXT(ATI_draw_buffers                        , dummy_true                             , GLL                  , 2002)
+EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL                  , 2001)
+EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL                  , 2006)
+EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL                  , 2004)
+EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL                  , 2002)
+EXT(ATI_texture_float                       , ARB_texture_float                      , GL                   , 2002)
+EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GL                   , 2006)
+EXT(IBM_multimode_draw_arrays               , dummy_true                             , GL                   , 1998)
+EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL                  , 1996)
+EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL                  , 1998)
+EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL                  , 1999)
+EXT(INTEL_performance_query                 , INTEL_performance_query                , GL       | ES2       , 2013)
+EXT(MESA_pack_invert                        , MESA_pack_invert                       , GL                   , 2002)
+EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GL                   , 2009)
+EXT(MESA_window_pos                         , dummy_true                             , GLL                  , 2000)
+EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GL                   , 2002)
+EXT(NV_blend_square                         , dummy_true                             , GLL                  , 1999)
+EXT(NV_conditional_render                   , NV_conditional_render                  , GL                   , 2008)
+EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GL                   , 2001)
+EXT(NV_draw_buffers                         , dummy_true                             ,            ES2       , 2011)
+EXT(NV_fbo_color_attachments                , dummy_true                             ,            ES2       , 2010)
+EXT(NV_fog_distance                         , NV_fog_distance                        , GLL                  , 2001)
+EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL                  , 2005)
+EXT(NV_light_max_exponent                   , dummy_true                             , GLL                  , 1999)
+EXT(NV_packed_depth_stencil                 , dummy_true                             , GL                   , 2000)
+EXT(NV_point_sprite                         , NV_point_sprite                        , GL                   , 2001)
+EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL                  , 2002)
+EXT(NV_read_buffer                          , dummy_true                             ,            ES2       , 2011)
+EXT(NV_read_depth                           , dummy_true                             ,            ES2       , 2011)
+EXT(NV_read_depth_stencil                   , dummy_true                             ,            ES2       , 2011)
+EXT(NV_read_stencil                         , dummy_true                             ,            ES2       , 2011)
+EXT(NV_texgen_reflection                    , dummy_true                             , GLL                  , 1999)
+EXT(NV_texture_barrier                      , NV_texture_barrier                     , GL                   , 2009)
+EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL                  , 1999)
+EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL                  , 2000)
+EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GL                   , 2010)
+EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GL                   , 1999)
+EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL                  , 1997)
+EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL                  , 1997)
+EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL                  , 1997)
+EXT(SGIS_texture_lod                        , dummy_true                             , GLL                  , 1997)
+EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL                  , 1999)

From f47df8f729abfda177120a1437197372fd19b83f Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Tue, 8 Sep 2015 12:25:56 -0700
Subject: [PATCH 201/287] mesa/extensions: Add extension::version

Enable limiting advertised extension support by context version with
finer granularity. This new field is currently unused and is set to
0 everywhere. When it is used, a value of 0 will indicate that the
extension is supported for any version of a context.

v2: Use uint*t type for version and note the expected values (Emil)
    Use an 8-bit data type
    Reformat macro for better readability (Chad)

v3: Note preparatory nature of commit (Chad)

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/extensions.c       |  18 +-
 src/mesa/main/extensions_table.h | 636 +++++++++++++++----------------
 2 files changed, 334 insertions(+), 320 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 30f5b9868d3..4fd7487e7e2 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -66,6 +66,12 @@ struct extension {
    /** Set of API's in which the extension exists, as a bitset. */
    uint8_t api_set;
 
+   /** Minimum version the extension requires for the given API
+    * (see gl_api defined in mtypes.h). The value is equal to:
+    * 10 * major_version + minor_version
+    */
+   uint8_t version[API_OPENGL_LAST + 1];
+
    /** Year the extension was proposed or approved.  Used to sort the 
     * extension string chronologically. */
    uint16_t year;
@@ -83,8 +89,16 @@ struct extension {
  * \brief Table of supported OpenGL extensions for all API's.
  */
 static const struct extension extension_table[] = {
-#define EXT(name_str, driver_cap, api_flags, yyyy) \
-        { .name = "GL_" #name_str, .offset = o(driver_cap), .api_set = api_flags, .year = yyyy},
+#define EXT(name_str, driver_cap, api_flags, gll_ver, glc_ver, gles_ver, gles2_ver, yyyy) \
+        { .name = "GL_" #name_str, .offset = o(driver_cap), .api_set = api_flags, \
+          .version = { \
+            [API_OPENGL_COMPAT] = gll_ver, \
+            [API_OPENGL_CORE]   = glc_ver, \
+            [API_OPENGLES]      = gles_ver, \
+            [API_OPENGLES2]     = gles2_ver, \
+           }, \
+           .year = yyyy \
+        },
 #include "extensions_table.h"
 #undef EXT
 };
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 084a04693d3..50c0d428725 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -1,325 +1,325 @@
-EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GL                   , 2009)
-EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GL                   , 2012)
-EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GL                   , 2012)
-EXT(ARB_base_instance                       , ARB_base_instance                      , GL                   , 2011)
-EXT(ARB_blend_func_extended                 , ARB_blend_func_extended                , GL                   , 2009)
-EXT(ARB_buffer_storage                      , ARB_buffer_storage                     , GL                   , 2013)
-EXT(ARB_clear_buffer_object                 , dummy_true                             , GL                   , 2012)
-EXT(ARB_clear_texture                       , ARB_clear_texture                      , GL                   , 2013)
-EXT(ARB_clip_control                        , ARB_clip_control                       , GL                   , 2014)
-EXT(ARB_color_buffer_float                  , ARB_color_buffer_float                 , GL                   , 2004)
-EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GL                   , 2011)
-EXT(ARB_compute_shader                      , ARB_compute_shader                     , GL                   , 2012)
-EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GL                   , 2014)
-EXT(ARB_copy_buffer                         , dummy_true                             , GL                   , 2008)
-EXT(ARB_copy_image                          , ARB_copy_image                         , GL                   , 2012)
-EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2011)
-EXT(ARB_debug_output                        , dummy_true                             , GL                   , 2009)
-EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GL                   , 2008)
-EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GL                   , 2003)
-EXT(ARB_depth_texture                       , ARB_depth_texture                      , GLL                  , 2001)
-EXT(ARB_derivative_control                  , ARB_derivative_control                 , GL                   , 2014)
-EXT(ARB_direct_state_access                 , dummy_true                             , GLC                  , 2014)
-EXT(ARB_draw_buffers                        , dummy_true                             , GL                   , 2002)
-EXT(ARB_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
-EXT(ARB_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , GL                   , 2009)
-EXT(ARB_draw_indirect                       , ARB_draw_indirect                      , GLC                  , 2010)
-EXT(ARB_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2008)
-EXT(ARB_enhanced_layouts                    , ARB_enhanced_layouts                   , GLC                  , 2013)
-EXT(ARB_explicit_attrib_location            , ARB_explicit_attrib_location           , GL                   , 2009)
-EXT(ARB_explicit_uniform_location           , ARB_explicit_uniform_location          , GL                   , 2012)
-EXT(ARB_fragment_coord_conventions          , ARB_fragment_coord_conventions         , GL                   , 2009)
-EXT(ARB_fragment_layer_viewport             , ARB_fragment_layer_viewport            , GLC                  , 2012)
-EXT(ARB_fragment_program                    , ARB_fragment_program                   , GLL                  , 2002)
-EXT(ARB_fragment_program_shadow             , ARB_fragment_program_shadow            , GLL                  , 2003)
-EXT(ARB_fragment_shader                     , ARB_fragment_shader                    , GL                   , 2002)
-EXT(ARB_framebuffer_no_attachments          , ARB_framebuffer_no_attachments         , GL                   , 2012)
-EXT(ARB_framebuffer_object                  , ARB_framebuffer_object                 , GL                   , 2005)
-EXT(ARB_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
-EXT(ARB_get_program_binary                  , dummy_true                             , GL                   , 2010)
-EXT(ARB_get_texture_sub_image               , dummy_true                             , GL                   , 2014)
-EXT(ARB_gpu_shader5                         , ARB_gpu_shader5                        , GLC                  , 2010)
-EXT(ARB_gpu_shader_fp64                     , ARB_gpu_shader_fp64                    , GLC                  , 2010)
-EXT(ARB_half_float_pixel                    , dummy_true                             , GL                   , 2003)
-EXT(ARB_half_float_vertex                   , ARB_half_float_vertex                  , GL                   , 2008)
-EXT(ARB_instanced_arrays                    , ARB_instanced_arrays                   , GL                   , 2008)
-EXT(ARB_internalformat_query                , ARB_internalformat_query               , GL                   , 2011)
-EXT(ARB_invalidate_subdata                  , dummy_true                             , GL                   , 2012)
-EXT(ARB_map_buffer_alignment                , dummy_true                             , GL                   , 2011)
-EXT(ARB_map_buffer_range                    , ARB_map_buffer_range                   , GL                   , 2008)
-EXT(ARB_multi_bind                          , dummy_true                             , GL                   , 2013)
-EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      , GLC                  , 2012)
-EXT(ARB_multisample                         , dummy_true                             , GLL                  , 1994)
-EXT(ARB_multitexture                        , dummy_true                             , GLL                  , 1998)
-EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GL                   , 2003)
-EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL                  , 2001)
-EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GL                   , 2014)
-EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
-EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
-EXT(ARB_point_sprite                        , ARB_point_sprite                       , GL                   , 2003)
-EXT(ARB_program_interface_query             , dummy_true                             , GL                   , 2012)
-EXT(ARB_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
-EXT(ARB_robustness                          , dummy_true                             , GL                   , 2010)
-EXT(ARB_sample_shading                      , ARB_sample_shading                     , GL                   , 2009)
-EXT(ARB_sampler_objects                     , dummy_true                             , GL                   , 2009)
-EXT(ARB_seamless_cube_map                   , ARB_seamless_cube_map                  , GL                   , 2009)
-EXT(ARB_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2013)
-EXT(ARB_separate_shader_objects             , dummy_true                             , GL                   , 2010)
-EXT(ARB_shader_atomic_counters              , ARB_shader_atomic_counters             , GL                   , 2011)
-EXT(ARB_shader_bit_encoding                 , ARB_shader_bit_encoding                , GL                   , 2010)
-EXT(ARB_shader_clock                        , ARB_shader_clock                       , GL                   , 2015)
-EXT(ARB_shader_image_load_store             , ARB_shader_image_load_store            , GL                   , 2011)
-EXT(ARB_shader_image_size                   , ARB_shader_image_size                  , GL                   , 2012)
-EXT(ARB_shader_objects                      , dummy_true                             , GL                   , 2002)
-EXT(ARB_shader_precision                    , ARB_shader_precision                   , GL                   , 2010)
-EXT(ARB_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
-EXT(ARB_shader_storage_buffer_object        , ARB_shader_storage_buffer_object       , GL                   , 2012)
-EXT(ARB_shader_subroutine                   , ARB_shader_subroutine                  , GLC                  , 2010)
-EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GL                   , 2014)
-EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GL                   , 2009)
-EXT(ARB_shading_language_100                , dummy_true                             , GLL                  , 2003)
-EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GL                   , 2011)
-EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GL                   , 2011)
-EXT(ARB_shadow                              , ARB_shadow                             , GLL                  , 2001)
-EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GL                   , 2012)
-EXT(ARB_sync                                , ARB_sync                               , GL                   , 2003)
-EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GL                   , 2014)
-EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                , GLC                  , 2009)
-EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL                  , 2000)
-EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              , GLC                  , 2008)
-EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        , GLC                  , 2009)
-EXT(ARB_texture_buffer_range                , ARB_texture_buffer_range               , GLC                  , 2012)
-EXT(ARB_texture_compression                 , dummy_true                             , GLL                  , 2000)
-EXT(ARB_texture_compression_bptc            , ARB_texture_compression_bptc           , GL                   , 2010)
-EXT(ARB_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
-EXT(ARB_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 1999)
-EXT(ARB_texture_cube_map_array              , ARB_texture_cube_map_array             , GL                   , 2009)
-EXT(ARB_texture_env_add                     , dummy_true                             , GLL                  , 1999)
-EXT(ARB_texture_env_combine                 , ARB_texture_env_combine                , GLL                  , 2001)
-EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar               , GLL                  , 2001)
-EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL                  , 2001)
-EXT(ARB_texture_float                       , ARB_texture_float                      , GL                   , 2004)
-EXT(ARB_texture_gather                      , ARB_texture_gather                     , GL                   , 2009)
-EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL                  , 2001)
-EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GL                   , 2013)
-EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GL                   , 2009)
-EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GL                   , 2003)
-EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GL                   , 2012)
-EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GL                   , 2009)
-EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GL                   , 2004)
-EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GL                   , 2009)
-EXT(ARB_texture_rg                          , ARB_texture_rg                         , GL                   , 2008)
-EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GL                   , 2013)
-EXT(ARB_texture_storage                     , dummy_true                             , GL                   , 2011)
-EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GL                   , 2012)
-EXT(ARB_texture_view                        , ARB_texture_view                       , GL                   , 2012)
-EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
-EXT(ARB_timer_query                         , ARB_timer_query                        , GL                   , 2010)
-EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GL                   , 2010)
-EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GL                   , 2010)
-EXT(ARB_transform_feedback_instanced        , ARB_transform_feedback_instanced       , GL                   , 2011)
-EXT(ARB_transpose_matrix                    , dummy_true                             , GLL                  , 1999)
-EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GL                   , 2009)
-EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
-EXT(ARB_vertex_array_object                 , dummy_true                             , GL                   , 2006)
-EXT(ARB_vertex_attrib_binding               , dummy_true                             , GL                   , 2012)
-EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL                  , 2003)
-EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL                  , 2002)
-EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GL                   , 2002)
-EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                , GLC                  , 2010)
-EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GL                   , 2013)
-EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GL                   , 2009)
-EXT(ARB_viewport_array                      , ARB_viewport_array                     , GLC                  , 2010)
-EXT(ARB_window_pos                          , dummy_true                             , GLL                  , 2001)
+EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_base_instance                       , ARB_base_instance                      , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_blend_func_extended                 , ARB_blend_func_extended                , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_buffer_storage                      , ARB_buffer_storage                     , GL             ,  0,  0,  0,  0, 2013)
+EXT(ARB_clear_buffer_object                 , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_clear_texture                       , ARB_clear_texture                      , GL             ,  0,  0,  0,  0, 2013)
+EXT(ARB_clip_control                        , ARB_clip_control                       , GL             ,  0,  0,  0,  0, 2014)
+EXT(ARB_color_buffer_float                  , ARB_color_buffer_float                 , GL             ,  0,  0,  0,  0, 2004)
+EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_compute_shader                      , ARB_compute_shader                     , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GL             ,  0,  0,  0,  0, 2014)
+EXT(ARB_copy_buffer                         , dummy_true                             , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_copy_image                          , ARB_copy_image                         , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_debug_output                        , dummy_true                             , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GL             ,  0,  0,  0,  0, 2003)
+EXT(ARB_depth_texture                       , ARB_depth_texture                      , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ARB_derivative_control                  , ARB_derivative_control                 , GL             ,  0,  0,  0,  0, 2014)
+EXT(ARB_direct_state_access                 , dummy_true                             , GLC            ,  0,  0,  0,  0, 2014)
+EXT(ARB_draw_buffers                        , dummy_true                             , GL             ,  0,  0,  0,  0, 2002)
+EXT(ARB_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_draw_indirect                       , ARB_draw_indirect                      , GLC            ,  0,  0,  0,  0, 2010)
+EXT(ARB_draw_instanced                      , ARB_draw_instanced                     , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_enhanced_layouts                    , ARB_enhanced_layouts                   , GLC            ,  0,  0,  0,  0, 2013)
+EXT(ARB_explicit_attrib_location            , ARB_explicit_attrib_location           , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_explicit_uniform_location           , ARB_explicit_uniform_location          , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_fragment_coord_conventions          , ARB_fragment_coord_conventions         , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_fragment_layer_viewport             , ARB_fragment_layer_viewport            , GLC            ,  0,  0,  0,  0, 2012)
+EXT(ARB_fragment_program                    , ARB_fragment_program                   , GLL            ,  0,  0,  0,  0, 2002)
+EXT(ARB_fragment_program_shadow             , ARB_fragment_program_shadow            , GLL            ,  0,  0,  0,  0, 2003)
+EXT(ARB_fragment_shader                     , ARB_fragment_shader                    , GL             ,  0,  0,  0,  0, 2002)
+EXT(ARB_framebuffer_no_attachments          , ARB_framebuffer_no_attachments         , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_framebuffer_object                  , ARB_framebuffer_object                 , GL             ,  0,  0,  0,  0, 2005)
+EXT(ARB_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL             ,  0,  0,  0,  0, 1998)
+EXT(ARB_get_program_binary                  , dummy_true                             , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_get_texture_sub_image               , dummy_true                             , GL             ,  0,  0,  0,  0, 2014)
+EXT(ARB_gpu_shader5                         , ARB_gpu_shader5                        , GLC            ,  0,  0,  0,  0, 2010)
+EXT(ARB_gpu_shader_fp64                     , ARB_gpu_shader_fp64                    , GLC            ,  0,  0,  0,  0, 2010)
+EXT(ARB_half_float_pixel                    , dummy_true                             , GL             ,  0,  0,  0,  0, 2003)
+EXT(ARB_half_float_vertex                   , ARB_half_float_vertex                  , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_instanced_arrays                    , ARB_instanced_arrays                   , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_internalformat_query                , ARB_internalformat_query               , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_invalidate_subdata                  , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_map_buffer_alignment                , dummy_true                             , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_map_buffer_range                    , ARB_map_buffer_range                   , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_multi_bind                          , dummy_true                             , GL             ,  0,  0,  0,  0, 2013)
+EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      , GLC            ,  0,  0,  0,  0, 2012)
+EXT(ARB_multisample                         , dummy_true                             , GLL            ,  0,  0,  0,  0, 1994)
+EXT(ARB_multitexture                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1998)
+EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GL             ,  0,  0,  0,  0, 2003)
+EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GL             ,  0,  0,  0,  0, 2014)
+EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL             ,  0,  0,  0,  0, 2004)
+EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL            ,  0,  0,  0,  0, 1997)
+EXT(ARB_point_sprite                        , ARB_point_sprite                       , GL             ,  0,  0,  0,  0, 2003)
+EXT(ARB_program_interface_query             , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_provoking_vertex                    , EXT_provoking_vertex                   , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_robustness                          , dummy_true                             , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_sample_shading                      , ARB_sample_shading                     , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_sampler_objects                     , dummy_true                             , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_seamless_cube_map                   , ARB_seamless_cube_map                  , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL             ,  0,  0,  0,  0, 2013)
+EXT(ARB_separate_shader_objects             , dummy_true                             , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_shader_atomic_counters              , ARB_shader_atomic_counters             , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_shader_bit_encoding                 , ARB_shader_bit_encoding                , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_shader_clock                        , ARB_shader_clock                       , GL             ,  0,  0,  0,  0, 2015)
+EXT(ARB_shader_image_load_store             , ARB_shader_image_load_store            , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_shader_image_size                   , ARB_shader_image_size                  , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_shader_objects                      , dummy_true                             , GL             ,  0,  0,  0,  0, 2002)
+EXT(ARB_shader_precision                    , ARB_shader_precision                   , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_shader_stencil_export               , ARB_shader_stencil_export              , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_shader_storage_buffer_object        , ARB_shader_storage_buffer_object       , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_shader_subroutine                   , ARB_shader_subroutine                  , GLC            ,  0,  0,  0,  0, 2010)
+EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GL             ,  0,  0,  0,  0, 2014)
+EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_shading_language_100                , dummy_true                             , GLL            ,  0,  0,  0,  0, 2003)
+EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_shadow                              , ARB_shadow                             , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_sync                                , ARB_sync                               , GL             ,  0,  0,  0,  0, 2003)
+EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GL             ,  0,  0,  0,  0, 2014)
+EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                , GLC            ,  0,  0,  0,  0, 2009)
+EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL            ,  0,  0,  0,  0, 2000)
+EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              , GLC            ,  0,  0,  0,  0, 2008)
+EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        , GLC            ,  0,  0,  0,  0, 2009)
+EXT(ARB_texture_buffer_range                , ARB_texture_buffer_range               , GLC            ,  0,  0,  0,  0, 2012)
+EXT(ARB_texture_compression                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
+EXT(ARB_texture_compression_bptc            , ARB_texture_compression_bptc           , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL             ,  0,  0,  0,  0, 2004)
+EXT(ARB_texture_cube_map                    , ARB_texture_cube_map                   , GLL            ,  0,  0,  0,  0, 1999)
+EXT(ARB_texture_cube_map_array              , ARB_texture_cube_map_array             , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_texture_env_add                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(ARB_texture_env_combine                 , ARB_texture_env_combine                , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar               , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ARB_texture_float                       , ARB_texture_float                      , GL             ,  0,  0,  0,  0, 2004)
+EXT(ARB_texture_gather                      , ARB_texture_gather                     , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GL             ,  0,  0,  0,  0, 2013)
+EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GL             ,  0,  0,  0,  0, 2003)
+EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GL             ,  0,  0,  0,  0, 2004)
+EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_texture_rg                          , ARB_texture_rg                         , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GL             ,  0,  0,  0,  0, 2013)
+EXT(ARB_texture_storage                     , dummy_true                             , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_texture_view                        , ARB_texture_view                       , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_timer_query                         , ARB_timer_query                        , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GL             ,  0,  0,  0,  0, 2010)
+EXT(ARB_transform_feedback_instanced        , ARB_transform_feedback_instanced       , GL             ,  0,  0,  0,  0, 2011)
+EXT(ARB_transpose_matrix                    , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL             ,  0,  0,  0,  0, 2008)
+EXT(ARB_vertex_array_object                 , dummy_true                             , GL             ,  0,  0,  0,  0, 2006)
+EXT(ARB_vertex_attrib_binding               , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
+EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL            ,  0,  0,  0,  0, 2003)
+EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL            ,  0,  0,  0,  0, 2002)
+EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GL             ,  0,  0,  0,  0, 2002)
+EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                , GLC            ,  0,  0,  0,  0, 2010)
+EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GL             ,  0,  0,  0,  0, 2013)
+EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GL             ,  0,  0,  0,  0, 2009)
+EXT(ARB_viewport_array                      , ARB_viewport_array                     , GLC            ,  0,  0,  0,  0, 2010)
+EXT(ARB_window_pos                          , dummy_true                             , GLL            ,  0,  0,  0,  0, 2001)
 
-EXT(EXT_abgr                                , dummy_true                             , GL                   , 1995)
-EXT(EXT_bgra                                , dummy_true                             , GLL                  , 1995)
-EXT(EXT_blend_color                         , EXT_blend_color                        , GLL                  , 1995)
-EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
-EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL                  , 1999)
-EXT(EXT_buffer_storage                      , ARB_buffer_storage                     , ES31                 , 2015)
-EXT(EXT_discard_framebuffer                 , dummy_true                             ,       ES1 | ES2      , 2009)
-EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL | ES1 | ES2      , 1995)
-EXT(EXT_blend_subtract                      , dummy_true                             , GLL                  , 1995)
-EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL                  , 1996)
-EXT(EXT_copy_texture                        , dummy_true                             , GLL                  , 1995)
-EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GL                   , 2002)
-EXT(EXT_draw_buffers                        , dummy_true                             ,             ES2      , 2012)
-EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GL                   , 2006)
-EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
-EXT(EXT_draw_instanced                      , ARB_draw_instanced                     , GL                   , 2006)
-EXT(EXT_draw_range_elements                 , dummy_true                             , GLL                  , 1997)
-EXT(EXT_fog_coord                           , dummy_true                             , GLL                  , 1999)
-EXT(EXT_framebuffer_blit                    , dummy_true                             , GL                   , 2005)
-EXT(EXT_framebuffer_multisample             , EXT_framebuffer_multisample            , GL                   , 2005)
-EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GL                   , 2011)
-EXT(EXT_framebuffer_object                  , dummy_true                             , GLL                  , 2000)
-EXT(EXT_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL                   , 1998)
-EXT(EXT_gpu_program_parameters              , EXT_gpu_program_parameters             , GLL                  , 2006)
-EXT(EXT_gpu_shader4                         , EXT_gpu_shader4                        , GL                   , 2006)
-EXT(EXT_map_buffer_range                    , ARB_map_buffer_range                   ,       ES1 | ES2      , 2012)
-EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL | ES1 | ES2      , 1999)
-EXT(EXT_packed_depth_stencil                , dummy_true                             , GL                   , 2005)
-EXT(EXT_packed_float                        , EXT_packed_float                       , GL                   , 2004)
-EXT(EXT_packed_pixels                       , dummy_true                             , GLL                  , 1997)
-EXT(EXT_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL                   , 2004)
-EXT(EXT_point_parameters                    , EXT_point_parameters                   , GLL                  , 1997)
-EXT(EXT_polygon_offset                      , dummy_true                             , GLL                  , 1995)
-EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GL                   , 2014)
-EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GL                   , 2009)
-EXT(EXT_rescale_normal                      , dummy_true                             , GLL                  , 1997)
-EXT(EXT_secondary_color                     , dummy_true                             , GLL                  , 1999)
-EXT(EXT_separate_shader_objects             , dummy_true                             ,            ES2       , 2013)
-EXT(EXT_separate_specular_color             , dummy_true                             , GLL                  , 1997)
-EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES3       , 2013)
-EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL                  , 2002)
-EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL                  , 2001)
-EXT(EXT_stencil_wrap                        , dummy_true                             , GLL                  , 2002)
-EXT(EXT_subtexture                          , dummy_true                             , GLL                  , 1995)
-EXT(EXT_texture3D                           , EXT_texture3D                          , GLL                  , 1996)
-EXT(EXT_texture_array                       , EXT_texture_array                      , GL                   , 2006)
-EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2004)
-EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
-EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2       , 2011)
-EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL                  , 2006)
-EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL                   , 2004)
-EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GL                   , 2000)
-EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL                  , 2001)
-EXT(EXT_texture_edge_clamp                  , dummy_true                             , GLL                  , 1997)
-EXT(EXT_texture_env_add                     , dummy_true                             , GLL                  , 1999)
-EXT(EXT_texture_env_combine                 , dummy_true                             , GLL                  , 2000)
-EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL                  , 2000)
-EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GL | ES1 | ES2       , 1999)
-EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,      ES1 | ES2       , 2005)
-EXT(EXT_texture_rg                          , ARB_texture_rg                         ,            ES2       , 2011)
-EXT(EXT_read_format_bgra                    , dummy_true                             ,      ES1 | ES2       , 2009)
-EXT(EXT_texture_integer                     , EXT_texture_integer                    , GL                   , 2006)
-EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL | ES1            , 1999)
-EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GL                   , 2004)
-EXT(EXT_texture_object                      , dummy_true                             , GLL                  , 1995)
-EXT(EXT_texture                             , dummy_true                             , GLL                  , 1996)
-EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL                  , 2004)
-EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GL                   , 2004)
-EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GL                   , 2009)
-EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GL                   , 2004)
-EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GL                   , 2006)
-EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GL                   , 2008)
-EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,            ES2       , 2008)
-EXT(EXT_timer_query                         , EXT_timer_query                        , GL                   , 2006)
-EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GL                   , 2011)
-EXT(EXT_unpack_subimage                     , dummy_true                             ,            ES2       , 2011)
-EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL                   , 2008)
-EXT(EXT_vertex_array                        , dummy_true                             , GLL                  , 1995)
-EXT(EXT_color_buffer_float                  , dummy_true                             ,                  ES3 , 2013)
+EXT(EXT_abgr                                , dummy_true                             , GL             ,  0,  0,  0,  0, 1995)
+EXT(EXT_bgra                                , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_blend_color                         , EXT_blend_color                        , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GL             ,  0,  0,  0,  0, 2003)
+EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL            ,  0,  0,  0,  0, 1999)
+EXT(EXT_discard_framebuffer                 , dummy_true                             ,       ES1 | ES2,  0,  0,  0,  0, 2009)
+EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL | ES1 | ES2,  0,  0,  0,  0, 1995)
+EXT(EXT_blend_subtract                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_buffer_storage                      , ARB_buffer_storage                     , ES2            ,  0,  0,  0, 31, 2015)
+EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL            ,  0,  0,  0,  0, 1996)
+EXT(EXT_copy_texture                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GL             ,  0,  0,  0,  0, 2002)
+EXT(EXT_draw_buffers                        , dummy_true                             ,             ES2,  0,  0,  0,  0, 2012)
+EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GL             ,  0,  0,  0,  0, 2006)
+EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2            ,  0,  0,  0,  0, 2014)
+EXT(EXT_draw_instanced                      , ARB_draw_instanced                     , GL             ,  0,  0,  0,  0, 2006)
+EXT(EXT_draw_range_elements                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(EXT_fog_coord                           , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(EXT_framebuffer_blit                    , dummy_true                             , GL             ,  0,  0,  0,  0, 2005)
+EXT(EXT_framebuffer_multisample             , EXT_framebuffer_multisample            , GL             ,  0,  0,  0,  0, 2005)
+EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GL             ,  0,  0,  0,  0, 2011)
+EXT(EXT_framebuffer_object                  , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
+EXT(EXT_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL             ,  0,  0,  0,  0, 1998)
+EXT(EXT_gpu_program_parameters              , EXT_gpu_program_parameters             , GLL            ,  0,  0,  0,  0, 2006)
+EXT(EXT_gpu_shader4                         , EXT_gpu_shader4                        , GL             ,  0,  0,  0,  0, 2006)
+EXT(EXT_map_buffer_range                    , ARB_map_buffer_range                   ,       ES1 | ES2,  0,  0,  0,  0, 2012)
+EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL | ES1 | ES2,  0,  0,  0,  0, 1999)
+EXT(EXT_packed_depth_stencil                , dummy_true                             , GL             ,  0,  0,  0,  0, 2005)
+EXT(EXT_packed_float                        , EXT_packed_float                       , GL             ,  0,  0,  0,  0, 2004)
+EXT(EXT_packed_pixels                       , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(EXT_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL             ,  0,  0,  0,  0, 2004)
+EXT(EXT_point_parameters                    , EXT_point_parameters                   , GLL            ,  0,  0,  0,  0, 1997)
+EXT(EXT_polygon_offset                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GL             ,  0,  0,  0,  0, 2014)
+EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GL             ,  0,  0,  0,  0, 2009)
+EXT(EXT_rescale_normal                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(EXT_secondary_color                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(EXT_separate_shader_objects             , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2013)
+EXT(EXT_separate_specular_color             , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES3 ,  0,  0,  0,  0, 2013)
+EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL            ,  0,  0,  0,  0, 2002)
+EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL            ,  0,  0,  0,  0, 2001)
+EXT(EXT_stencil_wrap                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
+EXT(EXT_subtexture                          , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_texture3D                           , EXT_texture3D                          , GLL            ,  0,  0,  0,  0, 1996)
+EXT(EXT_texture_array                       , EXT_texture_array                      , GL             ,  0,  0,  0,  0, 2006)
+EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2004)
+EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2011)
+EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2011)
+EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL            ,  0,  0,  0,  0, 2006)
+EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL             ,  0,  0,  0,  0, 2004)
+EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GL             ,  0,  0,  0,  0, 2000)
+EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL            ,  0,  0,  0,  0, 2001)
+EXT(EXT_texture_edge_clamp                  , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(EXT_texture_env_add                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(EXT_texture_env_combine                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
+EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL            ,  0,  0,  0,  0, 2000)
+EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GL | ES1 | ES2 ,  0,  0,  0,  0, 1999)
+EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(EXT_texture_rg                          , ARB_texture_rg                         ,            ES2 ,  0,  0,  0,  0, 2011)
+EXT(EXT_read_format_bgra                    , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2009)
+EXT(EXT_texture_integer                     , EXT_texture_integer                    , GL             ,  0,  0,  0,  0, 2006)
+EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL | ES1      ,  0,  0,  0,  0, 1999)
+EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GL             ,  0,  0,  0,  0, 2004)
+EXT(EXT_texture_object                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_texture                             , dummy_true                             , GLL            ,  0,  0,  0,  0, 1996)
+EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL            ,  0,  0,  0,  0, 2004)
+EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GL             ,  0,  0,  0,  0, 2004)
+EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GL             ,  0,  0,  0,  0, 2009)
+EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GL             ,  0,  0,  0,  0, 2004)
+EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GL             ,  0,  0,  0,  0, 2006)
+EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GL             ,  0,  0,  0,  0, 2008)
+EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2008)
+EXT(EXT_timer_query                         , EXT_timer_query                        , GL             ,  0,  0,  0,  0, 2006)
+EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GL             ,  0,  0,  0,  0, 2011)
+EXT(EXT_unpack_subimage                     , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
+EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL             ,  0,  0,  0,  0, 2008)
+EXT(EXT_vertex_array                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
+EXT(EXT_color_buffer_float                  , dummy_true                             ,            ES3 ,  0,  0,  0,  0, 2013)
 
 
-EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,      ES1             , 2009)
-EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,      ES1             , 2009)
-EXT(OES_blend_subtract                      , dummy_true                             ,      ES1             , 2009)
-EXT(OES_byte_coordinates                    , dummy_true                             ,      ES1             , 2002)
-EXT(OES_compressed_ETC1_RGB8_texture        , OES_compressed_ETC1_RGB8_texture       ,      ES1 | ES2       , 2005)
-EXT(OES_compressed_paletted_texture         , dummy_true                             ,      ES1             , 2003)
-EXT(OES_depth24                             , dummy_true                             ,      ES1 | ES2       , 2005)
-EXT(OES_depth32                             , dummy_false                            ,       DISABLE        , 2005)
-EXT(OES_depth_texture                       , ARB_depth_texture                      ,            ES2       , 2006)
-EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,            ES2       , 2012)
-EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2                  , 2014)
-EXT(OES_draw_texture                        , OES_draw_texture                       ,      ES1             , 2004)
-EXT(OES_EGL_sync                            , dummy_true                             ,      ES1 | ES2       , 2010)
-EXT(OES_EGL_image                           , OES_EGL_image                          , GL | ES1 | ES2       , 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
-EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,      ES1 | ES2       , 2010)
-EXT(OES_element_index_uint                  , dummy_true                             ,      ES1 | ES2       , 2005)
-EXT(OES_fbo_render_mipmap                   , dummy_true                             ,      ES1 | ES2       , 2005)
-EXT(OES_fixed_point                         , dummy_true                             ,      ES1             , 2002)
-EXT(OES_framebuffer_object                  , dummy_true                             ,      ES1             , 2005)
-EXT(OES_get_program_binary                  , dummy_true                             ,            ES2       , 2008)
-EXT(OES_mapbuffer                           , dummy_true                             ,      ES1 | ES2       , 2005)
-EXT(OES_packed_depth_stencil                , dummy_true                             ,      ES1 | ES2       , 2007)
-EXT(OES_point_size_array                    , dummy_true                             ,      ES1             , 2004)
-EXT(OES_point_sprite                        , ARB_point_sprite                       ,      ES1             , 2004)
-EXT(OES_query_matrix                        , dummy_true                             ,      ES1             , 2003)
-EXT(OES_read_format                         , dummy_true                             , GL | ES1             , 2003)
-EXT(OES_rgb8_rgba8                          , dummy_true                             ,      ES1 | ES2       , 2005)
-EXT(OES_single_precision                    , dummy_true                             ,      ES1             , 2003)
-EXT(OES_standard_derivatives                , OES_standard_derivatives               ,            ES2       , 2005)
-EXT(OES_stencil1                            , dummy_false                            ,       DISABLE        , 2005)
-EXT(OES_stencil4                            , dummy_false                            ,       DISABLE        , 2005)
-EXT(OES_stencil8                            , dummy_true                             ,      ES1 | ES2       , 2005)
-EXT(OES_stencil_wrap                        , dummy_true                             ,      ES1             , 2002)
-EXT(OES_surfaceless_context                 , dummy_true                             ,      ES1 | ES2       , 2012)
-EXT(OES_texture_3D                          , EXT_texture3D                          ,            ES2       , 2005)
-EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,      ES1             , 2007)
-EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,      ES1             , 2005)
-EXT(OES_texture_float                       , OES_texture_float                      ,            ES2       , 2005)
-EXT(OES_texture_float_linear                , OES_texture_float_linear               ,            ES2       , 2005)
-EXT(OES_texture_half_float                  , OES_texture_half_float                 ,            ES2       , 2005)
-EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,            ES2       , 2005)
-EXT(OES_texture_mirrored_repeat             , dummy_true                             ,      ES1             , 2005)
-EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,           ES31       , 2014)
-EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,      ES1 | ES2       , 2005)
-EXT(OES_vertex_array_object                 , dummy_true                             ,      ES1 | ES2       , 2010)
+EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,      ES1       ,  0,  0,  0,  0, 2009)
+EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,      ES1       ,  0,  0,  0,  0, 2009)
+EXT(OES_blend_subtract                      , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2009)
+EXT(OES_byte_coordinates                    , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2002)
+EXT(OES_compressed_ETC1_RGB8_texture        , OES_compressed_ETC1_RGB8_texture       ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_compressed_paletted_texture         , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2003)
+EXT(OES_depth24                             , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_depth32                             , dummy_false                            ,       DISABLE  ,  0,  0,  0,  0, 2005)
+EXT(OES_depth_texture                       , ARB_depth_texture                      ,            ES2 ,  0,  0,  0,  0, 2006)
+EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,            ES2 ,  0,  0,  0,  0, 2012)
+EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2            ,  0,  0,  0,  0, 2014)
+EXT(OES_draw_texture                        , OES_draw_texture                       ,      ES1       ,  0,  0,  0,  0, 2004)
+EXT(OES_EGL_sync                            , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2010)
+EXT(OES_EGL_image                           , OES_EGL_image                          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,      ES1 | ES2 ,  0,  0,  0,  0, 2010)
+EXT(OES_element_index_uint                  , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_fbo_render_mipmap                   , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_fixed_point                         , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2002)
+EXT(OES_framebuffer_object                  , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2005)
+EXT(OES_get_program_binary                  , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2008)
+EXT(OES_mapbuffer                           , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_packed_depth_stencil                , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2007)
+EXT(OES_point_size_array                    , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2004)
+EXT(OES_point_sprite                        , ARB_point_sprite                       ,      ES1       ,  0,  0,  0,  0, 2004)
+EXT(OES_query_matrix                        , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2003)
+EXT(OES_read_format                         , dummy_true                             , GL | ES1       ,  0,  0,  0,  0, 2003)
+EXT(OES_rgb8_rgba8                          , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_single_precision                    , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2003)
+EXT(OES_standard_derivatives                , OES_standard_derivatives               ,            ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_stencil1                            , dummy_false                            ,       DISABLE  ,  0,  0,  0,  0, 2005)
+EXT(OES_stencil4                            , dummy_false                            ,       DISABLE  ,  0,  0,  0,  0, 2005)
+EXT(OES_stencil8                            , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_stencil_wrap                        , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2002)
+EXT(OES_surfaceless_context                 , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2012)
+EXT(OES_texture_3D                          , EXT_texture3D                          ,            ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,      ES1       ,  0,  0,  0,  0, 2007)
+EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,      ES1       ,  0,  0,  0,  0, 2005)
+EXT(OES_texture_float                       , OES_texture_float                      ,            ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_texture_float_linear                , OES_texture_float_linear               ,            ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_texture_half_float                  , OES_texture_half_float                 ,            ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,            ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_texture_mirrored_repeat             , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2005)
+EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,           ES31 ,  0,  0,  0,  0, 2014)
+EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
+EXT(OES_vertex_array_object                 , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2010)
 
 
-EXT(KHR_debug                               , dummy_true                             , GL                   , 2012)
-EXT(KHR_context_flush_control               , dummy_true                             , GL       | ES2       , 2014)
-EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GL       | ES2       , 2012)
-EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GL       | ES2       , 2012)
+EXT(KHR_debug                               , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
+EXT(KHR_context_flush_control               , dummy_true                             , GL       | ES2 ,  0,  0,  0,  0, 2014)
+EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GL       | ES2 ,  0,  0,  0,  0, 2012)
+EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GL       | ES2 ,  0,  0,  0,  0, 2012)
 
 
-EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GL                   , 1999)
-EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GL                   , 2009)
-EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL                   , 2009)
-EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GL                   , 2007)
-EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GL                   , 2013)
-EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL                   , 2009)
-EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GL                   , 2009)
-EXT(AMD_shader_trinary_minmax               , dummy_true                             , GL                   , 2012)
-EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                , GLC                  , 2012)
-EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       , GLC                  , 2012)
-EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GL                   , 2006)
-EXT(APPLE_packed_pixels                     , dummy_true                             , GLL                  , 2002)
-EXT(APPLE_texture_max_level                 , dummy_true                             ,      ES1 | ES2       , 2009)
-EXT(APPLE_vertex_array_object               , dummy_true                             , GLL                  , 2002)
-EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GL                   , 2003)
-EXT(ATI_draw_buffers                        , dummy_true                             , GLL                  , 2002)
-EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL                  , 2001)
-EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL                  , 2006)
-EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL                  , 2004)
-EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL                  , 2002)
-EXT(ATI_texture_float                       , ARB_texture_float                      , GL                   , 2002)
-EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GL                   , 2006)
-EXT(IBM_multimode_draw_arrays               , dummy_true                             , GL                   , 1998)
-EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL                  , 1996)
-EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL                  , 1998)
-EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL                  , 1999)
-EXT(INTEL_performance_query                 , INTEL_performance_query                , GL       | ES2       , 2013)
-EXT(MESA_pack_invert                        , MESA_pack_invert                       , GL                   , 2002)
-EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GL                   , 2009)
-EXT(MESA_window_pos                         , dummy_true                             , GLL                  , 2000)
-EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GL                   , 2002)
-EXT(NV_blend_square                         , dummy_true                             , GLL                  , 1999)
-EXT(NV_conditional_render                   , NV_conditional_render                  , GL                   , 2008)
-EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GL                   , 2001)
-EXT(NV_draw_buffers                         , dummy_true                             ,            ES2       , 2011)
-EXT(NV_fbo_color_attachments                , dummy_true                             ,            ES2       , 2010)
-EXT(NV_fog_distance                         , NV_fog_distance                        , GLL                  , 2001)
-EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL                  , 2005)
-EXT(NV_light_max_exponent                   , dummy_true                             , GLL                  , 1999)
-EXT(NV_packed_depth_stencil                 , dummy_true                             , GL                   , 2000)
-EXT(NV_point_sprite                         , NV_point_sprite                        , GL                   , 2001)
-EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL                  , 2002)
-EXT(NV_read_buffer                          , dummy_true                             ,            ES2       , 2011)
-EXT(NV_read_depth                           , dummy_true                             ,            ES2       , 2011)
-EXT(NV_read_depth_stencil                   , dummy_true                             ,            ES2       , 2011)
-EXT(NV_read_stencil                         , dummy_true                             ,            ES2       , 2011)
-EXT(NV_texgen_reflection                    , dummy_true                             , GLL                  , 1999)
-EXT(NV_texture_barrier                      , NV_texture_barrier                     , GL                   , 2009)
-EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL                  , 1999)
-EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL                  , 2000)
-EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GL                   , 2010)
-EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GL                   , 1999)
-EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL                  , 1997)
-EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL                  , 1997)
-EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL                  , 1997)
-EXT(SGIS_texture_lod                        , dummy_true                             , GLL                  , 1997)
-EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL                  , 1999)
+EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GL             ,  0,  0,  0,  0, 1999)
+EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GL             ,  0,  0,  0,  0, 2009)
+EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL             ,  0,  0,  0,  0, 2009)
+EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GL             ,  0,  0,  0,  0, 2007)
+EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GL             ,  0,  0,  0,  0, 2013)
+EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL             ,  0,  0,  0,  0, 2009)
+EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GL             ,  0,  0,  0,  0, 2009)
+EXT(AMD_shader_trinary_minmax               , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
+EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                , GLC            ,  0,  0,  0,  0, 2012)
+EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       , GLC            ,  0,  0,  0,  0, 2012)
+EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GL             ,  0,  0,  0,  0, 2006)
+EXT(APPLE_packed_pixels                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
+EXT(APPLE_texture_max_level                 , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2009)
+EXT(APPLE_vertex_array_object               , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
+EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GL             ,  0,  0,  0,  0, 2003)
+EXT(ATI_draw_buffers                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
+EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL            ,  0,  0,  0,  0, 2001)
+EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL            ,  0,  0,  0,  0, 2006)
+EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL            ,  0,  0,  0,  0, 2004)
+EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL            ,  0,  0,  0,  0, 2002)
+EXT(ATI_texture_float                       , ARB_texture_float                      , GL             ,  0,  0,  0,  0, 2002)
+EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GL             ,  0,  0,  0,  0, 2006)
+EXT(IBM_multimode_draw_arrays               , dummy_true                             , GL             ,  0,  0,  0,  0, 1998)
+EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1996)
+EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL            ,  0,  0,  0,  0, 1998)
+EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL            ,  0,  0,  0,  0, 1999)
+EXT(INTEL_performance_query                 , INTEL_performance_query                , GL       | ES2 ,  0,  0,  0,  0, 2013)
+EXT(MESA_pack_invert                        , MESA_pack_invert                       , GL             ,  0,  0,  0,  0, 2002)
+EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GL             ,  0,  0,  0,  0, 2009)
+EXT(MESA_window_pos                         , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
+EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GL             ,  0,  0,  0,  0, 2002)
+EXT(NV_blend_square                         , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(NV_conditional_render                   , NV_conditional_render                  , GL             ,  0,  0,  0,  0, 2008)
+EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GL             ,  0,  0,  0,  0, 2001)
+EXT(NV_draw_buffers                         , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
+EXT(NV_fbo_color_attachments                , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2010)
+EXT(NV_fog_distance                         , NV_fog_distance                        , GLL            ,  0,  0,  0,  0, 2001)
+EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL            ,  0,  0,  0,  0, 2005)
+EXT(NV_light_max_exponent                   , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(NV_packed_depth_stencil                 , dummy_true                             , GL             ,  0,  0,  0,  0, 2000)
+EXT(NV_point_sprite                         , NV_point_sprite                        , GL             ,  0,  0,  0,  0, 2001)
+EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL            ,  0,  0,  0,  0, 2002)
+EXT(NV_read_buffer                          , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
+EXT(NV_read_depth                           , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
+EXT(NV_read_depth_stencil                   , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
+EXT(NV_read_stencil                         , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
+EXT(NV_texgen_reflection                    , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(NV_texture_barrier                      , NV_texture_barrier                     , GL             ,  0,  0,  0,  0, 2009)
+EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL            ,  0,  0,  0,  0, 1999)
+EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL            ,  0,  0,  0,  0, 2000)
+EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GL             ,  0,  0,  0,  0, 2010)
+EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GL             ,  0,  0,  0,  0, 1999)
+EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL            ,  0,  0,  0,  0, 1997)
+EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(SGIS_texture_lod                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
+EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)

From f6a818e76d09633c37057703ba1796ecd5678317 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Wed, 2 Sep 2015 11:53:16 -0700
Subject: [PATCH 202/287] mesa/extensions: Create _mesa_extension_supported()

Create a function which determines if an extension is supported in the
current context.

v2: Use common variable names (Emil)
    Insert new line between variables and return statement (Chad)
    Rename api_set variable to api_bit (Chad)

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/extensions.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 4fd7487e7e2..83c492130f2 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -425,6 +425,24 @@ _mesa_init_extensions(struct gl_extensions *extensions)
 typedef unsigned short extension_index;
 
 
+/**
+ * Given an extension enum, return whether or not the extension is supported
+ * dependent on the following factors:
+ * There's driver support and the OpenGL/ES version is at least that
+ * specified in the extension_table.
+ */
+static inline bool
+_mesa_extension_supported(const struct gl_context *ctx, extension_index i)
+{
+   const bool *base = (bool *) &ctx->Extensions;
+   const struct extension *ext = extension_table + i;
+   const uint8_t api_bit = 1 << ctx->API;
+
+   return (ext->api_set & api_bit) &&
+          (ctx->Version >= ext->version[ctx->API]) &&
+          base[ext->offset];
+}
+
 /**
  * Compare two entries of the extensions table.  Sorts first by year,
  * then by name.

From a82bc779af37334ebc874d38951324f5f0b651cd Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Tue, 8 Sep 2015 12:41:18 -0700
Subject: [PATCH 203/287] mesa/extensions: Use _mesa_extension_supported()

Replace open-coded checks for extension support with
_mesa_extension_supported().

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/extensions.c       | 54 +++++++-------------------------
 src/mesa/main/extensions_table.h |  6 ++--
 2 files changed, 14 insertions(+), 46 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 83c492130f2..1ce73f3bab0 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -49,8 +49,6 @@ enum {
    GL  = (1 << API_OPENGL_COMPAT) | (1 << API_OPENGL_CORE),
    ES1 = 1 << API_OPENGLES,
    ES2 = 1 << API_OPENGLES2,
-   ES3 = 1 << (API_OPENGL_LAST + 1),
-   ES31 = 1 << (API_OPENGL_LAST + 2),
 };
 
 /**
@@ -485,15 +483,9 @@ _mesa_make_extension_string(struct gl_context *ctx)
    extension_index *extension_indices;
    /* String of extra extensions. */
    char *extra_extensions = get_extension_override(ctx);
-   GLboolean *base = (GLboolean *) &ctx->Extensions;
    unsigned k;
    unsigned j;
    unsigned maxYear = ~0;
-   unsigned api_set = (1 << ctx->API);
-   if (_mesa_is_gles3(ctx))
-      api_set |= ES3;
-   if (_mesa_is_gles31(ctx))
-      api_set |= ES31;
 
    /* Check if the MESA_EXTENSION_MAX_YEAR env var is set */
    {
@@ -510,9 +502,8 @@ _mesa_make_extension_string(struct gl_context *ctx)
    for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
       const struct extension *i = extension_table + k;
 
-      if (base[i->offset] &&
-          i->year <= maxYear &&
-          (i->api_set & api_set)) {
+      if (i->year <= maxYear &&
+          _mesa_extension_supported(ctx, k)) {
 	 length += strlen(i->name) + 1; /* +1 for space */
 	 ++count;
       }
@@ -540,11 +531,8 @@ _mesa_make_extension_string(struct gl_context *ctx)
     */
    j = 0;
    for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
-      const struct extension *i = extension_table + k;
-
-      if (base[i->offset] &&
-          i->year <= maxYear &&
-          (i->api_set & api_set)) {
+      if (extension_table[k].year <= maxYear &&
+         _mesa_extension_supported(ctx, k)) {
          extension_indices[j++] = k;
       }
    }
@@ -555,7 +543,7 @@ _mesa_make_extension_string(struct gl_context *ctx)
    /* Build the extension string.*/
    for (j = 0; j < count; ++j) {
       const struct extension *i = &extension_table[extension_indices[j]];
-      assert(base[i->offset] && (i->api_set & api_set));
+      assert(_mesa_extension_supported(ctx, extension_indices[j]));
       strcat(exts, i->name);
       strcat(exts, " ");
    }
@@ -574,25 +562,15 @@ _mesa_make_extension_string(struct gl_context *ctx)
 GLuint
 _mesa_get_extension_count(struct gl_context *ctx)
 {
-   GLboolean *base;
    unsigned k;
-   unsigned api_set = (1 << ctx->API);
-   if (_mesa_is_gles3(ctx))
-      api_set |= ES3;
-   if (_mesa_is_gles31(ctx))
-      api_set |= ES31;
 
    /* only count once */
    if (ctx->Extensions.Count != 0)
       return ctx->Extensions.Count;
 
-   base = (GLboolean *) &ctx->Extensions;
    for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
-      const struct extension *i = extension_table + k;
-
-      if (base[i->offset] && (i->api_set & api_set)) {
+      if (_mesa_extension_supported(ctx, k))
 	 ctx->Extensions.Count++;
-      }
    }
    return ctx->Extensions.Count;
 }
@@ -603,23 +581,13 @@ _mesa_get_extension_count(struct gl_context *ctx)
 const GLubyte *
 _mesa_get_enabled_extension(struct gl_context *ctx, GLuint index)
 {
-   const GLboolean *base;
-   size_t n;
-   unsigned k;
-   unsigned api_set = (1 << ctx->API);
-   if (_mesa_is_gles3(ctx))
-      api_set |= ES3;
-   if (_mesa_is_gles31(ctx))
-      api_set |= ES31;
+   size_t n = 0;
+   unsigned i;
 
-   base = (GLboolean*) &ctx->Extensions;
-   n = 0;
-   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
-      const struct extension *i = extension_table + k;
-
-      if (base[i->offset] && (i->api_set & api_set)) {
+   for (i = 0; i < ARRAY_SIZE(extension_table); ++i) {
+      if (_mesa_extension_supported(ctx, i)) {
          if (n == index)
-            return (const GLubyte*) i->name;
+            return (const GLubyte*) extension_table[i].name;
          else
             ++n;
       }
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 50c0d428725..e5b8aa0ec72 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -171,7 +171,7 @@ EXT(EXT_rescale_normal                      , dummy_true
 EXT(EXT_secondary_color                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
 EXT(EXT_separate_shader_objects             , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2013)
 EXT(EXT_separate_specular_color             , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES3 ,  0,  0,  0,  0, 2013)
+EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES2 ,  0,  0,  0, 30, 2013)
 EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL            ,  0,  0,  0,  0, 2002)
 EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL            ,  0,  0,  0,  0, 2001)
 EXT(EXT_stencil_wrap                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
@@ -210,7 +210,7 @@ EXT(EXT_transform_feedback                  , EXT_transform_feedback
 EXT(EXT_unpack_subimage                     , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
 EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL             ,  0,  0,  0,  0, 2008)
 EXT(EXT_vertex_array                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_color_buffer_float                  , dummy_true                             ,            ES3 ,  0,  0,  0,  0, 2013)
+EXT(EXT_color_buffer_float                  , dummy_true                             ,            ES2 ,  0,  0,  0, 30, 2013)
 
 
 EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,      ES1       ,  0,  0,  0,  0, 2009)
@@ -255,7 +255,7 @@ EXT(OES_texture_float_linear                , OES_texture_float_linear
 EXT(OES_texture_half_float                  , OES_texture_half_float                 ,            ES2 ,  0,  0,  0,  0, 2005)
 EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,            ES2 ,  0,  0,  0,  0, 2005)
 EXT(OES_texture_mirrored_repeat             , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,           ES31 ,  0,  0,  0,  0, 2014)
+EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,            ES2 ,  0,  0,  0, 31, 2014)
 EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
 EXT(OES_vertex_array_object                 , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2010)
 

From eda15abd84af575d3bde432e2163e30d743a7c87 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Mon, 21 Sep 2015 11:23:33 -0700
Subject: [PATCH 204/287] mesa/extensions: Replace extension::api_set with
 ::version

The api_set field has no users outside of _mesa_extension_supported().
Remove it and allow the version field to take its place.

The brunt of the transformation was performed with the following vim commands:
s/\(GL [^,]\+\),\s*\d*,\s*\d*\(,\s*\d*\)\(,\s*\d*\)/\1, GLL, GLC\2\3/g
s/\(GLL [^,]\+\)\,\s*\d*/\1, GLL/g
s/\(GLC [^,]\+\)\(,\s*\d*\),\s*\d*\(,\s*\d*\)\(,\s*\d*\)/\1\2, GLC\3\4/g
s/\( ES1[^,]*\)\(,\s*\(\w\|\d\)\+\)\(,\s*\(\w\|\d\)\+\),\s*\d*/\1\2\4, ES1/g
s/\( ES2[^,]*\)\(,\s*\(\w\|\d\)\+\)\(,\s*\(\w\|\d\)\+\)\(,\s*\(\w\|\d\)\+\),\s*\d*/\1\2\4\6, ES2/g

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/extensions.c       |  21 +-
 src/mesa/main/extensions_table.h | 646 ++++++++++++++++---------------
 2 files changed, 331 insertions(+), 336 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 1ce73f3bab0..c7609bea0f0 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -42,15 +42,6 @@ struct gl_extensions _mesa_extension_override_disables;
 static char *extra_extensions = NULL;
 static char *cant_disable_extensions = NULL;
 
-enum {
-   DISABLE = 0,
-   GLL = 1 << API_OPENGL_COMPAT,       /* GL Legacy / Compatibility */
-   GLC = 1 << API_OPENGL_CORE,  /* GL Core */
-   GL  = (1 << API_OPENGL_COMPAT) | (1 << API_OPENGL_CORE),
-   ES1 = 1 << API_OPENGLES,
-   ES2 = 1 << API_OPENGLES2,
-};
-
 /**
  * \brief An element of the \c extension_table.
  */
@@ -61,9 +52,6 @@ struct extension {
    /** Offset (in bytes) of the corresponding member in struct gl_extensions. */
    size_t offset;
 
-   /** Set of API's in which the extension exists, as a bitset. */
-   uint8_t api_set;
-
    /** Minimum version the extension requires for the given API
     * (see gl_api defined in mtypes.h). The value is equal to:
     * 10 * major_version + minor_version
@@ -87,8 +75,8 @@ struct extension {
  * \brief Table of supported OpenGL extensions for all API's.
  */
 static const struct extension extension_table[] = {
-#define EXT(name_str, driver_cap, api_flags, gll_ver, glc_ver, gles_ver, gles2_ver, yyyy) \
-        { .name = "GL_" #name_str, .offset = o(driver_cap), .api_set = api_flags, \
+#define EXT(name_str, driver_cap, gll_ver, glc_ver, gles_ver, gles2_ver, yyyy) \
+        { .name = "GL_" #name_str, .offset = o(driver_cap), \
           .version = { \
             [API_OPENGL_COMPAT] = gll_ver, \
             [API_OPENGL_CORE]   = glc_ver, \
@@ -434,11 +422,8 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i)
 {
    const bool *base = (bool *) &ctx->Extensions;
    const struct extension *ext = extension_table + i;
-   const uint8_t api_bit = 1 << ctx->API;
 
-   return (ext->api_set & api_bit) &&
-          (ctx->Version >= ext->version[ctx->API]) &&
-          base[ext->offset];
+   return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
 }
 
 /**
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index e5b8aa0ec72..d983562dd86 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -1,325 +1,335 @@
-EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_base_instance                       , ARB_base_instance                      , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_blend_func_extended                 , ARB_blend_func_extended                , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_buffer_storage                      , ARB_buffer_storage                     , GL             ,  0,  0,  0,  0, 2013)
-EXT(ARB_clear_buffer_object                 , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_clear_texture                       , ARB_clear_texture                      , GL             ,  0,  0,  0,  0, 2013)
-EXT(ARB_clip_control                        , ARB_clip_control                       , GL             ,  0,  0,  0,  0, 2014)
-EXT(ARB_color_buffer_float                  , ARB_color_buffer_float                 , GL             ,  0,  0,  0,  0, 2004)
-EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_compute_shader                      , ARB_compute_shader                     , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GL             ,  0,  0,  0,  0, 2014)
-EXT(ARB_copy_buffer                         , dummy_true                             , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_copy_image                          , ARB_copy_image                         , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_debug_output                        , dummy_true                             , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GL             ,  0,  0,  0,  0, 2003)
-EXT(ARB_depth_texture                       , ARB_depth_texture                      , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ARB_derivative_control                  , ARB_derivative_control                 , GL             ,  0,  0,  0,  0, 2014)
-EXT(ARB_direct_state_access                 , dummy_true                             , GLC            ,  0,  0,  0,  0, 2014)
-EXT(ARB_draw_buffers                        , dummy_true                             , GL             ,  0,  0,  0,  0, 2002)
-EXT(ARB_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_draw_indirect                       , ARB_draw_indirect                      , GLC            ,  0,  0,  0,  0, 2010)
-EXT(ARB_draw_instanced                      , ARB_draw_instanced                     , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_enhanced_layouts                    , ARB_enhanced_layouts                   , GLC            ,  0,  0,  0,  0, 2013)
-EXT(ARB_explicit_attrib_location            , ARB_explicit_attrib_location           , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_explicit_uniform_location           , ARB_explicit_uniform_location          , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_fragment_coord_conventions          , ARB_fragment_coord_conventions         , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_fragment_layer_viewport             , ARB_fragment_layer_viewport            , GLC            ,  0,  0,  0,  0, 2012)
-EXT(ARB_fragment_program                    , ARB_fragment_program                   , GLL            ,  0,  0,  0,  0, 2002)
-EXT(ARB_fragment_program_shadow             , ARB_fragment_program_shadow            , GLL            ,  0,  0,  0,  0, 2003)
-EXT(ARB_fragment_shader                     , ARB_fragment_shader                    , GL             ,  0,  0,  0,  0, 2002)
-EXT(ARB_framebuffer_no_attachments          , ARB_framebuffer_no_attachments         , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_framebuffer_object                  , ARB_framebuffer_object                 , GL             ,  0,  0,  0,  0, 2005)
-EXT(ARB_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL             ,  0,  0,  0,  0, 1998)
-EXT(ARB_get_program_binary                  , dummy_true                             , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_get_texture_sub_image               , dummy_true                             , GL             ,  0,  0,  0,  0, 2014)
-EXT(ARB_gpu_shader5                         , ARB_gpu_shader5                        , GLC            ,  0,  0,  0,  0, 2010)
-EXT(ARB_gpu_shader_fp64                     , ARB_gpu_shader_fp64                    , GLC            ,  0,  0,  0,  0, 2010)
-EXT(ARB_half_float_pixel                    , dummy_true                             , GL             ,  0,  0,  0,  0, 2003)
-EXT(ARB_half_float_vertex                   , ARB_half_float_vertex                  , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_instanced_arrays                    , ARB_instanced_arrays                   , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_internalformat_query                , ARB_internalformat_query               , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_invalidate_subdata                  , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_map_buffer_alignment                , dummy_true                             , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_map_buffer_range                    , ARB_map_buffer_range                   , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_multi_bind                          , dummy_true                             , GL             ,  0,  0,  0,  0, 2013)
-EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      , GLC            ,  0,  0,  0,  0, 2012)
-EXT(ARB_multisample                         , dummy_true                             , GLL            ,  0,  0,  0,  0, 1994)
-EXT(ARB_multitexture                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1998)
-EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GL             ,  0,  0,  0,  0, 2003)
-EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GL             ,  0,  0,  0,  0, 2014)
-EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL             ,  0,  0,  0,  0, 2004)
-EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL            ,  0,  0,  0,  0, 1997)
-EXT(ARB_point_sprite                        , ARB_point_sprite                       , GL             ,  0,  0,  0,  0, 2003)
-EXT(ARB_program_interface_query             , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_provoking_vertex                    , EXT_provoking_vertex                   , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_robustness                          , dummy_true                             , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_sample_shading                      , ARB_sample_shading                     , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_sampler_objects                     , dummy_true                             , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_seamless_cube_map                   , ARB_seamless_cube_map                  , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL             ,  0,  0,  0,  0, 2013)
-EXT(ARB_separate_shader_objects             , dummy_true                             , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_shader_atomic_counters              , ARB_shader_atomic_counters             , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_shader_bit_encoding                 , ARB_shader_bit_encoding                , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_shader_clock                        , ARB_shader_clock                       , GL             ,  0,  0,  0,  0, 2015)
-EXT(ARB_shader_image_load_store             , ARB_shader_image_load_store            , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_shader_image_size                   , ARB_shader_image_size                  , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_shader_objects                      , dummy_true                             , GL             ,  0,  0,  0,  0, 2002)
-EXT(ARB_shader_precision                    , ARB_shader_precision                   , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_shader_stencil_export               , ARB_shader_stencil_export              , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_shader_storage_buffer_object        , ARB_shader_storage_buffer_object       , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_shader_subroutine                   , ARB_shader_subroutine                  , GLC            ,  0,  0,  0,  0, 2010)
-EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GL             ,  0,  0,  0,  0, 2014)
-EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_shading_language_100                , dummy_true                             , GLL            ,  0,  0,  0,  0, 2003)
-EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_shadow                              , ARB_shadow                             , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_sync                                , ARB_sync                               , GL             ,  0,  0,  0,  0, 2003)
-EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GL             ,  0,  0,  0,  0, 2014)
-EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                , GLC            ,  0,  0,  0,  0, 2009)
-EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL            ,  0,  0,  0,  0, 2000)
-EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              , GLC            ,  0,  0,  0,  0, 2008)
-EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        , GLC            ,  0,  0,  0,  0, 2009)
-EXT(ARB_texture_buffer_range                , ARB_texture_buffer_range               , GLC            ,  0,  0,  0,  0, 2012)
-EXT(ARB_texture_compression                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
-EXT(ARB_texture_compression_bptc            , ARB_texture_compression_bptc           , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL             ,  0,  0,  0,  0, 2004)
-EXT(ARB_texture_cube_map                    , ARB_texture_cube_map                   , GLL            ,  0,  0,  0,  0, 1999)
-EXT(ARB_texture_cube_map_array              , ARB_texture_cube_map_array             , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_texture_env_add                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(ARB_texture_env_combine                 , ARB_texture_env_combine                , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar               , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ARB_texture_float                       , ARB_texture_float                      , GL             ,  0,  0,  0,  0, 2004)
-EXT(ARB_texture_gather                      , ARB_texture_gather                     , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GL             ,  0,  0,  0,  0, 2013)
-EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GL             ,  0,  0,  0,  0, 2003)
-EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GL             ,  0,  0,  0,  0, 2004)
-EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_texture_rg                          , ARB_texture_rg                         , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GL             ,  0,  0,  0,  0, 2013)
-EXT(ARB_texture_storage                     , dummy_true                             , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_texture_view                        , ARB_texture_view                       , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_timer_query                         , ARB_timer_query                        , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GL             ,  0,  0,  0,  0, 2010)
-EXT(ARB_transform_feedback_instanced        , ARB_transform_feedback_instanced       , GL             ,  0,  0,  0,  0, 2011)
-EXT(ARB_transpose_matrix                    , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL             ,  0,  0,  0,  0, 2008)
-EXT(ARB_vertex_array_object                 , dummy_true                             , GL             ,  0,  0,  0,  0, 2006)
-EXT(ARB_vertex_attrib_binding               , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
-EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL            ,  0,  0,  0,  0, 2003)
-EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL            ,  0,  0,  0,  0, 2002)
-EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GL             ,  0,  0,  0,  0, 2002)
-EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                , GLC            ,  0,  0,  0,  0, 2010)
-EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GL             ,  0,  0,  0,  0, 2013)
-EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GL             ,  0,  0,  0,  0, 2009)
-EXT(ARB_viewport_array                      , ARB_viewport_array                     , GLC            ,  0,  0,  0,  0, 2010)
-EXT(ARB_window_pos                          , dummy_true                             , GLL            ,  0,  0,  0,  0, 2001)
+#define GLL 0
+#define GLC 0
+#define ES1 0
+#define ES2 0
+#define  x ~0
+EXT(ARB_ES2_compatibility                   , ARB_ES2_compatibility                  , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_ES3_compatibility                   , ARB_ES3_compatibility                  , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_arrays_of_arrays                    , ARB_arrays_of_arrays                   , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_base_instance                       , ARB_base_instance                      , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_blend_func_extended                 , ARB_blend_func_extended                , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_buffer_storage                      , ARB_buffer_storage                     , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_clear_buffer_object                 , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_clear_texture                       , ARB_clear_texture                      , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_clip_control                        , ARB_clip_control                       , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_color_buffer_float                  , ARB_color_buffer_float                 , GLL, GLC,  x ,  x , 2004)
+EXT(ARB_compressed_texture_pixel_storage    , dummy_true                             , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_compute_shader                      , ARB_compute_shader                     , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_conditional_render_inverted         , ARB_conditional_render_inverted        , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_copy_buffer                         , dummy_true                             , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_copy_image                          , ARB_copy_image                         , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_conservative_depth                  , ARB_conservative_depth                 , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_debug_output                        , dummy_true                             , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_depth_buffer_float                  , ARB_depth_buffer_float                 , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_depth_clamp                         , ARB_depth_clamp                        , GLL, GLC,  x ,  x , 2003)
+EXT(ARB_depth_texture                       , ARB_depth_texture                      , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_derivative_control                  , ARB_derivative_control                 , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_direct_state_access                 , dummy_true                             ,  x , GLC,  x ,  x , 2014)
+EXT(ARB_draw_buffers                        , dummy_true                             , GLL, GLC,  x ,  x , 2002)
+EXT(ARB_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_draw_indirect                       , ARB_draw_indirect                      ,  x , GLC,  x ,  x , 2010)
+EXT(ARB_draw_instanced                      , ARB_draw_instanced                     , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_enhanced_layouts                    , ARB_enhanced_layouts                   ,  x , GLC,  x ,  x , 2013)
+EXT(ARB_explicit_attrib_location            , ARB_explicit_attrib_location           , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_explicit_uniform_location           , ARB_explicit_uniform_location          , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_fragment_coord_conventions          , ARB_fragment_coord_conventions         , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_fragment_layer_viewport             , ARB_fragment_layer_viewport            ,  x , GLC,  x ,  x , 2012)
+EXT(ARB_fragment_program                    , ARB_fragment_program                   , GLL,  x ,  x ,  x , 2002)
+EXT(ARB_fragment_program_shadow             , ARB_fragment_program_shadow            , GLL,  x ,  x ,  x , 2003)
+EXT(ARB_fragment_shader                     , ARB_fragment_shader                    , GLL, GLC,  x ,  x , 2002)
+EXT(ARB_framebuffer_no_attachments          , ARB_framebuffer_no_attachments         , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_framebuffer_object                  , ARB_framebuffer_object                 , GLL, GLC,  x ,  x , 2005)
+EXT(ARB_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GLL, GLC,  x ,  x , 1998)
+EXT(ARB_get_program_binary                  , dummy_true                             , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_get_texture_sub_image               , dummy_true                             , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_gpu_shader5                         , ARB_gpu_shader5                        ,  x , GLC,  x ,  x , 2010)
+EXT(ARB_gpu_shader_fp64                     , ARB_gpu_shader_fp64                    ,  x , GLC,  x ,  x , 2010)
+EXT(ARB_half_float_pixel                    , dummy_true                             , GLL, GLC,  x ,  x , 2003)
+EXT(ARB_half_float_vertex                   , ARB_half_float_vertex                  , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_instanced_arrays                    , ARB_instanced_arrays                   , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_internalformat_query                , ARB_internalformat_query               , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_invalidate_subdata                  , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_map_buffer_alignment                , dummy_true                             , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_map_buffer_range                    , ARB_map_buffer_range                   , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_multi_bind                          , dummy_true                             , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_multi_draw_indirect                 , ARB_draw_indirect                      ,  x , GLC,  x ,  x , 2012)
+EXT(ARB_multisample                         , dummy_true                             , GLL,  x ,  x ,  x , 1994)
+EXT(ARB_multitexture                        , dummy_true                             , GLL,  x ,  x ,  x , 1998)
+EXT(ARB_occlusion_query2                    , ARB_occlusion_query2                   , GLL, GLC,  x ,  x , 2003)
+EXT(ARB_occlusion_query                     , ARB_occlusion_query                    , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_pipeline_statistics_query           , ARB_pipeline_statistics_query          , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_pixel_buffer_object                 , EXT_pixel_buffer_object                , GLL, GLC,  x ,  x , 2004)
+EXT(ARB_point_parameters                    , EXT_point_parameters                   , GLL,  x ,  x ,  x , 1997)
+EXT(ARB_point_sprite                        , ARB_point_sprite                       , GLL, GLC,  x ,  x , 2003)
+EXT(ARB_program_interface_query             , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_provoking_vertex                    , EXT_provoking_vertex                   , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_robustness                          , dummy_true                             , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_sample_shading                      , ARB_sample_shading                     , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_sampler_objects                     , dummy_true                             , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_seamless_cube_map                   , ARB_seamless_cube_map                  , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_separate_shader_objects             , dummy_true                             , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_shader_atomic_counters              , ARB_shader_atomic_counters             , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_shader_bit_encoding                 , ARB_shader_bit_encoding                , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_shader_clock                        , ARB_shader_clock                       , GLL, GLC,  x ,  x , 2015)
+EXT(ARB_shader_image_load_store             , ARB_shader_image_load_store            , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_shader_image_size                   , ARB_shader_image_size                  , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_shader_objects                      , dummy_true                             , GLL, GLC,  x ,  x , 2002)
+EXT(ARB_shader_precision                    , ARB_shader_precision                   , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_shader_stencil_export               , ARB_shader_stencil_export              , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_shader_storage_buffer_object        , ARB_shader_storage_buffer_object       , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_shader_subroutine                   , ARB_shader_subroutine                  ,  x , GLC,  x ,  x , 2010)
+EXT(ARB_shader_texture_image_samples        , ARB_shader_texture_image_samples       , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_shader_texture_lod                  , ARB_shader_texture_lod                 , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_shading_language_100                , dummy_true                             , GLL,  x ,  x ,  x , 2003)
+EXT(ARB_shading_language_packing            , ARB_shading_language_packing           , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_shading_language_420pack            , ARB_shading_language_420pack           , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_shadow                              , ARB_shadow                             , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_stencil_texturing                   , ARB_stencil_texturing                  , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_sync                                , ARB_sync                               , GLL, GLC,  x ,  x , 2003)
+EXT(ARB_texture_barrier                     , NV_texture_barrier                     , GLL, GLC,  x ,  x , 2014)
+EXT(ARB_tessellation_shader                 , ARB_tessellation_shader                ,  x , GLC,  x ,  x , 2009)
+EXT(ARB_texture_border_clamp                , ARB_texture_border_clamp               , GLL,  x ,  x ,  x , 2000)
+EXT(ARB_texture_buffer_object               , ARB_texture_buffer_object              ,  x , GLC,  x ,  x , 2008)
+EXT(ARB_texture_buffer_object_rgb32         , ARB_texture_buffer_object_rgb32        ,  x , GLC,  x ,  x , 2009)
+EXT(ARB_texture_buffer_range                , ARB_texture_buffer_range               ,  x , GLC,  x ,  x , 2012)
+EXT(ARB_texture_compression                 , dummy_true                             , GLL,  x ,  x ,  x , 2000)
+EXT(ARB_texture_compression_bptc            , ARB_texture_compression_bptc           , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GLL, GLC,  x ,  x , 2004)
+EXT(ARB_texture_cube_map                    , ARB_texture_cube_map                   , GLL,  x ,  x ,  x , 1999)
+EXT(ARB_texture_cube_map_array              , ARB_texture_cube_map_array             , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_texture_env_add                     , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(ARB_texture_env_combine                 , ARB_texture_env_combine                , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_texture_env_crossbar                , ARB_texture_env_crossbar               , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_texture_env_dot3                    , ARB_texture_env_dot3                   , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_texture_float                       , ARB_texture_float                      , GLL, GLC,  x ,  x , 2004)
+EXT(ARB_texture_gather                      , ARB_texture_gather                     , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_texture_mirrored_repeat             , dummy_true                             , GLL,  x ,  x ,  x , 2001)
+EXT(ARB_texture_mirror_clamp_to_edge        , ARB_texture_mirror_clamp_to_edge       , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_texture_multisample                 , ARB_texture_multisample                , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_texture_non_power_of_two            , ARB_texture_non_power_of_two           , GLL, GLC,  x ,  x , 2003)
+EXT(ARB_texture_query_levels                , ARB_texture_query_levels               , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_texture_query_lod                   , ARB_texture_query_lod                  , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_texture_rectangle                   , NV_texture_rectangle                   , GLL, GLC,  x ,  x , 2004)
+EXT(ARB_texture_rgb10_a2ui                  , ARB_texture_rgb10_a2ui                 , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_texture_rg                          , ARB_texture_rg                         , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_texture_stencil8                    , ARB_texture_stencil8                   , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_texture_storage                     , dummy_true                             , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_texture_storage_multisample         , ARB_texture_multisample                , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_texture_view                        , ARB_texture_view                       , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_texture_swizzle                     , EXT_texture_swizzle                    , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_timer_query                         , ARB_timer_query                        , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_transform_feedback2                 , ARB_transform_feedback2                , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_transform_feedback3                 , ARB_transform_feedback3                , GLL, GLC,  x ,  x , 2010)
+EXT(ARB_transform_feedback_instanced        , ARB_transform_feedback_instanced       , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_transpose_matrix                    , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(ARB_uniform_buffer_object               , ARB_uniform_buffer_object              , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GLL, GLC,  x ,  x , 2008)
+EXT(ARB_vertex_array_object                 , dummy_true                             , GLL, GLC,  x ,  x , 2006)
+EXT(ARB_vertex_attrib_binding               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(ARB_vertex_buffer_object                , dummy_true                             , GLL,  x ,  x ,  x , 2003)
+EXT(ARB_vertex_program                      , ARB_vertex_program                     , GLL,  x ,  x ,  x , 2002)
+EXT(ARB_vertex_shader                       , ARB_vertex_shader                      , GLL, GLC,  x ,  x , 2002)
+EXT(ARB_vertex_attrib_64bit                 , ARB_vertex_attrib_64bit                ,  x , GLC,  x ,  x , 2010)
+EXT(ARB_vertex_type_10f_11f_11f_rev         , ARB_vertex_type_10f_11f_11f_rev        , GLL, GLC,  x ,  x , 2013)
+EXT(ARB_vertex_type_2_10_10_10_rev          , ARB_vertex_type_2_10_10_10_rev         , GLL, GLC,  x ,  x , 2009)
+EXT(ARB_viewport_array                      , ARB_viewport_array                     ,  x , GLC,  x ,  x , 2010)
+EXT(ARB_window_pos                          , dummy_true                             , GLL,  x ,  x ,  x , 2001)
 
-EXT(EXT_abgr                                , dummy_true                             , GL             ,  0,  0,  0,  0, 1995)
-EXT(EXT_bgra                                , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_blend_color                         , EXT_blend_color                        , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GL             ,  0,  0,  0,  0, 2003)
-EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL            ,  0,  0,  0,  0, 1999)
-EXT(EXT_discard_framebuffer                 , dummy_true                             ,       ES1 | ES2,  0,  0,  0,  0, 2009)
-EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL | ES1 | ES2,  0,  0,  0,  0, 1995)
-EXT(EXT_blend_subtract                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_buffer_storage                      , ARB_buffer_storage                     , ES2            ,  0,  0,  0, 31, 2015)
-EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL            ,  0,  0,  0,  0, 1996)
-EXT(EXT_copy_texture                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GL             ,  0,  0,  0,  0, 2002)
-EXT(EXT_draw_buffers                        , dummy_true                             ,             ES2,  0,  0,  0,  0, 2012)
-EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GL             ,  0,  0,  0,  0, 2006)
-EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2            ,  0,  0,  0,  0, 2014)
-EXT(EXT_draw_instanced                      , ARB_draw_instanced                     , GL             ,  0,  0,  0,  0, 2006)
-EXT(EXT_draw_range_elements                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(EXT_fog_coord                           , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(EXT_framebuffer_blit                    , dummy_true                             , GL             ,  0,  0,  0,  0, 2005)
-EXT(EXT_framebuffer_multisample             , EXT_framebuffer_multisample            , GL             ,  0,  0,  0,  0, 2005)
-EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GL             ,  0,  0,  0,  0, 2011)
-EXT(EXT_framebuffer_object                  , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
-EXT(EXT_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GL             ,  0,  0,  0,  0, 1998)
-EXT(EXT_gpu_program_parameters              , EXT_gpu_program_parameters             , GLL            ,  0,  0,  0,  0, 2006)
-EXT(EXT_gpu_shader4                         , EXT_gpu_shader4                        , GL             ,  0,  0,  0,  0, 2006)
-EXT(EXT_map_buffer_range                    , ARB_map_buffer_range                   ,       ES1 | ES2,  0,  0,  0,  0, 2012)
-EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL | ES1 | ES2,  0,  0,  0,  0, 1999)
-EXT(EXT_packed_depth_stencil                , dummy_true                             , GL             ,  0,  0,  0,  0, 2005)
-EXT(EXT_packed_float                        , EXT_packed_float                       , GL             ,  0,  0,  0,  0, 2004)
-EXT(EXT_packed_pixels                       , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(EXT_pixel_buffer_object                 , EXT_pixel_buffer_object                , GL             ,  0,  0,  0,  0, 2004)
-EXT(EXT_point_parameters                    , EXT_point_parameters                   , GLL            ,  0,  0,  0,  0, 1997)
-EXT(EXT_polygon_offset                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GL             ,  0,  0,  0,  0, 2014)
-EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GL             ,  0,  0,  0,  0, 2009)
-EXT(EXT_rescale_normal                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(EXT_secondary_color                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(EXT_separate_shader_objects             , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2013)
-EXT(EXT_separate_specular_color             , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GL       | ES2 ,  0,  0,  0, 30, 2013)
-EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL            ,  0,  0,  0,  0, 2002)
-EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL            ,  0,  0,  0,  0, 2001)
-EXT(EXT_stencil_wrap                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
-EXT(EXT_subtexture                          , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_texture3D                           , EXT_texture3D                          , GLL            ,  0,  0,  0,  0, 1996)
-EXT(EXT_texture_array                       , EXT_texture_array                      , GL             ,  0,  0,  0,  0, 2006)
-EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2004)
-EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2011)
-EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2011)
-EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL            ,  0,  0,  0,  0, 2006)
-EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GL             ,  0,  0,  0,  0, 2004)
-EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GL             ,  0,  0,  0,  0, 2000)
-EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL            ,  0,  0,  0,  0, 2001)
-EXT(EXT_texture_edge_clamp                  , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(EXT_texture_env_add                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(EXT_texture_env_combine                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
-EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL            ,  0,  0,  0,  0, 2000)
-EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GL | ES1 | ES2 ,  0,  0,  0,  0, 1999)
-EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(EXT_texture_rg                          , ARB_texture_rg                         ,            ES2 ,  0,  0,  0,  0, 2011)
-EXT(EXT_read_format_bgra                    , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2009)
-EXT(EXT_texture_integer                     , EXT_texture_integer                    , GL             ,  0,  0,  0,  0, 2006)
-EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL | ES1      ,  0,  0,  0,  0, 1999)
-EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GL             ,  0,  0,  0,  0, 2004)
-EXT(EXT_texture_object                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_texture                             , dummy_true                             , GLL            ,  0,  0,  0,  0, 1996)
-EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL            ,  0,  0,  0,  0, 2004)
-EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GL             ,  0,  0,  0,  0, 2004)
-EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GL             ,  0,  0,  0,  0, 2009)
-EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GL             ,  0,  0,  0,  0, 2004)
-EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GL             ,  0,  0,  0,  0, 2006)
-EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GL             ,  0,  0,  0,  0, 2008)
-EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2008)
-EXT(EXT_timer_query                         , EXT_timer_query                        , GL             ,  0,  0,  0,  0, 2006)
-EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GL             ,  0,  0,  0,  0, 2011)
-EXT(EXT_unpack_subimage                     , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
-EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GL             ,  0,  0,  0,  0, 2008)
-EXT(EXT_vertex_array                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1995)
-EXT(EXT_color_buffer_float                  , dummy_true                             ,            ES2 ,  0,  0,  0, 30, 2013)
+EXT(EXT_abgr                                , dummy_true                             , GLL, GLC,  x ,  x , 1995)
+EXT(EXT_bgra                                , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_blend_color                         , EXT_blend_color                        , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_blend_equation_separate             , EXT_blend_equation_separate            , GLL, GLC,  x ,  x , 2003)
+EXT(EXT_blend_func_separate                 , EXT_blend_func_separate                , GLL,  x ,  x ,  x , 1999)
+EXT(EXT_buffer_storage                      , ARB_buffer_storage                     ,  x ,  x ,  x ,  31, 2015)
+EXT(EXT_discard_framebuffer                 , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
+EXT(EXT_blend_minmax                        , EXT_blend_minmax                       , GLL,  x , ES1, ES2, 1995)
+EXT(EXT_blend_subtract                      , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_compiled_vertex_array               , dummy_true                             , GLL,  x ,  x ,  x , 1996)
+EXT(EXT_copy_texture                        , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_depth_bounds_test                   , EXT_depth_bounds_test                  , GLL, GLC,  x ,  x , 2002)
+EXT(EXT_draw_buffers                        , dummy_true                             ,  x ,  x ,  x , ES2, 2012)
+EXT(EXT_draw_buffers2                       , EXT_draw_buffers2                      , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          ,  x ,  x ,  x , ES2, 2014)
+EXT(EXT_draw_instanced                      , ARB_draw_instanced                     , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_draw_range_elements                 , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(EXT_fog_coord                           , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(EXT_framebuffer_blit                    , dummy_true                             , GLL, GLC,  x ,  x , 2005)
+EXT(EXT_framebuffer_multisample             , EXT_framebuffer_multisample            , GLL, GLC,  x ,  x , 2005)
+EXT(EXT_framebuffer_multisample_blit_scaled , EXT_framebuffer_multisample_blit_scaled, GLL, GLC,  x ,  x , 2011)
+EXT(EXT_framebuffer_object                  , dummy_true                             , GLL,  x ,  x ,  x , 2000)
+EXT(EXT_framebuffer_sRGB                    , EXT_framebuffer_sRGB                   , GLL, GLC,  x ,  x , 1998)
+EXT(EXT_gpu_program_parameters              , EXT_gpu_program_parameters             , GLL,  x ,  x ,  x , 2006)
+EXT(EXT_gpu_shader4                         , EXT_gpu_shader4                        , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_map_buffer_range                    , ARB_map_buffer_range                   ,  x ,  x , ES1, ES2, 2012)
+EXT(EXT_multi_draw_arrays                   , dummy_true                             , GLL,  x , ES1, ES2, 1999)
+EXT(EXT_packed_depth_stencil                , dummy_true                             , GLL, GLC,  x ,  x , 2005)
+EXT(EXT_packed_float                        , EXT_packed_float                       , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_packed_pixels                       , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(EXT_pixel_buffer_object                 , EXT_pixel_buffer_object                , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_point_parameters                    , EXT_point_parameters                   , GLL,  x ,  x ,  x , 1997)
+EXT(EXT_polygon_offset                      , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_polygon_offset_clamp                , EXT_polygon_offset_clamp               , GLL, GLC,  x ,  x , 2014)
+EXT(EXT_provoking_vertex                    , EXT_provoking_vertex                   , GLL, GLC,  x ,  x , 2009)
+EXT(EXT_rescale_normal                      , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(EXT_secondary_color                     , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(EXT_separate_shader_objects             , dummy_true                             ,  x ,  x ,  x , ES2, 2013)
+EXT(EXT_separate_specular_color             , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(EXT_shader_integer_mix                  , EXT_shader_integer_mix                 , GLL, GLC, ES1,  30, 2013)
+EXT(EXT_shadow_funcs                        , ARB_shadow                             , GLL,  x ,  x ,  x , 2002)
+EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL,  x ,  x ,  x , 2001)
+EXT(EXT_stencil_wrap                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)
+EXT(EXT_subtexture                          , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_texture3D                           , EXT_texture3D                          , GLL,  x ,  x ,  x , 1996)
+EXT(EXT_texture_array                       , EXT_texture_array                      , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2004)
+EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2011)
+EXT(ANGLE_texture_compression_dxt5          , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2011)
+EXT(EXT_texture_compression_latc            , EXT_texture_compression_latc           , GLL,  x ,  x ,  x , 2006)
+EXT(EXT_texture_compression_rgtc            , ARB_texture_compression_rgtc           , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_compression_s3tc            , EXT_texture_compression_s3tc           , GLL, GLC,  x ,  x , 2000)
+EXT(EXT_texture_cube_map                    , ARB_texture_cube_map                   , GLL,  x ,  x ,  x , 2001)
+EXT(EXT_texture_edge_clamp                  , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(EXT_texture_env_add                     , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(EXT_texture_env_combine                 , dummy_true                             , GLL,  x ,  x ,  x , 2000)
+EXT(EXT_texture_env_dot3                    , EXT_texture_env_dot3                   , GLL,  x ,  x ,  x , 2000)
+EXT(EXT_texture_filter_anisotropic          , EXT_texture_filter_anisotropic         , GLL, GLC, ES1, ES2, 1999)
+EXT(EXT_texture_format_BGRA8888             , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
+EXT(EXT_texture_rg                          , ARB_texture_rg                         ,  x ,  x ,  x , ES2, 2011)
+EXT(EXT_read_format_bgra                    , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
+EXT(EXT_texture_integer                     , EXT_texture_integer                    , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_texture_lod_bias                    , dummy_true                             , GLL,  x , ES1,  x , 1999)
+EXT(EXT_texture_mirror_clamp                , EXT_texture_mirror_clamp               , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_object                      , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_texture                             , dummy_true                             , GLL,  x ,  x ,  x , 1996)
+EXT(EXT_texture_rectangle                   , NV_texture_rectangle                   , GLL,  x ,  x ,  x , 2004)
+EXT(EXT_texture_shared_exponent             , EXT_texture_shared_exponent            , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_snorm                       , EXT_texture_snorm                      , GLL, GLC,  x ,  x , 2009)
+EXT(EXT_texture_sRGB                        , EXT_texture_sRGB                       , GLL, GLC,  x ,  x , 2004)
+EXT(EXT_texture_sRGB_decode                 , EXT_texture_sRGB_decode                , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_texture_swizzle                     , EXT_texture_swizzle                    , GLL, GLC,  x ,  x , 2008)
+EXT(EXT_texture_type_2_10_10_10_REV         , dummy_true                             ,  x ,  x ,  x , ES2, 2008)
+EXT(EXT_timer_query                         , EXT_timer_query                        , GLL, GLC,  x ,  x , 2006)
+EXT(EXT_transform_feedback                  , EXT_transform_feedback                 , GLL, GLC,  x ,  x , 2011)
+EXT(EXT_unpack_subimage                     , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(EXT_vertex_array_bgra                   , EXT_vertex_array_bgra                  , GLL, GLC,  x ,  x , 2008)
+EXT(EXT_vertex_array                        , dummy_true                             , GLL,  x ,  x ,  x , 1995)
+EXT(EXT_color_buffer_float                  , dummy_true                             ,  x ,  x , ES1,  30, 2013)
 
 
-EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,      ES1       ,  0,  0,  0,  0, 2009)
-EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,      ES1       ,  0,  0,  0,  0, 2009)
-EXT(OES_blend_subtract                      , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2009)
-EXT(OES_byte_coordinates                    , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2002)
-EXT(OES_compressed_ETC1_RGB8_texture        , OES_compressed_ETC1_RGB8_texture       ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_compressed_paletted_texture         , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2003)
-EXT(OES_depth24                             , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_depth32                             , dummy_false                            ,       DISABLE  ,  0,  0,  0,  0, 2005)
-EXT(OES_depth_texture                       , ARB_depth_texture                      ,            ES2 ,  0,  0,  0,  0, 2006)
-EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,            ES2 ,  0,  0,  0,  0, 2012)
-EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          , ES2            ,  0,  0,  0,  0, 2014)
-EXT(OES_draw_texture                        , OES_draw_texture                       ,      ES1       ,  0,  0,  0,  0, 2004)
-EXT(OES_EGL_sync                            , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2010)
-EXT(OES_EGL_image                           , OES_EGL_image                          , GL | ES1 | ES2 ,  0,  0,  0,  0, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
-EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,      ES1 | ES2 ,  0,  0,  0,  0, 2010)
-EXT(OES_element_index_uint                  , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_fbo_render_mipmap                   , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_fixed_point                         , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2002)
-EXT(OES_framebuffer_object                  , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2005)
-EXT(OES_get_program_binary                  , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2008)
-EXT(OES_mapbuffer                           , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_packed_depth_stencil                , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2007)
-EXT(OES_point_size_array                    , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2004)
-EXT(OES_point_sprite                        , ARB_point_sprite                       ,      ES1       ,  0,  0,  0,  0, 2004)
-EXT(OES_query_matrix                        , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2003)
-EXT(OES_read_format                         , dummy_true                             , GL | ES1       ,  0,  0,  0,  0, 2003)
-EXT(OES_rgb8_rgba8                          , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_single_precision                    , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2003)
-EXT(OES_standard_derivatives                , OES_standard_derivatives               ,            ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_stencil1                            , dummy_false                            ,       DISABLE  ,  0,  0,  0,  0, 2005)
-EXT(OES_stencil4                            , dummy_false                            ,       DISABLE  ,  0,  0,  0,  0, 2005)
-EXT(OES_stencil8                            , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_stencil_wrap                        , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2002)
-EXT(OES_surfaceless_context                 , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2012)
-EXT(OES_texture_3D                          , EXT_texture3D                          ,            ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,      ES1       ,  0,  0,  0,  0, 2007)
-EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,      ES1       ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_float                       , OES_texture_float                      ,            ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_float_linear                , OES_texture_float_linear               ,            ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_half_float                  , OES_texture_half_float                 ,            ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,            ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_mirrored_repeat             , dummy_true                             ,      ES1       ,  0,  0,  0,  0, 2005)
-EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,            ES2 ,  0,  0,  0, 31, 2014)
-EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,      ES1 | ES2 ,  0,  0,  0,  0, 2005)
-EXT(OES_vertex_array_object                 , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2010)
+EXT(OES_blend_equation_separate             , EXT_blend_equation_separate            ,  x ,  x , ES1,  x , 2009)
+EXT(OES_blend_func_separate                 , EXT_blend_func_separate                ,  x ,  x , ES1,  x , 2009)
+EXT(OES_blend_subtract                      , dummy_true                             ,  x ,  x , ES1,  x , 2009)
+EXT(OES_byte_coordinates                    , dummy_true                             ,  x ,  x , ES1,  x , 2002)
+EXT(OES_compressed_ETC1_RGB8_texture        , OES_compressed_ETC1_RGB8_texture       ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_compressed_paletted_texture         , dummy_true                             ,  x ,  x , ES1,  x , 2003)
+EXT(OES_depth24                             , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_depth32                             , dummy_false                            ,  x ,  x ,  x ,  x , 2005)
+EXT(OES_depth_texture                       , ARB_depth_texture                      ,  x ,  x ,  x , ES2, 2006)
+EXT(OES_depth_texture_cube_map              , OES_depth_texture_cube_map             ,  x ,  x ,  x , ES2, 2012)
+EXT(OES_draw_elements_base_vertex           , ARB_draw_elements_base_vertex          ,  x ,  x ,  x , ES2, 2014)
+EXT(OES_draw_texture                        , OES_draw_texture                       ,  x ,  x , ES1,  x , 2004)
+EXT(OES_EGL_sync                            , dummy_true                             ,  x ,  x , ES1, ES2, 2010)
+EXT(OES_EGL_image                           , OES_EGL_image                          , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
+EXT(OES_EGL_image_external                  , OES_EGL_image_external                 ,  x ,  x , ES1, ES2, 2010)
+EXT(OES_element_index_uint                  , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_fbo_render_mipmap                   , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_fixed_point                         , dummy_true                             ,  x ,  x , ES1,  x , 2002)
+EXT(OES_framebuffer_object                  , dummy_true                             ,  x ,  x , ES1,  x , 2005)
+EXT(OES_get_program_binary                  , dummy_true                             ,  x ,  x ,  x , ES2, 2008)
+EXT(OES_mapbuffer                           , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_packed_depth_stencil                , dummy_true                             ,  x ,  x , ES1, ES2, 2007)
+EXT(OES_point_size_array                    , dummy_true                             ,  x ,  x , ES1,  x , 2004)
+EXT(OES_point_sprite                        , ARB_point_sprite                       ,  x ,  x , ES1,  x , 2004)
+EXT(OES_query_matrix                        , dummy_true                             ,  x ,  x , ES1,  x , 2003)
+EXT(OES_read_format                         , dummy_true                             , GLL, GLC, ES1,  x , 2003)
+EXT(OES_rgb8_rgba8                          , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_single_precision                    , dummy_true                             ,  x ,  x , ES1,  x , 2003)
+EXT(OES_standard_derivatives                , OES_standard_derivatives               ,  x ,  x ,  x , ES2, 2005)
+EXT(OES_stencil1                            , dummy_false                            ,  x ,  x ,  x ,  x , 2005)
+EXT(OES_stencil4                            , dummy_false                            ,  x ,  x ,  x ,  x , 2005)
+EXT(OES_stencil8                            , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_stencil_wrap                        , dummy_true                             ,  x ,  x , ES1,  x , 2002)
+EXT(OES_surfaceless_context                 , dummy_true                             ,  x ,  x , ES1, ES2, 2012)
+EXT(OES_texture_3D                          , EXT_texture3D                          ,  x ,  x ,  x , ES2, 2005)
+EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,  x ,  x , ES1,  x , 2007)
+EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,  x ,  x , ES1,  x , 2005)
+EXT(OES_texture_float                       , OES_texture_float                      ,  x ,  x ,  x , ES2, 2005)
+EXT(OES_texture_float_linear                , OES_texture_float_linear               ,  x ,  x ,  x , ES2, 2005)
+EXT(OES_texture_half_float                  , OES_texture_half_float                 ,  x ,  x ,  x , ES2, 2005)
+EXT(OES_texture_half_float_linear           , OES_texture_half_float_linear          ,  x ,  x ,  x , ES2, 2005)
+EXT(OES_texture_mirrored_repeat             , dummy_true                             ,  x ,  x , ES1,  x , 2005)
+EXT(OES_texture_storage_multisample_2d_array, ARB_texture_multisample                ,  x ,  x , ES1,  31, 2014)
+EXT(OES_texture_npot                        , ARB_texture_non_power_of_two           ,  x ,  x , ES1, ES2, 2005)
+EXT(OES_vertex_array_object                 , dummy_true                             ,  x ,  x , ES1, ES2, 2010)
 
 
-EXT(KHR_debug                               , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
-EXT(KHR_context_flush_control               , dummy_true                             , GL       | ES2 ,  0,  0,  0,  0, 2014)
-EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GL       | ES2 ,  0,  0,  0,  0, 2012)
-EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GL       | ES2 ,  0,  0,  0,  0, 2012)
+EXT(KHR_debug                               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(KHR_context_flush_control               , dummy_true                             , GLL, GLC,  x , ES2, 2014)
+EXT(KHR_texture_compression_astc_hdr        , KHR_texture_compression_astc_hdr       , GLL, GLC,  x , ES2, 2012)
+EXT(KHR_texture_compression_astc_ldr        , KHR_texture_compression_astc_ldr       , GLL, GLC,  x , ES2, 2012)
 
 
-EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GL             ,  0,  0,  0,  0, 1999)
-EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GL             ,  0,  0,  0,  0, 2009)
-EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GL             ,  0,  0,  0,  0, 2009)
-EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GL             ,  0,  0,  0,  0, 2007)
-EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GL             ,  0,  0,  0,  0, 2013)
-EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GL             ,  0,  0,  0,  0, 2009)
-EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GL             ,  0,  0,  0,  0, 2009)
-EXT(AMD_shader_trinary_minmax               , dummy_true                             , GL             ,  0,  0,  0,  0, 2012)
-EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                , GLC            ,  0,  0,  0,  0, 2012)
-EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       , GLC            ,  0,  0,  0,  0, 2012)
-EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GL             ,  0,  0,  0,  0, 2006)
-EXT(APPLE_packed_pixels                     , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
-EXT(APPLE_texture_max_level                 , dummy_true                             ,      ES1 | ES2 ,  0,  0,  0,  0, 2009)
-EXT(APPLE_vertex_array_object               , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
-EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GL             ,  0,  0,  0,  0, 2003)
-EXT(ATI_draw_buffers                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 2002)
-EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL            ,  0,  0,  0,  0, 2001)
-EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL            ,  0,  0,  0,  0, 2006)
-EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL            ,  0,  0,  0,  0, 2004)
-EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL            ,  0,  0,  0,  0, 2002)
-EXT(ATI_texture_float                       , ARB_texture_float                      , GL             ,  0,  0,  0,  0, 2002)
-EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GL             ,  0,  0,  0,  0, 2006)
-EXT(IBM_multimode_draw_arrays               , dummy_true                             , GL             ,  0,  0,  0,  0, 1998)
-EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL            ,  0,  0,  0,  0, 1996)
-EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL            ,  0,  0,  0,  0, 1998)
-EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL            ,  0,  0,  0,  0, 1999)
-EXT(INTEL_performance_query                 , INTEL_performance_query                , GL       | ES2 ,  0,  0,  0,  0, 2013)
-EXT(MESA_pack_invert                        , MESA_pack_invert                       , GL             ,  0,  0,  0,  0, 2002)
-EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GL             ,  0,  0,  0,  0, 2009)
-EXT(MESA_window_pos                         , dummy_true                             , GLL            ,  0,  0,  0,  0, 2000)
-EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GL             ,  0,  0,  0,  0, 2002)
-EXT(NV_blend_square                         , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(NV_conditional_render                   , NV_conditional_render                  , GL             ,  0,  0,  0,  0, 2008)
-EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GL             ,  0,  0,  0,  0, 2001)
-EXT(NV_draw_buffers                         , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
-EXT(NV_fbo_color_attachments                , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2010)
-EXT(NV_fog_distance                         , NV_fog_distance                        , GLL            ,  0,  0,  0,  0, 2001)
-EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL            ,  0,  0,  0,  0, 2005)
-EXT(NV_light_max_exponent                   , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(NV_packed_depth_stencil                 , dummy_true                             , GL             ,  0,  0,  0,  0, 2000)
-EXT(NV_point_sprite                         , NV_point_sprite                        , GL             ,  0,  0,  0,  0, 2001)
-EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL            ,  0,  0,  0,  0, 2002)
-EXT(NV_read_buffer                          , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
-EXT(NV_read_depth                           , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
-EXT(NV_read_depth_stencil                   , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
-EXT(NV_read_stencil                         , dummy_true                             ,            ES2 ,  0,  0,  0,  0, 2011)
-EXT(NV_texgen_reflection                    , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
-EXT(NV_texture_barrier                      , NV_texture_barrier                     , GL             ,  0,  0,  0,  0, 2009)
-EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL            ,  0,  0,  0,  0, 1999)
-EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL            ,  0,  0,  0,  0, 2000)
-EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GL             ,  0,  0,  0,  0, 2010)
-EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GL             ,  0,  0,  0,  0, 1999)
-EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL            ,  0,  0,  0,  0, 1997)
-EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(SGIS_texture_lod                        , dummy_true                             , GLL            ,  0,  0,  0,  0, 1997)
-EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL            ,  0,  0,  0,  0, 1999)
+EXT(3DFX_texture_compression_FXT1           , TDFX_texture_compression_FXT1          , GLL, GLC,  x ,  x , 1999)
+EXT(AMD_conservative_depth                  , ARB_conservative_depth                 , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_draw_buffers_blend                  , ARB_draw_buffers_blend                 , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_performance_monitor                 , AMD_performance_monitor                , GLL, GLC,  x ,  x , 2007)
+EXT(AMD_pinned_memory                       , AMD_pinned_memory                      , GLL, GLC,  x ,  x , 2013)
+EXT(AMD_seamless_cubemap_per_texture        , AMD_seamless_cubemap_per_texture       , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_shader_stencil_export               , ARB_shader_stencil_export              , GLL, GLC,  x ,  x , 2009)
+EXT(AMD_shader_trinary_minmax               , dummy_true                             , GLL, GLC,  x ,  x , 2012)
+EXT(AMD_vertex_shader_layer                 , AMD_vertex_shader_layer                ,  x , GLC,  x ,  x , 2012)
+EXT(AMD_vertex_shader_viewport_index        , AMD_vertex_shader_viewport_index       ,  x , GLC,  x ,  x , 2012)
+EXT(APPLE_object_purgeable                  , APPLE_object_purgeable                 , GLL, GLC,  x ,  x , 2006)
+EXT(APPLE_packed_pixels                     , dummy_true                             , GLL,  x ,  x ,  x , 2002)
+EXT(APPLE_texture_max_level                 , dummy_true                             ,  x ,  x , ES1, ES2, 2009)
+EXT(APPLE_vertex_array_object               , dummy_true                             , GLL,  x ,  x ,  x , 2002)
+EXT(ATI_blend_equation_separate             , EXT_blend_equation_separate            , GLL, GLC,  x ,  x , 2003)
+EXT(ATI_draw_buffers                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)
+EXT(ATI_fragment_shader                     , ATI_fragment_shader                    , GLL,  x ,  x ,  x , 2001)
+EXT(ATI_separate_stencil                    , ATI_separate_stencil                   , GLL,  x ,  x ,  x , 2006)
+EXT(ATI_texture_compression_3dc             , ATI_texture_compression_3dc            , GLL,  x ,  x ,  x , 2004)
+EXT(ATI_texture_env_combine3                , ATI_texture_env_combine3               , GLL,  x ,  x ,  x , 2002)
+EXT(ATI_texture_float                       , ARB_texture_float                      , GLL, GLC,  x ,  x , 2002)
+EXT(ATI_texture_mirror_once                 , ATI_texture_mirror_once                , GLL, GLC,  x ,  x , 2006)
+EXT(IBM_multimode_draw_arrays               , dummy_true                             , GLL, GLC,  x ,  x , 1998)
+EXT(IBM_rasterpos_clip                      , dummy_true                             , GLL,  x ,  x ,  x , 1996)
+EXT(IBM_texture_mirrored_repeat             , dummy_true                             , GLL,  x ,  x ,  x , 1998)
+EXT(INGR_blend_func_separate                , EXT_blend_func_separate                , GLL,  x ,  x ,  x , 1999)
+EXT(INTEL_performance_query                 , INTEL_performance_query                , GLL, GLC,  x , ES2, 2013)
+EXT(MESA_pack_invert                        , MESA_pack_invert                       , GLL, GLC,  x ,  x , 2002)
+EXT(MESA_texture_signed_rgba                , EXT_texture_snorm                      , GLL, GLC,  x ,  x , 2009)
+EXT(MESA_window_pos                         , dummy_true                             , GLL,  x ,  x ,  x , 2000)
+EXT(MESA_ycbcr_texture                      , MESA_ycbcr_texture                     , GLL, GLC,  x ,  x , 2002)
+EXT(NV_blend_square                         , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(NV_conditional_render                   , NV_conditional_render                  , GLL, GLC,  x ,  x , 2008)
+EXT(NV_depth_clamp                          , ARB_depth_clamp                        , GLL, GLC,  x ,  x , 2001)
+EXT(NV_draw_buffers                         , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_fbo_color_attachments                , dummy_true                             ,  x ,  x ,  x , ES2, 2010)
+EXT(NV_fog_distance                         , NV_fog_distance                        , GLL,  x ,  x ,  x , 2001)
+EXT(NV_fragment_program_option              , NV_fragment_program_option             , GLL,  x ,  x ,  x , 2005)
+EXT(NV_light_max_exponent                   , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(NV_packed_depth_stencil                 , dummy_true                             , GLL, GLC,  x ,  x , 2000)
+EXT(NV_point_sprite                         , NV_point_sprite                        , GLL, GLC,  x ,  x , 2001)
+EXT(NV_primitive_restart                    , NV_primitive_restart                   , GLL,  x ,  x ,  x , 2002)
+EXT(NV_read_buffer                          , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_read_depth                           , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_read_depth_stencil                   , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_read_stencil                         , dummy_true                             ,  x ,  x ,  x , ES2, 2011)
+EXT(NV_texgen_reflection                    , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+EXT(NV_texture_barrier                      , NV_texture_barrier                     , GLL, GLC,  x ,  x , 2009)
+EXT(NV_texture_env_combine4                 , NV_texture_env_combine4                , GLL,  x ,  x ,  x , 1999)
+EXT(NV_texture_rectangle                    , NV_texture_rectangle                   , GLL,  x ,  x ,  x , 2000)
+EXT(NV_vdpau_interop                        , NV_vdpau_interop                       , GLL, GLC,  x ,  x , 2010)
+EXT(S3_s3tc                                 , ANGLE_texture_compression_dxt          , GLL, GLC,  x ,  x , 1999)
+EXT(SGIS_generate_mipmap                    , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(SGIS_texture_border_clamp               , ARB_texture_border_clamp               , GLL,  x ,  x ,  x , 1997)
+EXT(SGIS_texture_edge_clamp                 , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(SGIS_texture_lod                        , dummy_true                             , GLL,  x ,  x ,  x , 1997)
+EXT(SUN_multi_draw_arrays                   , dummy_true                             , GLL,  x ,  x ,  x , 1999)
+#undef GLL
+#undef GLC
+#undef ES1
+#undef ES2
+#undef  x

From ab129a44ae3a7515eb4642c068299673c890fb73 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Thu, 17 Sep 2015 15:49:40 -0700
Subject: [PATCH 205/287] mesa: Generate a helper function for each extension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Generate functions which determine if an extension is supported in the
current context. Initially, enums were going to be explicitly used with
_mesa_extension_supported(). The idea to embed the function and enums
into generated helper functions was suggested by Kristian Høgsberg.

For performance, the function body no longer uses
_mesa_extension_supported() and, as suggested by Chad Versace, the
functions are also declared static inline.

v2: Place function qualifiers on separate line (Chad)
v3: Move function curly brace to new line (Chad)

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/context.h    |  1 +
 src/mesa/main/extensions.c | 23 +--------------------
 src/mesa/main/extensions.h | 42 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/src/mesa/main/context.h b/src/mesa/main/context.h
index 1e7a12c8a84..4798b1f9b43 100644
--- a/src/mesa/main/context.h
+++ b/src/mesa/main/context.h
@@ -50,6 +50,7 @@
 
 
 #include "imports.h"
+#include "extensions.h"
 #include "mtypes.h"
 #include "vbo/vbo.h"
 
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index c7609bea0f0..7ef79e58963 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -42,27 +42,6 @@ struct gl_extensions _mesa_extension_override_disables;
 static char *extra_extensions = NULL;
 static char *cant_disable_extensions = NULL;
 
-/**
- * \brief An element of the \c extension_table.
- */
-struct extension {
-   /** Name of extension, such as "GL_ARB_depth_clamp". */
-   const char *name;
-
-   /** Offset (in bytes) of the corresponding member in struct gl_extensions. */
-   size_t offset;
-
-   /** Minimum version the extension requires for the given API
-    * (see gl_api defined in mtypes.h). The value is equal to:
-    * 10 * major_version + minor_version
-    */
-   uint8_t version[API_OPENGL_LAST + 1];
-
-   /** Year the extension was proposed or approved.  Used to sort the 
-    * extension string chronologically. */
-   uint16_t year;
-};
-
 
 /**
  * Given a member \c x of struct gl_extensions, return offset of
@@ -74,7 +53,7 @@ struct extension {
 /**
  * \brief Table of supported OpenGL extensions for all API's.
  */
-static const struct extension extension_table[] = {
+const struct extension extension_table[] = {
 #define EXT(name_str, driver_cap, gll_ver, glc_ver, gles_ver, gles2_ver, yyyy) \
         { .name = "GL_" #name_str, .offset = o(driver_cap), \
           .version = { \
diff --git a/src/mesa/main/extensions.h b/src/mesa/main/extensions.h
index 595512a5d5c..6092eca36e7 100644
--- a/src/mesa/main/extensions.h
+++ b/src/mesa/main/extensions.h
@@ -55,6 +55,48 @@ _mesa_get_extension_count(struct gl_context *ctx);
 extern const GLubyte *
 _mesa_get_enabled_extension(struct gl_context *ctx, GLuint index);
 
+
+/**
+ * \brief An element of the \c extension_table.
+ */
+struct extension {
+   /** Name of extension, such as "GL_ARB_depth_clamp". */
+   const char *name;
+
+   /** Offset (in bytes) of the corresponding member in struct gl_extensions. */
+   size_t offset;
+
+   /** Minimum version the extension requires for the given API
+    * (see gl_api defined in mtypes.h). The value is equal to:
+    * 10 * major_version + minor_version
+    */
+   uint8_t version[API_OPENGL_LAST + 1];
+
+   /** Year the extension was proposed or approved.  Used to sort the 
+    * extension string chronologically. */
+   uint16_t year;
+} extern const extension_table[];
+
+
+/* Generate enums for the functions below */
+enum {
+#define EXT(name_str, ...) MESA_EXTENSION_##name_str,
+#include "extensions_table.h"
+#undef EXT
+};
+
+
+/** Checks if the context suports a user-facing extension */
+#define EXT(name_str, driver_cap, ...) \
+static inline bool \
+_mesa_has_##name_str(const struct gl_context *ctx) \
+{ \
+   return ctx->Extensions.driver_cap && (ctx->Version >= \
+          extension_table[MESA_EXTENSION_##name_str].version[ctx->API]); \
+}
+#include "extensions_table.h"
+#undef EXT
+
 extern struct gl_extensions _mesa_extension_override_enables;
 extern struct gl_extensions _mesa_extension_override_disables;
 

From 5645770742d21b0a2ae0fe3ecce135933cd9b4fc Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Mon, 26 Oct 2015 15:22:24 -0700
Subject: [PATCH 206/287] mesa/extensions: Prefix global struct and extension
 type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename the following types and variables:
* struct extension -> struct mesa_extension,
  like the mesa_format type.
* extension_table -> _mesa_extension_table,
  like the _mesa_extension_override_{enables,disables} structs.

Suggested-by: Marek Olšák <marek.olsak@amd.com>
Suggested-by: Chad Versace <chad.versace@intel.com>
Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/main/extensions.c | 40 +++++++++++++++++++-------------------
 src/mesa/main/extensions.h |  6 +++---
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 7ef79e58963..97f23bfd88a 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -53,7 +53,7 @@ static char *cant_disable_extensions = NULL;
 /**
  * \brief Table of supported OpenGL extensions for all API's.
  */
-const struct extension extension_table[] = {
+const struct mesa_extension _mesa_extension_table[] = {
 #define EXT(name_str, driver_cap, gll_ver, glc_ver, gles_ver, gles2_ver, yyyy) \
         { .name = "GL_" #name_str, .offset = o(driver_cap), \
           .version = { \
@@ -72,7 +72,7 @@ const struct extension extension_table[] = {
 /**
  * Given an extension name, lookup up the corresponding member of struct
  * gl_extensions and return that member's offset (in bytes).  If the name is
- * not found in the \c extension_table, return 0.
+ * not found in the \c _mesa_extension_table, return 0.
  *
  * \param name Name of extension.
  * \return Offset of member in struct gl_extensions.
@@ -85,9 +85,9 @@ name_to_offset(const char* name)
    if (name == 0)
       return 0;
 
-   for (i = 0; i < ARRAY_SIZE(extension_table); ++i) {
-      if (strcmp(name, extension_table[i].name) == 0)
-	 return extension_table[i].offset;
+   for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
+      if (strcmp(name, _mesa_extension_table[i].name) == 0)
+	 return _mesa_extension_table[i].offset;
    }
 
    return 0;
@@ -107,8 +107,8 @@ override_extensions_in_context(struct gl_context *ctx)
       (GLboolean*) &_mesa_extension_override_disables;
    GLboolean *ctx_ext = (GLboolean*)&ctx->Extensions;
 
-   for (i = 0; i < ARRAY_SIZE(extension_table); ++i) {
-      size_t offset = extension_table[i].offset;
+   for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
+      size_t offset = _mesa_extension_table[i].offset;
 
       assert(!enables[offset] || !disables[offset]);
       if (enables[offset]) {
@@ -394,13 +394,13 @@ typedef unsigned short extension_index;
  * Given an extension enum, return whether or not the extension is supported
  * dependent on the following factors:
  * There's driver support and the OpenGL/ES version is at least that
- * specified in the extension_table.
+ * specified in the _mesa_extension_table.
  */
 static inline bool
 _mesa_extension_supported(const struct gl_context *ctx, extension_index i)
 {
    const bool *base = (bool *) &ctx->Extensions;
-   const struct extension *ext = extension_table + i;
+   const struct mesa_extension *ext = _mesa_extension_table + i;
 
    return (ctx->Version >= ext->version[ctx->API]) && base[ext->offset];
 }
@@ -409,15 +409,15 @@ _mesa_extension_supported(const struct gl_context *ctx, extension_index i)
  * Compare two entries of the extensions table.  Sorts first by year,
  * then by name.
  *
- * Arguments are indices into extension_table.
+ * Arguments are indices into _mesa_extension_table.
  */
 static int
 extension_compare(const void *p1, const void *p2)
 {
    extension_index i1 = * (const extension_index *) p1;
    extension_index i2 = * (const extension_index *) p2;
-   const struct extension *e1 = &extension_table[i1];
-   const struct extension *e2 = &extension_table[i2];
+   const struct mesa_extension *e1 = &_mesa_extension_table[i1];
+   const struct mesa_extension *e2 = &_mesa_extension_table[i2];
    int res;
 
    res = (int)e1->year - (int)e2->year;
@@ -463,8 +463,8 @@ _mesa_make_extension_string(struct gl_context *ctx)
 
    /* Compute length of the extension string. */
    count = 0;
-   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
-      const struct extension *i = extension_table + k;
+   for (k = 0; k < ARRAY_SIZE(_mesa_extension_table); ++k) {
+      const struct mesa_extension *i = _mesa_extension_table + k;
 
       if (i->year <= maxYear &&
           _mesa_extension_supported(ctx, k)) {
@@ -494,8 +494,8 @@ _mesa_make_extension_string(struct gl_context *ctx)
     * expect will fit into that buffer.
     */
    j = 0;
-   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
-      if (extension_table[k].year <= maxYear &&
+   for (k = 0; k < ARRAY_SIZE(_mesa_extension_table); ++k) {
+      if (_mesa_extension_table[k].year <= maxYear &&
          _mesa_extension_supported(ctx, k)) {
          extension_indices[j++] = k;
       }
@@ -506,7 +506,7 @@ _mesa_make_extension_string(struct gl_context *ctx)
 
    /* Build the extension string.*/
    for (j = 0; j < count; ++j) {
-      const struct extension *i = &extension_table[extension_indices[j]];
+      const struct mesa_extension *i = &_mesa_extension_table[extension_indices[j]];
       assert(_mesa_extension_supported(ctx, extension_indices[j]));
       strcat(exts, i->name);
       strcat(exts, " ");
@@ -532,7 +532,7 @@ _mesa_get_extension_count(struct gl_context *ctx)
    if (ctx->Extensions.Count != 0)
       return ctx->Extensions.Count;
 
-   for (k = 0; k < ARRAY_SIZE(extension_table); ++k) {
+   for (k = 0; k < ARRAY_SIZE(_mesa_extension_table); ++k) {
       if (_mesa_extension_supported(ctx, k))
 	 ctx->Extensions.Count++;
    }
@@ -548,10 +548,10 @@ _mesa_get_enabled_extension(struct gl_context *ctx, GLuint index)
    size_t n = 0;
    unsigned i;
 
-   for (i = 0; i < ARRAY_SIZE(extension_table); ++i) {
+   for (i = 0; i < ARRAY_SIZE(_mesa_extension_table); ++i) {
       if (_mesa_extension_supported(ctx, i)) {
          if (n == index)
-            return (const GLubyte*) extension_table[i].name;
+            return (const GLubyte*) _mesa_extension_table[i].name;
          else
             ++n;
       }
diff --git a/src/mesa/main/extensions.h b/src/mesa/main/extensions.h
index 6092eca36e7..4ea2af13efc 100644
--- a/src/mesa/main/extensions.h
+++ b/src/mesa/main/extensions.h
@@ -59,7 +59,7 @@ _mesa_get_enabled_extension(struct gl_context *ctx, GLuint index);
 /**
  * \brief An element of the \c extension_table.
  */
-struct extension {
+struct mesa_extension {
    /** Name of extension, such as "GL_ARB_depth_clamp". */
    const char *name;
 
@@ -75,7 +75,7 @@ struct extension {
    /** Year the extension was proposed or approved.  Used to sort the 
     * extension string chronologically. */
    uint16_t year;
-} extern const extension_table[];
+} extern const _mesa_extension_table[];
 
 
 /* Generate enums for the functions below */
@@ -92,7 +92,7 @@ static inline bool \
 _mesa_has_##name_str(const struct gl_context *ctx) \
 { \
    return ctx->Extensions.driver_cap && (ctx->Version >= \
-          extension_table[MESA_EXTENSION_##name_str].version[ctx->API]); \
+          _mesa_extension_table[MESA_EXTENSION_##name_str].version[ctx->API]); \
 }
 #include "extensions_table.h"
 #undef EXT

From a16ffb743ced9fde80b2485dfc2d86ae74e86f25 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Thu, 15 Oct 2015 12:34:43 -0700
Subject: [PATCH 207/287] mesa: In helpers, only check driver capability for
 meta

Make API context and version checks done by the helper functions pass
unconditionally while meta is in progress. This transparently makes
extension checks solely dependent on struct gl_extensions while in meta.

v2: Use an 8-bit data type instead of a GLuint

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/mesa/drivers/common/meta.c | 11 +++++++++++
 src/mesa/drivers/common/meta.h |  1 +
 src/mesa/main/extensions.h     |  2 +-
 src/mesa/main/mtypes.h         |  6 ++++++
 src/mesa/main/version.c        |  1 +
 5 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index e27489d6195..0ffcd9c2c3f 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -449,6 +449,16 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
    save->API = ctx->API;
    ctx->API = API_OPENGL_COMPAT;
 
+   /* Mesa's extension helper functions use the current context's API to look up
+    * the version required by an extension as a step in determining whether or
+    * not it has been advertised. Since meta aims to only be restricted by the
+    * driver capability (and not by whether or not an extension has been
+    * advertised), set the helper functions' Version variable to a value that
+    * will make the checks on the context API and version unconditionally pass.
+    */
+   save->ExtensionsVersion = ctx->Extensions.Version;
+   ctx->Extensions.Version = ~0;
+
    /* Pausing transform feedback needs to be done early, or else we won't be
     * able to change other state.
     */
@@ -1222,6 +1232,7 @@ _mesa_meta_end(struct gl_context *ctx)
    ctx->Meta->SaveStackDepth--;
 
    ctx->API = save->API;
+   ctx->Extensions.Version = save->ExtensionsVersion;
 }
 
 
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index d742eaa9f67..d4bf0b65524 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -72,6 +72,7 @@ struct save_state
 
    /* Always saved/restored with meta. */
    gl_api API;
+   uint8_t ExtensionsVersion;
 
    /** MESA_META_CLEAR (and others?) */
    struct gl_query_object *CurrentOcclusionObject;
diff --git a/src/mesa/main/extensions.h b/src/mesa/main/extensions.h
index 4ea2af13efc..e4671be2cf6 100644
--- a/src/mesa/main/extensions.h
+++ b/src/mesa/main/extensions.h
@@ -91,7 +91,7 @@ enum {
 static inline bool \
 _mesa_has_##name_str(const struct gl_context *ctx) \
 { \
-   return ctx->Extensions.driver_cap && (ctx->Version >= \
+   return ctx->Extensions.driver_cap && (ctx->Extensions.Version >= \
           _mesa_extension_table[MESA_EXTENSION_##name_str].version[ctx->API]); \
 }
 #include "extensions_table.h"
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8de9c773b9f..c4d8c6a8cb4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3820,6 +3820,12 @@ struct gl_extensions
    const GLubyte *String;
    /** Number of supported extensions */
    GLuint Count;
+   /**
+    * The context version which extension helper functions compare against.
+    * By default, the value is equal to ctx->Version. This changes to ~0
+    * while meta is in progress.
+    */
+   GLubyte Version;
 };
 
 
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 5635a643200..314b26dc74f 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -524,6 +524,7 @@ _mesa_compute_version(struct gl_context *ctx)
       return;
 
    ctx->Version = _mesa_get_version(&ctx->Extensions, &ctx->Const, ctx->API);
+   ctx->Extensions.Version = ctx->Version;
 
    /* Make sure that the GLSL version lines up with the GL version. In some
     * cases it can be too high, e.g. if an extension is missing.

From b52cb9ec6ab3b74f754ccf8472b9db2b6d8c3333 Mon Sep 17 00:00:00 2001
From: Jordan Justen <jordan.l.justen@intel.com>
Date: Wed, 11 Nov 2015 22:02:06 -0800
Subject: [PATCH 208/287] glsl: Correctly handle vector extract on function
 parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit accidentally used a '==' when '=' was intended.

commit 96b22fb080894ba1840af2372f28a46cc0f40c76
Author: Kristian Høgsberg Kristensen <krh@bitplanet.net>
Date:   Wed Nov 4 14:58:54 2015 -0800

    glsl: Use array deref for access to vector components

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Kristian Høgsberg <krh@bitplanet.net>
---
 src/glsl/ast_function.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 55844706d35..466ece67424 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -368,8 +368,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
 
    ir_rvalue *lhs = actual;
    if (expr != NULL && expr->operation == ir_binop_vector_extract) {
-      lhs == new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL),
-                                               expr->operands[1]->clone(mem_ctx, NULL));
+      lhs = new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL),
+                                              expr->operands[1]->clone(mem_ctx, NULL));
    }
 
    ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs);

From 20748318c5b870fc27f699637041e67a67321f5f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 14 Sep 2015 16:13:43 -0400
Subject: [PATCH 209/287] glsl: add gl_HelperInvocation system value

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Matt Turner <mattst88@gmail.com>
---
 src/glsl/builtin_variables.cpp             | 3 +++
 src/glsl/nir/shader_enums.h                | 1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 1 +
 3 files changed, 5 insertions(+)

diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index c30fb9226e5..b06c1bc5c12 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -1059,6 +1059,9 @@ builtin_variable_generator::generate_fs_special_vars()
       var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
       var->data.interpolation = INTERP_QUALIFIER_FLAT;
    }
+
+   if (state->is_version(450, 310)/* || state->ARB_ES3_1_compatibility_enable*/)
+      add_system_value(SYSTEM_VALUE_HELPER_INVOCATION, bool_t, "gl_HelperInvocation");
 }
 
 
diff --git a/src/glsl/nir/shader_enums.h b/src/glsl/nir/shader_enums.h
index d1cf7ca04cc..dd0e0bad806 100644
--- a/src/glsl/nir/shader_enums.h
+++ b/src/glsl/nir/shader_enums.h
@@ -396,6 +396,7 @@ typedef enum
    SYSTEM_VALUE_SAMPLE_ID,
    SYSTEM_VALUE_SAMPLE_POS,
    SYSTEM_VALUE_SAMPLE_MASK_IN,
+   SYSTEM_VALUE_HELPER_INVOCATION,
    /*@}*/
 
    /**
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 9ee6f8f89be..b56512706b0 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4408,6 +4408,7 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
    TGSI_SEMANTIC_SAMPLEID,
    TGSI_SEMANTIC_SAMPLEPOS,
    TGSI_SEMANTIC_SAMPLEMASK,
+   0, /* gl_HelperInvocation */
 
    /* Tessellation shaders
     */

From e3d9dbe3046c4fd10c6b4c66d8016e908ea7c140 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 14 Sep 2015 16:23:04 -0400
Subject: [PATCH 210/287] gallium: add support for gl_HelperInvocation semantic

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Glenn Kennard <glenn.kennard@gmail.com>
---
 src/gallium/auxiliary/tgsi/tgsi_strings.c  | 1 +
 src/gallium/docs/source/tgsi.rst           | 8 ++++++++
 src/gallium/include/pipe/p_shader_tokens.h | 3 ++-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 +++-
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 89369d60f4e..fc29a2398aa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -95,6 +95,7 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
    "TESSOUTER",
    "TESSINNER",
    "VERTICESIN",
+   "HELPER_INVOCATION",
 };
 
 const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 01e18f3084e..e7b0c2f6377 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2941,6 +2941,14 @@ TGSI_SEMANTIC_VERTICESIN
 For tessellation evaluation/control shaders, this semantic label indicates the
 number of vertices provided in the input patch. Only the X value is defined.
 
+TGSI_SEMANTIC_HELPER_INVOCATION
+"""""""""""""""""""""""""""""""
+
+For fragment shaders, this semantic indicates whether the current
+invocation is covered or not. Helper invocations are created in order
+to properly compute derivatives, however it may be desirable to skip
+some of the logic in those cases. See ``gl_HelperInvocation`` documentation.
+
 
 Declaration Interpolate
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index e0ab9013dd5..a3137aec8db 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -185,7 +185,8 @@ struct tgsi_declaration_interp
 #define TGSI_SEMANTIC_TESSOUTER  32 /**< outer tessellation levels */
 #define TGSI_SEMANTIC_TESSINNER  33 /**< inner tessellation levels */
 #define TGSI_SEMANTIC_VERTICESIN 34 /**< number of input vertices */
-#define TGSI_SEMANTIC_COUNT      35 /**< number of semantic values */
+#define TGSI_SEMANTIC_HELPER_INVOCATION 35 /**< current invocation is helper */
+#define TGSI_SEMANTIC_COUNT      36 /**< number of semantic values */
 
 struct tgsi_declaration_semantic
 {
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b56512706b0..3ad1afdecda 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -4408,7 +4408,7 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
    TGSI_SEMANTIC_SAMPLEID,
    TGSI_SEMANTIC_SAMPLEPOS,
    TGSI_SEMANTIC_SAMPLEMASK,
-   0, /* gl_HelperInvocation */
+   TGSI_SEMANTIC_HELPER_INVOCATION,
 
    /* Tessellation shaders
     */
@@ -5139,6 +5139,8 @@ st_translate_program(
           TGSI_SEMANTIC_BASEVERTEX);
    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
           TGSI_SEMANTIC_TESSCOORD);
+   assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_HELPER_INVOCATION] ==
+          TGSI_SEMANTIC_HELPER_INVOCATION);
 
    t = CALLOC_STRUCT(st_translate);
    if (!t) {

From 39f51ec96f00f601b9c4d4e321dacb3af9dc866f Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Mon, 14 Sep 2015 16:23:29 -0400
Subject: [PATCH 211/287] nvc0/ir: add support for
 TGSI_SEMANTIC_HELPER_INVOCATION

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h              | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp      | 1 +
 6 files changed, 6 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index f6e93081e76..d09a0ab0610 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -389,6 +389,7 @@ enum SVSemantic
    SV_SBASE,
    SV_VERTEX_STRIDE,
    SV_INVOCATION_INFO,
+   SV_THREAD_KILL,
    SV_UNDEFINED,
    SV_LAST
 };
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index d712c9c300a..b163cd2db4a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1644,6 +1644,7 @@ CodeEmitterGK110::getSRegEncoding(const ValueRef& ref)
    case SV_VERTEX_COUNT:  return 0x10;
    case SV_INVOCATION_ID: return 0x11;
    case SV_YDIR:          return 0x12;
+   case SV_THREAD_KILL:   return 0x13;
    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 7e6ed842d54..e9ddd366391 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -244,6 +244,7 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val)
    case SV_LANEID         : id = 0x00; break;
    case SV_VERTEX_COUNT   : id = 0x10; break;
    case SV_INVOCATION_ID  : id = 0x11; break;
+   case SV_THREAD_KILL    : id = 0x13; break;
    case SV_INVOCATION_INFO: id = 0x1d; break;
    default:
       assert(!"invalid system value");
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 8784f3b0a21..2a13e1086a0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1839,6 +1839,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
    case SV_VERTEX_COUNT:  return 0x10;
    case SV_INVOCATION_ID: return 0x11;
    case SV_YDIR:          return 0x12;
+   case SV_THREAD_KILL:   return 0x13;
    case SV_TID:           return 0x21 + SDATA(ref).sv.index;
    case SV_CTAID:         return 0x25 + SDATA(ref).sv.index;
    case SV_NTID:          return 0x29 + SDATA(ref).sv.index;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 6a7cb4224f4..08a73d79781 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -376,6 +376,7 @@ static nv50_ir::SVSemantic translateSysVal(uint sysval)
    case TGSI_SEMANTIC_TESSOUTER:  return nv50_ir::SV_TESS_OUTER;
    case TGSI_SEMANTIC_TESSINNER:  return nv50_ir::SV_TESS_INNER;
    case TGSI_SEMANTIC_VERTICESIN: return nv50_ir::SV_VERTEX_COUNT;
+   case TGSI_SEMANTIC_HELPER_INVOCATION: return nv50_ir::SV_THREAD_KILL;
    default:
       assert(0);
       return nv50_ir::SV_CLOCK;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5f30f3d354b..0b02599dbdd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -275,6 +275,7 @@ static const char *SemanticStr[SV_LAST + 1] =
    "SBASE",
    "VERTEX_STRIDE",
    "INVOCATION_INFO",
+   "THREAD_KILL",
    "?",
    "(INVALID)"
 };

From 2de2e1702b4fab73b1f577fa4a21b7bd0a7040dd Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 12 Nov 2015 15:59:21 -0700
Subject: [PATCH 212/287] mesa: fix MSVC build break in extensions.h

Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
 src/mesa/main/extensions.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/main/extensions.h b/src/mesa/main/extensions.h
index e4671be2cf6..1615e1cc0a4 100644
--- a/src/mesa/main/extensions.h
+++ b/src/mesa/main/extensions.h
@@ -75,7 +75,9 @@ struct mesa_extension {
    /** Year the extension was proposed or approved.  Used to sort the 
     * extension string chronologically. */
    uint16_t year;
-} extern const _mesa_extension_table[];
+};
+
+extern const struct mesa_extension _mesa_extension_table[];
 
 
 /* Generate enums for the functions below */

From 79f68306d23bfb48ef645c4bded8ee7b74d2d661 Mon Sep 17 00:00:00 2001
From: Nanley Chery <nanley.g.chery@intel.com>
Date: Fri, 16 Oct 2015 10:14:39 -0700
Subject: [PATCH 213/287] mesa: Replace gl_extensions::EXT_texture3D with
 ::dummy_true

Mesa unconditionally sets this driver flag to true in
_mesa_init_extensions(). There is therefore no need for
the driver to communicate support for this extension.
Replace the driver capability flag with ::dummy_true.

Signed-off-by: Nanley Chery <nanley.g.chery@intel.com>
Reviewed-by: Chad Versace <chad.versace@intel.com>
---
 src/glsl/glsl_parser_extras.cpp     | 2 +-
 src/glsl/standalone_scaffolding.cpp | 1 -
 src/mesa/main/extensions.c          | 1 -
 src/mesa/main/extensions_table.h    | 4 ++--
 src/mesa/main/mtypes.h              | 1 -
 5 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 2dba7d9f48a..3ed11683062 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -634,7 +634,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
     */
    EXT(OES_EGL_image_external,         false, true,      OES_EGL_image_external),
    EXT(OES_standard_derivatives,       false, true,      OES_standard_derivatives),
-   EXT(OES_texture_3D,                 false, true,      EXT_texture3D),
+   EXT(OES_texture_3D,                 false, true,      dummy_true),
    EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample),
 
    /* All other extensions go here, sorted alphabetically.
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index fe1d820f2ea..f3e34c6ff05 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -167,7 +167,6 @@ void initialize_context_to_defaults(struct gl_context *ctx, gl_api api)
    ctx->Extensions.OES_standard_derivatives = true;
 
    ctx->Extensions.EXT_shader_integer_mix = true;
-   ctx->Extensions.EXT_texture3D = true;
    ctx->Extensions.EXT_texture_array = true;
 
    ctx->Extensions.NV_texture_rectangle = true;
diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c
index 97f23bfd88a..e94d2b74749 100644
--- a/src/mesa/main/extensions.c
+++ b/src/mesa/main/extensions.c
@@ -383,7 +383,6 @@ _mesa_init_extensions(struct gl_extensions *extensions)
 
    /* Then, selectively turn default extensions on. */
    extensions->dummy_true = GL_TRUE;
-   extensions->EXT_texture3D = GL_TRUE;
 }
 
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index d983562dd86..d12fd9f1c8d 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -181,7 +181,7 @@ EXT(EXT_shadow_funcs                        , ARB_shadow
 EXT(EXT_stencil_two_side                    , EXT_stencil_two_side                   , GLL,  x ,  x ,  x , 2001)
 EXT(EXT_stencil_wrap                        , dummy_true                             , GLL,  x ,  x ,  x , 2002)
 EXT(EXT_subtexture                          , dummy_true                             , GLL,  x ,  x ,  x , 1995)
-EXT(EXT_texture3D                           , EXT_texture3D                          , GLL,  x ,  x ,  x , 1996)
+EXT(EXT_texture3D                           , dummy_true                             , GLL,  x ,  x ,  x , 1996)
 EXT(EXT_texture_array                       , EXT_texture_array                      , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_texture_compression_dxt1            , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2004)
 EXT(ANGLE_texture_compression_dxt3          , ANGLE_texture_compression_dxt          , GLL, GLC, ES1, ES2, 2011)
@@ -252,7 +252,7 @@ EXT(OES_stencil4                            , dummy_false
 EXT(OES_stencil8                            , dummy_true                             ,  x ,  x , ES1, ES2, 2005)
 EXT(OES_stencil_wrap                        , dummy_true                             ,  x ,  x , ES1,  x , 2002)
 EXT(OES_surfaceless_context                 , dummy_true                             ,  x ,  x , ES1, ES2, 2012)
-EXT(OES_texture_3D                          , EXT_texture3D                          ,  x ,  x ,  x , ES2, 2005)
+EXT(OES_texture_3D                          , dummy_true                             ,  x ,  x ,  x , ES2, 2005)
 EXT(OES_texture_cube_map                    , ARB_texture_cube_map                   ,  x ,  x , ES1,  x , 2007)
 EXT(OES_texture_env_crossbar                , ARB_texture_env_crossbar               ,  x ,  x , ES1,  x , 2005)
 EXT(OES_texture_float                       , OES_texture_float                      ,  x ,  x ,  x , ES2, 2005)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index c4d8c6a8cb4..4efdf1ee8c1 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3762,7 +3762,6 @@ struct gl_extensions
    GLboolean EXT_provoking_vertex;
    GLboolean EXT_shader_integer_mix;
    GLboolean EXT_stencil_two_side;
-   GLboolean EXT_texture3D;
    GLboolean EXT_texture_array;
    GLboolean EXT_texture_compression_latc;
    GLboolean EXT_texture_compression_s3tc;

From 4bbf2ac06e71102a65b4d0427a0ba630a3c2477c Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Tue, 10 Nov 2015 14:13:47 -0800
Subject: [PATCH 214/287] nir/validate: Allow subroutine types for the tails of
 derefs

The shader-subroutine code creates uniforms of type SUBROUTINE for
subroutines that are then read as integers in the backends.  If we ever
want to do any optimizations on these, we'll need to come up with a better
plan where they are actual scalars or something, but this works for now.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92859
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_validate.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index a42e830fd72..51c2529dc38 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -401,7 +401,9 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
    case nir_intrinsic_load_var: {
       const struct glsl_type *type =
          nir_deref_tail(&instr->variables[0]->deref)->type;
-      assert(glsl_type_is_vector_or_scalar(type));
+      assert(glsl_type_is_vector_or_scalar(type) ||
+             (instr->variables[0]->var->data.mode == nir_var_uniform &&
+              glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
       assert(instr->num_components == glsl_get_vector_elements(type));
       assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
       break;
@@ -409,7 +411,9 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
    case nir_intrinsic_store_var: {
       const struct glsl_type *type =
          nir_deref_tail(&instr->variables[0]->deref)->type;
-      assert(glsl_type_is_vector_or_scalar(type));
+      assert(glsl_type_is_vector_or_scalar(type) ||
+             (instr->variables[0]->var->data.mode == nir_var_uniform &&
+              glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
       assert(instr->num_components == glsl_get_vector_elements(type));
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&

From 226ba889a0f820b9f4b1132e379620d2688c96e7 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 12 Nov 2015 18:10:22 -0800
Subject: [PATCH 215/287] nir/vars_to_ssa: Rework copy set handling in
 lower_copies_to_load_store

Previously, we walked through a given deref_node's copies and, after
lowering the copy away, removed it from both the source and destination
copy sets.  This commit changes this to only remove it from the other
node's copy set (not the one we're lowering).  At the end of the loop, we
just throw away the copy set for the node we're lowering since that node no
longer has any copies.  This has two advantages:

 1) It's more efficient because we're doing potentially half as many set
    search operations.

 2) It now properly handles copies from a node to itself.  Perviously, it
    would delete the copy from the set when processing the destinatioon and
    then assert-fail when we couldn't find it for the source.

Cc: "11.0" <mesa-stable@lists.freedesktop.org>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92588
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_lower_vars_to_ssa.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c
index 59715072c15..df24510b3a5 100644
--- a/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -455,7 +455,8 @@ lower_copies_to_load_store(struct deref_node *node,
          struct deref_node *arg_node =
             get_deref_node(copy->variables[i], state);
 
-         if (arg_node == NULL)
+         /* Only bother removing copy entries for other nodes */
+         if (arg_node == NULL || arg_node == node)
             continue;
 
          struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
@@ -466,6 +467,8 @@ lower_copies_to_load_store(struct deref_node *node,
       nir_instr_remove(&copy->instr);
    }
 
+   node->copies = NULL;
+
    return true;
 }
 

From 5f43e074d4a4e74b1238a5687da5d38e53ad4596 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@intel.com>
Date: Thu, 12 Nov 2015 21:52:37 -0800
Subject: [PATCH 216/287] nir/vars_to_ssa: Delete dead output set code

This was a remnant of an early attempt to handle output reads in
vars_to_ssa.  That attempt was abandon a long time ago but these few lines
were aparently left in the pass and managed to evade review.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_lower_vars_to_ssa.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c
index df24510b3a5..e670dbdc7e7 100644
--- a/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -879,10 +879,6 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
    state.add_to_direct_deref_nodes = true;
    nir_foreach_block(impl, register_variable_uses_block, &state);
 
-   struct set *outputs = _mesa_set_create(state.dead_ctx,
-                                          _mesa_hash_pointer,
-                                          _mesa_key_pointer_equal);
-
    bool progress = false;
 
    nir_metadata_require(impl, nir_metadata_block_index);
@@ -916,9 +912,6 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
          def_stack_push(node, &load->def, &state);
       }
 
-      if (deref->var->data.mode == nir_var_shader_out)
-         _mesa_set_add(outputs, node);
-
       foreach_deref_node_match(deref, lower_copies_to_load_store, &state);
    }
 

From 8610cd6b8c71b92a8e68603c030961e36faa0ef4 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 6 Nov 2015 12:08:49 +0100
Subject: [PATCH 217/287] nir/copy_propagate: do not copy-propagate MOV srcs
 with source modifiers

If a source operand in a MOV has source modifiers, then we cannot
copy-propagate it from the parent instruction and remove the MOV.

v2: remove the check for source source modifiers from is_move() (Jason)

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_opt_copy_propagate.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 7d8bdd7f2ca..2611069dd5c 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -41,11 +41,6 @@ static bool is_move(nir_alu_instr *instr)
    if (instr->dest.saturate)
       return false;
 
-   /* we handle modifiers in a separate pass */
-
-   if (instr->src[0].abs || instr->src[0].negate)
-      return false;
-
    if (!instr->src[0].src.is_ssa)
       return false;
 
@@ -65,9 +60,13 @@ static bool is_vec(nir_alu_instr *instr)
 }
 
 static bool
-is_swizzleless_move(nir_alu_instr *instr)
+is_simple_move(nir_alu_instr *instr)
 {
    if (is_move(instr)) {
+      /* We handle modifiers in a separate pass */
+      if (instr->src[0].negate || instr->src[0].abs)
+         return false;
+
       for (unsigned i = 0; i < 4; i++) {
          if (!((instr->dest.write_mask >> i) & 1))
             break;
@@ -81,6 +80,10 @@ is_swizzleless_move(nir_alu_instr *instr)
          if (instr->src[i].swizzle[0] != i)
             return false;
 
+         /* We handle modifiers in a separate pass */
+         if (instr->src[i].negate || instr->src[i].abs)
+            return false;
+
          if (def == NULL) {
             def = instr->src[i].src.ssa;
          } else if (instr->src[i].src.ssa != def) {
@@ -107,7 +110,7 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
       return false;
 
    nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
-   if (!is_swizzleless_move(alu_instr))
+   if (!is_simple_move(alu_instr))
       return false;
 
    /* Don't let copy propagation land us with a phi that has more

From 5f004fd1970338c9a1db51b421ae0f6f687661d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= <siglesias@igalia.com>
Date: Thu, 12 Nov 2015 16:14:07 +0100
Subject: [PATCH 218/287] glsl: fix 'shared' layout qualifier related
 regressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 8b28b35 added 'shared' as a keyword for compute shaders
but it broke the existing 'shared' layout qualifier support for
uniform and shader storage blocks.

This patch fixes 578 dEQP-GLES31.functional.ssbo.* tests.

v2:
- Move SHARED to interface_block_layout_qualifier (Timothy)
- Don't remove "shared" case insensitive check (Timothy)
- Remove the clearing of shared_storage flag (Timothy)

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Timothy Arceri <timothy.arceri@collabora.com>
---
 src/glsl/glsl_parser.yy | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 31e254a4fd3..adf6a05acce 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -1652,6 +1652,11 @@ interface_block_layout_qualifier:
       memset(& $$, 0, sizeof($$));
       $$.flags.q.packed = 1;
    }
+   | SHARED
+   {
+      memset(& $$, 0, sizeof($$));
+      $$.flags.q.shared = 1;
+   }
    ;
 
 subroutine_qualifier:

From a29d922c1a19ecebb7c274f31248b00086cb4733 Mon Sep 17 00:00:00 2001
From: Iago Toral Quiroga <itoral@igalia.com>
Date: Fri, 13 Nov 2015 08:51:06 +0100
Subject: [PATCH 219/287] Revert "nir/copy_propagate: do not copy-propagate MOV
 srcs with source modifiers"

The change proposed in the review leads to piglit regressions because
is_move() is used in other places and relies on the checks for source
modifiers to be there.

Revert this until we agree on a better solution.
---
 src/glsl/nir/nir_opt_copy_propagate.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 2611069dd5c..7d8bdd7f2ca 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -41,6 +41,11 @@ static bool is_move(nir_alu_instr *instr)
    if (instr->dest.saturate)
       return false;
 
+   /* we handle modifiers in a separate pass */
+
+   if (instr->src[0].abs || instr->src[0].negate)
+      return false;
+
    if (!instr->src[0].src.is_ssa)
       return false;
 
@@ -60,13 +65,9 @@ static bool is_vec(nir_alu_instr *instr)
 }
 
 static bool
-is_simple_move(nir_alu_instr *instr)
+is_swizzleless_move(nir_alu_instr *instr)
 {
    if (is_move(instr)) {
-      /* We handle modifiers in a separate pass */
-      if (instr->src[0].negate || instr->src[0].abs)
-         return false;
-
       for (unsigned i = 0; i < 4; i++) {
          if (!((instr->dest.write_mask >> i) & 1))
             break;
@@ -80,10 +81,6 @@ is_simple_move(nir_alu_instr *instr)
          if (instr->src[i].swizzle[0] != i)
             return false;
 
-         /* We handle modifiers in a separate pass */
-         if (instr->src[i].negate || instr->src[i].abs)
-            return false;
-
          if (def == NULL) {
             def = instr->src[i].src.ssa;
          } else if (instr->src[i].src.ssa != def) {
@@ -110,7 +107,7 @@ copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
       return false;
 
    nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
-   if (!is_simple_move(alu_instr))
+   if (!is_swizzleless_move(alu_instr))
       return false;
 
    /* Don't let copy propagation land us with a phi that has more

From d1314de293e9e4a63c35f094c3893aaaed8580b4 Mon Sep 17 00:00:00 2001
From: Daniel Stone <daniels@collabora.com>
Date: Sat, 7 Nov 2015 18:25:31 +0000
Subject: [PATCH 220/287] egl/wayland: Ignore rects from SwapBuffersWithDamage

eglSwapBuffersWithDamage accepts damage-region rectangles to hint the
compositor that it only needs to redraw certain areas, which was passed
through the wl_surface_damage request, as designed.

Wayland also offers a buffer transformation interface, e.g. to allow
users to render pre-rotated buffers. Unfortunately, there is no way to
query buffer transforms, and the damage region was provided in surface,
rather than buffer, co-ordinate space.

Users could in theory account for this themselves, but EGL also requires
co-ordinates to be passed in GL/mathematical co-ordinate space, with an
inversion to Wayland's natural/scanout co-ordinate space, so
transformations other than a 180-degree rotation will fail as EGL
attempts to subtract the region from (its view of the) surface height.

Pending creation and acceptance of a wl_surface.buffer_damage request,
which will accept co-ordinates in buffer co-ordinate space, pessimise to
always sending full-surface damage.

bce64c6c provides the explanation for why we send maximum-range damage,
rather than the full size of the surface: in the presence of buffer
transformations, full-surface damage may not actually cover the entire
surface.

Signed-off-by: Daniel Stone <daniels@collabora.com>
Reviewed-by: Pekka Paalanen <pekka.paalanen@collabora.co.uk>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/egl/drivers/dri2/platform_wayland.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
index 0d161f617a1..a635c758da1 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -703,18 +703,10 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
    dri2_surf->dx = 0;
    dri2_surf->dy = 0;
 
-   if (n_rects == 0) {
-      wl_surface_damage(dri2_surf->wl_win->surface,
-                        0, 0, INT32_MAX, INT32_MAX);
-   } else {
-      for (i = 0; i < n_rects; i++) {
-         const int *rect = &rects[i * 4];
-         wl_surface_damage(dri2_surf->wl_win->surface,
-                           rect[0],
-                           dri2_surf->base.Height - rect[1] - rect[3],
-                           rect[2], rect[3]);
-      }
-   }
+   /* We deliberately ignore the damage region and post maximum damage, due to
+    * https://bugs.freedesktop.org/78190 */
+   wl_surface_damage(dri2_surf->wl_win->surface,
+                     0, 0, INT32_MAX, INT32_MAX);
 
    if (dri2_dpy->is_different_gpu) {
       _EGLContext *ctx = _eglGetCurrentContext();

From 00046393f80f2a13b21fce647f35e1a979f41d7d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 13 Nov 2015 07:59:42 -0700
Subject: [PATCH 221/287] docs: update VMware driver instructions

Use a LIBDIR variable, set per-platform.
Update the Mesa configuration flags.
Run update-initramfs or dracut, update /etc/modules

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 docs/vmware-guest.html | 64 ++++++++++++++++++++++++++++++++----------
 1 file changed, 49 insertions(+), 15 deletions(-)

diff --git a/docs/vmware-guest.html b/docs/vmware-guest.html
index 284c6c261d2..b495bc255ce 100644
--- a/docs/vmware-guest.html
+++ b/docs/vmware-guest.html
@@ -148,10 +148,33 @@ To get the latest code from git:
 <h2>Building the Code</h2>
 
 <ul>
-<li>Build libdrm: If you're on a 32-bit system, you should skip the --libdir configure option. Note also the comment about toolchain libdrm above. 
+<li>
+Determine where the GL-related libraries reside on your system and set
+the LIBDIR environment variable accordingly.
+<br><br>
+For 32-bit Ubuntu systems:
+<pre>
+  export LIBDIR=/usr/lib/i386-linux-gnu
+</pre>
+For 64-bit Ubuntu systems:
+<pre>
+  export LIBDIR=/usr/lib/x86_64-linux-gnu
+</pre>
+For 32-bit Fedora systems:
+<pre>
+  export LIBDIR=/usr/lib
+</pre>
+For 64-bit Fedora systems:
+<pre>
+  export LIBDIR=/usr/lib64
+</pre>
+
+</li>
+
+<li>Build libdrm:
   <pre>
   cd $TOP/drm
-  ./autogen.sh --prefix=/usr --libdir=/usr/lib64
+  ./autogen.sh --prefix=/usr --libdir=${LIBDIR}
   make
   sudo make install
   </pre>
@@ -162,12 +185,9 @@ The libxatracker library is used exclusively by the X server to do render,
 copy and video acceleration:
 <br>
 The following configure options doesn't build the EGL system.
-<br>
-As before, if you're on a 32-bit system, you should skip the --libdir
-configure option.
   <pre>
   cd $TOP/mesa
-  ./autogen.sh --prefix=/usr --libdir=/usr/lib64 --with-gallium-drivers=svga --with-dri-drivers= --enable-xa --disable-dri3
+  ./autogen.sh --prefix=/usr --libdir=${LIBDIR} --with-gallium-drivers=svga --with-dri-drivers=swrast --enable-xa --disable-dri3 --enable-glx-tls
   make
   sudo make install
   </pre>
@@ -177,25 +197,39 @@ if they're not installed in your system.  You should be told what's missing.
 <br>
 <br>
 
-<li>xf86-video-vmware: Now, once libxatracker is installed, we proceed with building and replacing the current Xorg driver. First check if your system is 32- or 64-bit. If you're building for a 32-bit system, you will not be needing the --libdir=/usr/lib64 option to autogen. 
+<li>xf86-video-vmware: Now, once libxatracker is installed, we proceed with
+building and replacing the current Xorg driver.
+First check if your system is 32- or 64-bit.
   <pre>
   cd $TOP/xf86-video-vmware
-  ./autogen.sh --prefix=/usr --libdir=/usr/lib64
+  ./autogen.sh --prefix=/usr --libdir=${LIBDIR}
   make
   sudo make install
   </pre>
+
 <li>vmwgfx kernel module. First make sure that any old version of this kernel module is removed from the system by issuing
-  <pre>
+<pre>
   sudo rm /lib/modules/`uname -r`/kernel/drivers/gpu/drm/vmwgfx.ko*
-  </pre>
-Then 
-  <pre>
+</pre>
+Build and install:
+<pre>
   cd $TOP/vmwgfx
   make
   sudo make install
-  sudo cp 00-vmwgfx.rules /etc/udev/rules.d
-  sudo depmod -ae
-  </pre>
+  sudo depmod -a
+</pre>
+If you're using a Ubuntu OS:
+<pre>
+  sudo update-initramfs -u
+</pre>
+If you're using a Fedora OS:
+<pre>
+  sudo dracut --force
+</pre>
+Add 'vmwgfx' to the /etc/modules file:
+<pre>
+  echo vmwgfx | sudo tee -a /etc/modules
+</pre>
 
 Note: some distros put DRM kernel drivers in different directories.
 For example, sometimes vmwgfx.ko might be found in

From 5a5efbf804eb848553b85f498bf4c4340d748c3d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 13 Nov 2015 08:01:29 -0700
Subject: [PATCH 222/287] docs: add link to Coverity on developer utilities
 page

Signed-off-by: Brian Paul <brianp@vmware.com>
---
 docs/utilities.html | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/utilities.html b/docs/utilities.html
index 9541d3082f7..5c0a4fdcd2e 100644
--- a/docs/utilities.html
+++ b/docs/utilities.html
@@ -30,6 +30,10 @@
   <dt><a href="http://www.valgrind.org">Valgrind</a></dt>
   <dd>is a very useful tool for tracking down
   memory-related problems in your code.</dd>
+
+  <dt><a href="http:scan.coverity.com/projects/mesa">Coverity</a><dt>
+  <dd>provides static code analysis of Mesa.  If you create an account
+  you can see the results and try to fix outstanding issues.</dd>
 </dl>
 
 </div>

From 40663864d2ee46afe246c15f5c4e6e380bb81720 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Fri, 13 Nov 2015 08:02:05 -0700
Subject: [PATCH 223/287] mesa: minor comment fix in blend.c

---
 src/mesa/main/blend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/blend.c b/src/mesa/main/blend.c
index 20aa4980935..ddf7f497f1e 100644
--- a/src/mesa/main/blend.c
+++ b/src/mesa/main/blend.c
@@ -639,7 +639,7 @@ _mesa_AlphaFunc( GLenum func, GLclampf ref )
  * \param opcode operation.
  *
  * Verifies that \p opcode is a valid enum and updates
-gl_colorbuffer_attrib::LogicOp.
+ * gl_colorbuffer_attrib::LogicOp.
  * On a change, flushes the vertices and notifies the driver via the
  * dd_function_table::LogicOpcode callback.
  */

From 01526136badbc611b7dafa5a99934e7e8cf527e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 19:31:55 +0100
Subject: [PATCH 224/287] gallium/radeon: fix PIPE_QUERY_GPU_FINISHED
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broken by the addition of r600_multi_fence
in 3b37155a68acc351cba86a1fa142bd0de2192d4c

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89014

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeon/r600_query.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 9a5402583f4..2bb5732262f 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -532,7 +532,7 @@ static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query)
 	case PIPE_QUERY_TIMESTAMP_DISJOINT:
 		return;
 	case PIPE_QUERY_GPU_FINISHED:
-		rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, &rquery->fence);
+		ctx->flush(ctx, &rquery->fence, 0);
 		return;
 	case R600_QUERY_DRAW_CALLS:
 		rquery->end_result = rctx->num_draw_calls;

From 7f1e34e6c80b4cc59822eaf0c1731cab46befa46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Thu, 5 Nov 2015 12:24:20 +0100
Subject: [PATCH 225/287] r600g: fix clear_buffer fallback with offset != 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Discovered by luck. This code path hasn't been exercised since transform
feedback was implemented.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_blit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index aede8408446..90a1453ef69 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -604,6 +604,7 @@ static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *ds
 	} else {
 		uint32_t *map = r600_buffer_map_sync_with_rings(&rctx->b, r600_resource(dst),
 								 PIPE_TRANSFER_WRITE);
+		map += offset / 4;
 		size /= 4;
 		for (unsigned i = 0; i < size; i++)
 			*map++ = value;

From 65d0c558d5dfa0d10f3f169dd5af65ccc69e1899 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 6 Nov 2015 23:06:47 +0100
Subject: [PATCH 226/287] radeonsi: fix unaligned clear_buffer fallback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is unreachable currently, but it will be used by unaligned 8-bit and
16-bit fills.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 418b2cf65c5..7b8c6d00395 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -176,12 +176,14 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 
 	/* Fallback for unaligned clears. */
 	if (offset % 4 != 0 || size % 4 != 0) {
-		uint32_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
-						       sctx->b.rings.gfx.cs,
-						       PIPE_TRANSFER_WRITE);
-		size /= 4;
-		for (unsigned i = 0; i < size; i++)
-			*map++ = value;
+		uint8_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+						      sctx->b.rings.gfx.cs,
+						      PIPE_TRANSFER_WRITE);
+		map += offset;
+		for (unsigned i = 0; i < size; i++) {
+			unsigned byte_within_dword = (offset + i) % 4;
+			*map++ = (value >> (byte_within_dword * 8)) & 0xff;
+		}
 		return;
 	}
 

From e15c5c7a069a8dd8e0ad97f0f405a85f0f52d9f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 6 Nov 2015 23:42:49 +0100
Subject: [PATCH 227/287] radeonsi: fix a future crash in emit_cb_target_mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can't crash currently, but it would crash if clear_buffer
from u_blitter were used with a clean context.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 18b64056bc7..eba9c61ff26 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -265,7 +265,7 @@ static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *at
 	 *
 	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
 	 */
-	if (blend->dual_src_blend &&
+	if (blend && blend->dual_src_blend &&
 	    sctx->ps_shader.cso &&
 	    (sctx->ps_shader.cso->ps_colors_written & 0x3) != 0x3)
 		mask = 0;

From 19a9c1ecc7e2aee621b784cf452674373ec8499f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 6 Nov 2015 23:41:15 +0100
Subject: [PATCH 228/287] gallium/u_blitter: add support for multi-dword clear
 values in clear_buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/auxiliary/util/u_blitter.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index b7b1ece389b..fccc92c06c4 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -70,7 +70,7 @@ struct blitter_context_priv
    /* Constant state objects. */
    /* Vertex shaders. */
    void *vs; /**< Vertex shader which passes {pos, generic} to the output.*/
-   void *vs_pos_only; /**< Vertex shader which passes pos to the output.*/
+   void *vs_pos_only[4]; /**< Vertex shader which passes pos to the output.*/
    void *vs_layered; /**< Vertex shader which sets LAYER = INSTANCEID. */
 
    /* Fragment shaders. */
@@ -325,27 +325,29 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
    return &ctx->base;
 }
 
-static void bind_vs_pos_only(struct blitter_context_priv *ctx)
+static void bind_vs_pos_only(struct blitter_context_priv *ctx,
+                             unsigned num_so_channels)
 {
    struct pipe_context *pipe = ctx->base.pipe;
+   int index = num_so_channels ? num_so_channels - 1 : 0;
 
-   if (!ctx->vs_pos_only) {
+   if (!ctx->vs_pos_only[index]) {
       struct pipe_stream_output_info so;
       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION };
       const uint semantic_indices[] = { 0 };
 
       memset(&so, 0, sizeof(so));
       so.num_outputs = 1;
-      so.output[0].num_components = 1;
-      so.stride[0] = 1;
+      so.output[0].num_components = num_so_channels;
+      so.stride[0] = num_so_channels;
 
-      ctx->vs_pos_only =
+      ctx->vs_pos_only[index] =
          util_make_vertex_passthrough_shader_with_so(pipe, 1, semantic_names,
                                                      semantic_indices, FALSE,
                                                      &so);
    }
 
-   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+   pipe->bind_vs_state(pipe, ctx->vs_pos_only[index]);
 }
 
 static void bind_vs_passthrough(struct blitter_context_priv *ctx)
@@ -441,8 +443,9 @@ void util_blitter_destroy(struct blitter_context *blitter)
       pipe->delete_rasterizer_state(pipe, ctx->rs_discard_state);
    if (ctx->vs)
       pipe->delete_vs_state(pipe, ctx->vs);
-   if (ctx->vs_pos_only)
-      pipe->delete_vs_state(pipe, ctx->vs_pos_only);
+   for (i = 0; i < 4; i++)
+      if (ctx->vs_pos_only[i])
+         pipe->delete_vs_state(pipe, ctx->vs_pos_only[i]);
    if (ctx->vs_layered)
       pipe->delete_vs_state(pipe, ctx->vs_layered);
    pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
@@ -2036,7 +2039,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
 
    pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
    pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]);
-   bind_vs_pos_only(ctx);
+   bind_vs_pos_only(ctx, 1);
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    if (ctx->has_tessellation) {
@@ -2103,7 +2106,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
    pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
    pipe->bind_vertex_elements_state(pipe,
                                     ctx->velem_state_readbuf[num_channels-1]);
-   bind_vs_pos_only(ctx);
+   bind_vs_pos_only(ctx, num_channels);
    if (ctx->has_geometry_shader)
       pipe->bind_gs_state(pipe, NULL);
    if (ctx->has_tessellation) {

From 19773f98055ec6976b5f5c2d0d83245f96206ec4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 6 Nov 2015 23:16:11 +0100
Subject: [PATCH 229/287] radeonsi: add SI_SAVE_FRAGMENT_STATE blitter flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Buffer clears via transform feedback won't set this.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_blit.c | 46 +++++++++++++++-----------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index fce014a1e6b..d320ac42651 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -29,20 +29,23 @@ enum si_blitter_op /* bitmask */
 {
 	SI_SAVE_TEXTURES      = 1,
 	SI_SAVE_FRAMEBUFFER   = 2,
-	SI_DISABLE_RENDER_COND = 4,
+	SI_SAVE_FRAGMENT_STATE = 4,
+	SI_DISABLE_RENDER_COND = 8,
 
-	SI_CLEAR         = 0,
+	SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
 
-	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER,
+	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
 
 	SI_COPY          = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
+			   SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
+
+	SI_BLIT          = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
+			   SI_SAVE_FRAGMENT_STATE,
+
+	SI_DECOMPRESS    = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE |
 			   SI_DISABLE_RENDER_COND,
 
-	SI_BLIT          = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES,
-
-	SI_DECOMPRESS    = SI_SAVE_FRAMEBUFFER | SI_DISABLE_RENDER_COND,
-
-	SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER
+	SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE
 };
 
 static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
@@ -51,22 +54,25 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
 
 	r600_suspend_nontimer_queries(&sctx->b);
 
-	util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
-	util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
-	util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
-	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
-	util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
-	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
+	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
+	util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
+	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
 	util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
 	util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
-	util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
-	util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
-	util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
-	util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
-	util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
-	util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
+	util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
 	util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
 				     (struct pipe_stream_output_target**)sctx->b.streamout.targets);
+	util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
+
+	if (op & SI_SAVE_FRAGMENT_STATE) {
+		util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
+		util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
+		util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
+		util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
+		util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
+		util_blitter_save_viewport(sctx->blitter, &sctx->viewports.states[0]);
+		util_blitter_save_scissor(sctx->blitter, &sctx->scissors.states[0]);
+	}
 
 	if (op & SI_SAVE_FRAMEBUFFER)
 		util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);

From f7757100f22f0d22eb8c9f232915b9d9a28cc781 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Tue, 3 Nov 2015 19:35:46 +0100
Subject: [PATCH 230/287] radeonsi: add glClearBufferSubData acceleration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

8-bit and 16-bit clears which are not aligned to dwords are done in software.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_blit.c | 60 ++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index d320ac42651..31f22c4acf7 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -737,9 +737,69 @@ static void si_flush_resource(struct pipe_context *ctx,
 	}
 }
 
+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+				 struct pipe_resource *dst,
+				 unsigned offset, unsigned size,
+				 const void *clear_value_ptr,
+				 int clear_value_size)
+{
+	struct si_context *sctx = (struct si_context*)ctx;
+	uint32_t dword_value;
+	unsigned i;
+
+	assert(offset % clear_value_size == 0);
+	assert(size % clear_value_size == 0);
+
+	if (clear_value_size > 4) {
+		const uint32_t *u32 = clear_value_ptr;
+		bool clear_dword_duplicated = true;
+
+		/* See if we can lower large fills to dword fills. */
+		for (i = 1; i < clear_value_size / 4; i++)
+			if (u32[0] != u32[i]) {
+				clear_dword_duplicated = false;
+				break;
+			}
+
+		if (!clear_dword_duplicated) {
+			/* Use transform feedback for 64-bit, 96-bit, and
+			 * 128-bit fills.
+			 */
+			union pipe_color_union clear_value;
+
+			memcpy(&clear_value, clear_value_ptr, clear_value_size);
+			si_blitter_begin(ctx, SI_DISABLE_RENDER_COND);
+			util_blitter_clear_buffer(sctx->blitter, dst, offset,
+						  size, clear_value_size / 4,
+						  &clear_value);
+			si_blitter_end(ctx);
+			return;
+		}
+	}
+
+	/* Expand the clear value to a dword. */
+	switch (clear_value_size) {
+	case 1:
+		dword_value = *(uint8_t*)clear_value_ptr;
+		dword_value |= (dword_value << 8) |
+			       (dword_value << 16) |
+			       (dword_value << 24);
+		break;
+	case 2:
+		dword_value = *(uint16_t*)clear_value_ptr;
+		dword_value |= dword_value << 16;
+		break;
+	default:
+		dword_value = *(uint32_t*)clear_value_ptr;
+	}
+
+	sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
+}
+
 void si_init_blit_functions(struct si_context *sctx)
 {
 	sctx->b.b.clear = si_clear;
+	sctx->b.b.clear_buffer = si_pipe_clear_buffer;
 	sctx->b.b.clear_render_target = si_clear_render_target;
 	sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
 	sctx->b.b.resource_copy_region = si_resource_copy_region;

From 40912dd91e96376517fb41bb4dc228b45fd1a01c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Thu, 5 Nov 2015 23:56:38 +0100
Subject: [PATCH 231/287] radeonsi: initialize SX_PS_DOWNCONVERT to 0 on Stoney
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

otherwise the SX or CB blocks can go bananas

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_state.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index eba9c61ff26..6d97049c0f3 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3444,6 +3444,9 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
 	}
 
+	if (sctx->b.family == CHIP_STONEY)
+		si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
+
 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
 	if (sctx->b.chip_class >= CIK)
 		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);

From 10130ccd8ca9e28e417268f4e5681b5c25398516 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 12:07:31 +0100
Subject: [PATCH 232/287] radeonsi: set the DISABLE_WR_CONFIRM flag on CI-VI as
 well
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I missed this in commit c3e527f93d4281ad6e2ca165eaf6ff588e4faefa
    radeonsi: only enable write confirmation on the last CP DMA packet

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_cp_dma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 7b8c6d00395..3f657ff96ed 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -64,7 +64,7 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
 		radeon_emit(cs, src_va >> 32);		/* SRC_ADDR_HI [31:0] */
 		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [31:0] */
-		radeon_emit(cs, size | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
 	} else {
 		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
 		radeon_emit(cs, src_va);			/* SRC_ADDR_LO [31:0] */
@@ -96,7 +96,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 		radeon_emit(cs, 0);
 		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
 	} else {
 		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
 		radeon_emit(cs, clear_value);		/* DATA [31:0] */

From c6012a6650c894e57dba51f8e336f134aad13d61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Fri, 6 Nov 2015 21:11:16 +0100
Subject: [PATCH 233/287] radeonsi: rename cache flushing flags once more
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KCACHE, TC L1 and TC L2 are renamed to:
- SMEM L1
- VMEM L1
- GLOBAL L2

You can easily tell what they are used for now.
Shaders must deal with coherency issues between both L1s manually,
e.g. by setting GLC=1 or by using s_dcache_*.

BOTH_ICACHE_KCACHE was an unused definition.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_compute.c     | 12 ++++++------
 src/gallium/drivers/radeonsi/si_cp_dma.c      |  6 +++---
 src/gallium/drivers/radeonsi/si_descriptors.c |  4 ++--
 src/gallium/drivers/radeonsi/si_hw_context.c  | 10 +++++-----
 src/gallium/drivers/radeonsi/si_pipe.h        | 15 ++++++---------
 src/gallium/drivers/radeonsi/si_state.c       |  8 ++++----
 src/gallium/drivers/radeonsi/si_state_draw.c  | 10 ++++------
 7 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 697e60a50d9..c008f8b402b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -253,10 +253,10 @@ static void si_launch_grid(
 	radeon_emit(cs, 0x80000000);
 	radeon_emit(cs, 0x80000000);
 
-	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
+	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
 			 SI_CONTEXT_INV_ICACHE |
-			 SI_CONTEXT_INV_KCACHE |
+			 SI_CONTEXT_INV_SMEM_L1 |
 			 SI_CONTEXT_FLUSH_WITH_INV_L2 |
 			 SI_CONTEXT_FLAG_COMPUTE;
 	si_emit_cache_flush(sctx, NULL);
@@ -449,10 +449,10 @@ static void si_launch_grid(
 	si_pm4_free_state(sctx, pm4, ~0);
 
 	sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
-			 SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
+			 SI_CONTEXT_INV_VMEM_L1 |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
 			 SI_CONTEXT_INV_ICACHE |
-			 SI_CONTEXT_INV_KCACHE |
+			 SI_CONTEXT_INV_SMEM_L1 |
 			 SI_CONTEXT_FLAG_COMPUTE;
 	si_emit_cache_flush(sctx, NULL);
 }
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 3f657ff96ed..ac35a54ce65 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -112,9 +112,9 @@ static unsigned get_flush_flags(struct si_context *sctx, bool is_framebuffer)
 	if (is_framebuffer)
 		return SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
 
-	return SI_CONTEXT_INV_TC_L1 |
-	       (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
-	       SI_CONTEXT_INV_KCACHE;
+	return SI_CONTEXT_INV_SMEM_L1 |
+	       SI_CONTEXT_INV_VMEM_L1 |
+	       (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
 }
 
 static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a8ff6f27319..b4dc3cb2347 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -670,8 +670,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 		 * VS_PARTIAL_FLUSH is required if the buffers are going to be
 		 * used as an input immediately.
 		 */
-		sctx->b.flags |= SI_CONTEXT_INV_KCACHE |
-				 SI_CONTEXT_INV_TC_L1 |
+		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+				 SI_CONTEXT_INV_VMEM_L1 |
 				 SI_CONTEXT_VS_PARTIAL_FLUSH;
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 7c147e2e44c..9b8bdf5d901 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -73,8 +73,8 @@ void si_context_gfx_flush(void *context, unsigned flags,
 	r600_preflush_suspend_features(&ctx->b);
 
 	ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
-			SI_CONTEXT_INV_TC_L1 |
-			SI_CONTEXT_INV_TC_L2 |
+			SI_CONTEXT_INV_VMEM_L1 |
+			SI_CONTEXT_INV_GLOBAL_L2 |
 			/* this is probably not needed anymore */
 			SI_CONTEXT_PS_PARTIAL_FLUSH;
 	si_emit_cache_flush(ctx, NULL);
@@ -144,9 +144,9 @@ void si_begin_new_cs(struct si_context *ctx)
 
 	/* Flush read caches at the beginning of CS. */
 	ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
-			SI_CONTEXT_INV_TC_L1 |
-			SI_CONTEXT_INV_TC_L2 |
-			SI_CONTEXT_INV_KCACHE |
+			SI_CONTEXT_INV_VMEM_L1 |
+			SI_CONTEXT_INV_GLOBAL_L2 |
+			SI_CONTEXT_INV_SMEM_L1 |
 			SI_CONTEXT_INV_ICACHE;
 
 	/* set all valid group as dirty so they get reemited on
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 42cd8803c36..20fd6952d8c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -46,15 +46,12 @@
 
 /* Instruction cache. */
 #define SI_CONTEXT_INV_ICACHE		(R600_CONTEXT_PRIVATE_FLAG << 0)
-/* Cache used by scalar memory (SMEM) instructions. They also use TC
- * as a second level cache, which isn't flushed by this.
- * Other names: constant cache, data cache, DCACHE */
-#define SI_CONTEXT_INV_KCACHE		(R600_CONTEXT_PRIVATE_FLAG << 1)
-/* Caches used by vector memory (VMEM) instructions.
- * L1 can optionally be bypassed (GLC=1) and can only be used by shaders.
- * L2 is used by shaders and can be used by other blocks (CP, sDMA). */
-#define SI_CONTEXT_INV_TC_L1		(R600_CONTEXT_PRIVATE_FLAG << 2)
-#define SI_CONTEXT_INV_TC_L2		(R600_CONTEXT_PRIVATE_FLAG << 3)
+/* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
+#define SI_CONTEXT_INV_SMEM_L1		(R600_CONTEXT_PRIVATE_FLAG << 1)
+/* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
+#define SI_CONTEXT_INV_VMEM_L1		(R600_CONTEXT_PRIVATE_FLAG << 2)
+/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
+#define SI_CONTEXT_INV_GLOBAL_L2	(R600_CONTEXT_PRIVATE_FLAG << 3)
 /* Framebuffer caches. */
 #define SI_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4)
 #define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6d97049c0f3..2c35c0a8e0a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2125,8 +2125,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	 * Flush all CB and DB caches here because all buffers can be used
 	 * for write by both TC (with shader image stores) and CB/DB.
 	 */
-	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
+	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
 			 SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
 
 	/* Take the maximum of the old and new count. If the new count is lower,
@@ -3044,8 +3044,8 @@ static void si_texture_barrier(struct pipe_context *ctx)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
-	sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-			 SI_CONTEXT_INV_TC_L2 |
+	sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+			 SI_CONTEXT_INV_GLOBAL_L2 |
 			 SI_CONTEXT_FLUSH_AND_INV_CB;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index cf0891a2ab7..6c7fff9735c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -604,8 +604,6 @@ static void si_emit_draw_packets(struct si_context *sctx,
 	}
 }
 
-#define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE)
-
 void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
 {
 	struct r600_common_context *sctx = &si_ctx->b;
@@ -624,12 +622,12 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
 
 	if (sctx->flags & SI_CONTEXT_INV_ICACHE)
 		cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
-	if (sctx->flags & SI_CONTEXT_INV_KCACHE)
+	if (sctx->flags & SI_CONTEXT_INV_SMEM_L1)
 		cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
 
-	if (sctx->flags & SI_CONTEXT_INV_TC_L1)
+	if (sctx->flags & SI_CONTEXT_INV_VMEM_L1)
 		cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-	if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
+	if (sctx->flags & SI_CONTEXT_INV_GLOBAL_L2) {
 		cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
 
 		/* TODO: this might not be needed. */
@@ -843,7 +841,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	/* VI reads index buffers through TC L2. */
 	if (info->indexed && sctx->b.chip_class <= CIK &&
 	    r600_resource(ib.buffer)->TC_L2_dirty) {
-		sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
+		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
 		r600_resource(ib.buffer)->TC_L2_dirty = false;
 	}
 

From 81d412e02ce7db644774202b175f1f24b1f262c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 13:31:03 +0100
Subject: [PATCH 234/287] gallium/radeon: move GFX/DMA flushing from
 add_to_buffer_list to need_cs_space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

need_cs_space isn't invoked so often and is called before all commands too.
This is a lot cleaner. The code in radeon_add_to_buffer_list always seemed
dodgy to me.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_hw_context.c    |  5 +++++
 src/gallium/drivers/radeon/r600_cs.h          | 15 ---------------
 src/gallium/drivers/radeon/r600_pipe_common.c |  4 ++++
 src/gallium/drivers/radeonsi/si_hw_context.c  |  5 +++++
 4 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 6f11366e606..cf8a07f5a1a 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -33,6 +33,11 @@
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 			boolean count_draw_in)
 {
+	struct radeon_winsys_cs *dma = ctx->b.rings.dma.cs;
+
+	/* Flush the DMA IB if it's not empty. */
+	if (dma && dma->cdw)
+		ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 
 	if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
 		ctx->b.gtt = 0;
diff --git a/src/gallium/drivers/radeon/r600_cs.h b/src/gallium/drivers/radeon/r600_cs.h
index b5a1dafb273..ad067ce4e76 100644
--- a/src/gallium/drivers/radeon/r600_cs.h
+++ b/src/gallium/drivers/radeon/r600_cs.h
@@ -50,21 +50,6 @@ static inline unsigned radeon_add_to_buffer_list(struct r600_common_context *rct
 						 enum radeon_bo_priority priority)
 {
 	assert(usage);
-
-	/* Make sure that all previous rings are flushed so that everything
-	 * looks serialized from the driver point of view.
-	 */
-	if (!ring->flushing) {
-		if (ring == &rctx->rings.gfx) {
-			if (rctx->rings.dma.cs) {
-				/* flush dma ring */
-				rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
-			}
-		} else {
-			/* flush gfx ring */
-			rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
-		}
-	}
 	return rctx->ws->cs_add_buffer(ring->cs, rbo->cs_buf, usage,
 				      rbo->domains, priority) * 4;
 }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 79e624ea12b..e7179dc0a46 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -117,6 +117,10 @@ void r600_draw_rectangle(struct blitter_context *blitter,
 
 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
 {
+	/* Flush the GFX IB if it's not empty. */
+	if (ctx->rings.gfx.cs->cdw > ctx->initial_gfx_cs_size)
+		ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+
 	/* Flush if there's not enough space. */
 	if ((num_dw + ctx->rings.dma.cs->cdw) > ctx->rings.dma.cs->max_dw) {
 		ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 9b8bdf5d901..7d0e6d4dd45 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -30,6 +30,11 @@
 void si_need_cs_space(struct si_context *ctx)
 {
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *dma = ctx->b.rings.dma.cs;
+
+	/* Flush the DMA IB if it's not empty. */
+	if (dma && dma->cdw)
+		ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 
 	/* There are two memory usage counters in the winsys for all buffers
 	 * that have been added (cs_add_buffer) and two counters in the pipe

From 8569f9a87ec8d1bea3946476d5cc0be2a58ea149 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 13:43:18 +0100
Subject: [PATCH 235/287] gallium/radeon: remove the IB flushing flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not needed anymore. A similar flag will be introduced in the next commit,
which will be private in radeonsi.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_hw_context.c    | 3 ---
 src/gallium/drivers/radeon/r600_pipe_common.c | 9 ++-------
 src/gallium/drivers/radeon/r600_pipe_common.h | 1 -
 src/gallium/drivers/radeonsi/si_hw_context.c  | 3 ---
 4 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index cf8a07f5a1a..1cffc34a5a0 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -256,8 +256,6 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 	if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence)
 		return;
 
-	ctx->b.rings.gfx.flushing = true;
-
 	r600_preflush_suspend_features(&ctx->b);
 
 	/* flush the framebuffer cache */
@@ -283,7 +281,6 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 
 	/* Flush the CS. */
 	ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++);
-	ctx->b.rings.gfx.flushing = false;
 
 	r600_begin_new_cs(ctx);
 }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index e7179dc0a46..daa325dae04 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -221,13 +221,8 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
 
-	if (!cs->cdw)
-		goto done;
-
-	rctx->rings.dma.flushing = true;
-	rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0);
-	rctx->rings.dma.flushing = false;
-done:
+	if (cs->cdw)
+		rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0);
 	if (fence)
 		rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
 }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index b7f1a234baf..9fae5c8da26 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -365,7 +365,6 @@ struct r600_streamout {
 
 struct r600_ring {
 	struct radeon_winsys_cs		*cs;
-	bool				flushing;
 	void (*flush)(void *ctx, unsigned flags,
 		      struct pipe_fence_handle **fence);
 };
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 7d0e6d4dd45..8eade11078c 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -73,8 +73,6 @@ void si_context_gfx_flush(void *context, unsigned flags,
 		return;
 	}
 
-	ctx->b.rings.gfx.flushing = true;
-
 	r600_preflush_suspend_features(&ctx->b);
 
 	ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
@@ -116,7 +114,6 @@ void si_context_gfx_flush(void *context, unsigned flags,
 	/* Flush the CS. */
 	ws->cs_flush(cs, flags, &ctx->last_gfx_fence,
 		     ctx->screen->b.cs_count++);
-	ctx->b.rings.gfx.flushing = false;
 
 	if (fence)
 		ws->fence_reference(fence, ctx->last_gfx_fence);

From 3d963abc81789870d86257956a8fc24f7c6b661b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 12:22:56 +0100
Subject: [PATCH 236/287] radeonsi: prevent recursion in si_context_gfx_flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The recursion can only occur if you modify need_cs_space to always flush.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c | 7 +++++++
 src/gallium/drivers/radeonsi/si_pipe.h       | 1 +
 2 files changed, 8 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 8eade11078c..e5f1c8462d2 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -64,12 +64,18 @@ void si_context_gfx_flush(void *context, unsigned flags,
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 	struct radeon_winsys *ws = ctx->b.ws;
 
+	if (ctx->gfx_flush_in_progress)
+		return;
+
+	ctx->gfx_flush_in_progress = true;
+
 	if (cs->cdw == ctx->b.initial_gfx_cs_size &&
 	    (!fence || ctx->last_gfx_fence)) {
 		if (fence)
 			ws->fence_reference(fence, ctx->last_gfx_fence);
 		if (!(flags & RADEON_FLUSH_ASYNC))
 			ws->cs_sync_flush(cs);
+		ctx->gfx_flush_in_progress = false;
 		return;
 	}
 
@@ -123,6 +129,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
 		si_check_vm_faults(ctx);
 
 	si_begin_new_cs(ctx);
+	ctx->gfx_flush_in_progress = false;
 }
 
 void si_begin_new_cs(struct si_context *ctx)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 20fd6952d8c..6e742fc1342 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -173,6 +173,7 @@ struct si_context {
 	struct pipe_fence_handle	*last_gfx_fence;
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
 	LLVMTargetMachineRef		tm;
+	bool				gfx_flush_in_progress;
 
 	/* Atoms (direct states). */
 	union si_state_atoms		atoms;

From 6cc8f6c6a72b1aab7bb506deb220e04ae50d8c2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 14:00:30 +0100
Subject: [PATCH 237/287] gallium/radeon: inline the r600_rings structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/evergreen_compute.c  | 14 ++--
 .../drivers/r600/evergreen_hw_context.c       | 10 +--
 src/gallium/drivers/r600/evergreen_state.c    | 66 +++++++++---------
 src/gallium/drivers/r600/r600_blit.c          |  2 +-
 src/gallium/drivers/r600/r600_hw_context.c    | 34 +++++-----
 src/gallium/drivers/r600/r600_pipe.c          | 10 +--
 src/gallium/drivers/r600/r600_state.c         | 68 +++++++++----------
 src/gallium/drivers/r600/r600_state_common.c  | 36 +++++-----
 .../drivers/radeon/r600_buffer_common.c       | 32 ++++-----
 src/gallium/drivers/radeon/r600_pipe_common.c | 34 +++++-----
 src/gallium/drivers/radeon/r600_pipe_common.h |  8 +--
 src/gallium/drivers/radeon/r600_query.c       | 16 ++---
 src/gallium/drivers/radeon/r600_streamout.c   | 18 ++---
 src/gallium/drivers/radeonsi/cik_sdma.c       | 14 ++--
 src/gallium/drivers/radeonsi/si_compute.c     | 12 ++--
 src/gallium/drivers/radeonsi/si_cp_dma.c      | 10 +--
 src/gallium/drivers/radeonsi/si_descriptors.c | 38 +++++------
 src/gallium/drivers/radeonsi/si_dma.c         | 14 ++--
 src/gallium/drivers/radeonsi/si_hw_context.c  | 16 ++---
 src/gallium/drivers/radeonsi/si_pipe.c        |  8 +--
 src/gallium/drivers/radeonsi/si_pm4.c         |  6 +-
 src/gallium/drivers/radeonsi/si_state.c       | 34 +++++-----
 src/gallium/drivers/radeonsi/si_state_draw.c  | 24 +++----
 .../drivers/radeonsi/si_state_shaders.c       |  4 +-
 24 files changed, 262 insertions(+), 266 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c
index 6f2b7ba0db3..5743e3fe538 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -346,7 +346,7 @@ static void evergreen_emit_direct_dispatch(
 		const uint *block_layout, const uint *grid_layout)
 {
 	int i;
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
 	unsigned num_waves;
 	unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
@@ -417,12 +417,12 @@ static void evergreen_emit_direct_dispatch(
 static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 		const uint *grid_layout)
 {
-	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
 	unsigned i;
 
 	/* make sure that the gfx ring is only one active */
-	if (ctx->b.rings.dma.cs && ctx->b.rings.dma.cs->cdw) {
-		ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+	if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) {
+		ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 	}
 
 	/* Initialize all the compute-related registers.
@@ -439,7 +439,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
 	/* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */
 	for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
 		struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i];
-		unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.gfx,
+		unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx,
 						       (struct r600_resource*)cb->base.texture,
 						       RADEON_USAGE_READWRITE,
 						       RADEON_PRIO_SHADER_RW_BUFFER);
@@ -538,7 +538,7 @@ void evergreen_emit_cs_shader(
 	struct r600_cs_shader_state *state =
 					(struct r600_cs_shader_state*)atom;
 	struct r600_pipe_compute *shader = state->shader;
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint64_t va;
 	struct r600_resource *code_bo;
 	unsigned ngpr, nstack;
@@ -564,7 +564,7 @@ void evergreen_emit_cs_shader(
 	radeon_emit(cs, 0);	/* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
 
 	radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
-	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 					      code_bo, RADEON_USAGE_READ,
 					      RADEON_PRIO_USER_SHADER));
 }
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c
index 89abe92cbb4..a0f46800403 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -35,7 +35,7 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
 			       uint64_t src_offset,
 			       uint64_t size)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = rctx->b.dma.cs;
 	unsigned i, ncopy, csize, sub_cmd, shift;
 	struct r600_resource *rdst = (struct r600_resource*)dst;
 	struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -64,9 +64,9 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
 	for (i = 0; i < ncopy; i++) {
 		csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ,
 				      RADEON_PRIO_SDMA_BUFFER);
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE,
 				      RADEON_PRIO_SDMA_BUFFER);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
 		cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
@@ -86,7 +86,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 				   struct pipe_resource *dst, uint64_t offset,
 				   unsigned size, uint32_t clear_value)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 
 	assert(size);
 	assert(rctx->screen->b.has_cp_dma);
@@ -129,7 +129,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 		}
 
 		/* This must be done after r600_need_cs_space. */
-		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 					      (struct r600_resource*)dst, RADEON_USAGE_WRITE,
 					      RADEON_PRIO_CP_DMA);
 
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 4b29a2296ec..2fcc49bf7e2 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -871,7 +871,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
 
 static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4);
@@ -921,7 +921,7 @@ static void evergreen_set_scissor_states(struct pipe_context *ctx,
 
 static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_scissor_state *rstate = &rctx->scissor;
 	struct pipe_scissor_state *state;
 	uint32_t dirty_mask;
@@ -1525,7 +1525,7 @@ static void evergreen_get_sample_position(struct pipe_context *ctx,
 static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples, int ps_iter_samples)
 {
 
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	unsigned max_dist = 0;
 
 	switch (nr_samples) {
@@ -1566,7 +1566,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
 
 static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
 	unsigned nr_cbufs = state->nr_cbufs;
 	unsigned i, tl, br;
@@ -1591,7 +1591,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 
 		tex = (struct r600_texture *)cb->base.texture;
 		reloc = radeon_add_to_buffer_list(&rctx->b,
-					      &rctx->b.rings.gfx,
+					      &rctx->b.gfx,
 					      (struct r600_resource*)cb->base.texture,
 					      RADEON_USAGE_READWRITE,
 					      tex->surface.nsamples > 1 ?
@@ -1599,7 +1599,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 						      RADEON_PRIO_COLOR_BUFFER);
 
 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-			cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+			cmask_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
 				RADEON_PRIO_CMASK);
 		} else {
@@ -1645,7 +1645,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 
 		if (!rctx->keep_tiling_flags) {
 			unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
-							       &rctx->b.rings.gfx,
+							       &rctx->b.gfx,
 							       (struct r600_resource*)state->cbufs[0]->texture,
 							       RADEON_USAGE_READWRITE,
 							       RADEON_PRIO_COLOR_BUFFER);
@@ -1668,7 +1668,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 	if (state->zsbuf) {
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
 		unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
-						       &rctx->b.rings.gfx,
+						       &rctx->b.gfx,
 						       (struct r600_resource*)state->zsbuf->texture,
 						       RADEON_USAGE_READWRITE,
 						       zb->base.texture->nr_samples > 1 ?
@@ -1730,7 +1730,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
 
 static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
 	float offset_units = state->offset_units;
 	float offset_scale = state->offset_scale;
@@ -1757,7 +1757,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
 
 static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 	unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
 	unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
@@ -1772,7 +1772,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
 
 static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_db_state *a = (struct r600_db_state*)atom;
 
 	if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -1783,7 +1783,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
 		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
 		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
 		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer,
 						  RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
@@ -1795,7 +1795,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
 
 static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
 	unsigned db_render_control = 0;
 	unsigned db_count_control = 0;
@@ -1862,7 +1862,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 					  unsigned resource_offset,
 					  unsigned pkt_flags)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -1897,7 +1897,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 		radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD7 */
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
 	}
 	state->dirty_mask = 0;
@@ -1921,7 +1921,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 					    unsigned reg_alu_const_cache,
 					    unsigned pkt_flags)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -1945,7 +1945,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		}
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
@@ -1970,7 +1970,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 			    S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
 		dirty_mask &= ~(1 << buffer_index);
@@ -2018,7 +2018,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 					 struct r600_samplerview_state *state,
 					 unsigned resource_id_base, unsigned pkt_flags)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -2033,7 +2033,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx,
 		radeon_emit(cs, (resource_id_base + resource_index) * 8);
 		radeon_emit_array(cs, rview->tex_resource_words, 8);
 
-		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource,
 					      RADEON_USAGE_READ,
 					      r600_get_sampler_view_priority(rview->tex_resource));
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
@@ -2077,7 +2077,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx,
 				unsigned border_index_reg,
 				unsigned pkt_flags)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = texinfo->states.dirty_mask;
 
 	while (dirty_mask) {
@@ -2130,14 +2130,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C3C_PA_SC_AA_MASK,
+	radeon_set_context_reg(rctx->b.gfx.cs, R_028C3C_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
 static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a)
 {
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint16_t mask = s->sample_mask;
 
 	radeon_set_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
@@ -2147,21 +2147,21 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom
 
 static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
 	radeon_set_context_reg(cs, R_0288A4_SQ_PGM_START_FS,
 			       (shader->buffer->gpu_address + shader->offset) >> 8);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer,
                                                   RADEON_USAGE_READ,
                                                   RADEON_PRIO_INTERNAL_SHADER));
 }
 
 static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
 
 	uint32_t v = 0, v2 = 0, primid = 0;
@@ -2200,7 +2200,7 @@ static void evergreen_emit_shader_stages(struct r600_context *rctx, struct r600_
 
 static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
 	struct r600_resource *rbuffer;
 
@@ -2213,7 +2213,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
 		radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE,
 				rbuffer->gpu_address >> 8);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_RINGS_STREAMOUT));
 		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
@@ -2223,7 +2223,7 @@ static void evergreen_emit_gs_rings(struct r600_context *rctx, struct r600_atom
 		radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE,
 				rbuffer->gpu_address >> 8);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_RINGS_STREAMOUT));
 		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
@@ -3274,7 +3274,7 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 				unsigned pitch,
 				unsigned bpp)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = rctx->b.dma.cs;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
 	struct r600_texture *rdst = (struct r600_texture*)dst;
 	unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -3362,9 +3362,9 @@ static void evergreen_dma_copy_tile(struct r600_context *rctx,
 		}
 		size = (cheight * pitch) / 4;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource,
 				      RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource,
 				      RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, size);
 		cs->buf[cs->cdw++] = base >> 8;
@@ -3399,7 +3399,7 @@ static void evergreen_dma_copy(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-	if (rctx->b.rings.dma.cs == NULL) {
+	if (rctx->b.dma.cs == NULL) {
 		goto fallback;
 	}
 
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index 90a1453ef69..fff841c0ded 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -527,7 +527,7 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst
 	 * Can we somehow flush the index buffer cache? Starting a new IB seems
 	 * to do the trick. */
 	if (rctx->b.chip_class <= R700)
-		rctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		rctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 }
 
 /**
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 1cffc34a5a0..0fc58df157d 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -33,16 +33,16 @@
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 			boolean count_draw_in)
 {
-	struct radeon_winsys_cs *dma = ctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *dma = ctx->b.dma.cs;
 
 	/* Flush the DMA IB if it's not empty. */
 	if (dma && dma->cdw)
-		ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 
-	if (!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
+	if (!ctx->b.ws->cs_memory_below_limit(ctx->b.gfx.cs, ctx->b.vram, ctx->b.gtt)) {
 		ctx->b.gtt = 0;
 		ctx->b.vram = 0;
-		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 		return;
 	}
 	/* all will be accounted once relocation are emited */
@@ -50,7 +50,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 	ctx->b.vram = 0;
 
 	/* The number of dwords we already used in the CS so far. */
-	num_dw += ctx->b.rings.gfx.cs->cdw;
+	num_dw += ctx->b.gfx.cs->cdw;
 
 	if (count_draw_in) {
 		uint64_t mask;
@@ -97,14 +97,14 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 	num_dw += 10;
 
 	/* Flush if there's not enough space. */
-	if (num_dw > ctx->b.rings.gfx.cs->max_dw) {
-		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+	if (num_dw > ctx->b.gfx.cs->max_dw) {
+		ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 	}
 }
 
 void r600_flush_emit(struct r600_context *rctx)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	unsigned cp_coher_cntl = 0;
 	unsigned wait_until = 0;
 
@@ -251,7 +251,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
 			    struct pipe_fence_handle **fence)
 {
 	struct r600_context *ctx = context;
-	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
 
 	if (cs->cdw == ctx->b.initial_gfx_cs_size && !fence)
 		return;
@@ -294,7 +294,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->b.vram = 0;
 
 	/* Begin a new CS. */
-	r600_emit_command_buffer(ctx->b.rings.gfx.cs, &ctx->start_cs_cmd);
+	r600_emit_command_buffer(ctx->b.gfx.cs, &ctx->start_cs_cmd);
 
 	/* Re-emit states. */
 	r600_mark_atom_dirty(ctx, &ctx->alphatest_state.atom);
@@ -363,7 +363,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	ctx->last_primitive_type = -1;
 	ctx->last_start_instance = -1;
 
-	ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
+	ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;
 }
 
 /* The max number of bytes to copy per packet. */
@@ -374,7 +374,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 			     struct pipe_resource *src, uint64_t src_offset,
 			     unsigned size)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 
 	assert(size);
 	assert(rctx->screen->b.has_cp_dma);
@@ -420,9 +420,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
 		}
 
 		/* This must be done after r600_need_cs_space. */
-		src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)src,
+		src_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)src,
 						  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
-		dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, (struct r600_resource*)dst,
+		dst_reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)dst,
 						  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
 
 		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
@@ -455,7 +455,7 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
 			  uint64_t src_offset,
 			  uint64_t size)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = rctx->b.dma.cs;
 	unsigned i, ncopy, csize;
 	struct r600_resource *rdst = (struct r600_resource*)dst;
 	struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -473,9 +473,9 @@ void r600_dma_copy_buffer(struct r600_context *rctx,
 	for (i = 0; i < ncopy; i++) {
 		csize = size < R600_DMA_COPY_MAX_SIZE_DW ? size : R600_DMA_COPY_MAX_SIZE_DW;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ,
 				      RADEON_PRIO_SDMA_BUFFER);
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE,
 				      RADEON_PRIO_SDMA_BUFFER);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 0, 0, csize);
 		cs->buf[cs->cdw++] = dst_offset & 0xfffffffc;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index ceae6e891a3..bd00dcb642c 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -178,11 +178,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen,
 		goto fail;
 	}
 
-	rctx->b.rings.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
-					     r600_context_gfx_flush, rctx,
-					     rscreen->b.trace_bo ?
-						     rscreen->b.trace_bo->cs_buf : NULL);
-	rctx->b.rings.gfx.flush = r600_context_gfx_flush;
+	rctx->b.gfx.cs = ws->cs_create(rctx->b.ctx, RING_GFX,
+				       r600_context_gfx_flush, rctx,
+				       rscreen->b.trace_bo ?
+					       rscreen->b.trace_bo->cs_buf : NULL);
+	rctx->b.gfx.flush = r600_context_gfx_flush;
 
 	rctx->allocator_fetch_shader = u_suballocator_create(&rctx->b.b, 64 * 1024, 256,
 							     0, PIPE_USAGE_DEFAULT, FALSE);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 1305efa5660..ebb15ee9789 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -244,7 +244,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 
 static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a;
 	float offset_units = state->offset_units;
 	float offset_scale = state->offset_scale;
@@ -760,7 +760,7 @@ r600_create_sampler_view(struct pipe_context *ctx,
 
 static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct pipe_clip_state *state = &rctx->clip_state.state;
 
 	radeon_set_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4);
@@ -774,7 +774,7 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx,
 
 static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_scissor_state *rstate = &rctx->scissor;
 	struct pipe_scissor_state *state;
 	bool do_disable_workaround = false;
@@ -1334,7 +1334,7 @@ static void r600_get_sample_position(struct pipe_context *ctx,
 
 static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	unsigned max_dist = 0;
 
 	if (rctx->b.family == CHIP_R600) {
@@ -1401,7 +1401,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples)
 
 static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct pipe_framebuffer_state *state = &rctx->framebuffer.state;
 	unsigned nr_cbufs = state->nr_cbufs;
 	struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0];
@@ -1432,7 +1432,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			radeon_set_context_reg(cs, R_028040_CB_COLOR0_BASE + i*4, cb[i]->cb_color_base);
 
 			reloc = radeon_add_to_buffer_list(&rctx->b,
-						      &rctx->b.rings.gfx,
+						      &rctx->b.gfx,
 						      (struct r600_resource*)cb[i]->base.texture,
 						      RADEON_USAGE_READWRITE,
 						      cb[i]->base.texture->nr_samples > 1 ?
@@ -1445,7 +1445,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			radeon_set_context_reg(cs, R_0280E0_CB_COLOR0_FRAG + i*4, cb[i]->cb_color_fmask);
 
 			reloc = radeon_add_to_buffer_list(&rctx->b,
-						      &rctx->b.rings.gfx,
+						      &rctx->b.gfx,
 						      cb[i]->cb_buffer_fmask,
 						      RADEON_USAGE_READWRITE,
 						      cb[i]->base.texture->nr_samples > 1 ?
@@ -1458,7 +1458,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 			radeon_set_context_reg(cs, R_0280C0_CB_COLOR0_TILE + i*4, cb[i]->cb_color_cmask);
 
 			reloc = radeon_add_to_buffer_list(&rctx->b,
-						      &rctx->b.rings.gfx,
+						      &rctx->b.gfx,
 						      cb[i]->cb_buffer_cmask,
 						      RADEON_USAGE_READWRITE,
 						      cb[i]->base.texture->nr_samples > 1 ?
@@ -1497,7 +1497,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a
 	if (state->zsbuf) {
 		struct r600_surface *surf = (struct r600_surface*)state->zsbuf;
 		unsigned reloc = radeon_add_to_buffer_list(&rctx->b,
-						       &rctx->b.rings.gfx,
+						       &rctx->b.gfx,
 						       (struct r600_resource*)state->zsbuf->texture,
 						       RADEON_USAGE_READWRITE,
 						       surf->base.texture->nr_samples > 1 ?
@@ -1570,7 +1570,7 @@ static void r600_set_min_samples(struct pipe_context *ctx, unsigned min_samples)
 
 static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
 
 	if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) {
@@ -1600,7 +1600,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_db_state *a = (struct r600_db_state*)atom;
 
 	if (a->rsurf && a->rsurf->db_htile_surface) {
@@ -1610,7 +1610,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
 		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
 		radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
 		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
-		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rtex->htile_buffer,
+		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer,
 						  RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = reloc_idx;
@@ -1621,7 +1621,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
 
 static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom;
 	unsigned db_render_control = 0;
 	unsigned db_render_override =
@@ -1702,7 +1702,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_config_state *a = (struct r600_config_state*)atom;
 
 	radeon_set_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1);
@@ -1711,7 +1711,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *
 
 static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
 
 	while (dirty_mask) {
@@ -1740,7 +1740,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 		radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER));
 	}
 }
@@ -1751,7 +1751,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 				       unsigned reg_alu_constbuf_size,
 				       unsigned reg_alu_const_cache)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -1773,7 +1773,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		}
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
@@ -1789,7 +1789,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, 0xc0000000); /* RESOURCEi_WORD6 */
 
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER));
 
 		dirty_mask &= ~(1 << buffer_index);
@@ -1825,7 +1825,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 				    struct r600_samplerview_state *state,
 				    unsigned resource_id_base)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = state->dirty_mask;
 
 	while (dirty_mask) {
@@ -1840,7 +1840,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx,
 		radeon_emit(cs, (resource_id_base + resource_index) * 7);
 		radeon_emit_array(cs, rview->tex_resource_words, 7);
 
-		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rview->tex_resource,
+		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rview->tex_resource,
 					      RADEON_USAGE_READ,
 					      r600_get_sampler_view_priority(rview->tex_resource));
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
@@ -1872,7 +1872,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx,
 				unsigned resource_id_base,
 				unsigned border_color_reg)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint32_t dirty_mask = texinfo->states.dirty_mask;
 
 	while (dirty_mask) {
@@ -1933,7 +1933,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a
 
 static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	unsigned tmp;
 
 	tmp = S_009508_DISABLE_CUBE_ANISO(1) |
@@ -1951,26 +1951,26 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a
 	struct r600_sample_mask *s = (struct r600_sample_mask*)a;
 	uint8_t mask = s->sample_mask;
 
-	radeon_set_context_reg(rctx->b.rings.gfx.cs, R_028C48_PA_SC_AA_MASK,
+	radeon_set_context_reg(rctx->b.gfx.cs, R_028C48_PA_SC_AA_MASK,
 			       mask | (mask << 8) | (mask << 16) | (mask << 24));
 }
 
 static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_cso_state *state = (struct r600_cso_state*)a;
 	struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso;
 
 	radeon_set_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->buffer,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->buffer,
                                                   RADEON_USAGE_READ,
                                                   RADEON_PRIO_INTERNAL_SHADER));
 }
 
 static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_shader_stages_state *state = (struct r600_shader_stages_state*)a;
 
 	uint32_t v2 = 0, primid = 0;
@@ -2005,7 +2005,7 @@ static void r600_emit_shader_stages(struct r600_context *rctx, struct r600_atom
 
 static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_gs_rings_state *state = (struct r600_gs_rings_state*)a;
 	struct r600_resource *rbuffer;
 
@@ -2017,7 +2017,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 		rbuffer =(struct r600_resource*)state->esgs_ring.buffer;
 		radeon_set_config_reg(cs, R_008C40_SQ_ESGS_RING_BASE, 0);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_RINGS_STREAMOUT));
 		radeon_set_config_reg(cs, R_008C44_SQ_ESGS_RING_SIZE,
@@ -2026,7 +2026,7 @@ static void r600_emit_gs_rings(struct r600_context *rctx, struct r600_atom *a)
 		rbuffer =(struct r600_resource*)state->gsvs_ring.buffer;
 		radeon_set_config_reg(cs, R_008C48_SQ_GSVS_RING_BASE, 0);
 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rbuffer,
+		radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rbuffer,
 						      RADEON_USAGE_READWRITE,
 						      RADEON_PRIO_RINGS_STREAMOUT));
 		radeon_set_config_reg(cs, R_008C4C_SQ_GSVS_RING_SIZE,
@@ -2865,7 +2865,7 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 				unsigned pitch,
 				unsigned bpp)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = rctx->b.dma.cs;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
 	struct r600_texture *rdst = (struct r600_texture*)dst;
 	unsigned array_mode, lbpp, pitch_tile_max, slice_tile_max, size;
@@ -2938,9 +2938,9 @@ static boolean r600_dma_copy_tile(struct r600_context *rctx,
 		cheight = cheight > copy_height ? copy_height : cheight;
 		size = (cheight * pitch) / 4;
 		/* emit reloc before writing cs so that cs is always in consistent state */
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rsrc->resource, RADEON_USAGE_READ,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rsrc->resource, RADEON_USAGE_READ,
 				      RADEON_PRIO_SDMA_TEXTURE);
-		radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.dma, &rdst->resource, RADEON_USAGE_WRITE,
+		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, &rdst->resource, RADEON_USAGE_WRITE,
 				      RADEON_PRIO_SDMA_TEXTURE);
 		cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, size);
 		cs->buf[cs->cdw++] = base >> 8;
@@ -2974,7 +2974,7 @@ static void r600_dma_copy(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-	if (rctx->b.rings.dma.cs == NULL) {
+	if (rctx->b.dma.cs == NULL) {
 		goto fallback;
 	}
 
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 178005a8574..e1608575103 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -71,12 +71,12 @@ void r600_init_atom(struct r600_context *rctx,
 
 void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	r600_emit_command_buffer(rctx->b.rings.gfx.cs, ((struct r600_cso_state*)atom)->cb);
+	r600_emit_command_buffer(rctx->b.gfx.cs, ((struct r600_cso_state*)atom)->cb);
 }
 
 void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom;
 	unsigned alpha_ref = a->sx_alpha_ref;
 
@@ -211,7 +211,7 @@ static void r600_set_blend_color(struct pipe_context *ctx,
 
 void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct pipe_blend_color *state = &rctx->blend_color.state;
 
 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
@@ -223,7 +223,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom)
 
 void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_vgt_state *a = (struct r600_vgt_state *)atom;
 
 	radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en);
@@ -257,7 +257,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx,
 
 void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom;
 
 	radeon_set_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2);
@@ -709,7 +709,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
 
 void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_viewport_state *rstate = &rctx->viewport;
 	struct pipe_viewport_state *state;
 	uint32_t dirty_mask;
@@ -1460,7 +1460,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 
 void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_clip_misc_state *state = &rctx->clip_misc_state;
 
 	radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
@@ -1477,7 +1477,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct pipe_draw_info info = *dinfo;
 	struct pipe_index_buffer ib = {};
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint64_t mask;
 
 	if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
@@ -1490,8 +1490,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	}
 
 	/* make sure that the gfx ring is only one active */
-	if (rctx->b.rings.dma.cs && rctx->b.rings.dma.cs->cdw) {
-		rctx->b.rings.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
+	if (rctx->b.dma.cs && rctx->b.dma.cs->cdw) {
+		rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
 	}
 
 	if (!r600_update_derived_state(rctx)) {
@@ -1681,7 +1681,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 
 		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-		cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+		cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 							   (struct r600_resource*)info.indirect,
 							   RADEON_USAGE_READ,
                                                            RADEON_PRIO_DRAW_INDIRECT);
@@ -1711,7 +1711,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				cs->buf[cs->cdw++] = info.count;
 				cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 									   (struct r600_resource*)ib.buffer,
 									   RADEON_USAGE_READ,
                                                                            RADEON_PRIO_INDEX_BUFFER);
@@ -1724,7 +1724,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 
 				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
-				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 									   (struct r600_resource*)ib.buffer,
 									   RADEON_USAGE_READ,
                                                                            RADEON_PRIO_INDEX_BUFFER);
@@ -1752,7 +1752,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			cs->buf[cs->cdw++] = 0; /* unused */
 
 			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-			cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+			cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 								   t->buf_filled_size, RADEON_USAGE_READ,
 								   RADEON_PRIO_SO_FILLED_SIZE);
 		}
@@ -1938,7 +1938,7 @@ bool sampler_state_needs_border_color(const struct pipe_sampler_state *state)
 void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
 {
 
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	struct r600_pipe_shader *shader = ((struct r600_shader_state*)a)->shader;
 
 	if (!shader)
@@ -1946,7 +1946,7 @@ void r600_emit_shader(struct r600_context *rctx, struct r600_atom *a)
 
 	r600_emit_command_buffer(cs, &shader->command_buffer);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, shader->bo,
+	radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, shader->bo,
 					      RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER));
 }
 
@@ -2669,12 +2669,12 @@ void r600_init_common_state_functions(struct r600_context *rctx)
 void r600_trace_emit(struct r600_context *rctx)
 {
 	struct r600_screen *rscreen = rctx->screen;
-	struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
 	uint64_t va;
 	uint32_t reloc;
 
 	va = rscreen->b.trace_bo->gpu_address;
-	reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx, rscreen->b.trace_bo,
+	reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rscreen->b.trace_bo,
 				      RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
 	radeon_emit(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
 	radeon_emit(cs, va & 0xFFFFFFFFUL);
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
index 0dc6c918331..c294e516408 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -34,11 +34,11 @@ boolean r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
 					struct radeon_winsys_cs_handle *buf,
 					enum radeon_bo_usage usage)
 {
-	if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) {
+	if (ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, buf, usage)) {
 		return TRUE;
 	}
-	if (ctx->rings.dma.cs && ctx->rings.dma.cs->cdw &&
-	    ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) {
+	if (ctx->dma.cs && ctx->dma.cs->cdw &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, buf, usage)) {
 		return TRUE;
 	}
 	return FALSE;
@@ -60,26 +60,26 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
 		rusage = RADEON_USAGE_WRITE;
 	}
 
-	if (ctx->rings.gfx.cs->cdw != ctx->initial_gfx_cs_size &&
-	    ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs,
+	if (ctx->gfx.cs->cdw != ctx->initial_gfx_cs_size &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs,
 					     resource->cs_buf, rusage)) {
 		if (usage & PIPE_TRANSFER_DONTBLOCK) {
-			ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+			ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 			return NULL;
 		} else {
-			ctx->rings.gfx.flush(ctx, 0, NULL);
+			ctx->gfx.flush(ctx, 0, NULL);
 			busy = true;
 		}
 	}
-	if (ctx->rings.dma.cs &&
-	    ctx->rings.dma.cs->cdw &&
-	    ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs,
+	if (ctx->dma.cs &&
+	    ctx->dma.cs->cdw &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->dma.cs,
 					     resource->cs_buf, rusage)) {
 		if (usage & PIPE_TRANSFER_DONTBLOCK) {
-			ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+			ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 			return NULL;
 		} else {
-			ctx->rings.dma.flush(ctx, 0, NULL);
+			ctx->dma.flush(ctx, 0, NULL);
 			busy = true;
 		}
 	}
@@ -90,9 +90,9 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
 		} else {
 			/* We will be wait for the GPU. Wait for any offloaded
 			 * CS flush to complete to avoid busy-waiting in the winsys. */
-			ctx->ws->cs_sync_flush(ctx->rings.gfx.cs);
-			if (ctx->rings.dma.cs)
-				ctx->ws->cs_sync_flush(ctx->rings.dma.cs);
+			ctx->ws->cs_sync_flush(ctx->gfx.cs);
+			if (ctx->dma.cs)
+				ctx->ws->cs_sync_flush(ctx->dma.cs);
 		}
 	}
 
@@ -240,7 +240,7 @@ static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
 	bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
 
 	return rctx->screen->has_cp_dma ||
-	       (dword_aligned && (rctx->rings.dma.cs ||
+	       (dword_aligned && (rctx->dma.cs ||
 				  rctx->screen->has_streamout));
 
 }
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index daa325dae04..87399145be7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -118,13 +118,13 @@ void r600_draw_rectangle(struct blitter_context *blitter,
 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw)
 {
 	/* Flush the GFX IB if it's not empty. */
-	if (ctx->rings.gfx.cs->cdw > ctx->initial_gfx_cs_size)
-		ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+	if (ctx->gfx.cs->cdw > ctx->initial_gfx_cs_size)
+		ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 
 	/* Flush if there's not enough space. */
-	if ((num_dw + ctx->rings.dma.cs->cdw) > ctx->rings.dma.cs->max_dw) {
-		ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-		assert((num_dw + ctx->rings.dma.cs->cdw) <= ctx->rings.dma.cs->max_dw);
+	if ((num_dw + ctx->dma.cs->cdw) > ctx->dma.cs->max_dw) {
+		ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		assert((num_dw + ctx->dma.cs->cdw) <= ctx->dma.cs->max_dw);
 	}
 }
 
@@ -194,10 +194,10 @@ static void r600_flush_from_st(struct pipe_context *ctx,
 	if (flags & PIPE_FLUSH_END_OF_FRAME)
 		rflags |= RADEON_FLUSH_END_OF_FRAME;
 
-	if (rctx->rings.dma.cs) {
-		rctx->rings.dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
+	if (rctx->dma.cs) {
+		rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL);
 	}
-	rctx->rings.gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
+	rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL);
 
 	/* Both engines can signal out of order, so we need to keep both fences. */
 	if (gfx_fence || sdma_fence) {
@@ -219,7 +219,7 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
 				struct pipe_fence_handle **fence)
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-	struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
+	struct radeon_winsys_cs *cs = rctx->dma.cs;
 
 	if (cs->cdw)
 		rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence, 0);
@@ -296,10 +296,10 @@ bool r600_common_context_init(struct r600_common_context *rctx,
 		return false;
 
 	if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
-		rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
-							 r600_flush_dma_ring,
-							 rctx, NULL);
-		rctx->rings.dma.flush = r600_flush_dma_ring;
+		rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
+						   r600_flush_dma_ring,
+						   rctx, NULL);
+		rctx->dma.flush = r600_flush_dma_ring;
 	}
 
 	return true;
@@ -307,10 +307,10 @@ bool r600_common_context_init(struct r600_common_context *rctx,
 
 void r600_common_context_cleanup(struct r600_common_context *rctx)
 {
-	if (rctx->rings.gfx.cs)
-		rctx->ws->cs_destroy(rctx->rings.gfx.cs);
-	if (rctx->rings.dma.cs)
-		rctx->ws->cs_destroy(rctx->rings.dma.cs);
+	if (rctx->gfx.cs)
+		rctx->ws->cs_destroy(rctx->gfx.cs);
+	if (rctx->dma.cs)
+		rctx->ws->cs_destroy(rctx->dma.cs);
 	if (rctx->ctx)
 		rctx->ws->ctx_destroy(rctx->ctx);
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 9fae5c8da26..2e3f1547e45 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -369,11 +369,6 @@ struct r600_ring {
 		      struct pipe_fence_handle **fence);
 };
 
-struct r600_rings {
-	struct r600_ring		gfx;
-	struct r600_ring		dma;
-};
-
 struct r600_common_context {
 	struct pipe_context b; /* base class */
 
@@ -382,7 +377,8 @@ struct r600_common_context {
 	struct radeon_winsys_ctx	*ctx;
 	enum radeon_family		family;
 	enum chip_class			chip_class;
-	struct r600_rings		rings;
+	struct r600_ring		gfx;
+	struct r600_ring		dma;
 	struct pipe_fence_handle	*last_sdma_fence;
 	unsigned			initial_gfx_cs_size;
 	unsigned			gpu_reset_counter;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 2bb5732262f..ce0d7e77d43 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -172,7 +172,7 @@ static unsigned event_type_for_stream(struct r600_query *query)
 
 static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_query *query)
 {
-	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 	uint64_t va;
 
 	r600_update_occlusion_query_state(ctx, query->type, 1);
@@ -225,7 +225,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 	default:
 		assert(0);
 	}
-	r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
 			RADEON_PRIO_QUERY);
 
 	if (r600_is_timer_query(query->type))
@@ -236,7 +236,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 
 static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_query *query)
 {
-	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 	uint64_t va;
 
 	/* The queries which need begin already called this in begin_query. */
@@ -287,7 +287,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 	default:
 		assert(0);
 	}
-	r600_emit_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE,
+	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
 			RADEON_PRIO_QUERY);
 
 	query->buffer.results_end += query->result_size;
@@ -306,7 +306,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_query *query,
 					int operation, bool flag_wait)
 {
-	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 	uint32_t op = PRED_OP(operation);
 
 	/* if true then invert, see GL_ARB_conditional_render_inverted */
@@ -343,7 +343,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
 				radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
 				radeon_emit(cs, va + results_base);
 				radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
-				r600_emit_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ,
+				r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ,
 						RADEON_PRIO_QUERY);
 				results_base += query->result_size;
 	
@@ -939,7 +939,7 @@ void r600_resume_timer_queries(struct r600_common_context *ctx)
 /* Get backends mask */
 void r600_query_init_backend_mask(struct r600_common_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 	struct r600_resource *buffer;
 	uint32_t *results;
 	unsigned num_backends = ctx->screen->info.r600_num_backends;
@@ -990,7 +990,7 @@ void r600_query_init_backend_mask(struct r600_common_context *ctx)
 		radeon_emit(cs, buffer->gpu_address);
 		radeon_emit(cs, buffer->gpu_address >> 32);
 
-		r600_emit_reloc(ctx, &ctx->rings.gfx, buffer,
+		r600_emit_reloc(ctx, &ctx->gfx, buffer,
                                 RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 
 		/* analyze results */
diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c
index 33403b572af..e977ed9fa10 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -152,7 +152,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
 
 static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
 {
-	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->gfx.cs;
 	unsigned reg_strmout_cntl;
 
 	/* The register is at different places on different ASICs. */
@@ -184,7 +184,7 @@ static void r600_flush_vgt_streamout(struct r600_common_context *rctx)
 
 static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->gfx.cs;
 	struct r600_so_target **t = rctx->streamout.targets;
 	unsigned *stride_in_dw = rctx->streamout.stride_in_dw;
 	unsigned i, update_flags = 0;
@@ -216,7 +216,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
 			radeon_emit(cs, stride_in_dw[i]);		/* VTX_STRIDE (in DW) */
 			radeon_emit(cs, va >> 8);			/* BUFFER_BASE */
 
-			r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
+			r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
 					RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
 
 			/* R7xx requires this packet after updating BUFFER_BASE.
@@ -226,7 +226,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
 				radeon_emit(cs, i);
 				radeon_emit(cs, va >> 8);
 
-				r600_emit_reloc(rctx, &rctx->rings.gfx, r600_resource(t[i]->b.buffer),
+				r600_emit_reloc(rctx, &rctx->gfx, r600_resource(t[i]->b.buffer),
 						RADEON_USAGE_WRITE, RADEON_PRIO_RINGS_STREAMOUT);
 			}
 		}
@@ -244,7 +244,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
 			radeon_emit(cs, va); /* src address lo */
 			radeon_emit(cs, va >> 32); /* src address hi */
 
-			r600_emit_reloc(rctx,  &rctx->rings.gfx, t[i]->buf_filled_size,
+			r600_emit_reloc(rctx,  &rctx->gfx, t[i]->buf_filled_size,
 					RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE);
 		} else {
 			/* Start from the beginning. */
@@ -267,7 +267,7 @@ static void r600_emit_streamout_begin(struct r600_common_context *rctx, struct r
 
 void r600_emit_streamout_end(struct r600_common_context *rctx)
 {
-	struct radeon_winsys_cs *cs = rctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = rctx->gfx.cs;
 	struct r600_so_target **t = rctx->streamout.targets;
 	unsigned i;
 	uint64_t va;
@@ -288,7 +288,7 @@ void r600_emit_streamout_end(struct r600_common_context *rctx)
 		radeon_emit(cs, 0); /* unused */
 		radeon_emit(cs, 0); /* unused */
 
-		r600_emit_reloc(rctx,  &rctx->rings.gfx, t[i]->buf_filled_size,
+		r600_emit_reloc(rctx,  &rctx->gfx, t[i]->buf_filled_size,
 				RADEON_USAGE_WRITE, RADEON_PRIO_SO_FILLED_SIZE);
 
 		/* Zero the buffer size. The counters (primitives generated,
@@ -336,8 +336,8 @@ static void r600_emit_streamout_enable(struct r600_common_context *rctx,
 			S_028B94_STREAMOUT_2_EN(r600_get_strmout_en(rctx)) |
 			S_028B94_STREAMOUT_3_EN(r600_get_strmout_en(rctx));
 	}
-	radeon_set_context_reg(rctx->rings.gfx.cs, strmout_buffer_reg, strmout_buffer_val);
-	radeon_set_context_reg(rctx->rings.gfx.cs, strmout_config_reg, strmout_config_val);
+	radeon_set_context_reg(rctx->gfx.cs, strmout_buffer_reg, strmout_buffer_val);
+	radeon_set_context_reg(rctx->gfx.cs, strmout_config_reg, strmout_config_val);
 }
 
 static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable)
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index e53af1dd6b5..2de237b4716 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -50,7 +50,7 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
 				    uint64_t src_offset,
 				    uint64_t size)
 {
-	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = ctx->b.dma.cs;
 	unsigned i, ncopy, csize;
 	struct r600_resource *rdst = (struct r600_resource*)dst;
 	struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -61,9 +61,9 @@ static void cik_sdma_do_copy_buffer(struct si_context *ctx,
 	ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
 	r600_need_dma_space(&ctx->b, ncopy * 7);
 
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ,
 			      RADEON_PRIO_SDMA_BUFFER);
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rdst, RADEON_USAGE_WRITE,
 			      RADEON_PRIO_SDMA_BUFFER);
 
 	for (i = 0; i < ncopy; i++) {
@@ -112,7 +112,7 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
 			       unsigned pitch,
 			       unsigned bpe)
 {
-	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = ctx->b.dma.cs;
 	struct si_screen *sscreen = ctx->screen;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
 	struct r600_texture *rdst = (struct r600_texture*)dst;
@@ -171,9 +171,9 @@ static void cik_sdma_copy_tile(struct si_context *ctx,
 	ncopy = (copy_height + cheight - 1) / cheight;
 	r600_need_dma_space(&ctx->b, ncopy * 12);
 
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rsrc->resource,
 			      RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rdst->resource,
 			      RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
 
 	copy_height = size * 4 / pitch;
@@ -224,7 +224,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
 	unsigned copy_height, y_align;
 	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-	if (sctx->b.rings.dma.cs == NULL) {
+	if (sctx->b.dma.cs == NULL) {
 		goto fallback;
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index c008f8b402b..2d551dd0e6b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -227,7 +227,7 @@ static void si_launch_grid(
 		uint32_t pc, const void *input)
 {
 	struct si_context *sctx = (struct si_context*)ctx;
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_compute *program = sctx->cs_shader_state.program;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 	struct r600_resource *input_buffer = program->input_buffer;
@@ -274,7 +274,7 @@ static void si_launch_grid(
 	kernel_args_size = program->input_size + num_work_size_bytes + 8 /* For scratch va */;
 
 	kernel_args = sctx->b.ws->buffer_map(input_buffer->cs_buf,
-			sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+			sctx->b.gfx.cs, PIPE_TRANSFER_WRITE);
 	for (i = 0; i < 3; i++) {
 		kernel_args[i] = grid_layout[i];
 		kernel_args[i + 3] = grid_layout[i] * block_layout[i];
@@ -294,7 +294,7 @@ static void si_launch_grid(
 			    shader->scratch_bytes_per_wave *
 			    num_waves_for_scratch);
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 					  shader->scratch_bo,
 					  RADEON_USAGE_READWRITE,
 					  RADEON_PRIO_SCRATCH_BUFFER);
@@ -310,7 +310,7 @@ static void si_launch_grid(
 	kernel_args_va = input_buffer->gpu_address;
 	kernel_args_va += kernel_args_offset;
 
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, input_buffer,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, input_buffer,
 				  RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
 
 	si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
@@ -338,7 +338,7 @@ static void si_launch_grid(
 		if (!buffer) {
 			continue;
 		}
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, buffer,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buffer,
 					  RADEON_USAGE_READWRITE,
 					  RADEON_PRIO_COMPUTE_GLOBAL);
 	}
@@ -361,7 +361,7 @@ static void si_launch_grid(
 #if HAVE_LLVM >= 0x0306
 	shader_va += pc;
 #endif
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, shader->bo,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo,
 				  RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER);
 	si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
 	si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index ac35a54ce65..0bf85a04db7 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -46,7 +46,7 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx,
 				       uint64_t dst_va, uint64_t src_va,
 				       unsigned size, unsigned flags)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
 	uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
 	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
@@ -80,7 +80,7 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 					uint64_t dst_va, unsigned size,
 					uint32_t clear_value, unsigned flags)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
 	uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM(1) : 0;
 	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
@@ -129,11 +129,11 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 	si_need_cs_space(sctx);
 
 	/* This must be done after need_cs_space. */
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				  (struct r600_resource*)dst,
 				  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
 	if (src)
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 					  (struct r600_resource*)src,
 					  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
 
@@ -177,7 +177,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 	/* Fallback for unaligned clears. */
 	if (offset % 4 != 0 || size % 4 != 0) {
 		uint8_t *map = sctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
-						      sctx->b.rings.gfx.cs,
+						      sctx->b.gfx.cs,
 						      PIPE_TRANSFER_WRITE);
 		map += offset;
 		for (unsigned i = 0; i < size; i++) {
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index b4dc3cb2347..3fa3a9bbd6e 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -117,7 +117,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
 
 	util_memcpy_cpu_to_le32(ptr, desc->list, list_size);
 
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
 			      RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 
 	desc->list_dirty = false;
@@ -152,14 +152,14 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
 		if (!rview->resource)
 			continue;
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      rview->resource, RADEON_USAGE_READ,
 				      r600_get_sampler_view_priority(rview->resource));
 	}
 
 	if (!views->desc.buffer)
 		return;
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, views->desc.buffer,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
 }
 
@@ -177,12 +177,12 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
 			(struct si_sampler_view*)view;
 
 		if (rview->resource)
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				rview->resource, RADEON_USAGE_READ,
 				r600_get_sampler_view_priority(rview->resource));
 
 		if (rview->dcc_buffer && rview->dcc_buffer != rview->resource)
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				rview->dcc_buffer, RADEON_USAGE_READ,
 				RADEON_PRIO_DCC);
 
@@ -264,7 +264,7 @@ static void si_sampler_states_begin_new_cs(struct si_context *sctx,
 {
 	if (!states->desc.buffer)
 		return;
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, states->desc.buffer,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_DESCRIPTORS);
 }
 
@@ -334,14 +334,14 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 	while (mask) {
 		int i = u_bit_scan64(&mask);
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      (struct r600_resource*)buffers->buffers[i],
 				      buffers->shader_usage, buffers->priority);
 	}
 
 	if (!buffers->desc.buffer)
 		return;
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 			      buffers->desc.buffer, RADEON_USAGE_READWRITE,
 			      RADEON_PRIO_DESCRIPTORS);
 }
@@ -362,14 +362,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
 		if (!sctx->vertex_buffer[vb].buffer)
 			continue;
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 	}
 
 	if (!desc->buffer)
 		return;
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 			      desc->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_DESCRIPTORS);
 }
@@ -396,7 +396,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 	if (!desc->buffer)
 		return false;
 
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 			      desc->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_DESCRIPTORS);
 
@@ -440,7 +440,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 		desc[3] = sctx->vertex_elements->rsrc_word3[i];
 
 		if (!bound[ve->vertex_buffer_index]) {
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 					      (struct r600_resource*)vb->buffer,
 					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 			bound[ve->vertex_buffer_index] = true;
@@ -525,7 +525,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 			  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
 		buffers->buffers[slot] = buffer;
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      (struct r600_resource*)buffer,
 				      buffers->shader_usage, buffers->priority);
 		buffers->desc.enabled_mask |= 1llu << slot;
@@ -620,7 +620,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 			  S_008F0C_ADD_TID_ENABLE(add_tid);
 
 		pipe_resource_reference(&buffers->buffers[slot], buffer);
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      (struct r600_resource*)buffer,
 				      buffers->shader_usage, buffers->priority);
 		buffers->desc.enabled_mask |= 1llu << slot;
@@ -710,7 +710,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 			/* Set the resource. */
 			pipe_resource_reference(&buffers->buffers[bufidx],
 						buffer);
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 					      (struct r600_resource*)buffer,
 					      buffers->shader_usage, buffers->priority);
 			buffers->desc.enabled_mask |= 1llu << bufidx;
@@ -809,7 +809,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 							    old_va, buf);
 				buffers->desc.list_dirty = true;
 
-				radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+				radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 						      rbuffer, buffers->shader_usage,
 						      buffers->priority);
 
@@ -838,7 +838,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 							    old_va, buf);
 				buffers->desc.list_dirty = true;
 
-				radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+				radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 						      rbuffer, buffers->shader_usage,
 						      buffers->priority);
 			}
@@ -863,7 +863,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 							    old_va, buf);
 				views->desc.list_dirty = true;
 
-				radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+				radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 						      rbuffer, RADEON_USAGE_READ,
 						      RADEON_PRIO_SAMPLER_BUFFER);
 			}
@@ -948,7 +948,7 @@ static void si_emit_shader_pointer(struct si_context *sctx,
 				   struct si_descriptors *desc,
 				   unsigned sh_base, bool keep_dirty)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	uint64_t va;
 
 	if (!desc->pointer_dirty || !desc->buffer)
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 581e89f42d8..240d96190a9 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -49,7 +49,7 @@ static void si_dma_copy_buffer(struct si_context *ctx,
 				uint64_t src_offset,
 				uint64_t size)
 {
-	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = ctx->b.dma.cs;
 	unsigned i, ncopy, csize, max_csize, sub_cmd, shift;
 	struct r600_resource *rdst = (struct r600_resource*)dst;
 	struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -78,9 +78,9 @@ static void si_dma_copy_buffer(struct si_context *ctx,
 
 	r600_need_dma_space(&ctx->b, ncopy * 5);
 
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rsrc, RADEON_USAGE_READ,
 			      RADEON_PRIO_SDMA_BUFFER);
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, rdst, RADEON_USAGE_WRITE,
 			      RADEON_PRIO_SDMA_BUFFER);
 
 	for (i = 0; i < ncopy; i++) {
@@ -111,7 +111,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
 			     unsigned pitch,
 			     unsigned bpp)
 {
-	struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = ctx->b.dma.cs;
 	struct si_screen *sscreen = ctx->screen;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
 	struct r600_texture *rdst = (struct r600_texture*)dst;
@@ -177,9 +177,9 @@ static void si_dma_copy_tile(struct si_context *ctx,
 	ncopy = (size / SI_DMA_COPY_MAX_SIZE_DW) + !!(size % SI_DMA_COPY_MAX_SIZE_DW);
 	r600_need_dma_space(&ctx->b, ncopy * 9);
 
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rsrc->resource,
 			      RADEON_USAGE_READ, RADEON_PRIO_SDMA_TEXTURE);
-	radeon_add_to_buffer_list(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.dma, &rdst->resource,
 			      RADEON_USAGE_WRITE, RADEON_PRIO_SDMA_TEXTURE);
 
 	for (i = 0; i < ncopy; i++) {
@@ -221,7 +221,7 @@ void si_dma_copy(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-	if (sctx->b.rings.dma.cs == NULL) {
+	if (sctx->b.dma.cs == NULL) {
 		goto fallback;
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index e5f1c8462d2..2f4f1eec04b 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -29,22 +29,22 @@
 /* initialize */
 void si_need_cs_space(struct si_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
-	struct radeon_winsys_cs *dma = ctx->b.rings.dma.cs;
+	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
+	struct radeon_winsys_cs *dma = ctx->b.dma.cs;
 
 	/* Flush the DMA IB if it's not empty. */
 	if (dma && dma->cdw)
-		ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 
 	/* There are two memory usage counters in the winsys for all buffers
 	 * that have been added (cs_add_buffer) and two counters in the pipe
 	 * driver for those that haven't been added yet.
 	 */
-	if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.rings.gfx.cs,
+	if (unlikely(!ctx->b.ws->cs_memory_below_limit(ctx->b.gfx.cs,
 						       ctx->b.vram, ctx->b.gtt))) {
 		ctx->b.gtt = 0;
 		ctx->b.vram = 0;
-		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 		return;
 	}
 	ctx->b.gtt = 0;
@@ -54,14 +54,14 @@ void si_need_cs_space(struct si_context *ctx)
 	 * and just flush if there is not enough space left.
 	 */
 	if (unlikely(cs->cdw > cs->max_dw - 2048))
-		ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
 }
 
 void si_context_gfx_flush(void *context, unsigned flags,
 			  struct pipe_fence_handle **fence)
 {
 	struct si_context *ctx = context;
-	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
 	struct radeon_winsys *ws = ctx->b.ws;
 
 	if (ctx->gfx_flush_in_progress)
@@ -191,7 +191,7 @@ void si_begin_new_cs(struct si_context *ctx)
 
 	r600_postflush_resume_features(&ctx->b);
 
-	ctx->b.initial_gfx_cs_size = ctx->b.rings.gfx.cs->cdw;
+	ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->cdw;
 
 	/* Invalidate various draw states so that they are emitted before
 	 * the first draw call. */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 2316cca9c2e..6c13fcdf5d7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -139,10 +139,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 		sctx->b.b.create_video_buffer = vl_video_buffer_create;
 	}
 
-	sctx->b.rings.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
-					     sctx, sscreen->b.trace_bo ?
-						sscreen->b.trace_bo->cs_buf : NULL);
-	sctx->b.rings.gfx.flush = si_context_gfx_flush;
+	sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX, si_context_gfx_flush,
+				       sctx, sscreen->b.trace_bo ?
+					       sscreen->b.trace_bo->cs_buf : NULL);
+	sctx->b.gfx.flush = si_context_gfx_flush;
 
 	/* Border colors. */
 	sctx->border_color_table = malloc(SI_MAX_BORDER_COLORS *
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index f16933c5f98..c4ef2e78c50 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -127,10 +127,10 @@ void si_pm4_free_state(struct si_context *sctx,
 
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 	for (int i = 0; i < state->nbo; ++i) {
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, state->bo[i],
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, state->bo[i],
 				      state->bo_usage[i], state->bo_priority[i]);
 	}
 
@@ -139,7 +139,7 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 	} else {
 		struct r600_resource *ib = state->indirect_buffer;
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, ib,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, ib,
 					  RADEON_USAGE_READ,
                                           RADEON_PRIO_IB2);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 2c35c0a8e0a..6c411c11a77 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -248,7 +248,7 @@ static unsigned si_pack_float_12p4(float x)
  */
 static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_state_blend *blend = sctx->queued.named.blend;
 	uint32_t mask = 0, i;
 
@@ -454,7 +454,7 @@ static void si_set_blend_color(struct pipe_context *ctx,
 
 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
 	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
@@ -486,7 +486,7 @@ static void si_set_clip_state(struct pipe_context *ctx,
 
 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
 	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
@@ -496,7 +496,7 @@ static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
 
 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct tgsi_shader_info *info = si_get_vs_info(sctx);
 	unsigned window_space =
 	   info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
@@ -541,7 +541,7 @@ static void si_set_scissor_states(struct pipe_context *ctx,
 
 static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct pipe_scissor_state *states = sctx->scissors.states;
 	unsigned mask = sctx->scissors.dirty_mask;
 
@@ -593,7 +593,7 @@ static void si_set_viewport_states(struct pipe_context *ctx,
 
 static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct pipe_viewport_state *states = sctx->viewports.states;
 	unsigned mask = sctx->viewports.dirty_mask;
 
@@ -830,7 +830,7 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state)
  */
 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
 	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
 
@@ -989,7 +989,7 @@ static void si_set_occlusion_query_state(struct pipe_context *ctx, bool enable)
 
 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	unsigned db_shader_control;
 
@@ -2233,7 +2233,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
 	unsigned i, nr_cbufs = state->nr_cbufs;
 	struct r600_texture *tex = NULL;
@@ -2252,20 +2252,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		}
 
 		tex = (struct r600_texture *)cb->base.texture;
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      &tex->resource, RADEON_USAGE_READWRITE,
 				      tex->surface.nsamples > 1 ?
 					      RADEON_PRIO_COLOR_BUFFER_MSAA :
 					      RADEON_PRIO_COLOR_BUFFER);
 
 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
 				RADEON_PRIO_CMASK);
 		}
 
 		if (tex->dcc_buffer && tex->dcc_buffer != &tex->resource) {
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				tex->dcc_buffer, RADEON_USAGE_READWRITE,
 				RADEON_PRIO_DCC);
 		}
@@ -2305,14 +2305,14 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      &rtex->resource, RADEON_USAGE_READWRITE,
 				      zb->base.texture->nr_samples > 1 ?
 					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
 					      RADEON_PRIO_DEPTH_BUFFER);
 
 		if (zb->db_htile_data_base) {
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
 					      RADEON_PRIO_HTILE);
 		}
@@ -2354,7 +2354,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 static void si_emit_msaa_sample_locs(struct si_context *sctx,
 				     struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned nr_samples = sctx->framebuffer.nr_samples;
 
 	cayman_emit_msaa_sample_locs(cs, nr_samples > 1 ? nr_samples :
@@ -2363,7 +2363,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
 
 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 	cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
 				sctx->ps_iter_samples,
@@ -2846,7 +2846,7 @@ static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 
 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned mask = sctx->sample_mask.sample_mask;
 
 	radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 6c7fff9735c..3015374c648 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -108,7 +108,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 				       const struct pipe_draw_info *info,
 				       unsigned *num_patches)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_shader_ctx_state *ls = &sctx->vs_shader;
 	/* The TES pointer will only be used for sctx->last_tcs.
 	 * It would be wrong to think that TCS = TES. */
@@ -353,7 +353,7 @@ static unsigned si_get_ls_hs_config(struct si_context *sctx,
 
 static void si_emit_scratch_reloc(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 	if (!sctx->emit_scratch_reloc)
 		return;
@@ -362,7 +362,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
 			       sctx->spi_tmpring_size);
 
 	if (sctx->scratch_buffer) {
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      sctx->scratch_buffer, RADEON_USAGE_READWRITE,
 				      RADEON_PRIO_SCRATCH_BUFFER);
 
@@ -373,7 +373,7 @@ static void si_emit_scratch_reloc(struct si_context *sctx)
 /* rast_prim is the primitive type after GS. */
 static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned rast_prim = sctx->current_rast_prim;
 	struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
 
@@ -401,7 +401,7 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 static void si_emit_draw_registers(struct si_context *sctx,
 				   const struct pipe_draw_info *info)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned prim = si_conv_pipe_prim(info->mode);
 	unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
 	unsigned ia_multi_vgt_param, ls_hs_config, num_patches = 0;
@@ -455,7 +455,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 				 const struct pipe_draw_info *info,
 				 const struct pipe_index_buffer *ib)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
 
 	if (info->count_from_stream_output) {
@@ -476,7 +476,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
 		radeon_emit(cs, 0); /* unused */
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      t->buf_filled_size, RADEON_USAGE_READ,
 				      RADEON_PRIO_SO_FILLED_SIZE);
 	}
@@ -530,7 +530,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 	} else {
 		si_invalidate_draw_sh_constants(sctx);
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      (struct r600_resource *)info->indirect,
 				      RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 	}
@@ -540,7 +540,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 					  ib->index_size;
 		uint64_t index_va = r600_resource(ib->buffer)->gpu_address + ib->offset;
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx,
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      (struct r600_resource *)ib->buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
 
@@ -607,7 +607,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
 {
 	struct r600_common_context *sctx = &si_ctx->b;
-	struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->gfx.cs;
 	uint32_t cp_coher_cntl = 0;
 	uint32_t compute =
 		PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
@@ -907,10 +907,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 void si_trace_emit(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 
 	sctx->trace_id++;
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
 	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4a3a04caa52..93a689d9a07 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -937,7 +937,7 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
 
 static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_shader *ps = sctx->ps_shader.current;
 	struct si_shader *vs = si_get_vs_state(sctx);
 	struct tgsi_shader_info *psinfo;
@@ -1009,7 +1009,7 @@ bcolor:
 
 static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	struct si_shader *ps = sctx->ps_shader.current;
 	unsigned input_ena;
 

From 8dd1ee6ff30fd481dd33de93e5d613d11331c1f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 14:36:38 +0100
Subject: [PATCH 238/287] r600g: don't set predication on non-draw packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This has no effect.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_state_common.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index e1608575103..eb5436197c6 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1663,7 +1663,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 
 	/* Draw packets. */
 	if (!info.indirect) {
-		cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, rctx->b.predicate_drawing);
+		cs->buf[cs->cdw++] = PKT3(PKT3_NUM_INSTANCES, 0, 0);
 		cs->buf[cs->cdw++] = info.instance_count;
 	}
 
@@ -1675,12 +1675,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		rctx->vgt_state.last_draw_was_indirect = true;
 		rctx->last_start_instance = -1;
 
-		cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, rctx->b.predicate_drawing);
+		cs->buf[cs->cdw++] = PKT3(EG_PKT3_SET_BASE, 2, 0);
 		cs->buf[cs->cdw++] = EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE;
 		cs->buf[cs->cdw++] = va;
 		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 
-		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
+		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 		cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 							   (struct r600_resource*)info.indirect,
 							   RADEON_USAGE_READ,
@@ -1688,7 +1688,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	}
 
 	if (info.indexed) {
-		cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, rctx->b.predicate_drawing);
+		cs->buf[cs->cdw++] = PKT3(PKT3_INDEX_TYPE, 0, 0);
 		cs->buf[cs->cdw++] = ib.index_size == 4 ?
 					(VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) :
 					(VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0));
@@ -1710,7 +1710,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 				cs->buf[cs->cdw++] = info.count;
 				cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
-				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 									   (struct r600_resource*)ib.buffer,
 									   RADEON_USAGE_READ,
@@ -1719,17 +1719,17 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			else {
 				uint32_t max_size = (ib.buffer->width0 - ib.offset) / ib.index_size;
 
-				cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BASE, 1, 0);
 				cs->buf[cs->cdw++] = va;
 				cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 
-				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
 				cs->buf[cs->cdw++] = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
 									   (struct r600_resource*)ib.buffer,
 									   RADEON_USAGE_READ,
                                                                            RADEON_PRIO_INDEX_BUFFER);
 
-				cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0);
 				cs->buf[cs->cdw++] = max_size;
 
 				cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, rctx->b.predicate_drawing);

From 6eff5415e46fb43619b543368fa427334d267a71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 14:45:58 +0100
Subject: [PATCH 239/287] gallium/radeon: simplify disabling render condition
 for u_blitter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

just disable it by not setting the predication bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_blit.c          | 12 +++++-------
 src/gallium/drivers/r600/r600_state_common.c  | 11 ++++++-----
 src/gallium/drivers/radeon/r600_pipe_common.h |  3 ++-
 src/gallium/drivers/radeonsi/si_blit.c        | 10 ++++------
 src/gallium/drivers/radeonsi/si_state_draw.c  |  9 +++++----
 5 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index fff841c0ded..8a90489318e 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -87,18 +87,16 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
 			(struct pipe_sampler_view**)rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
 	}
 
-	if ((op & R600_DISABLE_RENDER_COND) && rctx->b.current_render_cond) {
-           util_blitter_save_render_condition(rctx->blitter,
-                                              rctx->b.current_render_cond,
-                                              rctx->b.current_render_cond_cond,
-                                              rctx->b.current_render_cond_mode);
-        }
+	if (op & R600_DISABLE_RENDER_COND)
+		rctx->b.render_cond_force_off = true;
 }
 
 static void r600_blitter_end(struct pipe_context *ctx)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-        r600_resume_nontimer_queries(&rctx->b);
+
+	rctx->b.render_cond_force_off = false;
+	r600_resume_nontimer_queries(&rctx->b);
 }
 
 static unsigned u_max_sample(struct pipe_resource *r)
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index eb5436197c6..28aedffc42d 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1478,6 +1478,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	struct pipe_draw_info info = *dinfo;
 	struct pipe_index_buffer ib = {};
 	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+	bool render_cond_bit = rctx->b.predicate_drawing && !rctx->b.render_cond_force_off;
 	uint64_t mask;
 
 	if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
@@ -1696,7 +1697,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		if (ib.user_buffer) {
 			unsigned size_bytes = info.count*ib.index_size;
 			unsigned size_dw = align(size_bytes, 4) / 4;
-			cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, rctx->b.predicate_drawing);
+			cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_IMMD, 1 + size_dw, render_cond_bit);
 			cs->buf[cs->cdw++] = info.count;
 			cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_IMMEDIATE;
 			memcpy(cs->buf+cs->cdw, ib.user_buffer, size_bytes);
@@ -1705,7 +1706,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 			uint64_t va = r600_resource(ib.buffer)->gpu_address + ib.offset;
 
 			if (likely(!info.indirect)) {
-				cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX, 3, render_cond_bit);
 				cs->buf[cs->cdw++] = va;
 				cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
 				cs->buf[cs->cdw++] = info.count;
@@ -1732,7 +1733,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 				cs->buf[cs->cdw++] = PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0);
 				cs->buf[cs->cdw++] = max_size;
 
-				cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, rctx->b.predicate_drawing);
+				cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit);
 				cs->buf[cs->cdw++] = info.indirect_offset;
 				cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA;
 			}
@@ -1758,11 +1759,11 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		}
 
 		if (likely(!info.indirect)) {
-			cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->b.predicate_drawing);
+			cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit);
 			cs->buf[cs->cdw++] = info.count;
 		}
 		else {
-			cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, rctx->b.predicate_drawing);
+			cs->buf[cs->cdw++] = PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit);
 			cs->buf[cs->cdw++] = info.indirect_offset;
 		}
 		cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_AUTO_INDEX |
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 2e3f1547e45..139c377cd6e 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -420,7 +420,8 @@ struct r600_common_context {
 	struct pipe_query		*current_render_cond;
 	unsigned			current_render_cond_mode;
 	boolean				current_render_cond_cond;
-	boolean				predicate_drawing;
+	bool				predicate_drawing;
+	bool				render_cond_force_off; /* for u_blitter */
 	/* For context flushing. */
 	struct pipe_query		*saved_render_cond;
 	boolean				saved_render_cond_cond;
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 31f22c4acf7..13d8e6f2a5f 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -86,17 +86,15 @@ static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
 			sctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
 	}
 
-	if ((op & SI_DISABLE_RENDER_COND) && sctx->b.current_render_cond) {
-		util_blitter_save_render_condition(sctx->blitter,
-                                                   sctx->b.current_render_cond,
-                                                   sctx->b.current_render_cond_cond,
-                                                   sctx->b.current_render_cond_mode);
-	}
+	if (op & SI_DISABLE_RENDER_COND)
+		sctx->b.render_cond_force_off = true;
 }
 
 static void si_blitter_end(struct pipe_context *ctx)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
+
+	sctx->b.render_cond_force_off = false;
 	r600_resume_nontimer_queries(&sctx->b);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 3015374c648..ebc01e8e4ae 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -457,6 +457,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
+	bool render_cond_bit = sctx->b.predicate_drawing && !sctx->b.render_cond_force_off;
 
 	if (info->count_from_stream_output) {
 		struct r600_so_target *t =
@@ -563,7 +564,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 			radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
 			radeon_emit(cs, index_max_size);
 
-			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, sctx->b.predicate_drawing));
+			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_INDIRECT, 3, render_cond_bit));
 			radeon_emit(cs, info->indirect_offset);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
@@ -571,7 +572,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 		} else {
 			index_va += info->start * ib->index_size;
 
-			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, sctx->b.predicate_drawing));
+			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
 			radeon_emit(cs, index_max_size);
 			radeon_emit(cs, index_va);
 			radeon_emit(cs, (index_va >> 32UL) & 0xFF);
@@ -590,13 +591,13 @@ static void si_emit_draw_packets(struct si_context *sctx,
 			radeon_emit(cs, indirect_va);
 			radeon_emit(cs, indirect_va >> 32);
 
-			radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, sctx->b.predicate_drawing));
+			radeon_emit(cs, PKT3(PKT3_DRAW_INDIRECT, 3, render_cond_bit));
 			radeon_emit(cs, info->indirect_offset);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2);
 			radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2);
 			radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
 		} else {
-			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, sctx->b.predicate_drawing));
+			radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit));
 			radeon_emit(cs, info->count);
 			radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
 				    S_0287F0_USE_OPAQUE(!!info->count_from_stream_output));

From 600e212d87017db613b8068decfeab3e4bf86deb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 14:55:23 +0100
Subject: [PATCH 240/287] gallium/radeon: don't use PREDICATION_OP_CLEAR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not setting the predication bit is sufficient.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeon/r600_query.c | 64 ++++++++++---------------
 1 file changed, 26 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index ce0d7e77d43..18383148a3f 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -307,6 +307,8 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
 					int operation, bool flag_wait)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
+	struct r600_query_buffer *qbuf;
+	unsigned count;
 	uint32_t op = PRED_OP(operation);
 
 	/* if true then invert, see GL_ARB_conditional_render_inverted */
@@ -315,41 +317,30 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
 	else
 		op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
 
-	if (operation == PREDICATION_OP_CLEAR) {
-		ctx->need_gfx_cs_space(&ctx->b, 3, FALSE);
+	/* Find how many results there are. */
+	count = 0;
+	for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous)
+		count += qbuf->results_end / query->result_size;
+	
+	ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
+	
+	op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
+	
+	/* emit predicate packets for all data blocks */
+	for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
+		unsigned results_base = 0;
+		uint64_t va = qbuf->buf->gpu_address;
 
-		radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
-		radeon_emit(cs, 0);
-		radeon_emit(cs, PRED_OP(PREDICATION_OP_CLEAR));
-	} else {
-		struct r600_query_buffer *qbuf;
-		unsigned count;
-		/* Find how many results there are. */
-		count = 0;
-		for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
-			count += qbuf->results_end / query->result_size;
-		}
-	
-		ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
-	
-		op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
-	
-		/* emit predicate packets for all data blocks */
-		for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) {
-			unsigned results_base = 0;
-			uint64_t va = qbuf->buf->gpu_address;
-	
-			while (results_base < qbuf->results_end) {
-				radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
-				radeon_emit(cs, va + results_base);
-				radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
-				r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ,
-						RADEON_PRIO_QUERY);
-				results_base += query->result_size;
-	
-				/* set CONTINUE bit for all packets except the first */
-				op |= PREDICATION_CONTINUE;
-			}
+		while (results_base < qbuf->results_end) {
+			radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+			radeon_emit(cs, va + results_base);
+			radeon_emit(cs, op | (((va + results_base) >> 32) & 0xFF));
+			r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ,
+					RADEON_PRIO_QUERY);
+			results_base += query->result_size;
+
+			/* set CONTINUE bit for all packets except the first */
+			op |= PREDICATION_CONTINUE;
 		}
 	}
 }
@@ -828,10 +819,7 @@ static void r600_render_condition(struct pipe_context *ctx,
 	rctx->current_render_cond_mode = mode;
 
 	if (query == NULL) {
-		if (rctx->predicate_drawing) {
-			rctx->predicate_drawing = false;
-			r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false);
-		}
+		rctx->predicate_drawing = false;
 		return;
 	}
 

From 35219076227e83ad2a406942c8b009337a4746d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 15:00:55 +0100
Subject: [PATCH 241/287] gallium/radeon: simplify restoring render condition
 after flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_hw_context.c    |  5 -----
 src/gallium/drivers/radeon/r600_pipe_common.c | 22 +++++--------------
 src/gallium/drivers/radeon/r600_pipe_common.h |  4 ----
 3 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index 0fc58df157d..f810b7150ea 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -80,11 +80,6 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 		num_dw += ctx->b.streamout.num_dw_for_end;
 	}
 
-	/* Count in render_condition(NULL) at the end of CS. */
-	if (ctx->b.predicate_drawing) {
-		num_dw += 3;
-	}
-
 	/* SX_MISC */
 	if (ctx->b.chip_class == R600) {
 		num_dw += 3;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 87399145be7..b6f6c92c5cf 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -134,17 +134,6 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
 
 void r600_preflush_suspend_features(struct r600_common_context *ctx)
 {
-	/* Disable render condition. */
-	ctx->saved_render_cond = NULL;
-	ctx->saved_render_cond_cond = FALSE;
-	ctx->saved_render_cond_mode = 0;
-	if (ctx->current_render_cond) {
-		ctx->saved_render_cond = ctx->current_render_cond;
-		ctx->saved_render_cond_cond = ctx->current_render_cond_cond;
-		ctx->saved_render_cond_mode = ctx->current_render_cond_mode;
-		ctx->b.render_condition(&ctx->b, NULL, FALSE, 0);
-	}
-
 	/* suspend queries */
 	ctx->queries_suspended_for_flush = false;
 	if (ctx->num_cs_dw_nontimer_queries_suspend) {
@@ -173,12 +162,11 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
 		r600_resume_timer_queries(ctx);
 	}
 
-	/* Re-enable render condition. */
-	if (ctx->saved_render_cond) {
-		ctx->b.render_condition(&ctx->b, ctx->saved_render_cond,
-					  ctx->saved_render_cond_cond,
-					  ctx->saved_render_cond_mode);
-	}
+	/* Re-emit PKT3_SET_PREDICATION. */
+	if (ctx->current_render_cond)
+		ctx->b.render_condition(&ctx->b, ctx->current_render_cond,
+					ctx->current_render_cond_cond,
+					ctx->current_render_cond_mode);
 }
 
 static void r600_flush_from_st(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 139c377cd6e..2a3a3a707e3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -422,10 +422,6 @@ struct r600_common_context {
 	boolean				current_render_cond_cond;
 	bool				predicate_drawing;
 	bool				render_cond_force_off; /* for u_blitter */
-	/* For context flushing. */
-	struct pipe_query		*saved_render_cond;
-	boolean				saved_render_cond_cond;
-	unsigned			saved_render_cond_mode;
 
 	/* MSAA sample locations.
 	 * The first index is the sample index.

From 12596cfd4cea4cff2bc067876d5ff25c54cdc874 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 15:39:39 +0100
Subject: [PATCH 242/287] gallium/radeon: atomize render condition
 (SET_PREDICATION)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/evergreen_state.c    |  1 +
 src/gallium/drivers/r600/r600_hw_context.c    |  1 +
 src/gallium/drivers/r600/r600_pipe.h          |  2 +-
 src/gallium/drivers/r600/r600_state.c         |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.c |  6 --
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_query.c       | 75 +++++++++----------
 src/gallium/drivers/radeonsi/si_hw_context.c  |  1 +
 src/gallium/drivers/radeonsi/si_state.c       |  1 +
 src/gallium/drivers/radeonsi/si_state.h       |  1 +
 10 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 2fcc49bf7e2..684eee7a355 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3543,6 +3543,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->viewport.atom, id++, r600_emit_viewport_state, 0);
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, evergreen_emit_vertex_fetch_shader, 5);
+	r600_add_atom(rctx, &rctx->b.render_cond_atom, id++);
 	r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
 	r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
 	r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index f810b7150ea..6409f0bd9f7 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -323,6 +323,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
 	}
 	r600_mark_atom_dirty(ctx, &ctx->vertex_shader.atom);
 	r600_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
+	r600_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
 
 	if (ctx->blend_state.cso)
 		r600_mark_atom_dirty(ctx, &ctx->blend_state.atom);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 950bb6be76c..bbb55adef82 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -38,7 +38,7 @@
 
 #include "tgsi/tgsi_scan.h"
 
-#define R600_NUM_ATOMS 42
+#define R600_NUM_ATOMS 43
 
 #define R600_MAX_VIEWPORTS 16
 
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index ebb15ee9789..c2d4abc5ea1 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -3106,6 +3106,7 @@ void r600_init_state_functions(struct r600_context *rctx)
 	r600_init_atom(rctx, &rctx->config_state.atom, id++, r600_emit_config_state, 3);
 	r600_init_atom(rctx, &rctx->stencil_ref.atom, id++, r600_emit_stencil_ref, 4);
 	r600_init_atom(rctx, &rctx->vertex_fetch_shader.atom, id++, r600_emit_vertex_fetch_shader, 5);
+	r600_add_atom(rctx, &rctx->b.render_cond_atom, id++);
 	r600_add_atom(rctx, &rctx->b.streamout.begin_atom, id++);
 	r600_add_atom(rctx, &rctx->b.streamout.enable_atom, id++);
 	r600_init_atom(rctx, &rctx->vertex_shader.atom, id++, r600_emit_shader, 23);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index b6f6c92c5cf..3599692a857 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -161,12 +161,6 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
 		r600_resume_nontimer_queries(ctx);
 		r600_resume_timer_queries(ctx);
 	}
-
-	/* Re-emit PKT3_SET_PREDICATION. */
-	if (ctx->current_render_cond)
-		ctx->b.render_condition(&ctx->b, ctx->current_render_cond,
-					ctx->current_render_cond_cond,
-					ctx->current_render_cond_mode);
 }
 
 static void r600_flush_from_st(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 2a3a3a707e3..09465ae0596 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -417,6 +417,7 @@ struct r600_common_context {
 	unsigned			num_draw_calls;
 
 	/* Render condition. */
+	struct r600_atom		render_cond_atom;
 	struct pipe_query		*current_render_cond;
 	unsigned			current_render_cond_mode;
 	boolean				current_render_cond_cond;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 18383148a3f..145b629deb1 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -303,13 +303,36 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 	r600_update_prims_generated_query_state(ctx, query->type, -1);
 }
 
-static void r600_emit_query_predication(struct r600_common_context *ctx, struct r600_query *query,
-					int operation, bool flag_wait)
+static void r600_emit_query_predication(struct r600_common_context *ctx,
+					struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
+	struct r600_query *query = (struct r600_query*)ctx->current_render_cond;
 	struct r600_query_buffer *qbuf;
-	unsigned count;
-	uint32_t op = PRED_OP(operation);
+	uint32_t op;
+	bool flag_wait;
+
+	if (!query)
+		return;
+
+	flag_wait = ctx->current_render_cond_mode == PIPE_RENDER_COND_WAIT ||
+		    ctx->current_render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+
+	switch (query->type) {
+	case PIPE_QUERY_OCCLUSION_COUNTER:
+	case PIPE_QUERY_OCCLUSION_PREDICATE:
+		op = PRED_OP(PREDICATION_OP_ZPASS);
+		break;
+	case PIPE_QUERY_PRIMITIVES_EMITTED:
+	case PIPE_QUERY_PRIMITIVES_GENERATED:
+	case PIPE_QUERY_SO_STATISTICS:
+	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+		op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
+		break;
+	default:
+		assert(0);
+		return;
+	}
 
 	/* if true then invert, see GL_ARB_conditional_render_inverted */
 	if (ctx->current_render_cond_cond)
@@ -317,13 +340,6 @@ static void r600_emit_query_predication(struct r600_common_context *ctx, struct
 	else
 		op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
 
-	/* Find how many results there are. */
-	count = 0;
-	for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous)
-		count += qbuf->results_end / query->result_size;
-	
-	ctx->need_gfx_cs_space(&ctx->b, 5 * count, TRUE);
-	
 	op |= flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW;
 	
 	/* emit predicate packets for all data blocks */
@@ -811,39 +827,21 @@ static void r600_render_condition(struct pipe_context *ctx,
 				  uint mode)
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
-	struct r600_query *rquery = (struct r600_query *)query;
-	bool wait_flag = false;
+	struct r600_query *rquery = (struct r600_query*)query;
+	struct r600_query_buffer *qbuf;
+	struct r600_atom *atom = &rctx->render_cond_atom;
 
 	rctx->current_render_cond = query;
 	rctx->current_render_cond_cond = condition;
 	rctx->current_render_cond_mode = mode;
+	rctx->predicate_drawing = query != NULL;
 
-	if (query == NULL) {
-		rctx->predicate_drawing = false;
-		return;
-	}
+	/* Compute the size of SET_PREDICATION packets. */
+	atom->num_dw = 0;
+	for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
+		atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
 
-	if (mode == PIPE_RENDER_COND_WAIT ||
-	    mode == PIPE_RENDER_COND_BY_REGION_WAIT) {
-		wait_flag = true;
-	}
-
-	rctx->predicate_drawing = true;
-
-	switch (rquery->type) {
-	case PIPE_QUERY_OCCLUSION_COUNTER:
-	case PIPE_QUERY_OCCLUSION_PREDICATE:
-		r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag);
-		break;
-	case PIPE_QUERY_PRIMITIVES_EMITTED:
-	case PIPE_QUERY_PRIMITIVES_GENERATED:
-	case PIPE_QUERY_SO_STATISTICS:
-	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag);
-		break;
-	default:
-		assert(0);
-	}
+	rctx->set_atom_dirty(rctx, atom, query != NULL);
 }
 
 static void r600_suspend_queries(struct r600_common_context *ctx,
@@ -1012,6 +1010,7 @@ void r600_query_init(struct r600_common_context *rctx)
 	rctx->b.begin_query = r600_begin_query;
 	rctx->b.end_query = r600_end_query;
 	rctx->b.get_query_result = r600_get_query_result;
+	rctx->render_cond_atom.emit = r600_emit_query_predication;
 
 	if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0)
 	    rctx->b.render_condition = r600_render_condition;
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 2f4f1eec04b..f28c11cb1d2 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -182,6 +182,7 @@ void si_begin_new_cs(struct si_context *ctx)
 	si_mark_atom_dirty(ctx, &ctx->spi_map);
 	si_mark_atom_dirty(ctx, &ctx->spi_ps_input);
 	si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
+	si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
 	si_all_descriptors_begin_new_cs(ctx);
 
 	ctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6c411c11a77..93847d5ec2f 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3069,6 +3069,7 @@ static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
+	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
 	si_init_external_atom(sctx, &sctx->b.streamout.begin_atom, &sctx->atoms.s.streamout_begin);
 	si_init_external_atom(sctx, &sctx->b.streamout.enable_atom, &sctx->atoms.s.streamout_enable);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 8b9a311cd3f..f5ca661f8d7 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -110,6 +110,7 @@ union si_state_atoms {
 	struct {
 		/* The order matters. */
 		struct r600_atom *cache_flush;
+		struct r600_atom *render_cond;
 		struct r600_atom *streamout_begin;
 		struct r600_atom *streamout_enable; /* must be after streamout_begin */
 		struct r600_atom *framebuffer;

From 70c40cc9899c1e895004c3e25c4f763af44cd17d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 16:24:47 +0100
Subject: [PATCH 243/287] gallium/radeon: remove predicate_drawing flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_state_common.c  | 2 +-
 src/gallium/drivers/radeon/r600_pipe_common.h | 1 -
 src/gallium/drivers/radeon/r600_query.c       | 1 -
 src/gallium/drivers/radeonsi/si_state_draw.c  | 2 +-
 4 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 28aedffc42d..5cf520899cf 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1478,7 +1478,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	struct pipe_draw_info info = *dinfo;
 	struct pipe_index_buffer ib = {};
 	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
-	bool render_cond_bit = rctx->b.predicate_drawing && !rctx->b.render_cond_force_off;
+	bool render_cond_bit = rctx->b.current_render_cond && !rctx->b.render_cond_force_off;
 	uint64_t mask;
 
 	if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 09465ae0596..ba9000f74ec 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -421,7 +421,6 @@ struct r600_common_context {
 	struct pipe_query		*current_render_cond;
 	unsigned			current_render_cond_mode;
 	boolean				current_render_cond_cond;
-	bool				predicate_drawing;
 	bool				render_cond_force_off; /* for u_blitter */
 
 	/* MSAA sample locations.
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 145b629deb1..9f92587a54b 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -834,7 +834,6 @@ static void r600_render_condition(struct pipe_context *ctx,
 	rctx->current_render_cond = query;
 	rctx->current_render_cond_cond = condition;
 	rctx->current_render_cond_mode = mode;
-	rctx->predicate_drawing = query != NULL;
 
 	/* Compute the size of SET_PREDICATION packets. */
 	atom->num_dw = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ebc01e8e4ae..79e88765d04 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -457,7 +457,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
-	bool render_cond_bit = sctx->b.predicate_drawing && !sctx->b.render_cond_force_off;
+	bool render_cond_bit = sctx->b.current_render_cond && !sctx->b.render_cond_force_off;
 
 	if (info->count_from_stream_output) {
 		struct r600_so_target *t =

From eb0d3e8a90df3f6a39a3ffc911a335554fc8cd98 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sat, 7 Nov 2015 16:30:01 +0100
Subject: [PATCH 244/287] gallium/radeon: shorten render_cond variable names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

and ..._cond -> ..._invert

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/r600/r600_state_common.c  |  2 +-
 src/gallium/drivers/radeon/r600_pipe_common.h |  6 +++---
 src/gallium/drivers/radeon/r600_query.c       | 14 +++++++-------
 src/gallium/drivers/radeon/r600_texture.c     |  2 +-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 5cf520899cf..d629194ca6e 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1478,7 +1478,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 	struct pipe_draw_info info = *dinfo;
 	struct pipe_index_buffer ib = {};
 	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
-	bool render_cond_bit = rctx->b.current_render_cond && !rctx->b.render_cond_force_off;
+	bool render_cond_bit = rctx->b.render_cond && !rctx->b.render_cond_force_off;
 	uint64_t mask;
 
 	if (!info.indirect && !info.count && (info.indexed || !info.count_from_stream_output)) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index ba9000f74ec..ebe633b9125 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -418,9 +418,9 @@ struct r600_common_context {
 
 	/* Render condition. */
 	struct r600_atom		render_cond_atom;
-	struct pipe_query		*current_render_cond;
-	unsigned			current_render_cond_mode;
-	boolean				current_render_cond_cond;
+	struct pipe_query		*render_cond;
+	unsigned			render_cond_mode;
+	boolean				render_cond_invert;
 	bool				render_cond_force_off; /* for u_blitter */
 
 	/* MSAA sample locations.
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 9f92587a54b..8c2b601a96c 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -307,7 +307,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
 					struct r600_atom *atom)
 {
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
-	struct r600_query *query = (struct r600_query*)ctx->current_render_cond;
+	struct r600_query *query = (struct r600_query*)ctx->render_cond;
 	struct r600_query_buffer *qbuf;
 	uint32_t op;
 	bool flag_wait;
@@ -315,8 +315,8 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
 	if (!query)
 		return;
 
-	flag_wait = ctx->current_render_cond_mode == PIPE_RENDER_COND_WAIT ||
-		    ctx->current_render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+	flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+		    ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
 
 	switch (query->type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -335,7 +335,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
 	}
 
 	/* if true then invert, see GL_ARB_conditional_render_inverted */
-	if (ctx->current_render_cond_cond)
+	if (ctx->render_cond_invert)
 		op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not visable/overflow */
 	else
 		op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
@@ -831,9 +831,9 @@ static void r600_render_condition(struct pipe_context *ctx,
 	struct r600_query_buffer *qbuf;
 	struct r600_atom *atom = &rctx->render_cond_atom;
 
-	rctx->current_render_cond = query;
-	rctx->current_render_cond_cond = condition;
-	rctx->current_render_cond_mode = mode;
+	rctx->render_cond = query;
+	rctx->render_cond_invert = condition;
+	rctx->render_cond_mode = mode;
 
 	/* Compute the size of SET_PREDICATION packets. */
 	atom->num_dw = 0;
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index edfdfe33187..3126cce8c22 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1324,7 +1324,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 {
 	int i;
 
-	if (rctx->current_render_cond)
+	if (rctx->render_cond)
 		return;
 
 	for (i = 0; i < fb->nr_cbufs; i++) {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 79e88765d04..753abc8c103 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -457,7 +457,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
-	bool render_cond_bit = sctx->b.current_render_cond && !sctx->b.render_cond_force_off;
+	bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
 
 	if (info->count_from_stream_output) {
 		struct r600_so_target *t =

From 3ab0c49f04e5039655ddc8b81cac325709b154fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 8 Nov 2015 11:49:33 +0100
Subject: [PATCH 245/287] radeonsi: clean up small duplication in si_shader_gs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.h        |  3 ++-
 src/gallium/drivers/radeonsi/si_state_shaders.c | 11 ++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index fd5500c1ab3..bffffad9573 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -205,7 +205,8 @@ struct si_shader_selector {
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	unsigned	gs_num_invocations;
-	unsigned	gsvs_itemsize;
+	unsigned	gsvs_vertex_size;
+	unsigned	max_gsvs_emit_size;
 
 	/* masks of "get_unique_index" bits */
 	uint64_t	inputs_read;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 93a689d9a07..7a2cafeb9dc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -226,9 +226,9 @@ static unsigned si_gs_get_max_stream(struct si_shader *shader)
 
 static void si_shader_gs(struct si_shader *shader)
 {
-	unsigned gs_vert_itemsize = shader->selector->info.num_outputs * 16;
+	unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size;
 	unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
-	unsigned gsvs_itemsize = (gs_vert_itemsize * gs_max_vert_out) >> 2;
+	unsigned gsvs_itemsize = shader->selector->max_gsvs_emit_size >> 2;
 	unsigned gs_num_invocations = shader->selector->gs_num_invocations;
 	unsigned cut_mode;
 	struct si_pm4_state *pm4;
@@ -713,8 +713,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 			sel->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
 		sel->gs_num_invocations =
 			sel->info.properties[TGSI_PROPERTY_GS_INVOCATIONS];
-		sel->gsvs_itemsize = sel->info.num_outputs * 16 *
-				     sel->gs_max_out_vertices;
+		sel->gsvs_vertex_size = sel->info.num_outputs * 16;
+		sel->max_gsvs_emit_size = sel->gsvs_vertex_size *
+					  sel->gs_max_out_vertices;
 
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			unsigned name = sel->info.input_semantic_name[i];
@@ -1144,7 +1145,7 @@ static void si_init_gs_rings(struct si_context *sctx)
 
 static void si_update_gs_rings(struct si_context *sctx)
 {
-	unsigned gsvs_itemsize = sctx->gs_shader.cso->gsvs_itemsize;
+	unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
 	uint64_t offset;
 
 	if (gsvs_itemsize == sctx->last_gsvs_itemsize)

From a0cf58996197d99cc7d743b76be977cc2359dca9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 8 Nov 2015 12:05:39 +0100
Subject: [PATCH 246/287] radeonsi: move maximum gs stream calculation into
 create_shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.h      |  1 +
 .../drivers/radeonsi/si_state_shaders.c       | 22 +++++--------------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index bffffad9573..1dd25227e9d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -205,6 +205,7 @@ struct si_shader_selector {
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	unsigned	gs_num_invocations;
+	unsigned	max_gs_stream; /* count - 1 */
 	unsigned	gsvs_vertex_size;
 	unsigned	max_gsvs_emit_size;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 7a2cafeb9dc..0e403c492f5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -209,21 +209,6 @@ static void si_shader_es(struct si_shader *shader)
 		si_set_tesseval_regs(shader, pm4);
 }
 
-static unsigned si_gs_get_max_stream(struct si_shader *shader)
-{
-	struct pipe_stream_output_info *so = &shader->selector->so;
-	unsigned max_stream = 0, i;
-
-	if (so->num_outputs == 0)
-		return 0;
-
-	for (i = 0; i < so->num_outputs; i++) {
-		if (so->output[i].stream > max_stream)
-			max_stream = so->output[i].stream;
-	}
-	return max_stream;
-}
-
 static void si_shader_gs(struct si_shader *shader)
 {
 	unsigned gs_vert_itemsize = shader->selector->gsvs_vertex_size;
@@ -234,7 +219,7 @@ static void si_shader_gs(struct si_shader *shader)
 	struct si_pm4_state *pm4;
 	unsigned num_sgprs, num_user_sgprs;
 	uint64_t va;
-	unsigned max_stream = si_gs_get_max_stream(shader);
+	unsigned max_stream = shader->selector->max_gs_stream;
 
 	/* The GSVS_RING_ITEMSIZE register takes 15 bits */
 	assert(gsvs_itemsize < (1 << 15));
@@ -717,6 +702,11 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 		sel->max_gsvs_emit_size = sel->gsvs_vertex_size *
 					  sel->gs_max_out_vertices;
 
+		sel->max_gs_stream = 0;
+		for (i = 0; i < sel->so.num_outputs; i++)
+			sel->max_gs_stream = MAX2(sel->max_gs_stream,
+						  sel->so.output[i].stream);
+
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			unsigned name = sel->info.input_semantic_name[i];
 			unsigned index = sel->info.input_semantic_index[i];

From 4acd856088c31ab7ebbfbe5010db1fbcca72845c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 8 Nov 2015 12:12:46 +0100
Subject: [PATCH 247/287] radeonsi: calculate ESGS_RING_ITEMSIZE in
 create_shader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.h        | 1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 1dd25227e9d..b87fb715aac 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -202,6 +202,7 @@ struct si_shader_selector {
 	bool		forces_persample_interp_for_persp;
 	bool		forces_persample_interp_for_linear;
 
+	unsigned	esgs_itemsize;
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	unsigned	gs_num_invocations;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 0e403c492f5..04754a7e0ea 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -251,7 +251,7 @@ static void si_shader_gs(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
 
 	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-		       util_bitcount64(shader->selector->inputs_read) * (16 >> 2));
+		       shader->selector->esgs_itemsize / 4);
 	si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
 
 	si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
@@ -739,6 +739,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 					1llu << si_shader_io_get_unique_index(name, index);
 			}
 		}
+		sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
 		break;
 	case PIPE_SHADER_FRAGMENT:
 		for (i = 0; i < sel->info.num_outputs; i++) {

From 2f5d911ba2b0d477bce80e4dd3ae4d9748c6f784 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 8 Nov 2015 12:15:54 +0100
Subject: [PATCH 248/287] radeonsi: rename si_update_gs_rings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 04754a7e0ea..6206dc62c1b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1134,7 +1134,7 @@ static void si_init_gs_rings(struct si_context *sctx)
 			   false, false, 0, 0, 0);
 }
 
-static void si_update_gs_rings(struct si_context *sctx)
+static void si_update_gsvs_ring_bindings(struct si_context *sctx)
 {
 	unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
 	uint64_t offset;
@@ -1506,7 +1506,7 @@ bool si_update_shaders(struct si_context *sctx)
 				return false;
 		}
 
-		si_update_gs_rings(sctx);
+		si_update_gsvs_ring_bindings(sctx);
 	} else {
 		si_pm4_bind_state(sctx, gs, NULL);
 		si_pm4_bind_state(sctx, es, NULL);

From b1c5f3faa9d7a227150b677469df1a5832236541 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Sun, 8 Nov 2015 13:34:44 +0100
Subject: [PATCH 249/287] radeonsi: calculate optimal GS ring sizes to fix GS
 hangs on Tonga
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I discovered that increasing the ESGS ring size fixes GS hangs on Tonga,
so let's do it properly.

There is now a separate init_config_gs_rings state that is not immutable,
because GS rings are resized when needed.

This also saves some memory. Most apps won't need more than 1MB
per ring per shader engine.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 src/gallium/drivers/radeonsi/si_hw_context.c  |   2 +
 src/gallium/drivers/radeonsi/si_pipe.c        |   2 +
 src/gallium/drivers/radeonsi/si_pipe.h        |   1 +
 src/gallium/drivers/radeonsi/si_shader.h      |   1 +
 .../drivers/radeonsi/si_state_shaders.c       | 162 ++++++++++++------
 5 files changed, 117 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index f28c11cb1d2..baa02293c41 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -165,6 +165,8 @@ void si_begin_new_cs(struct si_context *ctx)
 
 	/* The CS initialization should be emitted before everything else. */
 	si_pm4_emit(ctx, ctx->init_config);
+	if (ctx->init_config_gs_rings)
+		si_pm4_emit(ctx, ctx->init_config_gs_rings);
 
 	ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
 	ctx->framebuffer.dirty_zsbuf = true;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 6c13fcdf5d7..9a0fe808ee3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -50,6 +50,8 @@ static void si_destroy_context(struct pipe_context *context)
 	sctx->b.ws->fence_reference(&sctx->last_gfx_fence, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
+	if (sctx->init_config_gs_rings)
+		si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
 	for (i = 0; i < Elements(sctx->vgt_shader_config); i++)
 		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 6e742fc1342..05d52fe19dc 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -202,6 +202,7 @@ struct si_context {
 
 	/* Precomputed states. */
 	struct si_pm4_state		*init_config;
+	struct si_pm4_state		*init_config_gs_rings;
 	bool				init_config_has_vgt_flush;
 	struct si_pm4_state		*vgt_shader_config[4];
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b87fb715aac..d815ce27e6a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -203,6 +203,7 @@ struct si_shader_selector {
 	bool		forces_persample_interp_for_linear;
 
 	unsigned	esgs_itemsize;
+	unsigned	gs_input_verts_per_prim;
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	unsigned	gs_num_invocations;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 6206dc62c1b..007a0ecb476 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -33,6 +33,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
 #include "util/u_memory.h"
+#include "util/u_prim.h"
 #include "util/u_simple_shaders.h"
 
 static void si_set_tesseval_regs(struct si_shader *shader,
@@ -707,6 +708,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 			sel->max_gs_stream = MAX2(sel->max_gs_stream,
 						  sel->so.output[i].stream);
 
+		sel->gs_input_verts_per_prim =
+			u_vertices_per_prim(sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
+
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			unsigned name = sel->info.input_semantic_name[i];
 			unsigned index = sel->info.input_semantic_index[i];
@@ -723,6 +727,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
 	case PIPE_SHADER_VERTEX:
 	case PIPE_SHADER_TESS_CTRL:
+	case PIPE_SHADER_TESS_EVAL:
 		for (i = 0; i < sel->info.num_outputs; i++) {
 			unsigned name = sel->info.output_semantic_name[i];
 			unsigned index = sel->info.output_semantic_index[i];
@@ -1069,6 +1074,7 @@ static void si_init_config_add_vgt_flush(struct si_context *sctx)
 	if (sctx->init_config_has_vgt_flush)
 		return;
 
+	/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
 	si_pm4_cmd_begin(sctx->init_config, PKT3_EVENT_WRITE);
 	si_pm4_cmd_add(sctx->init_config, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
 	si_pm4_cmd_end(sctx->init_config, false);
@@ -1076,62 +1082,119 @@ static void si_init_config_add_vgt_flush(struct si_context *sctx)
 }
 
 /* Initialize state related to ESGS / GSVS ring buffers */
-static void si_init_gs_rings(struct si_context *sctx)
+static bool si_update_gs_ring_buffers(struct si_context *sctx)
 {
-	unsigned esgs_ring_size = 128 * 1024;
-	unsigned gsvs_ring_size = 60 * 1024 * 1024;
+	struct si_shader_selector *es =
+		sctx->tes_shader.cso ? sctx->tes_shader.cso : sctx->vs_shader.cso;
+	struct si_shader_selector *gs = sctx->gs_shader.cso;
+	struct si_pm4_state *pm4;
 
-	assert(!sctx->esgs_ring && !sctx->gsvs_ring);
+	/* Chip constants. */
+	unsigned num_se = sctx->screen->b.info.max_se;
+	unsigned wave_size = 64;
+	unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
+	unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register (per SE) */
+	unsigned alignment = 256 * num_se;
+	/* The maximum size is 63.999 MB per SE. */
+	unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
 
-	sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
-				       PIPE_USAGE_DEFAULT, esgs_ring_size);
-	if (!sctx->esgs_ring)
-		return;
+	/* Calculate the minimum size. */
+	unsigned min_esgs_ring_size = align(es->esgs_itemsize * gs_vertex_reuse *
+					    wave_size, alignment);
 
-	sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
-					     PIPE_USAGE_DEFAULT, gsvs_ring_size);
-	if (!sctx->gsvs_ring) {
-		pipe_resource_reference(&sctx->esgs_ring, NULL);
-		return;
-	}
+	/* These are recommended sizes, not minimum sizes. */
+	unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
+				  es->esgs_itemsize * gs->gs_input_verts_per_prim;
+	unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
+				  gs->max_gsvs_emit_size * (gs->max_gs_stream + 1);
 
-	si_init_config_add_vgt_flush(sctx);
+	min_esgs_ring_size = align(min_esgs_ring_size, alignment);
+	esgs_ring_size = align(esgs_ring_size, alignment);
+	gsvs_ring_size = align(gsvs_ring_size, alignment);
 
-	/* Append these registers to the init config state. */
-	if (sctx->b.chip_class >= CIK) {
-		if (sctx->b.chip_class >= VI) {
-			/* The maximum sizes are 63.999 MB on VI, because
-			 * the register fields only have 18 bits. */
-			assert(esgs_ring_size / 256 < (1 << 18));
-			assert(gsvs_ring_size / 256 < (1 << 18));
-		}
-		si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE,
-			       esgs_ring_size / 256);
-		si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE,
-			       gsvs_ring_size / 256);
-	} else {
-		si_pm4_set_reg(sctx->init_config, R_0088C8_VGT_ESGS_RING_SIZE,
-			       esgs_ring_size / 256);
-		si_pm4_set_reg(sctx->init_config, R_0088CC_VGT_GSVS_RING_SIZE,
-			       gsvs_ring_size / 256);
-	}
+	esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+	gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
 
-	/* Flush the context to re-emit the init_config state.
-	 * This is done only once in a lifetime of a context.
+	/* Some rings don't have to be allocated if shaders don't use them.
+	 * (e.g. no varyings between ES and GS or GS and VS)
 	 */
-	si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+	bool update_esgs = esgs_ring_size &&
+			   (!sctx->esgs_ring ||
+			    sctx->esgs_ring->width0 < esgs_ring_size);
+	bool update_gsvs = gsvs_ring_size &&
+			   (!sctx->gsvs_ring ||
+			    sctx->gsvs_ring->width0 < gsvs_ring_size);
+
+	if (!update_esgs && !update_gsvs)
+		return true;
+
+	if (update_esgs) {
+		pipe_resource_reference(&sctx->esgs_ring, NULL);
+		sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
+						     PIPE_USAGE_DEFAULT,
+						     esgs_ring_size);
+		if (!sctx->esgs_ring)
+			return false;
+	}
+
+	if (update_gsvs) {
+		pipe_resource_reference(&sctx->gsvs_ring, NULL);
+		sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
+						     PIPE_USAGE_DEFAULT,
+						     gsvs_ring_size);
+		if (!sctx->gsvs_ring)
+			return false;
+	}
+
+	/* Create the "init_config_gs_rings" state. */
+	pm4 = CALLOC_STRUCT(si_pm4_state);
+	if (!pm4)
+		return false;
+
+	if (sctx->b.chip_class >= CIK) {
+		if (sctx->esgs_ring)
+			si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
+				       sctx->esgs_ring->width0 / 256);
+		if (sctx->gsvs_ring)
+			si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE,
+				       sctx->gsvs_ring->width0 / 256);
+	} else {
+		if (sctx->esgs_ring)
+			si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE,
+				       sctx->esgs_ring->width0 / 256);
+		if (sctx->gsvs_ring)
+			si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE,
+				       sctx->gsvs_ring->width0 / 256);
+	}
+
+	/* Set the state. */
+	if (sctx->init_config_gs_rings)
+		si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
+	sctx->init_config_gs_rings = pm4;
+
+	if (!sctx->init_config_has_vgt_flush) {
+		si_init_config_add_vgt_flush(sctx);
+		si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
+	}
+
+	/* Flush the context to re-emit both init_config states. */
 	sctx->b.initial_gfx_cs_size = 0; /* force flush */
 	si_context_gfx_flush(sctx, RADEON_FLUSH_ASYNC, NULL);
 
-	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
-			   sctx->esgs_ring, 0, esgs_ring_size,
-			   true, true, 4, 64, 0);
-	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
-			   sctx->esgs_ring, 0, esgs_ring_size,
-			   false, false, 0, 0, 0);
-	si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
-			   sctx->gsvs_ring, 0, gsvs_ring_size,
-			   false, false, 0, 0, 0);
+	/* Set ring bindings. */
+	if (sctx->esgs_ring) {
+		si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
+				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
+				   true, true, 4, 64, 0);
+		si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
+				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
+				   false, false, 0, 0, 0);
+	}
+	if (sctx->gsvs_ring)
+		si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
+				   sctx->gsvs_ring, 0, sctx->gsvs_ring->width0,
+				   false, false, 0, 0, 0);
+	return true;
 }
 
 static void si_update_gsvs_ring_bindings(struct si_context *sctx)
@@ -1139,7 +1202,7 @@ static void si_update_gsvs_ring_bindings(struct si_context *sctx)
 	unsigned gsvs_itemsize = sctx->gs_shader.cso->max_gsvs_emit_size;
 	uint64_t offset;
 
-	if (gsvs_itemsize == sctx->last_gsvs_itemsize)
+	if (!sctx->gsvs_ring || gsvs_itemsize == sctx->last_gsvs_itemsize)
 		return;
 
 	sctx->last_gsvs_itemsize = gsvs_itemsize;
@@ -1500,11 +1563,8 @@ bool si_update_shaders(struct si_context *sctx)
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader.current->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader.cso);
 
-		if (!sctx->gsvs_ring) {
-			si_init_gs_rings(sctx);
-			if (!sctx->gsvs_ring)
-				return false;
-		}
+		if (!si_update_gs_ring_buffers(sctx))
+			return false;
 
 		si_update_gsvs_ring_bindings(sctx);
 	} else {

From d79a3449a70b35a7fd38e7b4e17cafcbc28dda0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Thu, 15 Oct 2015 23:29:00 +0200
Subject: [PATCH 250/287] radeonsi: link ES-GS just like LS-HS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reduces the shader key for ES.

Use a fixed attrib location based on (semantic name,  index).

The ESGS item size is determined by the physical index of the highest ES
output, so it's almost always larger than before, but I think that
shouldn't matter as long as the ESGS ring buffer is large enough.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c      | 24 ++++---------------
 src/gallium/drivers/radeonsi/si_shader.h      | 21 +++++++---------
 .../drivers/radeonsi/si_state_shaders.c       | 13 ++++------
 3 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a119cbdc16c..56b05cef1a3 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -775,6 +775,7 @@ static LLVMValueRef fetch_input_gs(
 	struct tgsi_shader_info *info = &shader->selector->info;
 	unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
 	unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
+	unsigned param;
 
 	if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
 		return get_primitive_id(bld_base, swizzle);
@@ -805,12 +806,10 @@ static LLVMValueRef fetch_input_gs(
 						   vtx_offset_param),
 				      4);
 
+	param = si_shader_io_get_unique_index(semantic_name, semantic_index);
 	args[0] = si_shader_ctx->esgs_ring;
 	args[1] = vtx_offset;
-	args[2] = lp_build_const_int32(gallivm,
-				       (get_param_index(semantic_name, semantic_index,
-							shader->selector->inputs_read) * 4 +
-					swizzle) * 256);
+	args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
 	args[3] = uint->zero;
 	args[4] = uint->one;  /* OFFEN */
 	args[5] = uint->zero; /* IDXEN */
@@ -2016,9 +2015,6 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
 	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
 	LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 					    si_shader_ctx->param_es2gs_offset);
-	uint64_t enabled_outputs = si_shader_ctx->type == TGSI_PROCESSOR_TESS_EVAL ?
-					   es->key.tes.es_enabled_outputs :
-					   es->key.vs.es_enabled_outputs;
 	unsigned chan;
 	int i;
 
@@ -2031,11 +2027,8 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
 		    info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
 			continue;
 
-		param_index = get_param_index(info->output_semantic_name[i],
-					      info->output_semantic_index[i],
-					      enabled_outputs);
-		if (param_index < 0)
-			continue;
+		param_index = si_shader_io_get_unique_index(info->output_semantic_name[i],
+							    info->output_semantic_index[i]);
 
 		for (chan = 0; chan < 4; chan++) {
 			LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
@@ -4023,10 +4016,6 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
 			fprintf(f, !i ? "%u" : ", %u",
 				key->vs.instance_divisors[i]);
 		fprintf(f, "}\n");
-
-		if (key->vs.as_es)
-			fprintf(f, "  es_enabled_outputs = 0x%"PRIx64"\n",
-				key->vs.es_enabled_outputs);
 		fprintf(f, "  as_es = %u\n", key->vs.as_es);
 		fprintf(f, "  as_ls = %u\n", key->vs.as_ls);
 		fprintf(f, "  export_prim_id = %u\n", key->vs.export_prim_id);
@@ -4037,9 +4026,6 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
 		break;
 
 	case PIPE_SHADER_TESS_EVAL:
-		if (key->tes.as_es)
-			fprintf(f, "  es_enabled_outputs = 0x%"PRIx64"\n",
-				key->tes.es_enabled_outputs);
 		fprintf(f, "  as_es = %u\n", key->tes.as_es);
 		fprintf(f, "  export_prim_id = %u\n", key->tes.export_prim_id);
 		break;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d815ce27e6a..de69a273ab4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -26,14 +26,15 @@
  *      Christian König <christian.koenig@amd.com>
  */
 
-/* How linking tessellation shader inputs and outputs works.
+/* How linking shader inputs and outputs between vertex, tessellation, and
+ * geometry shaders works.
  *
  * Inputs and outputs between shaders are stored in a buffer. This buffer
  * lives in LDS (typical case for tessellation), but it can also live
- * in memory. Each input or output has a fixed location within a vertex.
+ * in memory (ESGS). Each input or output has a fixed location within a vertex.
  * The highest used input or output determines the stride between vertices.
  *
- * Since tessellation is only enabled in the OpenGL core profile,
+ * Since GS and tessellation are only possible in the OpenGL core profile,
  * only these semantics are valid for per-vertex data:
  *
  *   Name             Location
@@ -57,13 +58,11 @@
  * That's how independent shaders agree on input and output locations.
  * The si_shader_io_get_unique_index function assigns the locations.
  *
- * Other required information for calculating the input and output addresses
- * like the vertex stride, the patch stride, and the offsets where per-vertex
- * and per-patch data start, is passed to the shader via user data SGPRs.
- * The offsets and strides are calculated at draw time and aren't available
- * at compile time.
- *
- * The same approach should be used for linking ES->GS in the future.
+ * For tessellation, other required information for calculating the input and
+ * output addresses like the vertex stride, the patch stride, and the offsets
+ * where per-vertex and per-patch data start, is passed to the shader via
+ * user data SGPRs. The offsets and strides are calculated at draw time and
+ * aren't available at compile time.
  */
 
 #ifndef SI_SHADER_H
@@ -245,7 +244,6 @@ union si_shader_key {
 		/* Mask of "get_unique_index" bits - which outputs are read
 		 * by the next stage (needed by ES).
 		 * This describes how outputs are laid out in memory. */
-		uint64_t	es_enabled_outputs;
 		unsigned	as_es:1; /* export shader */
 		unsigned	as_ls:1; /* local shader */
 		unsigned	export_prim_id:1; /* when PS needs it and GS is disabled */
@@ -257,7 +255,6 @@ union si_shader_key {
 		/* Mask of "get_unique_index" bits - which outputs are read
 		 * by the next stage (needed by ES).
 		 * This describes how outputs are laid out in memory. */
-		uint64_t	es_enabled_outputs;
 		unsigned	as_es:1; /* export shader */
 		unsigned	export_prim_id:1; /* when PS needs it and GS is disabled */
 	} tes; /* tessellation evaluation shader */
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 007a0ecb476..3bf130d8a36 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -195,6 +195,8 @@ static void si_shader_es(struct si_shader *shader)
 	}
 	assert(num_sgprs <= 104);
 
+	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+		       shader->selector->esgs_itemsize / 4);
 	si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
 	si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, va >> 40);
 	si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
@@ -251,8 +253,6 @@ static void si_shader_gs(struct si_shader *shader)
 	si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * ((max_stream >= 2) ? 2 : 1));
 	si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * ((max_stream >= 3) ? 3 : 1));
 
-	si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-		       shader->selector->esgs_itemsize / 4);
 	si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * (max_stream + 1));
 
 	si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
@@ -515,10 +515,8 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 
 		if (sctx->tes_shader.cso)
 			key->vs.as_ls = 1;
-		else if (sctx->gs_shader.cso) {
+		else if (sctx->gs_shader.cso)
 			key->vs.as_es = 1;
-			key->vs.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
-		}
 
 		if (!sctx->gs_shader.cso && sctx->ps_shader.cso &&
 		    sctx->ps_shader.cso->info.uses_primid)
@@ -529,10 +527,9 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 			sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE];
 		break;
 	case PIPE_SHADER_TESS_EVAL:
-		if (sctx->gs_shader.cso) {
+		if (sctx->gs_shader.cso)
 			key->tes.as_es = 1;
-			key->tes.es_enabled_outputs = sctx->gs_shader.cso->inputs_read;
-		} else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
+		else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
 			key->tes.export_prim_id = 1;
 		break;
 	case PIPE_SHADER_GEOMETRY:

From 3694d58e6c4a39bd84e8aef0d8e67c3ae9447f33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Thu, 15 Oct 2015 23:41:35 +0200
Subject: [PATCH 251/287] radeonsi: remove dead code after ES-GS linkage change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
---
 src/gallium/drivers/radeonsi/si_shader.c      | 43 -------------------
 src/gallium/drivers/radeonsi/si_shader.h      |  1 -
 .../drivers/radeonsi/si_state_shaders.c       | 13 ------
 3 files changed, 57 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 56b05cef1a3..354d0646b99 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -163,49 +163,6 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 	}
 }
 
-/**
- * Given a semantic name and index of a parameter and a mask of used parameters
- * (inputs or outputs), return the index of the parameter in the list of all
- * used parameters.
- *
- * For example, assume this list of parameters:
- *   POSITION, PSIZE, GENERIC0, GENERIC2
- * which has the mask:
- *   11000000000101
- * Then:
- *   querying POSITION returns 0,
- *   querying PSIZE returns 1,
- *   querying GENERIC0 returns 2,
- *   querying GENERIC2 returns 3.
- *
- * Which can be used as an offset to a parameter buffer in units of vec4s.
- */
-static int get_param_index(unsigned semantic_name, unsigned index,
-			   uint64_t mask)
-{
-	unsigned unique_index = si_shader_io_get_unique_index(semantic_name, index);
-	int i, param_index = 0;
-
-	/* If not present... */
-	if (!((1llu << unique_index) & mask))
-		return -1;
-
-	for (i = 0; mask; i++) {
-		uint64_t bit = 1llu << i;
-
-		if (bit & mask) {
-			if (i == unique_index)
-				return param_index;
-
-			mask &= ~bit;
-			param_index++;
-		}
-	}
-
-	assert(!"unreachable");
-	return -1;
-}
-
 /**
  * Get the value of a shader input parameter and extract a bitfield.
  */
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index de69a273ab4..3400a03d7bb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -211,7 +211,6 @@ struct si_shader_selector {
 	unsigned	max_gsvs_emit_size;
 
 	/* masks of "get_unique_index" bits */
-	uint64_t	inputs_read;
 	uint64_t	outputs_written;
 	uint32_t	patch_outputs_written;
 	uint32_t	ps_colors_written;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 3bf130d8a36..7f6511cf01b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -707,19 +707,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
 		sel->gs_input_verts_per_prim =
 			u_vertices_per_prim(sel->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
-
-		for (i = 0; i < sel->info.num_inputs; i++) {
-			unsigned name = sel->info.input_semantic_name[i];
-			unsigned index = sel->info.input_semantic_index[i];
-
-			switch (name) {
-			case TGSI_SEMANTIC_PRIMID:
-				break;
-			default:
-				sel->inputs_read |=
-					1llu << si_shader_io_get_unique_index(name, index);
-			}
-		}
 		break;
 
 	case PIPE_SHADER_VERTEX:

From 88f349c4e100acd5dd3e7137496444907a175c39 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 12 Nov 2015 16:02:22 -0800
Subject: [PATCH 252/287] i965/fs: Replace nested ternary with if ladder.

Since the types of the expression were

   bool ? src_reg : (bool ? brw_reg : brw_reg)

the result of the second (nested) ternary would be implicitly
converted to a src_reg by the src_reg(struct brw_reg) constructor. I.e.,

   bool ? src_reg : src_reg(bool ? brw_reg : brw_reg)

In the next patch, I make backend_reg (the parent of src_reg) inherit
from brw_reg, which changes this expression to return brw_reg, which
throws away any fields that exist in the classes derived from brw_reg.
I.e.,

   src_reg(bool ? brw_reg(src_reg) : bool ? brw_reg : brw_reg)

Generally this code was gross, and wasn't actually shorter or easier to
read than an if ladder.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
---
 src/mesa/drivers/dri/i965/brw_fs_builder.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index f121f3463d3..d5763f699d2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -224,12 +224,13 @@ namespace brw {
       src_reg
       sample_mask_reg() const
       {
-         const bool uses_kill =
-            (shader->stage == MESA_SHADER_FRAGMENT &&
-             ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill);
-         return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) :
-                 uses_kill ? brw_flag_reg(0, 1) :
-                 retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD));
+         if (shader->stage != MESA_SHADER_FRAGMENT) {
+            return src_reg(0xffff);
+         } else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) {
+            return brw_flag_reg(0, 1);
+         } else {
+            return retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD);
+         }
       }
 
       /**

From c7ed5d1d1ca5d0e537cd5eb2cc8d4cae7ae73564 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sat, 24 Oct 2015 14:32:03 -0700
Subject: [PATCH 253/287] i965: Make backend_reg inherit from brw_reg.

Some fields (file, type, abs, negate) in brw_reg are shadowed by
backend_reg.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_shader.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 29baebf0cc1..e1e89dd3212 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -48,16 +48,15 @@ enum PACKED register_file {
    UNIFORM, /* prog_data->params[reg] */
 };
 
-struct backend_reg
-{
 #ifdef __cplusplus
+struct backend_reg : public brw_reg
+{
    bool is_zero() const;
    bool is_one() const;
    bool is_negative_one() const;
    bool is_null() const;
    bool is_accumulator() const;
    bool in_range(const backend_reg &r, unsigned n) const;
-#endif
 
    enum register_file file; /**< Register file: GRF, MRF, IMM. */
    enum brw_reg_type type;  /**< Register type: BRW_REGISTER_TYPE_* */
@@ -87,6 +86,7 @@ struct backend_reg
    bool negate;
    bool abs;
 };
+#endif
 
 struct cfg_t;
 struct bblock_t;

From 433df2e03c9a066bb2975bed28b57d6e2edf0aa9 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sat, 24 Oct 2015 14:35:33 -0700
Subject: [PATCH 254/287] i965: Delete abs/negate fields from backend_reg.

Instead use the ones provided by brw_reg. Also allows us to handle
HW_REGs in the negate() functions.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_ir_fs.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_ir_vec4.h | 2 +-
 src/mesa/drivers/dri/i965/brw_shader.h  | 3 ---
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 4417555f18e..c0e486e5edc 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -72,7 +72,7 @@ public:
 static inline fs_reg
 negate(fs_reg reg)
 {
-   assert(reg.file != HW_REG && reg.file != IMM);
+   assert(reg.file != IMM);
    reg.negate = !reg.negate;
    return reg;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 29642c6d2a4..2fbb043f244 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -90,7 +90,7 @@ swizzle(src_reg reg, unsigned swizzle)
 static inline src_reg
 negate(src_reg reg)
 {
-   assert(reg.file != HW_REG && reg.file != IMM);
+   assert(reg.file != IMM);
    reg.negate = !reg.negate;
    return reg;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index e1e89dd3212..73b57f4f8d1 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -82,9 +82,6 @@ struct backend_reg : public brw_reg
    uint16_t reg_offset;
 
    struct brw_reg fixed_hw_reg;
-
-   bool negate;
-   bool abs;
 };
 #endif
 

From 182f137521f9c81f89a473ca5a411e6a7c531e19 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sat, 24 Oct 2015 15:04:23 -0700
Subject: [PATCH 255/287] i965: Delete type field from backend_reg.

Switching from an implicitly-sized type field to field with an explicit
bit width is safe because we have fewer than 2^4 types, and gcc will
warn if you attempt to set a value that will not fit.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_shader.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 73b57f4f8d1..3f435e2b728 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -59,7 +59,6 @@ struct backend_reg : public brw_reg
    bool in_range(const backend_reg &r, unsigned n) const;
 
    enum register_file file; /**< Register file: GRF, MRF, IMM. */
-   enum brw_reg_type type;  /**< Register type: BRW_REGISTER_TYPE_* */
 
    /**
     * Register number.

From e42fb0c2a687cdcd6af2a590f6f5e24f64cfff3b Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 22 Oct 2015 19:41:30 -0700
Subject: [PATCH 256/287] i965: Make 'dw1' and 'bits' unnamed structures in
 brw_reg.

Generated by

   sed -i -e 's/\.bits\././g' *.c *.h *.cpp
   sed -i -e 's/dw1\.//g' *.c *.h *.cpp

and then reverting changes to comments in gen7_blorp.cpp and
brw_fs_generator.cpp.

There wasn't any utility offered by forcing the programmer to list these
to access their fields. Removing them will reduce churn in future
commits.

This is C11 (and gcc has apparently supported it for sometime
"compatibility with other compilers")

See https://gcc.gnu.org/onlinedocs/gcc/Unnamed-Fields.html

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c       | 52 ++++++-------
 src/mesa/drivers/dri/i965/brw_ff_gs_emit.c    |  2 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 74 +++++++++----------
 .../dri/i965/brw_fs_combine_constants.cpp     |  6 +-
 .../dri/i965/brw_fs_copy_propagation.cpp      |  8 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp      | 16 ++--
 .../drivers/dri/i965/brw_fs_generator.cpp     | 42 ++++++-----
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  4 +-
 src/mesa/drivers/dri/i965/brw_reg.h           | 40 +++++-----
 src/mesa/drivers/dri/i965/brw_shader.cpp      | 30 ++++----
 src/mesa/drivers/dri/i965/brw_vec4.cpp        | 40 +++++-----
 .../dri/i965/brw_vec4_copy_propagation.cpp    |  6 +-
 .../drivers/dri/i965/brw_vec4_generator.cpp   | 41 +++++-----
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  2 +-
 14 files changed, 185 insertions(+), 178 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index a6fbb542919..775027d9e3a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -169,10 +169,10 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
          brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
       } else {
          brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
-         brw_inst_set_da16_writemask(devinfo, inst, dest.dw1.bits.writemask);
+         brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
          if (dest.file == BRW_GENERAL_REGISTER_FILE ||
              dest.file == BRW_MESSAGE_REGISTER_FILE) {
-            assert(dest.dw1.bits.writemask != 0);
+            assert(dest.writemask != 0);
          }
 	 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
 	  *    Although Dst.HorzStride is a don't care for Align16, HW needs
@@ -187,13 +187,13 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
        */
       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
          brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
-                                       dest.dw1.bits.indirect_offset);
+                                       dest.indirect_offset);
 	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
 	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
          brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
       } else {
          brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
-                                        dest.dw1.bits.indirect_offset);
+                                        dest.indirect_offset);
 	 /* even ignored in da16, still need to set as '01' */
          brw_inst_set_dst_hstride(devinfo, inst, 1);
       }
@@ -243,7 +243,7 @@ validate_reg(const struct brw_device_info *devinfo,
     */
    if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
        reg.nr == BRW_ARF_ACCUMULATOR)
-      assert(reg.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+      assert(reg.swizzle == BRW_SWIZZLE_XYZW);
 
    assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
    hstride = hstride_for_reg[reg.hstride];
@@ -338,7 +338,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
 
    if (reg.file == BRW_IMMEDIATE_VALUE) {
-      brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud);
+      brw_inst_set_imm_ud(devinfo, inst, reg.ud);
 
       /* The Bspec's section titled "Non-present Operands" claims that if src0
        * is an immediate that src1's type must be the same as that of src0.
@@ -408,9 +408,9 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
          brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
 
          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-            brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
+            brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
 	 } else {
-            brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
+            brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
 	 }
       }
 
@@ -427,13 +427,13 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
 	 }
       } else {
          brw_inst_set_src0_da16_swiz_x(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
          brw_inst_set_src0_da16_swiz_y(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
          brw_inst_set_src0_da16_swiz_z(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
          brw_inst_set_src0_da16_swiz_w(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
 
 	 /* This is an oddity of the fact we're using the same
 	  * descriptions for registers in align_16 as align_1:
@@ -479,7 +479,7 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
 
    if (reg.file == BRW_IMMEDIATE_VALUE) {
-      brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud);
+      brw_inst_set_imm_ud(devinfo, inst, reg.ud);
    } else {
       /* This is a hardware restriction, which may or may not be lifted
        * in the future:
@@ -507,13 +507,13 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
 	 }
       } else {
          brw_inst_set_src1_da16_swiz_x(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
          brw_inst_set_src1_da16_swiz_y(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
          brw_inst_set_src1_da16_swiz_z(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
          brw_inst_set_src1_da16_swiz_w(devinfo, inst,
-            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
+            BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
 
 	 /* This is an oddity of the fact we're using the same
 	  * descriptions for registers in align_16 as align_1:
@@ -848,8 +848,8 @@ static int
 get_3src_subreg_nr(struct brw_reg reg)
 {
    if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
-      assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
-      return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
+      assert(brw_is_single_value_swizzle(reg.swizzle));
+      return reg.subnr / 4 + BRW_GET_SWZ(reg.swizzle, 0);
    } else {
       return reg.subnr / 4;
    }
@@ -879,12 +879,12 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
    }
    brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
    brw_inst_set_3src_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
-   brw_inst_set_3src_dst_writemask(devinfo, inst, dest.dw1.bits.writemask);
+   brw_inst_set_3src_dst_writemask(devinfo, inst, dest.writemask);
 
    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
    assert(src0.nr < 128);
-   brw_inst_set_3src_src0_swizzle(devinfo, inst, src0.dw1.bits.swizzle);
+   brw_inst_set_3src_src0_swizzle(devinfo, inst, src0.swizzle);
    brw_inst_set_3src_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
    brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
    brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
@@ -895,7 +895,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
    assert(src1.nr < 128);
-   brw_inst_set_3src_src1_swizzle(devinfo, inst, src1.dw1.bits.swizzle);
+   brw_inst_set_3src_src1_swizzle(devinfo, inst, src1.swizzle);
    brw_inst_set_3src_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
    brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
    brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
@@ -906,7 +906,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
    assert(src2.file == BRW_GENERAL_REGISTER_FILE);
    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
    assert(src2.nr < 128);
-   brw_inst_set_3src_src2_swizzle(devinfo, inst, src2.dw1.bits.swizzle);
+   brw_inst_set_3src_src2_swizzle(devinfo, inst, src2.swizzle);
    brw_inst_set_3src_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
    brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
    brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
@@ -2426,7 +2426,7 @@ void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
 
    if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
       const int sampler_state_size = 16; /* 16 bytes */
-      uint32_t sampler = sampler_index.dw1.ud;
+      uint32_t sampler = sampler_index.ud;
 
       if (sampler >= 16) {
          assert(devinfo->is_haswell || devinfo->gen >= 8);
@@ -2581,7 +2581,7 @@ brw_send_indirect_surface_message(struct brw_codegen *p,
        */
       insn = brw_AND(p, addr,
                      suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
-                               BRW_GET_SWZ(surface.dw1.bits.swizzle, 0)),
+                               BRW_GET_SWZ(surface.swizzle, 0)),
                      brw_imm_ud(0xff));
 
       brw_pop_insn_state(p);
@@ -3336,7 +3336,7 @@ brw_broadcast(struct brw_codegen *p,
        * We will typically not get here if the optimizer is doing its job, but
        * asserting would be mean.
        */
-      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0;
+      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
       brw_MOV(p, dst,
               (align1 ? stride(suboffset(src, i), 0, 1, 0) :
                stride(suboffset(src, 4 * i), 0, 4, 1)));
diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c
index 50bda619f55..830fc6e41df 100644
--- a/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_ff_gs_emit.c
@@ -436,7 +436,7 @@ gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key,
             vertex_slot.nr += slot / 2;
             vertex_slot.subnr = (slot % 2) * 16;
             /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
-            vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ
+            vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ
                ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding];
             brw_set_default_access_mode(p, BRW_ALIGN_16);
             brw_MOV(p, stride(c->reg.header, 4, 4, 1),
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index be712e56209..f40d05d3f43 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -379,7 +379,7 @@ fs_reg::fs_reg(float f)
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_F;
    this->stride = 0;
-   this->fixed_hw_reg.dw1.f = f;
+   this->fixed_hw_reg.f = f;
 }
 
 /** Immediate value constructor. */
@@ -389,7 +389,7 @@ fs_reg::fs_reg(int32_t i)
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_D;
    this->stride = 0;
-   this->fixed_hw_reg.dw1.d = i;
+   this->fixed_hw_reg.d = i;
 }
 
 /** Immediate value constructor. */
@@ -399,7 +399,7 @@ fs_reg::fs_reg(uint32_t u)
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_UD;
    this->stride = 0;
-   this->fixed_hw_reg.dw1.ud = u;
+   this->fixed_hw_reg.ud = u;
 }
 
 /** Vector float immediate value constructor. */
@@ -408,7 +408,7 @@ fs_reg::fs_reg(uint8_t vf[4])
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   memcpy(&this->fixed_hw_reg.dw1.ud, vf, sizeof(unsigned));
+   memcpy(&this->fixed_hw_reg.ud, vf, sizeof(unsigned));
 }
 
 /** Vector float immediate value constructor. */
@@ -417,7 +417,7 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   this->fixed_hw_reg.dw1.ud = (vf0 <<  0) |
+   this->fixed_hw_reg.ud = (vf0 <<  0) |
                                (vf1 <<  8) |
                                (vf2 << 16) |
                                (vf3 << 24);
@@ -719,7 +719,7 @@ fs_inst::components_read(unsigned i) const
       assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
       /* First/second FB write color. */
       if (i < 2)
-         return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
+         return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.ud;
       else
          return 1;
 
@@ -739,10 +739,10 @@ fs_inst::components_read(unsigned i) const
       assert(src[8].file == IMM && src[9].file == IMM);
       /* Texture coordinates. */
       if (i == 0)
-         return src[8].fixed_hw_reg.dw1.ud;
+         return src[8].fixed_hw_reg.ud;
       /* Texture derivatives. */
       else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL)
-         return src[9].fixed_hw_reg.dw1.ud;
+         return src[9].fixed_hw_reg.ud;
       /* Texture offset. */
       else if (i == 7)
          return 2;
@@ -757,7 +757,7 @@ fs_inst::components_read(unsigned i) const
       assert(src[3].file == IMM);
       /* Surface coordinates. */
       if (i == 0)
-         return src[3].fixed_hw_reg.dw1.ud;
+         return src[3].fixed_hw_reg.ud;
       /* Surface operation source (ignored for reads). */
       else if (i == 1)
          return 0;
@@ -770,10 +770,10 @@ fs_inst::components_read(unsigned i) const
              src[4].file == IMM);
       /* Surface coordinates. */
       if (i == 0)
-         return src[3].fixed_hw_reg.dw1.ud;
+         return src[3].fixed_hw_reg.ud;
       /* Surface operation source. */
       else if (i == 1)
-         return src[4].fixed_hw_reg.dw1.ud;
+         return src[4].fixed_hw_reg.ud;
       else
          return 1;
 
@@ -781,10 +781,10 @@ fs_inst::components_read(unsigned i) const
    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {
       assert(src[3].file == IMM &&
              src[4].file == IMM);
-      const unsigned op = src[4].fixed_hw_reg.dw1.ud;
+      const unsigned op = src[4].fixed_hw_reg.ud;
       /* Surface coordinates. */
       if (i == 0)
-         return src[3].fixed_hw_reg.dw1.ud;
+         return src[3].fixed_hw_reg.ud;
       /* Surface operation source. */
       else if (i == 1 && op == BRW_AOP_CMPWR)
          return 2;
@@ -1666,11 +1666,11 @@ fs_visitor::assign_gs_urb_setup()
       if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
          assert(inst->src[0].file == IMM);
          inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
-                                            inst->src[0].fixed_hw_reg.dw1.ud,
+                                            inst->src[0].fixed_hw_reg.ud,
                                             0), BRW_REGISTER_TYPE_UD);
          /* for now, assume constant - we can do per-slot offsets later */
          assert(inst->src[1].file == IMM);
-         inst->offset = inst->src[1].fixed_hw_reg.dw1.ud;
+         inst->offset = inst->src[1].fixed_hw_reg.ud;
          inst->src[1] = fs_reg();
          inst->mlen = 1;
          inst->base_mrf = -1;
@@ -2112,7 +2112,7 @@ fs_visitor::opt_algebraic()
          if (inst->src[0].file == IMM) {
             assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
             inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0].fixed_hw_reg.dw1.f *= inst->src[1].fixed_hw_reg.dw1.f;
+            inst->src[0].fixed_hw_reg.f *= inst->src[1].fixed_hw_reg.f;
             inst->src[1] = reg_undef;
             progress = true;
             break;
@@ -2133,7 +2133,7 @@ fs_visitor::opt_algebraic()
          if (inst->src[0].file == IMM) {
             assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
             inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0].fixed_hw_reg.dw1.f += inst->src[1].fixed_hw_reg.dw1.f;
+            inst->src[0].fixed_hw_reg.f += inst->src[1].fixed_hw_reg.f;
             inst->src[1] = reg_undef;
             progress = true;
             break;
@@ -2182,7 +2182,7 @@ fs_visitor::opt_algebraic()
             case BRW_CONDITIONAL_L:
                switch (inst->src[1].type) {
                case BRW_REGISTER_TYPE_F:
-                  if (inst->src[1].fixed_hw_reg.dw1.f >= 1.0f) {
+                  if (inst->src[1].fixed_hw_reg.f >= 1.0f) {
                      inst->opcode = BRW_OPCODE_MOV;
                      inst->src[1] = reg_undef;
                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2197,7 +2197,7 @@ fs_visitor::opt_algebraic()
             case BRW_CONDITIONAL_G:
                switch (inst->src[1].type) {
                case BRW_REGISTER_TYPE_F:
-                  if (inst->src[1].fixed_hw_reg.dw1.f <= 0.0f) {
+                  if (inst->src[1].fixed_hw_reg.f <= 0.0f) {
                      inst->opcode = BRW_OPCODE_MOV;
                      inst->src[1] = reg_undef;
                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2234,7 +2234,7 @@ fs_visitor::opt_algebraic()
             progress = true;
          } else if (inst->src[1].file == IMM && inst->src[2].file == IMM) {
             inst->opcode = BRW_OPCODE_ADD;
-            inst->src[1].fixed_hw_reg.dw1.f *= inst->src[2].fixed_hw_reg.dw1.f;
+            inst->src[1].fixed_hw_reg.f *= inst->src[2].fixed_hw_reg.f;
             inst->src[2] = reg_undef;
             progress = true;
          }
@@ -2259,7 +2259,7 @@ fs_visitor::opt_algebraic()
          } else if (inst->src[1].file == IMM) {
             inst->opcode = BRW_OPCODE_MOV;
             inst->src[0] = component(inst->src[0],
-                                     inst->src[1].fixed_hw_reg.dw1.ud);
+                                     inst->src[1].fixed_hw_reg.ud);
             inst->sources = 1;
             inst->force_writemask_all = true;
             progress = true;
@@ -3081,7 +3081,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
          fs_reg const_offset_reg = inst->src[1];
          assert(const_offset_reg.file == IMM &&
                 const_offset_reg.type == BRW_REGISTER_TYPE_UD);
-         const_offset_reg.fixed_hw_reg.dw1.ud /= 4;
+         const_offset_reg.fixed_hw_reg.ud /= 4;
 
          fs_reg payload, offset;
          if (devinfo->gen >= 9) {
@@ -3250,7 +3250,7 @@ fs_visitor::lower_integer_multiplication()
             continue;
 
          if (inst->src[1].file == IMM &&
-             inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) {
+             inst->src[1].fixed_hw_reg.ud < (1 << 16)) {
             /* The MUL instruction isn't commutative. On Gen <= 6, only the low
              * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
              * src1 are used.
@@ -3326,8 +3326,8 @@ fs_visitor::lower_integer_multiplication()
                fs_reg src1_1_w = inst->src[1];
 
                if (inst->src[1].file == IMM) {
-                  src1_0_w.fixed_hw_reg.dw1.ud &= 0xffff;
-                  src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
+                  src1_0_w.fixed_hw_reg.ud &= 0xffff;
+                  src1_1_w.fixed_hw_reg.ud >>= 16;
                } else {
                   src1_0_w.type = BRW_REGISTER_TYPE_UW;
                   if (src1_0_w.stride != 0) {
@@ -3482,7 +3482,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
    fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
    const unsigned components =
-      inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud;
+      inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.ud;
 
    /* We can potentially have a message length of up to 15, so we have to set
     * base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3822,7 +3822,7 @@ is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler)
    if (devinfo->gen < 8 && !devinfo->is_haswell)
       return false;
 
-   return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+   return sampler.file != IMM || sampler.fixed_hw_reg.ud >= 16;
 }
 
 static void
@@ -4057,8 +4057,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
    const fs_reg &sampler = inst->src[6];
    const fs_reg &offset_value = inst->src[7];
    assert(inst->src[8].file == IMM && inst->src[9].file == IMM);
-   const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
-   const unsigned grad_components = inst->src[9].fixed_hw_reg.dw1.ud;
+   const unsigned coord_components = inst->src[8].fixed_hw_reg.ud;
+   const unsigned grad_components = inst->src[9].fixed_hw_reg.ud;
 
    if (devinfo->gen >= 7) {
       lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
@@ -4384,7 +4384,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
        * circumstances it can end up with a message that is too long in SIMD16
        * mode.
        */
-      const unsigned coord_components = inst->src[8].fixed_hw_reg.dw1.ud;
+      const unsigned coord_components = inst->src[8].fixed_hw_reg.ud;
       /* First three arguments are the sample index and the two arguments for
        * the MCS data.
        */
@@ -4692,22 +4692,22 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       case IMM:
          switch (inst->src[i].type) {
          case BRW_REGISTER_TYPE_F:
-            fprintf(file, "%ff", inst->src[i].fixed_hw_reg.dw1.f);
+            fprintf(file, "%ff", inst->src[i].fixed_hw_reg.f);
             break;
          case BRW_REGISTER_TYPE_W:
          case BRW_REGISTER_TYPE_D:
-            fprintf(file, "%dd", inst->src[i].fixed_hw_reg.dw1.d);
+            fprintf(file, "%dd", inst->src[i].fixed_hw_reg.d);
             break;
          case BRW_REGISTER_TYPE_UW:
          case BRW_REGISTER_TYPE_UD:
-            fprintf(file, "%uu", inst->src[i].fixed_hw_reg.dw1.ud);
+            fprintf(file, "%uu", inst->src[i].fixed_hw_reg.ud);
             break;
          case BRW_REGISTER_TYPE_VF:
             fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >>  0) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >>  8) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 16) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 24) & 0xff));
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  0) & 0xff),
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  8) & 0xff),
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 16) & 0xff),
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 24) & 0xff));
             break;
          default:
             fprintf(file, "???");
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index c182232285e..504c4b6171c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -219,7 +219,7 @@ fs_visitor::opt_combine_constants()
              inst->src[i].type != BRW_REGISTER_TYPE_F)
             continue;
 
-         float val = fabsf(inst->src[i].fixed_hw_reg.dw1.f);
+         float val = fabsf(inst->src[i].fixed_hw_reg.f);
          struct imm *imm = find_imm(&table, val);
 
          if (imm) {
@@ -299,9 +299,9 @@ fs_visitor::opt_combine_constants()
          reg->reg = table.imm[i].reg;
          reg->subreg_offset = table.imm[i].subreg_offset;
          reg->stride = 0;
-         reg->negate = signbit(reg->fixed_hw_reg.dw1.f) !=
+         reg->negate = signbit(reg->fixed_hw_reg.f) !=
                                signbit(table.imm[i].val);
-         assert(fabsf(reg->fixed_hw_reg.dw1.f) == table.imm[i].val);
+         assert(fabsf(reg->fixed_hw_reg.f) == table.imm[i].val);
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 26204827156..17989e3bfd0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -369,8 +369,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
       switch(inst->opcode) {
       case BRW_OPCODE_SEL:
          if (inst->src[1].file != IMM ||
-             inst->src[1].fixed_hw_reg.dw1.f < 0.0 ||
-             inst->src[1].fixed_hw_reg.dw1.f > 1.0) {
+             inst->src[1].fixed_hw_reg.f < 0.0 ||
+             inst->src[1].fixed_hw_reg.f > 1.0) {
             return false;
          }
          break;
@@ -605,10 +605,10 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
           * anyway.
           */
          assert(i == 0);
-         if (inst->src[0].fixed_hw_reg.dw1.f != 0.0f) {
+         if (inst->src[0].fixed_hw_reg.f != 0.0f) {
             inst->opcode = BRW_OPCODE_MOV;
             inst->src[0] = val;
-            inst->src[0].fixed_hw_reg.dw1.f = 1.0f / inst->src[0].fixed_hw_reg.dw1.f;
+            inst->src[0].fixed_hw_reg.f = 1.0f / inst->src[0].fixed_hw_reg.f;
             progress = true;
          }
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 3a28c8d591d..05d64deab79 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -110,20 +110,20 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
               (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
    } else if (a->opcode == BRW_OPCODE_MUL && a->dst.type == BRW_REGISTER_TYPE_F) {
       bool xs0_negate = xs[0].negate;
-      bool xs1_negate = xs[1].file == IMM ? xs[1].fixed_hw_reg.dw1.f < 0.0f
+      bool xs1_negate = xs[1].file == IMM ? xs[1].fixed_hw_reg.f < 0.0f
                                           : xs[1].negate;
       bool ys0_negate = ys[0].negate;
-      bool ys1_negate = ys[1].file == IMM ? ys[1].fixed_hw_reg.dw1.f < 0.0f
+      bool ys1_negate = ys[1].file == IMM ? ys[1].fixed_hw_reg.f < 0.0f
                                           : ys[1].negate;
-      float xs1_imm = xs[1].fixed_hw_reg.dw1.f;
-      float ys1_imm = ys[1].fixed_hw_reg.dw1.f;
+      float xs1_imm = xs[1].fixed_hw_reg.f;
+      float ys1_imm = ys[1].fixed_hw_reg.f;
 
       xs[0].negate = false;
       xs[1].negate = false;
       ys[0].negate = false;
       ys[1].negate = false;
-      xs[1].fixed_hw_reg.dw1.f = fabsf(xs[1].fixed_hw_reg.dw1.f);
-      ys[1].fixed_hw_reg.dw1.f = fabsf(ys[1].fixed_hw_reg.dw1.f);
+      xs[1].fixed_hw_reg.f = fabsf(xs[1].fixed_hw_reg.f);
+      ys[1].fixed_hw_reg.f = fabsf(ys[1].fixed_hw_reg.f);
 
       bool ret = (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
                  (xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
@@ -132,8 +132,8 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
       xs[1].negate = xs[1].file == IMM ? false : xs1_negate;
       ys[0].negate = ys0_negate;
       ys[1].negate = ys[1].file == IMM ? false : ys1_negate;
-      xs[1].fixed_hw_reg.dw1.f = xs1_imm;
-      ys[1].fixed_hw_reg.dw1.f = ys1_imm;
+      xs[1].fixed_hw_reg.f = xs1_imm;
+      ys[1].fixed_hw_reg.f = ys1_imm;
 
       *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate);
       return ret;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 48775047e9d..5fd104a64e8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -91,22 +91,22 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
 
       switch (reg->type) {
       case BRW_REGISTER_TYPE_F:
-	 brw_reg = brw_imm_f(reg->fixed_hw_reg.dw1.f);
+	 brw_reg = brw_imm_f(reg->fixed_hw_reg.f);
 	 break;
       case BRW_REGISTER_TYPE_D:
-	 brw_reg = brw_imm_d(reg->fixed_hw_reg.dw1.d);
+	 brw_reg = brw_imm_d(reg->fixed_hw_reg.d);
 	 break;
       case BRW_REGISTER_TYPE_UD:
-	 brw_reg = brw_imm_ud(reg->fixed_hw_reg.dw1.ud);
+	 brw_reg = brw_imm_ud(reg->fixed_hw_reg.ud);
 	 break;
       case BRW_REGISTER_TYPE_W:
-	 brw_reg = brw_imm_w(reg->fixed_hw_reg.dw1.d);
+	 brw_reg = brw_imm_w(reg->fixed_hw_reg.d);
 	 break;
       case BRW_REGISTER_TYPE_UW:
-	 brw_reg = brw_imm_uw(reg->fixed_hw_reg.dw1.ud);
+	 brw_reg = brw_imm_uw(reg->fixed_hw_reg.ud);
 	 break;
       case BRW_REGISTER_TYPE_VF:
-         brw_reg = brw_imm_vf(reg->fixed_hw_reg.dw1.ud);
+         brw_reg = brw_imm_vf(reg->fixed_hw_reg.ud);
          break;
       default:
 	 unreachable("not reached");
@@ -658,7 +658,7 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
               retype(dst, BRW_REGISTER_TYPE_UW),
               inst->base_mrf,
               src,
-              surf_index.dw1.ud,
+              surf_index.ud,
               0,
               GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
               rlen, /* response length */
@@ -666,6 +666,8 @@ fs_generator::generate_get_buffer_size(fs_inst *inst,
               inst->header_size > 0,
               simd_mode,
               BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(prog_data, surf_index.ud);
 }
 
 void
@@ -907,7 +909,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
          : prog_data->binding_table.texture_start;
 
    if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
-      uint32_t sampler = sampler_index.dw1.ud;
+      uint32_t sampler = sampler_index.ud;
 
       brw_SAMPLE(p,
                  retype(dst, BRW_REGISTER_TYPE_UW),
@@ -1174,11 +1176,11 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
 
    assert(index.file == BRW_IMMEDIATE_VALUE &&
 	  index.type == BRW_REGISTER_TYPE_UD);
-   uint32_t surf_index = index.dw1.ud;
+   uint32_t surf_index = index.ud;
 
    assert(offset.file == BRW_IMMEDIATE_VALUE &&
 	  offset.type == BRW_REGISTER_TYPE_UD);
-   uint32_t read_offset = offset.dw1.ud;
+   uint32_t read_offset = offset.ud;
 
    brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf),
 			read_offset, surf_index);
@@ -1223,7 +1225,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
 
    if (index.file == BRW_IMMEDIATE_VALUE) {
 
-      uint32_t surf_index = index.dw1.ud;
+      uint32_t surf_index = index.ud;
 
       brw_push_insn_state(p);
       brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
@@ -1286,7 +1288,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
 
    assert(index.file == BRW_IMMEDIATE_VALUE &&
 	  index.type == BRW_REGISTER_TYPE_UD);
-   uint32_t surf_index = index.dw1.ud;
+   uint32_t surf_index = index.ud;
 
    uint32_t simd_mode, rlen, msg_type;
    if (dispatch_width == 16) {
@@ -1366,7 +1368,7 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
 
    if (index.file == BRW_IMMEDIATE_VALUE) {
 
-      uint32_t surf_index = index.dw1.ud;
+      uint32_t surf_index = index.ud;
 
       brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
       brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW));
@@ -2052,7 +2054,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
       case FS_OPCODE_DDY_COARSE:
       case FS_OPCODE_DDY_FINE:
          assert(src[1].file == BRW_IMMEDIATE_VALUE);
-         generate_ddy(inst->opcode, dst, src[0], src[1].dw1.ud);
+         generate_ddy(inst->opcode, dst, src[0], src[1].ud);
 	 break;
 
       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
@@ -2120,37 +2122,37 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 
       case SHADER_OPCODE_UNTYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud,
+         brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud,
                             inst->mlen, !inst->dst.is_null());
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_read(p, dst, src[0], src[1],
-                                  inst->mlen, src[2].dw1.ud);
+                                  inst->mlen, src[2].ud);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_write(p, src[0], src[1],
-                                   inst->mlen, src[2].dw1.ud);
+                                   inst->mlen, src[2].ud);
          break;
 
       case SHADER_OPCODE_TYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_atomic(p, dst, src[0], src[1],
-                          src[2].dw1.ud, inst->mlen, !inst->dst.is_null());
+                          src[2].ud, inst->mlen, !inst->dst.is_null());
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_READ:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_surface_read(p, dst, src[0], src[1],
-                                inst->mlen, src[2].dw1.ud);
+                                inst->mlen, src[2].ud);
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].dw1.ud);
+         brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud);
          break;
 
       case SHADER_OPCODE_MEMORY_FENCE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 213c9120b50..d6be2d598be 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -322,7 +322,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       inst->shadow_compare = true;
 
    if (offset_value.file == IMM)
-      inst->offset = offset_value.fixed_hw_reg.dw1.ud;
+      inst->offset = offset_value.fixed_hw_reg.ud;
 
    if (op == ir_tg4) {
       inst->offset |=
@@ -949,7 +949,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       fs_reg offset;
       if (gs_vertex_count.file == IMM) {
          per_slot_offsets = fs_reg(output_vertex_size_owords *
-                                   gs_vertex_count.fixed_hw_reg.dw1.ud);
+                                   gs_vertex_count.fixed_hw_reg.ud);
       } else {
          per_slot_offsets = vgrf(glsl_type::int_type);
          bld.MUL(per_slot_offsets, gs_vertex_count,
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index c3f77c0210a..af2a49b3097 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -249,12 +249,12 @@ struct brw_reg {
          unsigned writemask:4;    /* dest only, align16 only */
          int  indirect_offset:10; /* relative addressing offset */
          unsigned pad1:10;        /* two dwords total */
-      } bits;
+      };
 
       float f;
       int   d;
       unsigned ud;
-   } dw1;
+   };
 };
 
 
@@ -369,10 +369,10 @@ brw_reg(unsigned file,
     * keep track of as you'd want it adjusted by suboffset(), etc.
     * Perhaps fix up when converting to align16?
     */
-   reg.dw1.bits.swizzle = swizzle;
-   reg.dw1.bits.writemask = writemask;
-   reg.dw1.bits.indirect_offset = 0;
-   reg.dw1.bits.pad1 = 0;
+   reg.swizzle = swizzle;
+   reg.writemask = writemask;
+   reg.indirect_offset = 0;
+   reg.pad1 = 0;
    return reg;
 }
 
@@ -569,7 +569,7 @@ static inline struct brw_reg
 brw_imm_f(float f)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
-   imm.dw1.f = f;
+   imm.f = f;
    return imm;
 }
 
@@ -578,7 +578,7 @@ static inline struct brw_reg
 brw_imm_d(int d)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
-   imm.dw1.d = d;
+   imm.d = d;
    return imm;
 }
 
@@ -587,7 +587,7 @@ static inline struct brw_reg
 brw_imm_ud(unsigned ud)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
-   imm.dw1.ud = ud;
+   imm.ud = ud;
    return imm;
 }
 
@@ -596,7 +596,7 @@ static inline struct brw_reg
 brw_imm_uw(uint16_t uw)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
-   imm.dw1.ud = uw | (uw << 16);
+   imm.ud = uw | (uw << 16);
    return imm;
 }
 
@@ -605,7 +605,7 @@ static inline struct brw_reg
 brw_imm_w(int16_t w)
 {
    struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
-   imm.dw1.d = w | (w << 16);
+   imm.d = w | (w << 16);
    return imm;
 }
 
@@ -621,7 +621,7 @@ brw_imm_v(unsigned v)
    imm.vstride = BRW_VERTICAL_STRIDE_0;
    imm.width = BRW_WIDTH_8;
    imm.hstride = BRW_HORIZONTAL_STRIDE_1;
-   imm.dw1.ud = v;
+   imm.ud = v;
    return imm;
 }
 
@@ -633,7 +633,7 @@ brw_imm_vf(unsigned v)
    imm.vstride = BRW_VERTICAL_STRIDE_0;
    imm.width = BRW_WIDTH_4;
    imm.hstride = BRW_HORIZONTAL_STRIDE_1;
-   imm.dw1.ud = v;
+   imm.ud = v;
    return imm;
 }
 
@@ -923,8 +923,8 @@ brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
 
-   reg.dw1.bits.swizzle = brw_compose_swizzle(BRW_SWIZZLE4(x, y, z, w),
-                                              reg.dw1.bits.swizzle);
+   reg.swizzle = brw_compose_swizzle(BRW_SWIZZLE4(x, y, z, w),
+                                              reg.swizzle);
    return reg;
 }
 
@@ -939,7 +939,7 @@ static inline struct brw_reg
 brw_writemask(struct brw_reg reg, unsigned mask)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
-   reg.dw1.bits.writemask &= mask;
+   reg.writemask &= mask;
    return reg;
 }
 
@@ -947,7 +947,7 @@ static inline struct brw_reg
 brw_set_writemask(struct brw_reg reg, unsigned mask)
 {
    assert(reg.file != BRW_IMMEDIATE_VALUE);
-   reg.dw1.bits.writemask = mask;
+   reg.writemask = mask;
    return reg;
 }
 
@@ -980,7 +980,7 @@ brw_vec4_indirect(unsigned subnr, int offset)
    struct brw_reg reg =  brw_vec4_grf(0, 0);
    reg.subnr = subnr;
    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
-   reg.dw1.bits.indirect_offset = offset;
+   reg.indirect_offset = offset;
    return reg;
 }
 
@@ -990,7 +990,7 @@ brw_vec1_indirect(unsigned subnr, int offset)
    struct brw_reg reg =  brw_vec1_grf(0, 0);
    reg.subnr = subnr;
    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
-   reg.dw1.bits.indirect_offset = offset;
+   reg.indirect_offset = offset;
    return reg;
 }
 
@@ -1001,7 +1001,7 @@ brw_VxH_indirect(unsigned subnr, int offset)
    reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL;
    reg.subnr = subnr;
    reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
-   reg.dw1.bits.indirect_offset = offset;
+   reg.indirect_offset = offset;
    return reg;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index de21e769871..8391a2cb9a0 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -564,7 +564,7 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
       unsigned ud;
       int d;
       float f;
-   } imm = { reg->dw1.ud }, sat_imm = { 0 };
+   } imm = { reg->ud }, sat_imm = { 0 };
 
    switch (type) {
    case BRW_REGISTER_TYPE_UD:
@@ -595,7 +595,7 @@ brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg)
    }
 
    if (imm.ud != sat_imm.ud) {
-      reg->dw1.ud = sat_imm.ud;
+      reg->ud = sat_imm.ud;
       return true;
    }
    return false;
@@ -607,17 +607,17 @@ brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg)
    switch (type) {
    case BRW_REGISTER_TYPE_D:
    case BRW_REGISTER_TYPE_UD:
-      reg->dw1.d = -reg->dw1.d;
+      reg->d = -reg->d;
       return true;
    case BRW_REGISTER_TYPE_W:
    case BRW_REGISTER_TYPE_UW:
-      reg->dw1.d = -(int16_t)reg->dw1.ud;
+      reg->d = -(int16_t)reg->ud;
       return true;
    case BRW_REGISTER_TYPE_F:
-      reg->dw1.f = -reg->dw1.f;
+      reg->f = -reg->f;
       return true;
    case BRW_REGISTER_TYPE_VF:
-      reg->dw1.ud ^= 0x80808080;
+      reg->ud ^= 0x80808080;
       return true;
    case BRW_REGISTER_TYPE_UB:
    case BRW_REGISTER_TYPE_B:
@@ -641,16 +641,16 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg)
 {
    switch (type) {
    case BRW_REGISTER_TYPE_D:
-      reg->dw1.d = abs(reg->dw1.d);
+      reg->d = abs(reg->d);
       return true;
    case BRW_REGISTER_TYPE_W:
-      reg->dw1.d = abs((int16_t)reg->dw1.ud);
+      reg->d = abs((int16_t)reg->ud);
       return true;
    case BRW_REGISTER_TYPE_F:
-      reg->dw1.f = fabsf(reg->dw1.f);
+      reg->f = fabsf(reg->f);
       return true;
    case BRW_REGISTER_TYPE_VF:
-      reg->dw1.ud &= ~0x80808080;
+      reg->ud &= ~0x80808080;
       return true;
    case BRW_REGISTER_TYPE_UB:
    case BRW_REGISTER_TYPE_B:
@@ -700,7 +700,7 @@ backend_reg::is_zero() const
    if (file != IMM)
       return false;
 
-   return fixed_hw_reg.dw1.d == 0;
+   return fixed_hw_reg.d == 0;
 }
 
 bool
@@ -710,8 +710,8 @@ backend_reg::is_one() const
       return false;
 
    return type == BRW_REGISTER_TYPE_F
-          ? fixed_hw_reg.dw1.f == 1.0
-          : fixed_hw_reg.dw1.d == 1;
+          ? fixed_hw_reg.f == 1.0
+          : fixed_hw_reg.d == 1;
 }
 
 bool
@@ -722,9 +722,9 @@ backend_reg::is_negative_one() const
 
    switch (type) {
    case BRW_REGISTER_TYPE_F:
-      return fixed_hw_reg.dw1.f == -1.0;
+      return fixed_hw_reg.f == -1.0;
    case BRW_REGISTER_TYPE_D:
-      return fixed_hw_reg.dw1.d == -1;
+      return fixed_hw_reg.d == -1;
    default:
       return false;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 5cba3b31dc2..fb5e2016006 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -77,7 +77,7 @@ src_reg::src_reg(float f)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_F;
-   this->fixed_hw_reg.dw1.f = f;
+   this->fixed_hw_reg.f = f;
 }
 
 src_reg::src_reg(uint32_t u)
@@ -86,7 +86,7 @@ src_reg::src_reg(uint32_t u)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_UD;
-   this->fixed_hw_reg.dw1.ud = u;
+   this->fixed_hw_reg.ud = u;
 }
 
 src_reg::src_reg(int32_t i)
@@ -95,7 +95,7 @@ src_reg::src_reg(int32_t i)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_D;
-   this->fixed_hw_reg.dw1.d = i;
+   this->fixed_hw_reg.d = i;
 }
 
 src_reg::src_reg(uint8_t vf[4])
@@ -104,7 +104,7 @@ src_reg::src_reg(uint8_t vf[4])
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   memcpy(&this->fixed_hw_reg.dw1.ud, vf, sizeof(unsigned));
+   memcpy(&this->fixed_hw_reg.ud, vf, sizeof(unsigned));
 }
 
 src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
@@ -113,7 +113,7 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   this->fixed_hw_reg.dw1.ud = (vf0 <<  0) |
+   this->fixed_hw_reg.ud = (vf0 <<  0) |
                                (vf1 <<  8) |
                                (vf2 << 16) |
                                (vf3 << 24);
@@ -397,7 +397,7 @@ vec4_visitor::opt_vector_float()
           inst->src[0].file != IMM)
          continue;
 
-      int vf = brw_float_to_vf(inst->src[0].fixed_hw_reg.dw1.f);
+      int vf = brw_float_to_vf(inst->src[0].fixed_hw_reg.f);
       if (vf == -1)
          continue;
 
@@ -1467,20 +1467,20 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       case IMM:
          switch (inst->src[i].type) {
          case BRW_REGISTER_TYPE_F:
-            fprintf(file, "%fF", inst->src[i].fixed_hw_reg.dw1.f);
+            fprintf(file, "%fF", inst->src[i].fixed_hw_reg.f);
             break;
          case BRW_REGISTER_TYPE_D:
-            fprintf(file, "%dD", inst->src[i].fixed_hw_reg.dw1.d);
+            fprintf(file, "%dD", inst->src[i].fixed_hw_reg.d);
             break;
          case BRW_REGISTER_TYPE_UD:
-            fprintf(file, "%uU", inst->src[i].fixed_hw_reg.dw1.ud);
+            fprintf(file, "%uU", inst->src[i].fixed_hw_reg.ud);
             break;
          case BRW_REGISTER_TYPE_VF:
             fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >>  0) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >>  8) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 16) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.dw1.ud >> 24) & 0xff));
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  0) & 0xff),
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  8) & 0xff),
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 16) & 0xff),
+                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 24) & 0xff));
             break;
          default:
             fprintf(file, "???");
@@ -1597,7 +1597,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
 
 	 struct brw_reg reg = attribute_to_hw_reg(grf, interleaved);
 	 reg.type = inst->dst.type;
-	 reg.dw1.bits.writemask = inst->dst.writemask;
+	 reg.writemask = inst->dst.writemask;
 
 	 inst->dst.file = HW_REG;
 	 inst->dst.fixed_hw_reg = reg;
@@ -1615,7 +1615,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
          assert(grf != 0);
 
 	 struct brw_reg reg = attribute_to_hw_reg(grf, interleaved);
-	 reg.dw1.bits.swizzle = inst->src[i].swizzle;
+	 reg.swizzle = inst->src[i].swizzle;
          reg.type = inst->src[i].type;
 	 if (inst->src[i].abs)
 	    reg = brw_abs(reg);
@@ -1810,14 +1810,14 @@ vec4_visitor::convert_to_hw_regs()
          case GRF:
             reg = brw_vec8_grf(src.reg + src.reg_offset, 0);
             reg.type = src.type;
-            reg.dw1.bits.swizzle = src.swizzle;
+            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
             break;
 
          case IMM:
             reg = brw_imm_reg(src.type);
-            reg.dw1.ud = src.fixed_hw_reg.dw1.ud;
+            reg.ud = src.fixed_hw_reg.ud;
             break;
 
          case UNIFORM:
@@ -1826,7 +1826,7 @@ vec4_visitor::convert_to_hw_regs()
                                       ((src.reg + src.reg_offset) % 2) * 4),
                          0, 4, 1);
             reg.type = src.type;
-            reg.dw1.bits.swizzle = src.swizzle;
+            reg.swizzle = src.swizzle;
             reg.abs = src.abs;
             reg.negate = src.negate;
 
@@ -1857,14 +1857,14 @@ vec4_visitor::convert_to_hw_regs()
       case GRF:
          reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
          reg.type = dst.type;
-         reg.dw1.bits.writemask = dst.writemask;
+         reg.writemask = dst.writemask;
          break;
 
       case MRF:
          assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
          reg = brw_message_reg(dst.reg + dst.reg_offset);
          reg.type = dst.type;
-         reg.dw1.bits.writemask = dst.writemask;
+         reg.writemask = dst.writemask;
          break;
 
       case HW_REG:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index db99ecba35a..f37f6084af6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -147,7 +147,7 @@ try_constant_propagate(const struct brw_device_info *devinfo,
    }
 
    if (value.type == BRW_REGISTER_TYPE_VF)
-      value.fixed_hw_reg.dw1.ud = swizzle_vf_imm(value.fixed_hw_reg.dw1.ud,
+      value.fixed_hw_reg.ud = swizzle_vf_imm(value.fixed_hw_reg.ud,
                                                  inst->src[arg].swizzle);
 
    switch (inst->opcode) {
@@ -359,8 +359,8 @@ try_copy_propagate(const struct brw_device_info *devinfo,
              inst->src[0].type != BRW_REGISTER_TYPE_F ||
              inst->src[1].file != IMM ||
              inst->src[1].type != BRW_REGISTER_TYPE_F ||
-             inst->src[1].fixed_hw_reg.dw1.f < 0.0 ||
-             inst->src[1].fixed_hw_reg.dw1.f > 1.0) {
+             inst->src[1].fixed_hw_reg.f < 0.0 ||
+             inst->src[1].fixed_hw_reg.f > 1.0) {
             return false;
          }
          if (!inst->saturate)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 531acb37b6a..74d67cb0820 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -46,7 +46,7 @@ check_gen6_math_src_arg(struct brw_reg src)
    /* Source swizzles are ignored. */
    assert(!src.abs);
    assert(!src.negate);
-   assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
+   assert(src.swizzle == BRW_SWIZZLE_XYZW);
 }
 
 static void
@@ -57,7 +57,7 @@ generate_math_gen6(struct brw_codegen *p,
                    struct brw_reg src1)
 {
    /* Can't do writemask because math can't be align16. */
-   assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
+   assert(dst.writemask == WRITEMASK_XYZW);
    /* Source swizzles are ignored. */
    check_gen6_math_src_arg(src0);
    if (src1.file == BRW_GENERAL_REGISTER_FILE)
@@ -264,7 +264,7 @@ generate_tex(struct brw_codegen *p,
          : prog_data->base.binding_table.texture_start;
 
    if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
-      uint32_t sampler = sampler_index.dw1.ud;
+      uint32_t sampler = sampler_index.ud;
 
       brw_SAMPLE(p,
                  dst,
@@ -419,10 +419,10 @@ generate_gs_set_write_offset(struct brw_codegen *p,
    assert(p->devinfo->gen >= 7 &&
           src1.file == BRW_IMMEDIATE_VALUE &&
           src1.type == BRW_REGISTER_TYPE_UD &&
-          src1.dw1.ud <= USHRT_MAX);
+          src1.ud <= USHRT_MAX);
    if (src0.file == BRW_IMMEDIATE_VALUE) {
       brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
-              brw_imm_ud(src0.dw1.ud * src1.dw1.ud));
+              brw_imm_ud(src0.ud * src1.ud));
    } else {
       brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
               retype(src1, BRW_REGISTER_TYPE_UW));
@@ -740,7 +740,7 @@ generate_oword_dual_block_offsets(struct brw_codegen *p,
    brw_MOV(p, m1_0, index_0);
 
    if (index.file == BRW_IMMEDIATE_VALUE) {
-      index_4.dw1.ud += second_vertex_offset;
+      index_4.ud += second_vertex_offset;
       brw_MOV(p, m1_4, index_4);
    } else {
       brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
@@ -895,7 +895,7 @@ generate_pull_constant_load(struct brw_codegen *p,
    const struct brw_device_info *devinfo = p->devinfo;
    assert(index.file == BRW_IMMEDIATE_VALUE &&
 	  index.type == BRW_REGISTER_TYPE_UD);
-   uint32_t surf_index = index.dw1.ud;
+   uint32_t surf_index = index.ud;
 
    struct brw_reg header = brw_vec8_grf(0, 0);
 
@@ -947,7 +947,7 @@ generate_get_buffer_size(struct brw_codegen *p,
               dst,
               inst->base_mrf,
               src,
-              surf_index.dw1.ud,
+              surf_index.ud,
               0,
               GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
               1, /* response length */
@@ -955,6 +955,8 @@ generate_get_buffer_size(struct brw_codegen *p,
               inst->header_size > 0,
               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
               BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(&prog_data->base, surf_index.ud);
 }
 
 static void
@@ -973,7 +975,7 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
       brw_set_dest(p, insn, dst);
       brw_set_src0(p, insn, offset);
       brw_set_sampler_message(p, insn,
-                              surf_index.dw1.ud,
+                              surf_index.ud,
                               0, /* LD message ignores sampler unit */
                               GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
                               1, /* rlen */
@@ -981,6 +983,9 @@ generate_pull_constant_load_gen7(struct brw_codegen *p,
                               inst->header_size != 0,
                               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
                               0);
+
+      brw_mark_surface_used(&prog_data->base, surf_index.ud);
+
    } else {
 
       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1410,38 +1415,38 @@ generate_code(struct brw_codegen *p,
 
       case SHADER_OPCODE_UNTYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
+         brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
                             !inst->dst.is_null());
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_READ:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
-                                  src[2].dw1.ud);
+                                  src[2].ud);
          break;
 
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
-                                   src[2].dw1.ud);
+                                   src[2].ud);
          break;
 
       case SHADER_OPCODE_TYPED_ATOMIC:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
-         brw_typed_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
+         brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
                           !inst->dst.is_null());
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_READ:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
-                                src[2].dw1.ud);
+                                src[2].ud);
          break;
 
       case SHADER_OPCODE_TYPED_SURFACE_WRITE:
          assert(src[2].file == BRW_IMMEDIATE_VALUE);
          brw_typed_surface_write(p, src[0], src[1], inst->mlen,
-                                 src[2].dw1.ud);
+                                 src[2].ud);
          break;
 
       case SHADER_OPCODE_MEMORY_FENCE:
@@ -1489,9 +1494,9 @@ generate_code(struct brw_codegen *p,
           *
           * where they pack the four bytes from the low and high four DW.
           */
-         assert(_mesa_is_pow_two(dst.dw1.bits.writemask) &&
-                dst.dw1.bits.writemask != 0);
-         unsigned offset = __builtin_ctz(dst.dw1.bits.writemask);
+         assert(_mesa_is_pow_two(dst.writemask) &&
+                dst.writemask != 0);
+         unsigned offset = __builtin_ctz(dst.writemask);
 
          dst.type = BRW_REGISTER_TYPE_UB;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 94759afd166..9b04acb8ed6 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -863,7 +863,7 @@ vec4_visitor::is_high_sampler(src_reg sampler)
    if (devinfo->gen < 8 && !devinfo->is_haswell)
       return false;
 
-   return sampler.file != IMM || sampler.fixed_hw_reg.dw1.ud >= 16;
+   return sampler.file != IMM || sampler.fixed_hw_reg.ud >= 16;
 }
 
 void

From 977df90d6538ae35a5463a6b098ba974d3f0143e Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 23 Oct 2015 12:17:03 -0700
Subject: [PATCH 257/287] i965: Reorganize brw_reg fields.

Put fields that are meaningless with an immediate in the same storage
with the immediate. This leaves fields type, file, nr, subnr in the
first dword where there's now extra room for expansion.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_reg.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index af2a49b3097..d43438315d5 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -237,18 +237,18 @@ struct brw_reg {
    unsigned subnr:5;              /* :1 in align16 */
    unsigned negate:1;             /* source only */
    unsigned abs:1;                /* source only */
-   unsigned vstride:4;            /* source only */
-   unsigned width:3;              /* src only, align1 only */
-   unsigned hstride:2;            /* align1 only */
    unsigned address_mode:1;       /* relative addressing, hopefully! */
-   unsigned pad0:1;
+   unsigned pad0:10;
 
    union {
       struct {
          unsigned swizzle:8;      /* src only, align16 only */
          unsigned writemask:4;    /* dest only, align16 only */
          int  indirect_offset:10; /* relative addressing offset */
-         unsigned pad1:10;        /* two dwords total */
+         unsigned vstride:4;      /* source only */
+         unsigned width:3;        /* src only, align1 only */
+         unsigned hstride:2;      /* align1 only */
+         unsigned pad1:1;
       };
 
       float f;
@@ -357,9 +357,6 @@ brw_reg(unsigned file,
    reg.subnr = subnr * type_sz(type);
    reg.negate = negate;
    reg.abs = abs;
-   reg.vstride = vstride;
-   reg.width = width;
-   reg.hstride = hstride;
    reg.address_mode = BRW_ADDRESS_DIRECT;
    reg.pad0 = 0;
 
@@ -372,6 +369,9 @@ brw_reg(unsigned file,
    reg.swizzle = swizzle;
    reg.writemask = writemask;
    reg.indirect_offset = 0;
+   reg.vstride = vstride;
+   reg.width = width;
+   reg.hstride = hstride;
    reg.pad1 = 0;
    return reg;
 }

From d74dd703f80ff40047ad8360e66ffd70b80f7230 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 23 Oct 2015 13:11:44 -0700
Subject: [PATCH 258/287] i965: Add and use enum brw_reg_file.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h       | 10 +++++---
 src/mesa/drivers/dri/i965/brw_eu_emit.c       |  2 +-
 .../drivers/dri/i965/brw_fs_generator.cpp     |  5 ++--
 src/mesa/drivers/dri/i965/brw_reg.h           | 25 ++++++++++---------
 4 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 62bdb1fbb67..47350301a14 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1399,10 +1399,12 @@ enum PACKED brw_predicate {
    BRW_PREDICATE_ALIGN16_ALL4H       =  7,
 };
 
-#define BRW_ARCHITECTURE_REGISTER_FILE    0
-#define BRW_GENERAL_REGISTER_FILE         1
-#define BRW_MESSAGE_REGISTER_FILE         2
-#define BRW_IMMEDIATE_VALUE               3
+enum PACKED brw_reg_file {
+   BRW_ARCHITECTURE_REGISTER_FILE = 0,
+   BRW_GENERAL_REGISTER_FILE      = 1,
+   BRW_MESSAGE_REGISTER_FILE      = 2,
+   BRW_IMMEDIATE_VALUE            = 3,
+};
 
 #define BRW_HW_REG_TYPE_UD  0
 #define BRW_HW_REG_TYPE_D   1
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 775027d9e3a..ec04d7de0e0 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -92,7 +92,7 @@ gen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
  */
 unsigned
 brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
-                        enum brw_reg_type type, unsigned file)
+                        enum brw_reg_type type, enum brw_reg_file file)
 {
    if (file == BRW_IMMEDIATE_VALUE) {
       static const int imm_hw_types[] = {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 5fd104a64e8..7de1669fbad 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -33,7 +33,8 @@
 #include "brw_fs.h"
 #include "brw_cfg.h"
 
-static uint32_t brw_file_from_reg(fs_reg *reg)
+static enum brw_reg_file
+brw_file_from_reg(fs_reg *reg)
 {
    switch (reg->file) {
    case GRF:
@@ -48,7 +49,7 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
    case UNIFORM:
       unreachable("not reached");
    }
-   return 0;
+   return BRW_ARCHITECTURE_REGISTER_FILE;
 }
 
 static struct brw_reg
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index d43438315d5..8fc2fee94ca 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -219,7 +219,7 @@ enum PACKED brw_reg_type {
 };
 
 unsigned brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
-                                 enum brw_reg_type type, unsigned file);
+                                 enum brw_reg_type type, enum brw_reg_file file);
 const char *brw_reg_type_letters(unsigned brw_reg_type);
 
 #define REG_SIZE (8*4)
@@ -232,7 +232,7 @@ const char *brw_reg_type_letters(unsigned brw_reg_type);
  */
 struct brw_reg {
    enum brw_reg_type type:4;
-   unsigned file:2;
+   enum brw_reg_file file:2;
    unsigned nr:8;
    unsigned subnr:5;              /* :1 in align16 */
    unsigned negate:1;             /* source only */
@@ -329,7 +329,7 @@ type_is_signed(unsigned type)
  * \param writemask WRITEMASK_X/Y/Z/W bitfield
  */
 static inline struct brw_reg
-brw_reg(unsigned file,
+brw_reg(enum brw_reg_file file,
         unsigned nr,
         unsigned subnr,
         unsigned negate,
@@ -378,7 +378,7 @@ brw_reg(unsigned file,
 
 /** Construct float[16] register */
 static inline struct brw_reg
-brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return brw_reg(file,
                   nr,
@@ -395,7 +395,7 @@ brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
 
 /** Construct float[8] register */
 static inline struct brw_reg
-brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return brw_reg(file,
                   nr,
@@ -412,7 +412,7 @@ brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
 
 /** Construct float[4] register */
 static inline struct brw_reg
-brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return brw_reg(file,
                   nr,
@@ -429,7 +429,7 @@ brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
 
 /** Construct float[2] register */
 static inline struct brw_reg
-brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return brw_reg(file,
                   nr,
@@ -446,7 +446,7 @@ brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
 
 /** Construct float[1] register */
 static inline struct brw_reg
-brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return brw_reg(file,
                   nr,
@@ -462,7 +462,8 @@ brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
 }
 
 static inline struct brw_reg
-brw_vecn_reg(unsigned width, unsigned file, unsigned nr, unsigned subnr)
+brw_vecn_reg(unsigned width, enum brw_reg_file file,
+             unsigned nr, unsigned subnr)
 {
    switch (width) {
    case 1:
@@ -529,21 +530,21 @@ byte_offset(struct brw_reg reg, unsigned bytes)
 
 /** Construct unsigned word[16] register */
 static inline struct brw_reg
-brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
 /** Construct unsigned word[8] register */
 static inline struct brw_reg
-brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }
 
 /** Construct unsigned word[1] register */
 static inline struct brw_reg
-brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
+brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
 {
    return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
 }

From 1392e45bfb396ccbfa5bb0c6063522e0550988d3 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sat, 24 Oct 2015 14:55:57 -0700
Subject: [PATCH 259/287] i965: Use immediate storage in inherited brw_reg.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 80 +++++++++----------
 .../dri/i965/brw_fs_combine_constants.cpp     |  6 +-
 .../dri/i965/brw_fs_copy_propagation.cpp      | 12 +--
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp      | 16 ++--
 .../drivers/dri/i965/brw_fs_generator.cpp     | 12 +--
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  4 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp      | 10 +--
 src/mesa/drivers/dri/i965/brw_vec4.cpp        | 39 ++++-----
 .../dri/i965/brw_vec4_copy_propagation.cpp    | 10 +--
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  2 +-
 10 files changed, 96 insertions(+), 95 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f40d05d3f43..931a8fdbae2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -379,7 +379,7 @@ fs_reg::fs_reg(float f)
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_F;
    this->stride = 0;
-   this->fixed_hw_reg.f = f;
+   this->f = f;
 }
 
 /** Immediate value constructor. */
@@ -389,7 +389,7 @@ fs_reg::fs_reg(int32_t i)
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_D;
    this->stride = 0;
-   this->fixed_hw_reg.d = i;
+   this->d = i;
 }
 
 /** Immediate value constructor. */
@@ -399,7 +399,7 @@ fs_reg::fs_reg(uint32_t u)
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_UD;
    this->stride = 0;
-   this->fixed_hw_reg.ud = u;
+   this->ud = u;
 }
 
 /** Vector float immediate value constructor. */
@@ -408,7 +408,7 @@ fs_reg::fs_reg(uint8_t vf[4])
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   memcpy(&this->fixed_hw_reg.ud, vf, sizeof(unsigned));
+   memcpy(&this->ud, vf, sizeof(unsigned));
 }
 
 /** Vector float immediate value constructor. */
@@ -417,7 +417,7 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   this->fixed_hw_reg.ud = (vf0 <<  0) |
+   this->ud = (vf0 <<  0) |
                                (vf1 <<  8) |
                                (vf2 << 16) |
                                (vf3 << 24);
@@ -443,9 +443,10 @@ fs_reg::equals(const fs_reg &r) const
            negate == r.negate &&
            abs == r.abs &&
            !reladdr && !r.reladdr &&
-           ((file != HW_REG && file != IMM) ||
+           (file != HW_REG ||
             memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
                    sizeof(fixed_hw_reg)) == 0) &&
+           (file != IMM || d == r.d) &&
            stride == r.stride);
 }
 
@@ -719,7 +720,7 @@ fs_inst::components_read(unsigned i) const
       assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM);
       /* First/second FB write color. */
       if (i < 2)
-         return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.ud;
+         return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
       else
          return 1;
 
@@ -739,10 +740,10 @@ fs_inst::components_read(unsigned i) const
       assert(src[8].file == IMM && src[9].file == IMM);
       /* Texture coordinates. */
       if (i == 0)
-         return src[8].fixed_hw_reg.ud;
+         return src[8].ud;
       /* Texture derivatives. */
       else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL)
-         return src[9].fixed_hw_reg.ud;
+         return src[9].ud;
       /* Texture offset. */
       else if (i == 7)
          return 2;
@@ -757,7 +758,7 @@ fs_inst::components_read(unsigned i) const
       assert(src[3].file == IMM);
       /* Surface coordinates. */
       if (i == 0)
-         return src[3].fixed_hw_reg.ud;
+         return src[3].ud;
       /* Surface operation source (ignored for reads). */
       else if (i == 1)
          return 0;
@@ -770,10 +771,10 @@ fs_inst::components_read(unsigned i) const
              src[4].file == IMM);
       /* Surface coordinates. */
       if (i == 0)
-         return src[3].fixed_hw_reg.ud;
+         return src[3].ud;
       /* Surface operation source. */
       else if (i == 1)
-         return src[4].fixed_hw_reg.ud;
+         return src[4].ud;
       else
          return 1;
 
@@ -781,10 +782,10 @@ fs_inst::components_read(unsigned i) const
    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {
       assert(src[3].file == IMM &&
              src[4].file == IMM);
-      const unsigned op = src[4].fixed_hw_reg.ud;
+      const unsigned op = src[4].ud;
       /* Surface coordinates. */
       if (i == 0)
-         return src[3].fixed_hw_reg.ud;
+         return src[3].ud;
       /* Surface operation source. */
       else if (i == 1 && op == BRW_AOP_CMPWR)
          return 2;
@@ -1666,11 +1667,11 @@ fs_visitor::assign_gs_urb_setup()
       if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
          assert(inst->src[0].file == IMM);
          inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
-                                            inst->src[0].fixed_hw_reg.ud,
+                                            inst->src[0].ud,
                                             0), BRW_REGISTER_TYPE_UD);
          /* for now, assume constant - we can do per-slot offsets later */
          assert(inst->src[1].file == IMM);
-         inst->offset = inst->src[1].fixed_hw_reg.ud;
+         inst->offset = inst->src[1].ud;
          inst->src[1] = fs_reg();
          inst->mlen = 1;
          inst->base_mrf = -1;
@@ -2071,8 +2072,7 @@ fs_visitor::opt_algebraic()
             if (inst->dst.type != inst->src[0].type)
                assert(!"unimplemented: saturate mixed types");
 
-            if (brw_saturate_immediate(inst->dst.type,
-                                       &inst->src[0].fixed_hw_reg)) {
+            if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) {
                inst->saturate = false;
                progress = true;
             }
@@ -2112,7 +2112,7 @@ fs_visitor::opt_algebraic()
          if (inst->src[0].file == IMM) {
             assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
             inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0].fixed_hw_reg.f *= inst->src[1].fixed_hw_reg.f;
+            inst->src[0].f *= inst->src[1].f;
             inst->src[1] = reg_undef;
             progress = true;
             break;
@@ -2133,7 +2133,7 @@ fs_visitor::opt_algebraic()
          if (inst->src[0].file == IMM) {
             assert(inst->src[0].type == BRW_REGISTER_TYPE_F);
             inst->opcode = BRW_OPCODE_MOV;
-            inst->src[0].fixed_hw_reg.f += inst->src[1].fixed_hw_reg.f;
+            inst->src[0].f += inst->src[1].f;
             inst->src[1] = reg_undef;
             progress = true;
             break;
@@ -2182,7 +2182,7 @@ fs_visitor::opt_algebraic()
             case BRW_CONDITIONAL_L:
                switch (inst->src[1].type) {
                case BRW_REGISTER_TYPE_F:
-                  if (inst->src[1].fixed_hw_reg.f >= 1.0f) {
+                  if (inst->src[1].f >= 1.0f) {
                      inst->opcode = BRW_OPCODE_MOV;
                      inst->src[1] = reg_undef;
                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2197,7 +2197,7 @@ fs_visitor::opt_algebraic()
             case BRW_CONDITIONAL_G:
                switch (inst->src[1].type) {
                case BRW_REGISTER_TYPE_F:
-                  if (inst->src[1].fixed_hw_reg.f <= 0.0f) {
+                  if (inst->src[1].f <= 0.0f) {
                      inst->opcode = BRW_OPCODE_MOV;
                      inst->src[1] = reg_undef;
                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
@@ -2234,7 +2234,7 @@ fs_visitor::opt_algebraic()
             progress = true;
          } else if (inst->src[1].file == IMM && inst->src[2].file == IMM) {
             inst->opcode = BRW_OPCODE_ADD;
-            inst->src[1].fixed_hw_reg.f *= inst->src[2].fixed_hw_reg.f;
+            inst->src[1].f *= inst->src[2].f;
             inst->src[2] = reg_undef;
             progress = true;
          }
@@ -2259,7 +2259,7 @@ fs_visitor::opt_algebraic()
          } else if (inst->src[1].file == IMM) {
             inst->opcode = BRW_OPCODE_MOV;
             inst->src[0] = component(inst->src[0],
-                                     inst->src[1].fixed_hw_reg.ud);
+                                     inst->src[1].ud);
             inst->sources = 1;
             inst->force_writemask_all = true;
             progress = true;
@@ -3081,7 +3081,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
          fs_reg const_offset_reg = inst->src[1];
          assert(const_offset_reg.file == IMM &&
                 const_offset_reg.type == BRW_REGISTER_TYPE_UD);
-         const_offset_reg.fixed_hw_reg.ud /= 4;
+         const_offset_reg.ud /= 4;
 
          fs_reg payload, offset;
          if (devinfo->gen >= 9) {
@@ -3250,7 +3250,7 @@ fs_visitor::lower_integer_multiplication()
             continue;
 
          if (inst->src[1].file == IMM &&
-             inst->src[1].fixed_hw_reg.ud < (1 << 16)) {
+             inst->src[1].ud < (1 << 16)) {
             /* The MUL instruction isn't commutative. On Gen <= 6, only the low
              * 16-bits of src0 are read, and on Gen >= 7 only the low 16-bits of
              * src1 are used.
@@ -3326,8 +3326,8 @@ fs_visitor::lower_integer_multiplication()
                fs_reg src1_1_w = inst->src[1];
 
                if (inst->src[1].file == IMM) {
-                  src1_0_w.fixed_hw_reg.ud &= 0xffff;
-                  src1_1_w.fixed_hw_reg.ud >>= 16;
+                  src1_0_w.ud &= 0xffff;
+                  src1_1_w.ud >>= 16;
                } else {
                   src1_0_w.type = BRW_REGISTER_TYPE_UW;
                   if (src1_0_w.stride != 0) {
@@ -3482,7 +3482,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL];
    fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK];
    const unsigned components =
-      inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.ud;
+      inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].ud;
 
    /* We can potentially have a message length of up to 15, so we have to set
     * base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3822,7 +3822,7 @@ is_high_sampler(const struct brw_device_info *devinfo, const fs_reg &sampler)
    if (devinfo->gen < 8 && !devinfo->is_haswell)
       return false;
 
-   return sampler.file != IMM || sampler.fixed_hw_reg.ud >= 16;
+   return sampler.file != IMM || sampler.ud >= 16;
 }
 
 static void
@@ -4057,8 +4057,8 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
    const fs_reg &sampler = inst->src[6];
    const fs_reg &offset_value = inst->src[7];
    assert(inst->src[8].file == IMM && inst->src[9].file == IMM);
-   const unsigned coord_components = inst->src[8].fixed_hw_reg.ud;
-   const unsigned grad_components = inst->src[9].fixed_hw_reg.ud;
+   const unsigned coord_components = inst->src[8].ud;
+   const unsigned grad_components = inst->src[9].ud;
 
    if (devinfo->gen >= 7) {
       lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
@@ -4384,7 +4384,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
        * circumstances it can end up with a message that is too long in SIMD16
        * mode.
        */
-      const unsigned coord_components = inst->src[8].fixed_hw_reg.ud;
+      const unsigned coord_components = inst->src[8].ud;
       /* First three arguments are the sample index and the two arguments for
        * the MCS data.
        */
@@ -4692,22 +4692,22 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       case IMM:
          switch (inst->src[i].type) {
          case BRW_REGISTER_TYPE_F:
-            fprintf(file, "%ff", inst->src[i].fixed_hw_reg.f);
+            fprintf(file, "%ff", inst->src[i].f);
             break;
          case BRW_REGISTER_TYPE_W:
          case BRW_REGISTER_TYPE_D:
-            fprintf(file, "%dd", inst->src[i].fixed_hw_reg.d);
+            fprintf(file, "%dd", inst->src[i].d);
             break;
          case BRW_REGISTER_TYPE_UW:
          case BRW_REGISTER_TYPE_UD:
-            fprintf(file, "%uu", inst->src[i].fixed_hw_reg.ud);
+            fprintf(file, "%uu", inst->src[i].ud);
             break;
          case BRW_REGISTER_TYPE_VF:
             fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  0) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  8) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 16) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 24) & 0xff));
+                    brw_vf_to_float((inst->src[i].ud >>  0) & 0xff),
+                    brw_vf_to_float((inst->src[i].ud >>  8) & 0xff),
+                    brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
+                    brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
             break;
          default:
             fprintf(file, "???");
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 504c4b6171c..c9564597b2c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -219,7 +219,7 @@ fs_visitor::opt_combine_constants()
              inst->src[i].type != BRW_REGISTER_TYPE_F)
             continue;
 
-         float val = fabsf(inst->src[i].fixed_hw_reg.f);
+         float val = fabsf(inst->src[i].f);
          struct imm *imm = find_imm(&table, val);
 
          if (imm) {
@@ -299,9 +299,9 @@ fs_visitor::opt_combine_constants()
          reg->reg = table.imm[i].reg;
          reg->subreg_offset = table.imm[i].subreg_offset;
          reg->stride = 0;
-         reg->negate = signbit(reg->fixed_hw_reg.f) !=
+         reg->negate = signbit(reg->f) !=
                                signbit(table.imm[i].val);
-         assert(fabsf(reg->fixed_hw_reg.f) == table.imm[i].val);
+         assert(fabsf(reg->f) == table.imm[i].val);
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 17989e3bfd0..2c966d173c6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -369,8 +369,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
       switch(inst->opcode) {
       case BRW_OPCODE_SEL:
          if (inst->src[1].file != IMM ||
-             inst->src[1].fixed_hw_reg.f < 0.0 ||
-             inst->src[1].fixed_hw_reg.f > 1.0) {
+             inst->src[1].f < 0.0 ||
+             inst->src[1].f > 1.0) {
             return false;
          }
          break;
@@ -477,14 +477,14 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
 
       if (inst->src[i].abs) {
          if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-             !brw_abs_immediate(val.type, &val.fixed_hw_reg)) {
+             !brw_abs_immediate(val.type, &val)) {
             continue;
          }
       }
 
       if (inst->src[i].negate) {
          if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-             !brw_negate_immediate(val.type, &val.fixed_hw_reg)) {
+             !brw_negate_immediate(val.type, &val)) {
             continue;
          }
       }
@@ -605,10 +605,10 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
           * anyway.
           */
          assert(i == 0);
-         if (inst->src[0].fixed_hw_reg.f != 0.0f) {
+         if (inst->src[0].f != 0.0f) {
             inst->opcode = BRW_OPCODE_MOV;
             inst->src[0] = val;
-            inst->src[0].fixed_hw_reg.f = 1.0f / inst->src[0].fixed_hw_reg.f;
+            inst->src[0].f = 1.0f / inst->src[0].f;
             progress = true;
          }
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 05d64deab79..fbf00d1bfd0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -110,20 +110,20 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
               (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
    } else if (a->opcode == BRW_OPCODE_MUL && a->dst.type == BRW_REGISTER_TYPE_F) {
       bool xs0_negate = xs[0].negate;
-      bool xs1_negate = xs[1].file == IMM ? xs[1].fixed_hw_reg.f < 0.0f
+      bool xs1_negate = xs[1].file == IMM ? xs[1].f < 0.0f
                                           : xs[1].negate;
       bool ys0_negate = ys[0].negate;
-      bool ys1_negate = ys[1].file == IMM ? ys[1].fixed_hw_reg.f < 0.0f
+      bool ys1_negate = ys[1].file == IMM ? ys[1].f < 0.0f
                                           : ys[1].negate;
-      float xs1_imm = xs[1].fixed_hw_reg.f;
-      float ys1_imm = ys[1].fixed_hw_reg.f;
+      float xs1_imm = xs[1].f;
+      float ys1_imm = ys[1].f;
 
       xs[0].negate = false;
       xs[1].negate = false;
       ys[0].negate = false;
       ys[1].negate = false;
-      xs[1].fixed_hw_reg.f = fabsf(xs[1].fixed_hw_reg.f);
-      ys[1].fixed_hw_reg.f = fabsf(ys[1].fixed_hw_reg.f);
+      xs[1].f = fabsf(xs[1].f);
+      ys[1].f = fabsf(ys[1].f);
 
       bool ret = (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
                  (xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
@@ -132,8 +132,8 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate)
       xs[1].negate = xs[1].file == IMM ? false : xs1_negate;
       ys[0].negate = ys0_negate;
       ys[1].negate = ys[1].file == IMM ? false : ys1_negate;
-      xs[1].fixed_hw_reg.f = xs1_imm;
-      ys[1].fixed_hw_reg.f = ys1_imm;
+      xs[1].f = xs1_imm;
+      ys[1].f = ys1_imm;
 
       *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate);
       return ret;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 7de1669fbad..9dd574cd14f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -92,22 +92,22 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
 
       switch (reg->type) {
       case BRW_REGISTER_TYPE_F:
-	 brw_reg = brw_imm_f(reg->fixed_hw_reg.f);
+	 brw_reg = brw_imm_f(reg->f);
 	 break;
       case BRW_REGISTER_TYPE_D:
-	 brw_reg = brw_imm_d(reg->fixed_hw_reg.d);
+	 brw_reg = brw_imm_d(reg->d);
 	 break;
       case BRW_REGISTER_TYPE_UD:
-	 brw_reg = brw_imm_ud(reg->fixed_hw_reg.ud);
+	 brw_reg = brw_imm_ud(reg->ud);
 	 break;
       case BRW_REGISTER_TYPE_W:
-	 brw_reg = brw_imm_w(reg->fixed_hw_reg.d);
+	 brw_reg = brw_imm_w(reg->d);
 	 break;
       case BRW_REGISTER_TYPE_UW:
-	 brw_reg = brw_imm_uw(reg->fixed_hw_reg.ud);
+	 brw_reg = brw_imm_uw(reg->ud);
 	 break;
       case BRW_REGISTER_TYPE_VF:
-         brw_reg = brw_imm_vf(reg->fixed_hw_reg.ud);
+         brw_reg = brw_imm_vf(reg->ud);
          break;
       default:
 	 unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d6be2d598be..66a0c90c40d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -322,7 +322,7 @@ fs_visitor::emit_texture(ir_texture_opcode op,
       inst->shadow_compare = true;
 
    if (offset_value.file == IMM)
-      inst->offset = offset_value.fixed_hw_reg.ud;
+      inst->offset = offset_value.ud;
 
    if (op == ir_tg4) {
       inst->offset |=
@@ -949,7 +949,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       fs_reg offset;
       if (gs_vertex_count.file == IMM) {
          per_slot_offsets = fs_reg(output_vertex_size_owords *
-                                   gs_vertex_count.fixed_hw_reg.ud);
+                                   gs_vertex_count.ud);
       } else {
          per_slot_offsets = vgrf(glsl_type::int_type);
          bld.MUL(per_slot_offsets, gs_vertex_count,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 8391a2cb9a0..ce0019ff6f9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -700,7 +700,7 @@ backend_reg::is_zero() const
    if (file != IMM)
       return false;
 
-   return fixed_hw_reg.d == 0;
+   return d == 0;
 }
 
 bool
@@ -710,8 +710,8 @@ backend_reg::is_one() const
       return false;
 
    return type == BRW_REGISTER_TYPE_F
-          ? fixed_hw_reg.f == 1.0
-          : fixed_hw_reg.d == 1;
+          ? f == 1.0
+          : d == 1;
 }
 
 bool
@@ -722,9 +722,9 @@ backend_reg::is_negative_one() const
 
    switch (type) {
    case BRW_REGISTER_TYPE_F:
-      return fixed_hw_reg.f == -1.0;
+      return f == -1.0;
    case BRW_REGISTER_TYPE_D:
-      return fixed_hw_reg.d == -1;
+      return d == -1;
    default:
       return false;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index fb5e2016006..1cb43c3c1f2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -77,7 +77,7 @@ src_reg::src_reg(float f)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_F;
-   this->fixed_hw_reg.f = f;
+   this->f = f;
 }
 
 src_reg::src_reg(uint32_t u)
@@ -86,7 +86,7 @@ src_reg::src_reg(uint32_t u)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_UD;
-   this->fixed_hw_reg.ud = u;
+   this->ud = u;
 }
 
 src_reg::src_reg(int32_t i)
@@ -95,7 +95,7 @@ src_reg::src_reg(int32_t i)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_D;
-   this->fixed_hw_reg.d = i;
+   this->d = i;
 }
 
 src_reg::src_reg(uint8_t vf[4])
@@ -104,7 +104,7 @@ src_reg::src_reg(uint8_t vf[4])
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   memcpy(&this->fixed_hw_reg.ud, vf, sizeof(unsigned));
+   memcpy(&this->ud, vf, sizeof(unsigned));
 }
 
 src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
@@ -113,7 +113,7 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   this->fixed_hw_reg.ud = (vf0 <<  0) |
+   this->ud = (vf0 <<  0) |
                                (vf1 <<  8) |
                                (vf2 << 16) |
                                (vf3 << 24);
@@ -218,7 +218,7 @@ dst_reg::equals(const dst_reg &r) const
            writemask == r.writemask &&
            (reladdr == r.reladdr ||
             (reladdr && r.reladdr && reladdr->equals(*r.reladdr))) &&
-           ((file != HW_REG && file != IMM) ||
+           (file != HW_REG ||
             memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
                    sizeof(fixed_hw_reg)) == 0));
 }
@@ -363,8 +363,10 @@ src_reg::equals(const src_reg &r) const
 	   abs == r.abs &&
 	   swizzle == r.swizzle &&
 	   !reladdr && !r.reladdr &&
-	   memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
-		  sizeof(fixed_hw_reg)) == 0);
+           (file != HW_REG ||
+            memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
+                   sizeof(fixed_hw_reg)) == 0) &&
+           (file != IMM || d == r.d));
 }
 
 bool
@@ -397,7 +399,7 @@ vec4_visitor::opt_vector_float()
           inst->src[0].file != IMM)
          continue;
 
-      int vf = brw_float_to_vf(inst->src[0].fixed_hw_reg.f);
+      int vf = brw_float_to_vf(inst->src[0].f);
       if (vf == -1)
          continue;
 
@@ -660,8 +662,7 @@ vec4_visitor::opt_algebraic()
             if (inst->dst.type != inst->src[0].type)
                assert(!"unimplemented: saturate mixed types");
 
-            if (brw_saturate_immediate(inst->dst.type,
-                                       &inst->src[0].fixed_hw_reg)) {
+            if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) {
                inst->saturate = false;
                progress = true;
             }
@@ -1467,20 +1468,20 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       case IMM:
          switch (inst->src[i].type) {
          case BRW_REGISTER_TYPE_F:
-            fprintf(file, "%fF", inst->src[i].fixed_hw_reg.f);
+            fprintf(file, "%fF", inst->src[i].f);
             break;
          case BRW_REGISTER_TYPE_D:
-            fprintf(file, "%dD", inst->src[i].fixed_hw_reg.d);
+            fprintf(file, "%dD", inst->src[i].d);
             break;
          case BRW_REGISTER_TYPE_UD:
-            fprintf(file, "%uU", inst->src[i].fixed_hw_reg.ud);
+            fprintf(file, "%uU", inst->src[i].ud);
             break;
          case BRW_REGISTER_TYPE_VF:
             fprintf(file, "[%-gF, %-gF, %-gF, %-gF]",
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  0) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >>  8) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 16) & 0xff),
-                    brw_vf_to_float((inst->src[i].fixed_hw_reg.ud >> 24) & 0xff));
+                    brw_vf_to_float((inst->src[i].ud >>  0) & 0xff),
+                    brw_vf_to_float((inst->src[i].ud >>  8) & 0xff),
+                    brw_vf_to_float((inst->src[i].ud >> 16) & 0xff),
+                    brw_vf_to_float((inst->src[i].ud >> 24) & 0xff));
             break;
          default:
             fprintf(file, "???");
@@ -1817,7 +1818,7 @@ vec4_visitor::convert_to_hw_regs()
 
          case IMM:
             reg = brw_imm_reg(src.type);
-            reg.ud = src.fixed_hw_reg.ud;
+            reg.ud = src.ud;
             break;
 
          case UNIFORM:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index f37f6084af6..523866efe19 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -134,20 +134,20 @@ try_constant_propagate(const struct brw_device_info *devinfo,
 
    if (inst->src[arg].abs) {
       if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-          !brw_abs_immediate(value.type, &value.fixed_hw_reg)) {
+          !brw_abs_immediate(value.type, &value)) {
          return false;
       }
    }
 
    if (inst->src[arg].negate) {
       if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
-          !brw_negate_immediate(value.type, &value.fixed_hw_reg)) {
+          !brw_negate_immediate(value.type, &value)) {
          return false;
       }
    }
 
    if (value.type == BRW_REGISTER_TYPE_VF)
-      value.fixed_hw_reg.ud = swizzle_vf_imm(value.fixed_hw_reg.ud,
+      value.ud = swizzle_vf_imm(value.ud,
                                                  inst->src[arg].swizzle);
 
    switch (inst->opcode) {
@@ -359,8 +359,8 @@ try_copy_propagate(const struct brw_device_info *devinfo,
              inst->src[0].type != BRW_REGISTER_TYPE_F ||
              inst->src[1].file != IMM ||
              inst->src[1].type != BRW_REGISTER_TYPE_F ||
-             inst->src[1].fixed_hw_reg.f < 0.0 ||
-             inst->src[1].fixed_hw_reg.f > 1.0) {
+             inst->src[1].f < 0.0 ||
+             inst->src[1].f > 1.0) {
             return false;
          }
          if (!inst->saturate)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 9b04acb8ed6..ecabedfe614 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -863,7 +863,7 @@ vec4_visitor::is_high_sampler(src_reg sampler)
    if (devinfo->gen < 8 && !devinfo->is_haswell)
       return false;
 
-   return sampler.file != IMM || sampler.fixed_hw_reg.ud >= 16;
+   return sampler.file != IMM || sampler.ud >= 16;
 }
 
 void

From 94b1031703b1b5759436fe215323727cffce5f86 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sat, 24 Oct 2015 15:29:03 -0700
Subject: [PATCH 260/287] i965: Remove fixed_hw_reg field from backend_reg.

Since backend_reg now inherits brw_reg, we can use it in place of the
fixed_hw_reg field.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          |  93 +++++++--------
 .../drivers/dri/i965/brw_fs_generator.cpp     |   9 +-
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  |   4 +-
 src/mesa/drivers/dri/i965/brw_ir_fs.h         |   4 +-
 src/mesa/drivers/dri/i965/brw_ir_vec4.h       |   4 +-
 .../dri/i965/brw_schedule_instructions.cpp    |  50 ++++----
 src/mesa/drivers/dri/i965/brw_shader.cpp      |   8 +-
 src/mesa/drivers/dri/i965/brw_shader.h        |   5 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp        | 110 ++++++++----------
 .../drivers/dri/i965/brw_vec4_generator.cpp   |  12 +-
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |   2 -
 11 files changed, 139 insertions(+), 162 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 931a8fdbae2..c2d04d970e5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -423,13 +423,15 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
                                (vf3 << 24);
 }
 
-/** Fixed brw_reg. */
-fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
+fs_reg::fs_reg(struct brw_reg reg) :
+   backend_reg(reg)
 {
-   init();
    this->file = HW_REG;
-   this->fixed_hw_reg = fixed_hw_reg;
-   this->type = fixed_hw_reg.type;
+   this->reg = 0;
+   this->reg_offset = 0;
+   this->subreg_offset = 0;
+   this->reladdr = NULL;
+   this->stride = 1;
 }
 
 bool
@@ -444,8 +446,7 @@ fs_reg::equals(const fs_reg &r) const
            abs == r.abs &&
            !reladdr && !r.reladdr &&
            (file != HW_REG ||
-            memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
-                   sizeof(fixed_hw_reg)) == 0) &&
+            memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0) &&
            (file != IMM || d == r.d) &&
            stride == r.stride);
 }
@@ -469,8 +470,8 @@ unsigned
 fs_reg::component_size(unsigned width) const
 {
    const unsigned stride = (file != HW_REG ? this->stride :
-                            fixed_hw_reg.hstride == 0 ? 0 :
-                            1 << (fixed_hw_reg.hstride - 1));
+                            hstride == 0 ? 0 :
+                            1 << (hstride - 1));
    return MAX2(width * stride, 1) * type_sz(type);
 }
 
@@ -961,7 +962,6 @@ fs_visitor::vgrf(const glsl_type *const type)
                  brw_type_for_base_type(type));
 }
 
-/** Fixed HW reg constructor. */
 fs_reg::fs_reg(enum register_file file, int reg)
 {
    init();
@@ -971,7 +971,6 @@ fs_reg::fs_reg(enum register_file file, int reg)
    this->stride = (file == UNIFORM ? 0 : 1);
 }
 
-/** Fixed HW reg constructor. */
 fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type)
 {
    init();
@@ -1476,10 +1475,11 @@ fs_visitor::assign_curb_setup()
 	    struct brw_reg brw_reg = brw_vec1_grf(payload.num_regs +
 						  constant_nr / 8,
 						  constant_nr % 8);
+            brw_reg.abs = inst->src[i].abs;
+            brw_reg.negate = inst->src[i].negate;
 
             assert(inst->src[i].stride == 0);
-	    inst->src[i].file = HW_REG;
-	    inst->src[i].fixed_hw_reg = byte_offset(
+            inst->src[i] = byte_offset(
                retype(brw_reg, inst->src[i].type),
                inst->src[i].subreg_offset);
 	 }
@@ -1595,12 +1595,12 @@ fs_visitor::assign_urb_setup()
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->opcode == FS_OPCODE_LINTERP) {
 	 assert(inst->src[1].file == HW_REG);
-	 inst->src[1].fixed_hw_reg.nr += urb_start;
+         inst->src[1].nr += urb_start;
       }
 
       if (inst->opcode == FS_OPCODE_CINTERP) {
 	 assert(inst->src[0].file == HW_REG);
-	 inst->src[0].fixed_hw_reg.nr += urb_start;
+         inst->src[0].nr += urb_start;
       }
    }
 
@@ -1618,12 +1618,15 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
                    inst->src[i].reg +
                    inst->src[i].reg_offset;
 
-         inst->src[i].file = HW_REG;
-         inst->src[i].fixed_hw_reg =
+         struct brw_reg reg =
             stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
                                inst->src[i].subreg_offset),
                    inst->exec_size * inst->src[i].stride,
                    inst->exec_size, inst->src[i].stride);
+         reg.abs = inst->src[i].abs;
+         reg.negate = inst->src[i].negate;
+
+         inst->src[i] = reg;
       }
    }
 }
@@ -2793,7 +2796,7 @@ fs_visitor::emit_repclear_shader()
 
    /* Now that we have the uniform assigned, go ahead and force it to a vec4. */
    assert(mov->src[0].file == HW_REG);
-   mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+   mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
 }
 
 /**
@@ -2874,8 +2877,8 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
       if (inst->src[i].file == GRF) {
          grf = inst->src[i].reg;
       } else if (inst->src[i].file == HW_REG &&
-                 inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
-         grf = inst->src[i].fixed_hw_reg.nr;
+                 inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+         grf = inst->src[i].nr;
       } else {
          continue;
       }
@@ -4627,31 +4630,31 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       fprintf(file, "***attr%d***", inst->dst.reg + inst->dst.reg_offset);
       break;
    case HW_REG:
-      if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
-         switch (inst->dst.fixed_hw_reg.nr) {
+      if (inst->dst.brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
+         switch (inst->dst.nr) {
          case BRW_ARF_NULL:
             fprintf(file, "null");
             break;
          case BRW_ARF_ADDRESS:
-            fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "a0.%d", inst->dst.subnr);
             break;
          case BRW_ARF_ACCUMULATOR:
-            fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "acc%d", inst->dst.subnr);
             break;
          case BRW_ARF_FLAG:
-            fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
-                             inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "f%d.%d", inst->dst.nr & 0xf,
+                             inst->dst.subnr);
             break;
          default:
-            fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
-                               inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "arf%d.%d", inst->dst.nr & 0xf,
+                               inst->dst.subnr);
             break;
          }
       } else {
-         fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr);
+         fprintf(file, "hw_reg%d", inst->dst.nr);
       }
-      if (inst->dst.fixed_hw_reg.subnr)
-         fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr);
+      if (inst->dst.subnr)
+         fprintf(file, "+%d", inst->dst.subnr);
       break;
    case IMM:
       unreachable("not reached");
@@ -4715,37 +4718,31 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
          }
          break;
       case HW_REG:
-         if (inst->src[i].fixed_hw_reg.negate)
-            fprintf(file, "-");
-         if (inst->src[i].fixed_hw_reg.abs)
-            fprintf(file, "|");
-         if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
-            switch (inst->src[i].fixed_hw_reg.nr) {
+         if (inst->src[i].brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
+            switch (inst->src[i].nr) {
             case BRW_ARF_NULL:
                fprintf(file, "null");
                break;
             case BRW_ARF_ADDRESS:
-               fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "a0.%d", inst->src[i].subnr);
                break;
             case BRW_ARF_ACCUMULATOR:
-               fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "acc%d", inst->src[i].subnr);
                break;
             case BRW_ARF_FLAG:
-               fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
-                                inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "f%d.%d", inst->src[i].nr & 0xf,
+                                inst->src[i].subnr);
                break;
             default:
-               fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
-                                  inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf,
+                                  inst->src[i].subnr);
                break;
             }
          } else {
-            fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr);
+            fprintf(file, "hw_reg%d", inst->src[i].nr);
          }
-         if (inst->src[i].fixed_hw_reg.subnr)
-            fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr);
-         if (inst->src[i].fixed_hw_reg.abs)
-            fprintf(file, "|");
+         if (inst->src[i].subnr)
+            fprintf(file, "+%d", inst->src[i].subnr);
          break;
       }
       if (inst->src[i].abs)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 9dd574cd14f..6b6b5771298 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -84,6 +84,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
 
       brw_reg = retype(brw_reg, reg->type);
       brw_reg = byte_offset(brw_reg, reg->subreg_offset);
+      brw_reg.abs = reg->abs;
+      brw_reg.negate = reg->negate;
       break;
    case IMM:
       assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V ||
@@ -114,8 +116,7 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
       }
       break;
    case HW_REG:
-      assert(reg->type == reg->fixed_hw_reg.type);
-      brw_reg = reg->fixed_hw_reg;
+      brw_reg = *static_cast<struct brw_reg *>(reg);
       break;
    case BAD_FILE:
       /* Probably unused. */
@@ -125,10 +126,6 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
    case UNIFORM:
       unreachable("not reached");
    }
-   if (reg->abs)
-      brw_reg = brw_abs(brw_reg);
-   if (reg->negate)
-      brw_reg = negate(brw_reg);
 
    return brw_reg;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 9251d9552a5..3e0e0e9586b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -372,8 +372,8 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
        */
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == HW_REG &&
-             inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
-            int node_nr = inst->src[i].fixed_hw_reg.nr;
+             inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+            int node_nr = inst->src[i].nr;
             if (node_nr >= payload_node_count)
                continue;
 
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index c0e486e5edc..1f2931af179 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -41,7 +41,7 @@ public:
    explicit fs_reg(uint32_t u);
    explicit fs_reg(uint8_t vf[4]);
    explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
-   fs_reg(struct brw_reg fixed_hw_reg);
+   fs_reg(struct brw_reg reg);
    fs_reg(enum register_file file, int reg);
    fs_reg(enum register_file file, int reg, enum brw_reg_type type);
 
@@ -80,7 +80,7 @@ negate(fs_reg reg)
 static inline fs_reg
 retype(fs_reg reg, enum brw_reg_type type)
 {
-   reg.fixed_hw_reg.type = reg.type = type;
+   reg.type = type;
    return reg;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 2fbb043f244..0b2a9258f25 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -63,7 +63,7 @@ public:
 static inline src_reg
 retype(src_reg reg, enum brw_reg_type type)
 {
-   reg.fixed_hw_reg.type = reg.type = type;
+   reg.type = type;
    return reg;
 }
 
@@ -130,7 +130,7 @@ public:
 static inline dst_reg
 retype(dst_reg reg, enum brw_reg_type type)
 {
-   reg.fixed_hw_reg.type = reg.type = type;
+   reg.type = type;
    return reg;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index d21bc677c82..521d04ec17e 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -586,12 +586,12 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
       if (inst->src[i].file == GRF) {
          reads_remaining[inst->src[i].reg]++;
       } else if (inst->src[i].file == HW_REG &&
-               inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
-         if (inst->src[i].fixed_hw_reg.nr >= hw_reg_count)
+                 inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+         if (inst->src[i].nr >= hw_reg_count)
             continue;
 
          for (int j = 0; j < inst->regs_read(i); j++)
-            hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + j]++;
+            hw_reads_remaining[inst->src[i].nr + j]++;
       }
    }
 }
@@ -671,10 +671,10 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
       if (inst->src[i].file == GRF) {
          reads_remaining[inst->src[i].reg]--;
       } else if (inst->src[i].file == HW_REG &&
-                 inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
-                 inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+                 inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE &&
+                 inst->src[i].nr < hw_reg_count) {
          for (int off = 0; off < inst->regs_read(i); off++)
-            hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + off]--;
+            hw_reads_remaining[inst->src[i].nr + off]--;
       }
    }
 }
@@ -701,10 +701,10 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
          benefit += v->alloc.sizes[inst->src[i].reg];
 
       if (inst->src[i].file == HW_REG &&
-          inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE &&
-          inst->src[i].fixed_hw_reg.nr < hw_reg_count) {
+          inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE &&
+          inst->src[i].nr < hw_reg_count) {
          for (int off = 0; off < inst->regs_read(i); off++) {
-            int reg = inst->src[i].fixed_hw_reg.nr + off;
+            int reg = inst->src[i].nr + off;
             if (!BITSET_TEST(hw_liveout[block_idx], reg) &&
                 hw_reads_remaining[reg] == 1) {
                benefit++;
@@ -960,11 +960,11 @@ fs_instruction_scheduler::calculate_deps()
                }
             }
          } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].fixed_hw_reg.file ==
+                    (inst->src[i].brw_reg::file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
-                  add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
+                  add_dep(last_grf_write[inst->src[i].nr + r], n);
             } else {
                add_dep(last_fixed_grf_write, n);
             }
@@ -974,7 +974,7 @@ fs_instruction_scheduler::calculate_deps()
                     inst->src[i].file != IMM &&
                     inst->src[i].file != UNIFORM &&
                     (inst->src[i].file != HW_REG ||
-                     inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
             assert(inst->src[i].file != MRF);
             add_barrier_deps(n);
          }
@@ -1025,10 +1025,10 @@ fs_instruction_scheduler::calculate_deps()
             last_mrf_write[reg] = n;
          }
       } else if (inst->dst.file == HW_REG &&
-                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++)
-               last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+               last_grf_write[inst->dst.nr + r] = n;
          } else {
             last_fixed_grf_write = n;
          }
@@ -1086,11 +1086,11 @@ fs_instruction_scheduler::calculate_deps()
                }
             }
          } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].fixed_hw_reg.file ==
+                    (inst->src[i].brw_reg::file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
-                  add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0);
+                  add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
             } else {
                add_dep(n, last_fixed_grf_write, 0);
             }
@@ -1100,7 +1100,7 @@ fs_instruction_scheduler::calculate_deps()
                     inst->src[i].file != IMM &&
                     inst->src[i].file != UNIFORM &&
                     (inst->src[i].file != HW_REG ||
-                     inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
             assert(inst->src[i].file != MRF);
             add_barrier_deps(n);
          }
@@ -1150,10 +1150,10 @@ fs_instruction_scheduler::calculate_deps()
             last_mrf_write[reg] = n;
          }
       } else if (inst->dst.file == HW_REG &&
-                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++)
-               last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+               last_grf_write[inst->dst.nr + r] = n;
          } else {
             last_fixed_grf_write = n;
          }
@@ -1219,7 +1219,7 @@ vec4_instruction_scheduler::calculate_deps()
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
                add_dep(last_grf_write[inst->src[i].reg + j], n);
          } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].fixed_hw_reg.file ==
+                    (inst->src[i].brw_reg::file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             add_dep(last_fixed_grf_write, n);
          } else if (inst->src[i].is_accumulator()) {
@@ -1229,7 +1229,7 @@ vec4_instruction_scheduler::calculate_deps()
                     inst->src[i].file != IMM &&
                     inst->src[i].file != UNIFORM &&
                     (inst->src[i].file != HW_REG ||
-                     inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
             /* No reads from MRF, and ATTR is already translated away */
             assert(inst->src[i].file != MRF &&
                    inst->src[i].file != ATTR);
@@ -1267,7 +1267,7 @@ vec4_instruction_scheduler::calculate_deps()
          add_dep(last_mrf_write[inst->dst.reg], n);
          last_mrf_write[inst->dst.reg] = n;
      } else if (inst->dst.file == HW_REG &&
-                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          last_fixed_grf_write = n;
       } else if (inst->dst.is_accumulator()) {
          add_dep(last_accumulator_write, n);
@@ -1317,7 +1317,7 @@ vec4_instruction_scheduler::calculate_deps()
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
                add_dep(n, last_grf_write[inst->src[i].reg + j]);
          } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].fixed_hw_reg.file ==
+                    (inst->src[i].brw_reg::file ==
                      BRW_GENERAL_REGISTER_FILE)) {
             add_dep(n, last_fixed_grf_write);
          } else if (inst->src[i].is_accumulator()) {
@@ -1326,7 +1326,7 @@ vec4_instruction_scheduler::calculate_deps()
                     inst->src[i].file != IMM &&
                     inst->src[i].file != UNIFORM &&
                     (inst->src[i].file != HW_REG ||
-                     inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) {
+                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
             assert(inst->src[i].file != MRF &&
                    inst->src[i].file != ATTR);
             add_barrier_deps(n);
@@ -1360,7 +1360,7 @@ vec4_instruction_scheduler::calculate_deps()
       } else if (inst->dst.file == MRF) {
          last_mrf_write[inst->dst.reg] = n;
       } else if (inst->dst.file == HW_REG &&
-                 inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          last_fixed_grf_write = n;
       } else if (inst->dst.is_accumulator()) {
          last_accumulator_write = n;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index ce0019ff6f9..ca7db9a73ac 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -734,8 +734,8 @@ bool
 backend_reg::is_null() const
 {
    return file == HW_REG &&
-          fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
-          fixed_hw_reg.nr == BRW_ARF_NULL;
+          brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE &&
+          nr == BRW_ARF_NULL;
 }
 
 
@@ -743,8 +743,8 @@ bool
 backend_reg::is_accumulator() const
 {
    return file == HW_REG &&
-          fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
-          fixed_hw_reg.nr == BRW_ARF_ACCUMULATOR;
+          brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE &&
+          nr == BRW_ARF_ACCUMULATOR;
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 3f435e2b728..086ab607c52 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -51,6 +51,9 @@ enum PACKED register_file {
 #ifdef __cplusplus
 struct backend_reg : public brw_reg
 {
+   backend_reg() {}
+   backend_reg(struct brw_reg reg) : brw_reg(reg) {}
+
    bool is_zero() const;
    bool is_one() const;
    bool is_negative_one() const;
@@ -79,8 +82,6 @@ struct backend_reg : public brw_reg
     * For uniforms, this is in units of 1 float.
     */
    uint16_t reg_offset;
-
-   struct brw_reg fixed_hw_reg;
 };
 #endif
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 1cb43c3c1f2..9155c2e811d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -119,25 +119,23 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
                                (vf3 << 24);
 }
 
-src_reg::src_reg(struct brw_reg reg)
+src_reg::src_reg(struct brw_reg reg) :
+   backend_reg(reg)
 {
-   init();
-
    this->file = HW_REG;
-   this->fixed_hw_reg = reg;
-   this->type = reg.type;
+   this->reg = 0;
+   this->reg_offset = 0;
+   this->swizzle = BRW_SWIZZLE_XXXX;
+   this->reladdr = NULL;
 }
 
-src_reg::src_reg(const dst_reg &reg)
+src_reg::src_reg(const dst_reg &reg) :
+   backend_reg(static_cast<struct brw_reg>(reg))
 {
-   init();
-
    this->file = reg.file;
    this->reg = reg.reg;
    this->reg_offset = reg.reg_offset;
-   this->type = reg.type;
    this->reladdr = reg.reladdr;
-   this->fixed_hw_reg = reg.fixed_hw_reg;
    this->swizzle = brw_swizzle_for_mask(reg.writemask);
 }
 
@@ -184,26 +182,24 @@ dst_reg::dst_reg(register_file file, int reg, brw_reg_type type,
    this->writemask = writemask;
 }
 
-dst_reg::dst_reg(struct brw_reg reg)
+dst_reg::dst_reg(struct brw_reg reg) :
+   backend_reg(reg)
 {
-   init();
-
    this->file = HW_REG;
-   this->fixed_hw_reg = reg;
-   this->type = reg.type;
+   this->reg = 0;
+   this->reg_offset = 0;
+   this->writemask = WRITEMASK_XYZW;
+   this->reladdr = NULL;
 }
 
-dst_reg::dst_reg(const src_reg &reg)
+dst_reg::dst_reg(const src_reg &reg) :
+   backend_reg(static_cast<struct brw_reg>(reg))
 {
-   init();
-
    this->file = reg.file;
    this->reg = reg.reg;
    this->reg_offset = reg.reg_offset;
-   this->type = reg.type;
    this->writemask = brw_mask_for_swizzle(reg.swizzle);
    this->reladdr = reg.reladdr;
-   this->fixed_hw_reg = reg.fixed_hw_reg;
 }
 
 bool
@@ -219,8 +215,7 @@ dst_reg::equals(const dst_reg &r) const
            (reladdr == r.reladdr ||
             (reladdr && r.reladdr && reladdr->equals(*r.reladdr))) &&
            (file != HW_REG ||
-            memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
-                   sizeof(fixed_hw_reg)) == 0));
+            memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0));
 }
 
 bool
@@ -364,8 +359,7 @@ src_reg::equals(const src_reg &r) const
 	   swizzle == r.swizzle &&
 	   !reladdr && !r.reladdr &&
            (file != HW_REG ||
-            memcmp(&fixed_hw_reg, &r.fixed_hw_reg,
-                   sizeof(fixed_hw_reg)) == 0) &&
+            memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0) &&
            (file != IMM || d == r.d));
 }
 
@@ -969,9 +963,9 @@ vec4_visitor::opt_set_dependency_control()
             last_mrf_write[reg] = inst;
             mrf_channels_written[reg] |= inst->dst.writemask;
          } else if (inst->dst.reg == HW_REG) {
-            if (inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE)
+            if (inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE)
                memset(last_grf_write, 0, sizeof(last_grf_write));
-            if (inst->dst.fixed_hw_reg.file == BRW_MESSAGE_REGISTER_FILE)
+            if (inst->dst.brw_reg::file == BRW_MESSAGE_REGISTER_FILE)
                memset(last_mrf_write, 0, sizeof(last_mrf_write));
          }
       }
@@ -1400,31 +1394,31 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       fprintf(file, "m%d", inst->dst.reg);
       break;
    case HW_REG:
-      if (inst->dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
-         switch (inst->dst.fixed_hw_reg.nr) {
+      if (inst->dst.brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
+         switch (inst->dst.nr) {
          case BRW_ARF_NULL:
             fprintf(file, "null");
             break;
          case BRW_ARF_ADDRESS:
-            fprintf(file, "a0.%d", inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "a0.%d", inst->dst.subnr);
             break;
          case BRW_ARF_ACCUMULATOR:
-            fprintf(file, "acc%d", inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "acc%d", inst->dst.subnr);
             break;
          case BRW_ARF_FLAG:
-            fprintf(file, "f%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
-                             inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "f%d.%d", inst->dst.nr & 0xf,
+                             inst->dst.subnr);
             break;
          default:
-            fprintf(file, "arf%d.%d", inst->dst.fixed_hw_reg.nr & 0xf,
-                               inst->dst.fixed_hw_reg.subnr);
+            fprintf(file, "arf%d.%d", inst->dst.nr & 0xf,
+                               inst->dst.subnr);
             break;
          }
       } else {
-         fprintf(file, "hw_reg%d", inst->dst.fixed_hw_reg.nr);
+         fprintf(file, "hw_reg%d", inst->dst.nr);
       }
-      if (inst->dst.fixed_hw_reg.subnr)
-         fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr);
+      if (inst->dst.subnr)
+         fprintf(file, "+%d", inst->dst.subnr);
       break;
    case BAD_FILE:
       fprintf(file, "(null)");
@@ -1489,37 +1483,31 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
          }
          break;
       case HW_REG:
-         if (inst->src[i].fixed_hw_reg.negate)
-            fprintf(file, "-");
-         if (inst->src[i].fixed_hw_reg.abs)
-            fprintf(file, "|");
-         if (inst->src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
-            switch (inst->src[i].fixed_hw_reg.nr) {
+         if (inst->src[i].brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
+            switch (inst->src[i].nr) {
             case BRW_ARF_NULL:
                fprintf(file, "null");
                break;
             case BRW_ARF_ADDRESS:
-               fprintf(file, "a0.%d", inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "a0.%d", inst->src[i].subnr);
                break;
             case BRW_ARF_ACCUMULATOR:
-               fprintf(file, "acc%d", inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "acc%d", inst->src[i].subnr);
                break;
             case BRW_ARF_FLAG:
-               fprintf(file, "f%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
-                                inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "f%d.%d", inst->src[i].nr & 0xf,
+                                inst->src[i].subnr);
                break;
             default:
-               fprintf(file, "arf%d.%d", inst->src[i].fixed_hw_reg.nr & 0xf,
-                                  inst->src[i].fixed_hw_reg.subnr);
+               fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf,
+                                  inst->src[i].subnr);
                break;
             }
          } else {
-            fprintf(file, "hw_reg%d", inst->src[i].fixed_hw_reg.nr);
+            fprintf(file, "hw_reg%d", inst->src[i].nr);
          }
-         if (inst->src[i].fixed_hw_reg.subnr)
-            fprintf(file, "+%d", inst->src[i].fixed_hw_reg.subnr);
-         if (inst->src[i].fixed_hw_reg.abs)
-            fprintf(file, "|");
+         if (inst->src[i].subnr)
+            fprintf(file, "+%d", inst->src[i].subnr);
          break;
       case BAD_FILE:
          fprintf(file, "(null)");
@@ -1600,8 +1588,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
 	 reg.type = inst->dst.type;
 	 reg.writemask = inst->dst.writemask;
 
-	 inst->dst.file = HW_REG;
-	 inst->dst.fixed_hw_reg = reg;
+         inst->dst = reg;
       }
 
       for (int i = 0; i < 3; i++) {
@@ -1623,8 +1610,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
 	 if (inst->src[i].negate)
 	    reg = negate(reg);
 
-	 inst->src[i].file = HW_REG;
-	 inst->src[i].fixed_hw_reg = reg;
+         inst->src[i] = reg;
       }
    }
 }
@@ -1836,7 +1822,6 @@ vec4_visitor::convert_to_hw_regs()
             break;
 
          case HW_REG:
-            assert(src.type == src.fixed_hw_reg.type);
             continue;
 
          case BAD_FILE:
@@ -1848,7 +1833,7 @@ vec4_visitor::convert_to_hw_regs()
          case ATTR:
             unreachable("not reached");
          }
-         src.fixed_hw_reg = reg;
+         src = reg;
       }
 
       dst_reg &dst = inst->dst;
@@ -1869,8 +1854,7 @@ vec4_visitor::convert_to_hw_regs()
          break;
 
       case HW_REG:
-         assert(dst.type == dst.fixed_hw_reg.type);
-         reg = dst.fixed_hw_reg;
+         reg = dst;
          break;
 
       case BAD_FILE:
@@ -1883,7 +1867,7 @@ vec4_visitor::convert_to_hw_regs()
          unreachable("not reached");
       }
 
-      dst.fixed_hw_reg = reg;
+      dst = reg;
    }
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 74d67cb0820..20107ac2054 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -356,7 +356,7 @@ generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
 
    /* We pass the temporary passed in src0 as the writeback register */
    brw_urb_WRITE(p,
-                 inst->src[0].fixed_hw_reg, /* dest */
+                 inst->src[0], /* dest */
                  inst->base_mrf, /* starting mrf reg nr */
                  src,
                  BRW_URB_WRITE_ALLOCATE_COMPLETE,
@@ -369,8 +369,8 @@ generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst)
    brw_push_insn_state(p);
    brw_set_default_access_mode(p, BRW_ALIGN_1);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0),
-           get_element_ud(inst->src[0].fixed_hw_reg, 0));
+   brw_MOV(p, get_element_ud(inst->dst, 0),
+           get_element_ud(inst->src[0], 0));
    brw_pop_insn_state(p);
 }
 
@@ -1059,9 +1059,9 @@ generate_code(struct brw_codegen *p,
          annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
 
       for (unsigned int i = 0; i < 3; i++) {
-	 src[i] = inst->src[i].fixed_hw_reg;
+         src[i] = inst->src[i];
       }
-      dst = inst->dst.fixed_hw_reg;
+      dst = inst->dst;
 
       brw_set_default_predicate_control(p, inst->predicate);
       brw_set_default_predicate_inverse(p, inst->predicate_inverse);
@@ -1241,7 +1241,7 @@ generate_code(struct brw_codegen *p,
          break;
 
       case BRW_OPCODE_IF:
-         if (inst->src[0].file != BAD_FILE) {
+         if (!inst->src[0].is_null()) {
             /* The instruction has an embedded compare (only allowed on gen6) */
             assert(devinfo->gen == 6);
             gen6_IF(p, inst->conditional_mod, src[0], src[1]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ecabedfe614..7b11ac1675d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -237,8 +237,6 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1,
     * type to match src0 so we can compact the instruction.
     */
    dst.type = src0.type;
-   if (dst.file == HW_REG)
-      dst.fixed_hw_reg.type = dst.type;
 
    resolve_ud_negate(&src0);
    resolve_ud_negate(&src1);

From 58fa9d47b536403c4e3ca5d6a2495691338388fd Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Sun, 25 Oct 2015 21:14:56 -0700
Subject: [PATCH 261/287] i965/vec4: Remove swizzle/writemask fields from
 src/dst_reg.

Also allows us to handle HW_REGs in the swizzle() and writemask()
functions.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_ir_vec4.h | 7 +------
 src/mesa/drivers/dri/i965/brw_vec4.cpp  | 2 --
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index 0b2a9258f25..a19a262506d 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -55,8 +55,6 @@ public:
 
    explicit src_reg(const dst_reg &reg);
 
-   unsigned swizzle; /**< BRW_SWIZZLE_XYZW macros from brw_reg.h. */
-
    src_reg *reladdr;
 };
 
@@ -82,7 +80,6 @@ offset(src_reg reg, unsigned delta)
 static inline src_reg
 swizzle(src_reg reg, unsigned swizzle)
 {
-   assert(reg.file != HW_REG);
    reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle);
    return reg;
 }
@@ -122,8 +119,6 @@ public:
 
    bool equals(const dst_reg &r) const;
 
-   unsigned writemask; /**< Bitfield of WRITEMASK_[XYZW] */
-
    src_reg *reladdr;
 };
 
@@ -145,7 +140,7 @@ offset(dst_reg reg, unsigned delta)
 static inline dst_reg
 writemask(dst_reg reg, unsigned mask)
 {
-   assert(reg.file != HW_REG && reg.file != IMM);
+   assert(reg.file != IMM);
    assert((reg.writemask & mask) != 0);
    reg.writemask &= mask;
    return reg;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9155c2e811d..37170e7fc57 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -125,7 +125,6 @@ src_reg::src_reg(struct brw_reg reg) :
    this->file = HW_REG;
    this->reg = 0;
    this->reg_offset = 0;
-   this->swizzle = BRW_SWIZZLE_XXXX;
    this->reladdr = NULL;
 }
 
@@ -188,7 +187,6 @@ dst_reg::dst_reg(struct brw_reg reg) :
    this->file = HW_REG;
    this->reg = 0;
    this->reg_offset = 0;
-   this->writemask = WRITEMASK_XYZW;
    this->reladdr = NULL;
 }
 

From 3048053908310eaf082058e5be34ae902e1fc02c Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 26 Oct 2015 04:04:16 -0700
Subject: [PATCH 262/287] i965: Unwrap some lines.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp                    | 5 +----
 src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp  | 3 +--
 src/mesa/drivers/dri/i965/brw_vec4.cpp                  | 5 +----
 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 3 +--
 4 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c2d04d970e5..f589e6e5631 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -417,10 +417,7 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
    init();
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   this->ud = (vf0 <<  0) |
-                               (vf1 <<  8) |
-                               (vf2 << 16) |
-                               (vf3 << 24);
+   this->ud = (vf0 <<  0) | (vf1 <<  8) | (vf2 << 16) | (vf3 << 24);
 }
 
 fs_reg::fs_reg(struct brw_reg reg) :
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index c9564597b2c..234bbec0b6b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -299,8 +299,7 @@ fs_visitor::opt_combine_constants()
          reg->reg = table.imm[i].reg;
          reg->subreg_offset = table.imm[i].subreg_offset;
          reg->stride = 0;
-         reg->negate = signbit(reg->f) !=
-                               signbit(table.imm[i].val);
+         reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
          assert(fabsf(reg->f) == table.imm[i].val);
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 37170e7fc57..8c2056b767e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -113,10 +113,7 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
 
    this->file = IMM;
    this->type = BRW_REGISTER_TYPE_VF;
-   this->ud = (vf0 <<  0) |
-                               (vf1 <<  8) |
-                               (vf2 << 16) |
-                               (vf3 << 24);
+   this->ud = (vf0 <<  0) | (vf1 <<  8) | (vf2 << 16) | (vf3 << 24);
 }
 
 src_reg::src_reg(struct brw_reg reg) :
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 523866efe19..2be7b14ee70 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -147,8 +147,7 @@ try_constant_propagate(const struct brw_device_info *devinfo,
    }
 
    if (value.type == BRW_REGISTER_TYPE_VF)
-      value.ud = swizzle_vf_imm(value.ud,
-                                                 inst->src[arg].swizzle);
+      value.ud = swizzle_vf_imm(value.ud, inst->src[arg].swizzle);
 
    switch (inst->opcode) {
    case BRW_OPCODE_MOV:

From 7638e75cf99263c1ee8e31c6cc5a319feec2c943 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 26 Oct 2015 04:35:14 -0700
Subject: [PATCH 263/287] i965: Use brw_reg's nr field to store register
 number.

In addition to combining another field, we get replace silliness like
"reg.reg" with something that actually makes sense, "reg.nr"; and no one
will ever wonder again why dst.reg isn't a dst_reg.

Moving the now 16-bit nr field to a 16-bit boundary decreases code size
by about 3k.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 163 +++++++++---------
 .../dri/i965/brw_fs_combine_constants.cpp     |   8 +-
 .../dri/i965/brw_fs_copy_propagation.cpp      |  18 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp      |   2 +-
 .../drivers/dri/i965/brw_fs_generator.cpp     |   8 +-
 .../drivers/dri/i965/brw_fs_live_variables.h  |   2 +-
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  |  34 ++--
 .../dri/i965/brw_fs_register_coalesce.cpp     |  22 +--
 .../dri/i965/brw_fs_saturate_propagation.cpp  |   2 +-
 src/mesa/drivers/dri/i965/brw_fs_validate.cpp |   4 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |   2 +-
 src/mesa/drivers/dri/i965/brw_ir_fs.h         |   6 +-
 src/mesa/drivers/dri/i965/brw_ir_vec4.h       |   8 +-
 src/mesa/drivers/dri/i965/brw_reg.h           |  10 +-
 .../dri/i965/brw_schedule_instructions.cpp    |  62 +++----
 src/mesa/drivers/dri/i965/brw_shader.cpp      |   2 +-
 src/mesa/drivers/dri/i965/brw_shader.h        |   9 -
 src/mesa/drivers/dri/i965/brw_vec4.cpp        | 100 ++++++-----
 .../dri/i965/brw_vec4_copy_propagation.cpp    |   6 +-
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp    |   2 +-
 .../dri/i965/brw_vec4_live_variables.h        |  12 +-
 .../dri/i965/brw_vec4_reg_allocate.cpp        |  36 ++--
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  40 ++---
 .../dri/i965/test_vec4_copy_propagation.cpp   |   4 +-
 .../dri/i965/test_vec4_register_coalesce.cpp  |   4 +-
 25 files changed, 276 insertions(+), 290 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f589e6e5631..3ea97f22e97 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -307,7 +307,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
    if (reg.file != GRF || reg.reg_offset != 0 || reg.stride == 0)
       return false;
 
-   if (grf_alloc.sizes[reg.reg] != this->regs_written)
+   if (grf_alloc.sizes[reg.nr] != this->regs_written)
       return false;
 
    for (int i = 0; i < this->sources; i++) {
@@ -424,7 +424,6 @@ fs_reg::fs_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
    this->file = HW_REG;
-   this->reg = 0;
    this->reg_offset = 0;
    this->subreg_offset = 0;
    this->reladdr = NULL;
@@ -435,7 +434,7 @@ bool
 fs_reg::equals(const fs_reg &r) const
 {
    return (file == r.file &&
-           reg == r.reg &&
+           nr == r.nr &&
            reg_offset == r.reg_offset &&
            subreg_offset == r.subreg_offset &&
            type == r.type &&
@@ -959,20 +958,20 @@ fs_visitor::vgrf(const glsl_type *const type)
                  brw_type_for_base_type(type));
 }
 
-fs_reg::fs_reg(enum register_file file, int reg)
+fs_reg::fs_reg(enum register_file file, int nr)
 {
    init();
    this->file = file;
-   this->reg = reg;
+   this->nr = nr;
    this->type = BRW_REGISTER_TYPE_F;
    this->stride = (file == UNIFORM ? 0 : 1);
 }
 
-fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type)
+fs_reg::fs_reg(enum register_file file, int nr, enum brw_reg_type type)
 {
    init();
    this->file = file;
-   this->reg = reg;
+   this->nr = nr;
    this->type = type;
    this->stride = (file == UNIFORM ? 0 : 1);
 }
@@ -1456,7 +1455,7 @@ fs_visitor::assign_curb_setup()
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       for (unsigned int i = 0; i < inst->sources; i++) {
 	 if (inst->src[i].file == UNIFORM) {
-            int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
+            int uniform_nr = inst->src[i].nr + inst->src[i].reg_offset;
             int constant_nr;
             if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
                constant_nr = push_constant_loc[uniform_nr];
@@ -1612,7 +1611,7 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
       if (inst->src[i].file == ATTR) {
          int grf = payload.num_regs +
                    prog_data->curb_read_length +
-                   inst->src[i].reg +
+                   inst->src[i].nr +
                    inst->src[i].reg_offset;
 
          struct brw_reg reg =
@@ -1726,15 +1725,15 @@ fs_visitor::split_virtual_grfs()
    /* Mark all used registers as fully splittable */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->dst.file == GRF) {
-         int reg = vgrf_to_reg[inst->dst.reg];
-         for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++)
+         int reg = vgrf_to_reg[inst->dst.nr];
+         for (unsigned j = 1; j < this->alloc.sizes[inst->dst.nr]; j++)
             split_points[reg + j] = true;
       }
 
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == GRF) {
-            int reg = vgrf_to_reg[inst->src[i].reg];
-            for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++)
+            int reg = vgrf_to_reg[inst->src[i].nr];
+            for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].nr]; j++)
                split_points[reg + j] = true;
          }
       }
@@ -1742,13 +1741,13 @@ fs_visitor::split_virtual_grfs()
 
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->dst.file == GRF) {
-         int reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
+         int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
          for (int j = 1; j < inst->regs_written; j++)
             split_points[reg + j] = false;
       }
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == GRF) {
-            int reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
+            int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
             for (int j = 1; j < inst->regs_read(i); j++)
                split_points[reg + j] = false;
          }
@@ -1795,15 +1794,15 @@ fs_visitor::split_virtual_grfs()
 
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->dst.file == GRF) {
-         reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset;
-         inst->dst.reg = new_virtual_grf[reg];
+         reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
+         inst->dst.nr = new_virtual_grf[reg];
          inst->dst.reg_offset = new_reg_offset[reg];
          assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
       }
       for (int i = 0; i < inst->sources; i++) {
 	 if (inst->src[i].file == GRF) {
-            reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset;
-            inst->src[i].reg = new_virtual_grf[reg];
+            reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
+            inst->src[i].nr = new_virtual_grf[reg];
             inst->src[i].reg_offset = new_reg_offset[reg];
             assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
          }
@@ -1831,11 +1830,11 @@ fs_visitor::compact_virtual_grfs()
    /* Mark which virtual GRFs are used. */
    foreach_block_and_inst(block, const fs_inst, inst, cfg) {
       if (inst->dst.file == GRF)
-         remap_table[inst->dst.reg] = 0;
+         remap_table[inst->dst.nr] = 0;
 
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == GRF)
-            remap_table[inst->src[i].reg] = 0;
+            remap_table[inst->src[i].nr] = 0;
       }
    }
 
@@ -1860,11 +1859,11 @@ fs_visitor::compact_virtual_grfs()
    /* Patch all the instructions to use the newly renumbered registers */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->dst.file == GRF)
-         inst->dst.reg = remap_table[inst->dst.reg];
+         inst->dst.nr = remap_table[inst->dst.nr];
 
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == GRF)
-            inst->src[i].reg = remap_table[inst->src[i].reg];
+            inst->src[i].nr = remap_table[inst->src[i].nr];
       }
    }
 
@@ -1874,8 +1873,8 @@ fs_visitor::compact_virtual_grfs()
     */
    for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
       if (delta_xy[i].file == GRF) {
-         if (remap_table[delta_xy[i].reg] != -1) {
-            delta_xy[i].reg = remap_table[delta_xy[i].reg];
+         if (remap_table[delta_xy[i].nr] != -1) {
+            delta_xy[i].nr = remap_table[delta_xy[i].nr];
          } else {
             delta_xy[i].file = BAD_FILE;
          }
@@ -1927,7 +1926,7 @@ fs_visitor::assign_constant_locations()
             continue;
 
          if (inst->src[i].reladdr) {
-            int uniform = inst->src[i].reg;
+            int uniform = inst->src[i].nr;
 
             /* If this array isn't already present in the pull constant buffer,
              * add it.
@@ -1939,7 +1938,7 @@ fs_visitor::assign_constant_locations()
             }
          } else {
             /* Mark the the one accessed uniform as live */
-            int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
+            int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
             if (constant_nr >= 0 && constant_nr < (int) uniforms)
                is_live[constant_nr] = true;
          }
@@ -2015,7 +2014,7 @@ fs_visitor::demote_pull_constants()
 	    continue;
 
          int pull_index;
-         unsigned location = inst->src[i].reg + inst->src[i].reg_offset;
+         unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
          if (location >= uniforms) /* Out of bounds access */
             pull_index = -1;
          else
@@ -2050,7 +2049,7 @@ fs_visitor::demote_pull_constants()
 
          /* Rewrite the instruction to use the temporary VGRF. */
          inst->src[i].file = GRF;
-         inst->src[i].reg = dst.reg;
+         inst->src[i].nr = dst.nr;
          inst->src[i].reg_offset = 0;
       }
    }
@@ -2461,30 +2460,30 @@ fs_visitor::opt_register_renaming()
       /* Rewrite instruction sources. */
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == GRF &&
-             remap[inst->src[i].reg] != -1 &&
-             remap[inst->src[i].reg] != inst->src[i].reg) {
-            inst->src[i].reg = remap[inst->src[i].reg];
+             remap[inst->src[i].nr] != -1 &&
+             remap[inst->src[i].nr] != inst->src[i].nr) {
+            inst->src[i].nr = remap[inst->src[i].nr];
             progress = true;
          }
       }
 
-      const int dst = inst->dst.reg;
+      const int dst = inst->dst.nr;
 
       if (depth == 0 &&
           inst->dst.file == GRF &&
-          alloc.sizes[inst->dst.reg] == inst->exec_size / 8 &&
+          alloc.sizes[inst->dst.nr] == inst->exec_size / 8 &&
           !inst->is_partial_write()) {
          if (remap[dst] == -1) {
             remap[dst] = dst;
          } else {
             remap[dst] = alloc.allocate(inst->exec_size / 8);
-            inst->dst.reg = remap[dst];
+            inst->dst.nr = remap[dst];
             progress = true;
          }
       } else if (inst->dst.file == GRF &&
                  remap[dst] != -1 &&
                  remap[dst] != dst) {
-         inst->dst.reg = remap[dst];
+         inst->dst.nr = remap[dst];
          progress = true;
       }
    }
@@ -2493,8 +2492,8 @@ fs_visitor::opt_register_renaming()
       invalidate_live_intervals();
 
       for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
-         if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
-            delta_xy[i].reg = remap[delta_xy[i].reg];
+         if (delta_xy[i].file == GRF && remap[delta_xy[i].nr] != -1) {
+            delta_xy[i].nr = remap[delta_xy[i].nr];
          }
       }
    }
@@ -2571,9 +2570,9 @@ fs_visitor::compute_to_mrf()
       /* Work out which hardware MRF registers are written by this
        * instruction.
        */
-      int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
+      int mrf_low = inst->dst.nr & ~BRW_MRF_COMPR4;
       int mrf_high;
-      if (inst->dst.reg & BRW_MRF_COMPR4) {
+      if (inst->dst.nr & BRW_MRF_COMPR4) {
 	 mrf_high = mrf_low + 4;
       } else if (inst->exec_size == 16) {
 	 mrf_high = mrf_low + 1;
@@ -2584,7 +2583,7 @@ fs_visitor::compute_to_mrf()
       /* Can't compute-to-MRF this GRF if someone else was going to
        * read it later.
        */
-      if (this->virtual_grf_end[inst->src[0].reg] > ip)
+      if (this->virtual_grf_end[inst->src[0].nr] > ip)
 	 continue;
 
       /* Found a move of a GRF to a MRF.  Let's see if we can go
@@ -2592,7 +2591,7 @@ fs_visitor::compute_to_mrf()
        */
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
 	 if (scan_inst->dst.file == GRF &&
-	     scan_inst->dst.reg == inst->src[0].reg) {
+            scan_inst->dst.nr == inst->src[0].nr) {
 	    /* Found the last thing to write our reg we want to turn
 	     * into a compute-to-MRF.
 	     */
@@ -2627,7 +2626,7 @@ fs_visitor::compute_to_mrf()
 	    if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
 	       /* Found the creator of our MRF's source value. */
 	       scan_inst->dst.file = MRF;
-	       scan_inst->dst.reg = inst->dst.reg;
+               scan_inst->dst.nr = inst->dst.nr;
 	       scan_inst->saturate |= inst->saturate;
 	       inst->remove(block);
 	       progress = true;
@@ -2648,7 +2647,7 @@ fs_visitor::compute_to_mrf()
 	 bool interfered = false;
 	 for (int i = 0; i < scan_inst->sources; i++) {
 	    if (scan_inst->src[i].file == GRF &&
-		scan_inst->src[i].reg == inst->src[0].reg &&
+                scan_inst->src[i].nr == inst->src[0].nr &&
 		scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
 	       interfered = true;
 	    }
@@ -2660,10 +2659,10 @@ fs_visitor::compute_to_mrf()
 	    /* If somebody else writes our MRF here, we can't
 	     * compute-to-MRF before that.
 	     */
-	    int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
+            int scan_mrf_low = scan_inst->dst.nr & ~BRW_MRF_COMPR4;
 	    int scan_mrf_high;
 
-	    if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
+            if (scan_inst->dst.nr & BRW_MRF_COMPR4) {
 	       scan_mrf_high = scan_mrf_low + 4;
 	    } else if (scan_inst->exec_size == 16) {
 	       scan_mrf_high = scan_mrf_low + 1;
@@ -2819,7 +2818,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       if (inst->opcode == BRW_OPCODE_MOV &&
 	  inst->dst.file == MRF) {
-	 fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
+         fs_inst *prev_inst = last_mrf_move[inst->dst.nr];
 	 if (prev_inst && inst->equals(prev_inst)) {
 	    inst->remove(block);
 	    progress = true;
@@ -2829,7 +2828,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       /* Clear out the last-write records for MRFs that were overwritten. */
       if (inst->dst.file == MRF) {
-	 last_mrf_move[inst->dst.reg] = NULL;
+         last_mrf_move[inst->dst.nr] = NULL;
       }
 
       if (inst->mlen > 0 && inst->base_mrf != -1) {
@@ -2845,7 +2844,7 @@ fs_visitor::remove_duplicate_mrf_writes()
       if (inst->dst.file == GRF) {
 	 for (unsigned int i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
 	    if (last_mrf_move[i] &&
-		last_mrf_move[i]->src[0].reg == inst->dst.reg) {
+                last_mrf_move[i]->src[0].nr == inst->dst.nr) {
 	       last_mrf_move[i] = NULL;
 	    }
 	 }
@@ -2855,7 +2854,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 	  inst->dst.file == MRF &&
 	  inst->src[0].file == GRF &&
 	  !inst->is_partial_write()) {
-	 last_mrf_move[inst->dst.reg] = inst;
+         last_mrf_move[inst->dst.nr] = inst;
       }
    }
 
@@ -2872,7 +2871,7 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
    for (int i = 0; i < inst->sources; i++) {
       int grf;
       if (inst->src[i].file == GRF) {
-         grf = inst->src[i].reg;
+         grf = inst->src[i].nr;
       } else if (inst->src[i].file == HW_REG &&
                  inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          grf = inst->src[i].nr;
@@ -2910,7 +2909,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
                                                         fs_inst *inst)
 {
    int write_len = inst->regs_written;
-   int first_write_grf = inst->dst.reg;
+   int first_write_grf = inst->dst.nr;
    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
 
@@ -2943,7 +2942,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
        */
       if (scan_inst->dst.file == GRF) {
          for (int i = 0; i < scan_inst->regs_written; i++) {
-            int reg = scan_inst->dst.reg + i;
+            int reg = scan_inst->dst.nr + i;
 
             if (reg >= first_write_grf &&
                 reg < first_write_grf + write_len &&
@@ -2981,7 +2980,7 @@ void
 fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst)
 {
    int write_len = inst->regs_written;
-   int first_write_grf = inst->dst.reg;
+   int first_write_grf = inst->dst.nr;
    bool needs_dep[BRW_MAX_MRF(devinfo->gen)];
    assert(write_len < (int)sizeof(needs_dep) - 1);
 
@@ -3008,12 +3007,12 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
        * result of a SEND, which has massive latency.
        */
       if (scan_inst->dst.file == GRF &&
-          scan_inst->dst.reg >= first_write_grf &&
-          scan_inst->dst.reg < first_write_grf + write_len &&
-          needs_dep[scan_inst->dst.reg - first_write_grf]) {
+          scan_inst->dst.nr >= first_write_grf &&
+          scan_inst->dst.nr < first_write_grf + write_len &&
+          needs_dep[scan_inst->dst.nr - first_write_grf]) {
          DEP_RESOLVE_MOV(fs_builder(this, block, scan_inst),
-                         scan_inst->dst.reg);
-         needs_dep[scan_inst->dst.reg - first_write_grf] = false;
+                         scan_inst->dst.nr);
+         needs_dep[scan_inst->dst.nr - first_write_grf] = false;
       }
 
       /* Continue the loop only if we haven't resolved all the dependencies */
@@ -3145,7 +3144,7 @@ fs_visitor::lower_load_payload()
 
       /* Get rid of COMPR4.  We'll add it back in if we need it */
       if (dst.file == MRF)
-         dst.reg = dst.reg & ~BRW_MRF_COMPR4;
+         dst.nr = dst.nr & ~BRW_MRF_COMPR4;
 
       const fs_builder ibld(this, block, inst);
       const fs_builder hbld = ibld.exec_all().group(8, 0);
@@ -3159,7 +3158,7 @@ fs_visitor::lower_load_payload()
          dst = offset(dst, hbld, 1);
       }
 
-      if (inst->dst.file == MRF && (inst->dst.reg & BRW_MRF_COMPR4) &&
+      if (inst->dst.file == MRF && (inst->dst.nr & BRW_MRF_COMPR4) &&
           inst->exec_size > 8) {
          /* In this case, the payload portion of the LOAD_PAYLOAD isn't
           * a straightforward copy.  Instead, the result of the
@@ -3183,18 +3182,18 @@ fs_visitor::lower_load_payload()
             if (inst->src[i].file != BAD_FILE) {
                if (devinfo->has_compr4) {
                   fs_reg compr4_dst = retype(dst, inst->src[i].type);
-                  compr4_dst.reg |= BRW_MRF_COMPR4;
+                  compr4_dst.nr |= BRW_MRF_COMPR4;
                   ibld.MOV(compr4_dst, inst->src[i]);
                } else {
                   /* Platform doesn't have COMPR4.  We have to fake it */
                   fs_reg mov_dst = retype(dst, inst->src[i].type);
                   ibld.half(0).MOV(mov_dst, half(inst->src[i], 0));
-                  mov_dst.reg += 4;
+                  mov_dst.nr += 4;
                   ibld.half(1).MOV(mov_dst, half(inst->src[i], 1));
                }
             }
 
-            dst.reg++;
+            dst.nr++;
          }
 
          /* The loop above only ever incremented us through the first set
@@ -3202,7 +3201,7 @@ fs_visitor::lower_load_payload()
           * actually wrote to the first 8 registers, so we need to take
           * that into account now.
           */
-         dst.reg += 4;
+         dst.nr += 4;
 
          /* The COMPR4 code took care of the first 4 sources.  We'll let
           * the regular path handle any remaining sources.  Yes, we are
@@ -3588,7 +3587,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
       /* Send from the GRF */
       fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
       load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
-      payload.reg = bld.shader->alloc.allocate(load->regs_written);
+      payload.nr = bld.shader->alloc.allocate(load->regs_written);
       load->dst = payload;
 
       inst->src[0] = payload;
@@ -3603,7 +3602,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
        * will do this for us if we just give it a COMPR4 destination.
        */
       if (devinfo->gen < 6 && bld.dispatch_width() == 16)
-         load->dst.reg |= BRW_MRF_COMPR4;
+         load->dst.nr |= BRW_MRF_COMPR4;
 
       inst->resize_sources(0);
       inst->base_mrf = 1;
@@ -3713,8 +3712,8 @@ lower_sampler_logical_send_gen4(const fs_builder &bld, fs_inst *inst, opcode op,
    inst->src[0] = reg_undef;
    inst->src[1] = sampler;
    inst->resize_sources(2);
-   inst->base_mrf = msg_begin.reg;
-   inst->mlen = msg_end.reg - msg_begin.reg;
+   inst->base_mrf = msg_begin.nr;
+   inst->mlen = msg_end.nr - msg_begin.nr;
    inst->header_size = 1;
 }
 
@@ -3738,7 +3737,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
        * go headerless.
        */
       header_size = 1;
-      message.reg--;
+      message.nr--;
    }
 
    for (unsigned i = 0; i < coord_components; i++) {
@@ -3808,8 +3807,8 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
    inst->src[0] = reg_undef;
    inst->src[1] = sampler;
    inst->resize_sources(2);
-   inst->base_mrf = message.reg;
-   inst->mlen = msg_end.reg - message.reg;
+   inst->base_mrf = message.nr;
+   inst->mlen = msg_end.nr - message.nr;
    inst->header_size = header_size;
 
    /* Message length > MAX_SAMPLER_MESSAGE_SIZE disallowed by hardware. */
@@ -4608,23 +4607,23 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
 
    switch (inst->dst.file) {
    case GRF:
-      fprintf(file, "vgrf%d", inst->dst.reg);
-      if (alloc.sizes[inst->dst.reg] != inst->regs_written ||
+      fprintf(file, "vgrf%d", inst->dst.nr);
+      if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
           inst->dst.subreg_offset)
          fprintf(file, "+%d.%d",
                  inst->dst.reg_offset, inst->dst.subreg_offset);
       break;
    case MRF:
-      fprintf(file, "m%d", inst->dst.reg);
+      fprintf(file, "m%d", inst->dst.nr);
       break;
    case BAD_FILE:
       fprintf(file, "(null)");
       break;
    case UNIFORM:
-      fprintf(file, "***u%d***", inst->dst.reg + inst->dst.reg_offset);
+      fprintf(file, "***u%d***", inst->dst.nr + inst->dst.reg_offset);
       break;
    case ATTR:
-      fprintf(file, "***attr%d***", inst->dst.reg + inst->dst.reg_offset);
+      fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.reg_offset);
       break;
    case HW_REG:
       if (inst->dst.brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
@@ -4665,20 +4664,20 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
          fprintf(file, "|");
       switch (inst->src[i].file) {
       case GRF:
-         fprintf(file, "vgrf%d", inst->src[i].reg);
-         if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) ||
+         fprintf(file, "vgrf%d", inst->src[i].nr);
+         if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) ||
              inst->src[i].subreg_offset)
             fprintf(file, "+%d.%d", inst->src[i].reg_offset,
                     inst->src[i].subreg_offset);
          break;
       case MRF:
-         fprintf(file, "***m%d***", inst->src[i].reg);
+         fprintf(file, "***m%d***", inst->src[i].nr);
          break;
       case ATTR:
-         fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset);
+         fprintf(file, "attr%d+%d", inst->src[i].nr, inst->src[i].reg_offset);
          break;
       case UNIFORM:
-         fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset);
+         fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
          if (inst->src[i].reladdr) {
             fprintf(file, "+reladdr");
          } else if (inst->src[i].subreg_offset) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 234bbec0b6b..127cee4f1e9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -121,7 +121,7 @@ struct imm {
     * constant value.
     */
    uint8_t subreg_offset;
-   uint16_t reg;
+   uint16_t nr;
 
    /** The number of coissuable instructions using this immediate. */
    uint16_t uses_by_coissue;
@@ -280,12 +280,12 @@ fs_visitor::opt_combine_constants()
       const fs_builder ibld = bld.at(imm->block, n).exec_all().group(1, 0);
 
       ibld.MOV(reg, fs_reg(imm->val));
-      imm->reg = reg.reg;
+      imm->nr = reg.nr;
       imm->subreg_offset = reg.subreg_offset;
 
       reg.subreg_offset += sizeof(float);
       if ((unsigned)reg.subreg_offset == dispatch_width * sizeof(float)) {
-         reg.reg = alloc.allocate(dispatch_width / 8);
+         reg.nr = alloc.allocate(dispatch_width / 8);
          reg.subreg_offset = 0;
       }
    }
@@ -296,7 +296,7 @@ fs_visitor::opt_combine_constants()
       foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
          fs_reg *reg = link->reg;
          reg->file = GRF;
-         reg->reg = table.imm[i].reg;
+         reg->nr = table.imm[i].nr;
          reg->subreg_offset = table.imm[i].subreg_offset;
          reg->stride = 0;
          reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 2c966d173c6..79594130526 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -291,7 +291,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
       return false;
 
    assert(entry->dst.file == GRF);
-   if (inst->src[arg].reg != entry->dst.reg)
+   if (inst->src[arg].nr != entry->dst.nr)
       return false;
 
    /* Bail if inst is reading a range that isn't contained in the range
@@ -380,7 +380,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    }
 
    inst->src[arg].file = entry->src.file;
-   inst->src[arg].reg = entry->src.reg;
+   inst->src[arg].nr = entry->src.nr;
    inst->src[arg].stride *= entry->src.stride;
    inst->saturate = inst->saturate || entry->saturate;
 
@@ -460,7 +460,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
          continue;
 
       assert(entry->dst.file == GRF);
-      if (inst->src[i].reg != entry->dst.reg)
+      if (inst->src[i].nr != entry->dst.nr)
          continue;
 
       /* Bail if inst is reading a range that isn't contained in the range
@@ -654,7 +654,7 @@ can_propagate_from(fs_inst *inst)
    return (inst->opcode == BRW_OPCODE_MOV &&
            inst->dst.file == GRF &&
            ((inst->src[0].file == GRF &&
-             (inst->src[0].reg != inst->dst.reg ||
+             (inst->src[0].nr != inst->dst.nr ||
               inst->src[0].reg_offset != inst->dst.reg_offset)) ||
             inst->src[0].file == ATTR ||
             inst->src[0].file == UNIFORM ||
@@ -678,7 +678,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
          if (inst->src[i].file != GRF)
             continue;
 
-         foreach_in_list(acp_entry, entry, &acp[inst->src[i].reg % ACP_HASH_SIZE]) {
+         foreach_in_list(acp_entry, entry, &acp[inst->src[i].nr % ACP_HASH_SIZE]) {
             if (try_constant_propagate(inst, entry))
                progress = true;
 
@@ -689,7 +689,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
 
       /* kill the destination from the ACP */
       if (inst->dst.file == GRF) {
-	 foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.reg % ACP_HASH_SIZE]) {
+         foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
 	    if (inst->overwrites_reg(entry->dst)) {
 	       entry->remove();
 	    }
@@ -716,7 +716,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
          entry->regs_written = inst->regs_written;
          entry->opcode = inst->opcode;
          entry->saturate = inst->saturate;
-	 acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+         acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
       } else if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
                  inst->dst.file == GRF) {
          int offset = 0;
@@ -731,7 +731,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
                entry->regs_written = regs_written;
                entry->opcode = inst->opcode;
                if (!entry->dst.equals(inst->src[i])) {
-                  acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+                  acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
                } else {
                   ralloc_free(entry);
                }
@@ -774,7 +774,7 @@ fs_visitor::opt_copy_propagate()
       for (int i = 0; i < dataflow.num_acp; i++) {
          if (BITSET_TEST(dataflow.bd[block->num].livein, i)) {
             struct acp_entry *entry = dataflow.acp[i];
-            in_acp[entry->dst.reg % ACP_HASH_SIZE].push_tail(entry);
+            in_acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
          }
       }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index fbf00d1bfd0..08f89d54601 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -320,7 +320,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
             /* Kill any AEB entries using registers that don't get reused any
              * more -- a sure sign they'll fail operands_match().
              */
-            if (src_reg->file == GRF && virtual_grf_end[src_reg->reg] < ip) {
+            if (src_reg->file == GRF && virtual_grf_end[src_reg->nr] < ip) {
                entry->remove();
                ralloc_free(entry);
                break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6b6b5771298..95649d870ee 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -59,13 +59,13 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
 
    switch (reg->file) {
    case MRF:
-      assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF(gen));
+      assert((reg->nr & ~(1 << 7)) < BRW_MAX_MRF(gen));
       /* Fallthrough */
    case GRF:
       if (reg->stride == 0) {
-         brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0);
+         brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
       } else if (inst->exec_size < 8) {
-         brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
+         brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0);
          brw_reg = stride(brw_reg, inst->exec_size * reg->stride,
                           inst->exec_size, reg->stride);
       } else {
@@ -78,7 +78,7 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
           * So, for registers with width > 8, we have to use a width of 8
           * and trust the compression state to sort out the exec size.
           */
-         brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
+         brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0);
          brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride);
       }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
index c7457069ede..96cadea96aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h
@@ -68,7 +68,7 @@ public:
    bool vars_interfere(int a, int b);
    int var_from_reg(const fs_reg &reg) const
    {
-      return var_from_vgrf[reg.reg] + reg.reg_offset;
+      return var_from_vgrf[reg.nr] + reg.reg_offset;
    }
 
    /** Map from virtual GRF number to index in block_data arrays. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 3e0e0e9586b..99ccdb15e6f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -36,7 +36,7 @@ static void
 assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
 {
    if (reg->file == GRF) {
-      reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset;
+      reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset;
       reg->reg_offset = 0;
    }
 }
@@ -489,10 +489,10 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
 
    foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
       if (inst->dst.file == MRF) {
-         int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+         int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
          mrf_used[reg] = true;
          if (reg_width == 2) {
-            if (inst->dst.reg & BRW_MRF_COMPR4) {
+            if (inst->dst.nr & BRW_MRF_COMPR4) {
                mrf_used[reg + 4] = true;
             } else {
                mrf_used[reg + 1] = true;
@@ -585,7 +585,7 @@ fs_visitor::assign_regs(bool allow_spilling)
        */
       if (compiler->fs_reg_sets[rsi].aligned_pairs_class >= 0 &&
           this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
-          this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
+          this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].nr == i) {
          c = compiler->fs_reg_sets[rsi].aligned_pairs_class;
       }
 
@@ -616,7 +616,7 @@ fs_visitor::assign_regs(bool allow_spilling)
           * highest register that works.
           */
          if (inst->eot) {
-            int size = alloc.sizes[inst->src[0].reg];
+            int size = alloc.sizes[inst->src[0].nr];
             int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
 
             /* If something happened to spill, we want to push the EOT send
@@ -625,7 +625,7 @@ fs_visitor::assign_regs(bool allow_spilling)
              */
             reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
 
-            ra_set_node_reg(g, inst->src[0].reg, reg);
+            ra_set_node_reg(g, inst->src[0].nr, reg);
             break;
          }
       }
@@ -649,7 +649,7 @@ fs_visitor::assign_regs(bool allow_spilling)
 
          for (int i = 0; i < inst->sources; ++i) {
             if (inst->src[i].file == GRF) {
-               ra_add_node_interference(g, inst->dst.reg, inst->src[i].reg);
+               ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
             }
          }
       }
@@ -787,7 +787,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       for (unsigned int i = 0; i < inst->sources; i++) {
 	 if (inst->src[i].file == GRF) {
-	    spill_costs[inst->src[i].reg] += loop_scale;
+            spill_costs[inst->src[i].nr] += loop_scale;
 
             /* Register spilling logic assumes full-width registers; smeared
              * registers have a width of 1 so if we try to spill them we'll
@@ -797,16 +797,16 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
              * register pressure anyhow.
              */
             if (!inst->src[i].is_contiguous()) {
-               no_spill[inst->src[i].reg] = true;
+               no_spill[inst->src[i].nr] = true;
             }
 	 }
       }
 
       if (inst->dst.file == GRF) {
-	 spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;
+         spill_costs[inst->dst.nr] += inst->regs_written * loop_scale;
 
          if (!inst->dst.is_contiguous()) {
-            no_spill[inst->dst.reg] = true;
+            no_spill[inst->dst.nr] = true;
          }
       }
 
@@ -822,13 +822,13 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
 
       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
 	 if (inst->src[0].file == GRF)
-	    no_spill[inst->src[0].reg] = true;
+            no_spill[inst->src[0].nr] = true;
 	 break;
 
       case SHADER_OPCODE_GEN4_SCRATCH_READ:
       case SHADER_OPCODE_GEN7_SCRATCH_READ:
 	 if (inst->dst.file == GRF)
-	    no_spill[inst->dst.reg] = true;
+            no_spill[inst->dst.nr] = true;
 	 break;
 
       default:
@@ -884,13 +884,13 @@ fs_visitor::spill_reg(int spill_reg)
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
       for (unsigned int i = 0; i < inst->sources; i++) {
 	 if (inst->src[i].file == GRF &&
-	     inst->src[i].reg == spill_reg) {
+             inst->src[i].nr == spill_reg) {
             int regs_read = inst->regs_read(i);
             int subset_spill_offset = (spill_offset +
                                        REG_SIZE * inst->src[i].reg_offset);
             fs_reg unspill_dst(GRF, alloc.allocate(regs_read));
 
-            inst->src[i].reg = unspill_dst.reg;
+            inst->src[i].nr = unspill_dst.nr;
             inst->src[i].reg_offset = 0;
 
             emit_unspill(block, inst, unspill_dst, subset_spill_offset,
@@ -899,12 +899,12 @@ fs_visitor::spill_reg(int spill_reg)
       }
 
       if (inst->dst.file == GRF &&
-	  inst->dst.reg == spill_reg) {
+          inst->dst.nr == spill_reg) {
          int subset_spill_offset = (spill_offset +
                                     REG_SIZE * inst->dst.reg_offset);
          fs_reg spill_src(GRF, alloc.allocate(inst->regs_written));
 
-         inst->dst.reg = spill_src.reg;
+         inst->dst.nr = spill_src.nr;
          inst->dst.reg_offset = 0;
 
          /* If we're immediately spilling the register, we should not use
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index 34f8715eeb9..ce1d66e7ed6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -79,8 +79,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
       return false;
    }
 
-   if (v->alloc.sizes[inst->src[0].reg] >
-       v->alloc.sizes[inst->dst.reg])
+   if (v->alloc.sizes[inst->src[0].nr] >
+       v->alloc.sizes[inst->dst.nr])
       return false;
 
    if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
@@ -170,19 +170,19 @@ fs_visitor::register_coalesce()
          continue;
       }
 
-      if (src_reg != inst->src[0].reg) {
-         src_reg = inst->src[0].reg;
+      if (src_reg != inst->src[0].nr) {
+         src_reg = inst->src[0].nr;
 
-         src_size = alloc.sizes[inst->src[0].reg];
+         src_size = alloc.sizes[inst->src[0].nr];
          assert(src_size <= MAX_VGRF_SIZE);
 
          channels_remaining = src_size;
          memset(mov, 0, sizeof(mov));
 
-         dst_reg = inst->dst.reg;
+         dst_reg = inst->dst.nr;
       }
 
-      if (dst_reg != inst->dst.reg)
+      if (dst_reg != inst->dst.nr)
          continue;
 
       if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
@@ -251,16 +251,16 @@ fs_visitor::register_coalesce()
 
       foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
          if (scan_inst->dst.file == GRF &&
-             scan_inst->dst.reg == src_reg) {
-            scan_inst->dst.reg = dst_reg;
+             scan_inst->dst.nr == src_reg) {
+            scan_inst->dst.nr = dst_reg;
             scan_inst->dst.reg_offset =
                dst_reg_offset[scan_inst->dst.reg_offset];
          }
 
          for (int j = 0; j < scan_inst->sources; j++) {
             if (scan_inst->src[j].file == GRF &&
-                scan_inst->src[j].reg == src_reg) {
-               scan_inst->src[j].reg = dst_reg;
+                scan_inst->src[j].nr == src_reg) {
+               scan_inst->src[j].nr = dst_reg;
                scan_inst->src[j].reg_offset =
                   dst_reg_offset[scan_inst->src[j].reg_offset];
             }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index 862e3245d43..0c48dcd180c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -91,7 +91,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
          }
          for (int i = 0; i < scan_inst->sources; i++) {
             if (scan_inst->src[i].file == GRF &&
-                scan_inst->src[i].reg == inst->src[0].reg &&
+                scan_inst->src[i].nr == inst->src[0].nr &&
                 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
                if (scan_inst->opcode != BRW_OPCODE_MOV ||
                    !scan_inst->saturate ||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
index 814c551f1be..a79c343ce02 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -44,13 +44,13 @@ fs_visitor::validate()
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
       if (inst->dst.file == GRF) {
          fsv_assert(inst->dst.reg_offset + inst->regs_written <=
-                    alloc.sizes[inst->dst.reg]);
+                    alloc.sizes[inst->dst.nr]);
       }
 
       for (unsigned i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == GRF) {
             fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
-                       (int)alloc.sizes[inst->src[i].reg]);
+                       (int)alloc.sizes[inst->src[i].nr]);
          }
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 66a0c90c40d..da7e9ca67ef 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -876,7 +876,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
 
       abld.MUL(output, outputs[clip_vertex], u);
       for (int j = 1; j < 4; j++) {
-         u.reg = userplane[i].reg + j;
+         u.nr = userplane[i].nr + j;
          abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u);
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 1f2931af179..9309ba58e68 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -42,8 +42,8 @@ public:
    explicit fs_reg(uint8_t vf[4]);
    explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
    fs_reg(struct brw_reg reg);
-   fs_reg(enum register_file file, int reg);
-   fs_reg(enum register_file file, int reg, enum brw_reg_type type);
+   fs_reg(enum register_file file, int nr);
+   fs_reg(enum register_file file, int nr, enum brw_reg_type type);
 
    bool equals(const fs_reg &r) const;
    bool is_contiguous() const;
@@ -95,7 +95,7 @@ byte_offset(fs_reg reg, unsigned delta)
       reg.reg_offset += delta / 32;
       break;
    case MRF:
-      reg.reg += delta / 32;
+      reg.nr += delta / 32;
       break;
    case IMM:
    case HW_REG:
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index a19a262506d..d3f0d61b55b 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -39,7 +39,7 @@ public:
 
    void init();
 
-   src_reg(register_file file, int reg, const glsl_type *type);
+   src_reg(register_file file, int nr, const glsl_type *type);
    src_reg();
    src_reg(float f);
    src_reg(uint32_t u);
@@ -107,10 +107,10 @@ public:
    void init();
 
    dst_reg();
-   dst_reg(register_file file, int reg);
-   dst_reg(register_file file, int reg, const glsl_type *type,
+   dst_reg(register_file file, int nr);
+   dst_reg(register_file file, int nr, const glsl_type *type,
            unsigned writemask);
-   dst_reg(register_file file, int reg, brw_reg_type type,
+   dst_reg(register_file file, int nr, brw_reg_type type,
            unsigned writemask);
    dst_reg(struct brw_reg reg);
    dst_reg(class vec4_visitor *v, const struct glsl_type *type);
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index 8fc2fee94ca..a2f41559503 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -233,12 +233,12 @@ const char *brw_reg_type_letters(unsigned brw_reg_type);
 struct brw_reg {
    enum brw_reg_type type:4;
    enum brw_reg_file file:2;
-   unsigned nr:8;
-   unsigned subnr:5;              /* :1 in align16 */
    unsigned negate:1;             /* source only */
    unsigned abs:1;                /* source only */
    unsigned address_mode:1;       /* relative addressing, hopefully! */
-   unsigned pad0:10;
+   unsigned pad0:2;
+   unsigned subnr:5;              /* :1 in align16 */
+   unsigned nr:16;
 
    union {
       struct {
@@ -353,12 +353,12 @@ brw_reg(enum brw_reg_file file,
 
    reg.type = type;
    reg.file = file;
-   reg.nr = nr;
-   reg.subnr = subnr * type_sz(type);
    reg.negate = negate;
    reg.abs = abs;
    reg.address_mode = BRW_ADDRESS_DIRECT;
    reg.pad0 = 0;
+   reg.subnr = subnr * type_sz(type);
+   reg.nr = nr;
 
    /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
     * set swizzle and writemask to W, as the lower bits of subnr will
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 521d04ec17e..2f92595e215 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -584,7 +584,7 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
          continue;
 
       if (inst->src[i].file == GRF) {
-         reads_remaining[inst->src[i].reg]++;
+         reads_remaining[inst->src[i].nr]++;
       } else if (inst->src[i].file == HW_REG &&
                  inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          if (inst->src[i].nr >= hw_reg_count)
@@ -661,7 +661,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
       return;
 
    if (inst->dst.file == GRF) {
-      written[inst->dst.reg] = true;
+      written[inst->dst.nr] = true;
    }
 
    for (int i = 0; i < inst->sources; i++) {
@@ -669,7 +669,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
           continue;
 
       if (inst->src[i].file == GRF) {
-         reads_remaining[inst->src[i].reg]--;
+         reads_remaining[inst->src[i].nr]--;
       } else if (inst->src[i].file == HW_REG &&
                  inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE &&
                  inst->src[i].nr < hw_reg_count) {
@@ -686,9 +686,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
    int benefit = 0;
 
    if (inst->dst.file == GRF) {
-      if (!BITSET_TEST(livein[block_idx], inst->dst.reg) &&
-          !written[inst->dst.reg])
-         benefit -= v->alloc.sizes[inst->dst.reg];
+      if (!BITSET_TEST(livein[block_idx], inst->dst.nr) &&
+          !written[inst->dst.nr])
+         benefit -= v->alloc.sizes[inst->dst.nr];
    }
 
    for (int i = 0; i < inst->sources; i++) {
@@ -696,9 +696,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
          continue;
 
       if (inst->src[i].file == GRF &&
-          !BITSET_TEST(liveout[block_idx], inst->src[i].reg) &&
-          reads_remaining[inst->src[i].reg] == 1)
-         benefit += v->alloc.sizes[inst->src[i].reg];
+          !BITSET_TEST(liveout[block_idx], inst->src[i].nr) &&
+          reads_remaining[inst->src[i].nr] == 1)
+         benefit += v->alloc.sizes[inst->src[i].nr];
 
       if (inst->src[i].file == HW_REG &&
           inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE &&
@@ -953,10 +953,10 @@ fs_instruction_scheduler::calculate_deps()
          if (inst->src[i].file == GRF) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
-                  add_dep(last_grf_write[inst->src[i].reg + r], n);
+                  add_dep(last_grf_write[inst->src[i].nr + r], n);
             } else {
                for (int r = 0; r < inst->regs_read(i); r++) {
-                  add_dep(last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], n);
+                  add_dep(last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], n);
                }
             }
          } else if (inst->src[i].file == HW_REG &&
@@ -1002,22 +1002,22 @@ fs_instruction_scheduler::calculate_deps()
       if (inst->dst.file == GRF) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++) {
-               add_dep(last_grf_write[inst->dst.reg + r], n);
-               last_grf_write[inst->dst.reg + r] = n;
+               add_dep(last_grf_write[inst->dst.nr + r], n);
+               last_grf_write[inst->dst.nr + r] = n;
             }
          } else {
             for (int r = 0; r < inst->regs_written; r++) {
-               add_dep(last_grf_write[inst->dst.reg * 16 + inst->dst.reg_offset + r], n);
-               last_grf_write[inst->dst.reg * 16 + inst->dst.reg_offset + r] = n;
+               add_dep(last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r], n);
+               last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n;
             }
          }
       } else if (inst->dst.file == MRF) {
-         int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+         int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
 
          add_dep(last_mrf_write[reg], n);
          last_mrf_write[reg] = n;
          if (is_compressed(inst)) {
-            if (inst->dst.reg & BRW_MRF_COMPR4)
+            if (inst->dst.nr & BRW_MRF_COMPR4)
                reg += 4;
             else
                reg++;
@@ -1079,10 +1079,10 @@ fs_instruction_scheduler::calculate_deps()
          if (inst->src[i].file == GRF) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
-                  add_dep(n, last_grf_write[inst->src[i].reg + r], 0);
+                  add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
             } else {
                for (int r = 0; r < inst->regs_read(i); r++) {
-                  add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], 0);
+                  add_dep(n, last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], 0);
                }
             }
          } else if (inst->src[i].file == HW_REG &&
@@ -1130,19 +1130,19 @@ fs_instruction_scheduler::calculate_deps()
       if (inst->dst.file == GRF) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++)
-               last_grf_write[inst->dst.reg + r] = n;
+               last_grf_write[inst->dst.nr + r] = n;
          } else {
             for (int r = 0; r < inst->regs_written; r++) {
-               last_grf_write[inst->dst.reg * 16 + inst->dst.reg_offset + r] = n;
+               last_grf_write[inst->dst.nr * 16 + inst->dst.reg_offset + r] = n;
             }
          }
       } else if (inst->dst.file == MRF) {
-         int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
+         int reg = inst->dst.nr & ~BRW_MRF_COMPR4;
 
          last_mrf_write[reg] = n;
 
          if (is_compressed(inst)) {
-            if (inst->dst.reg & BRW_MRF_COMPR4)
+            if (inst->dst.nr & BRW_MRF_COMPR4)
                reg += 4;
             else
                reg++;
@@ -1217,7 +1217,7 @@ vec4_instruction_scheduler::calculate_deps()
       for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
-               add_dep(last_grf_write[inst->src[i].reg + j], n);
+               add_dep(last_grf_write[inst->src[i].nr + j], n);
          } else if (inst->src[i].file == HW_REG &&
                     (inst->src[i].brw_reg::file ==
                      BRW_GENERAL_REGISTER_FILE)) {
@@ -1260,12 +1260,12 @@ vec4_instruction_scheduler::calculate_deps()
       /* write-after-write deps. */
       if (inst->dst.file == GRF) {
          for (unsigned j = 0; j < inst->regs_written; ++j) {
-            add_dep(last_grf_write[inst->dst.reg + j], n);
-            last_grf_write[inst->dst.reg + j] = n;
+            add_dep(last_grf_write[inst->dst.nr + j], n);
+            last_grf_write[inst->dst.nr + j] = n;
          }
       } else if (inst->dst.file == MRF) {
-         add_dep(last_mrf_write[inst->dst.reg], n);
-         last_mrf_write[inst->dst.reg] = n;
+         add_dep(last_mrf_write[inst->dst.nr], n);
+         last_mrf_write[inst->dst.nr] = n;
      } else if (inst->dst.file == HW_REG &&
                  inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          last_fixed_grf_write = n;
@@ -1315,7 +1315,7 @@ vec4_instruction_scheduler::calculate_deps()
       for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF) {
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
-               add_dep(n, last_grf_write[inst->src[i].reg + j]);
+               add_dep(n, last_grf_write[inst->src[i].nr + j]);
          } else if (inst->src[i].file == HW_REG &&
                     (inst->src[i].brw_reg::file ==
                      BRW_GENERAL_REGISTER_FILE)) {
@@ -1356,9 +1356,9 @@ vec4_instruction_scheduler::calculate_deps()
        */
       if (inst->dst.file == GRF) {
          for (unsigned j = 0; j < inst->regs_written; ++j)
-            last_grf_write[inst->dst.reg + j] = n;
+            last_grf_write[inst->dst.nr + j] = n;
       } else if (inst->dst.file == MRF) {
-         last_mrf_write[inst->dst.reg] = n;
+         last_mrf_write[inst->dst.nr] = n;
       } else if (inst->dst.file == HW_REG &&
                  inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
          last_fixed_grf_write = n;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index ca7db9a73ac..d736d0e46ac 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -751,7 +751,7 @@ bool
 backend_reg::in_range(const backend_reg &r, unsigned n) const
 {
    return (file == r.file &&
-           reg == r.reg &&
+           nr == r.nr &&
            reg_offset >= r.reg_offset &&
            reg_offset < r.reg_offset + n);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 086ab607c52..67d623cd35a 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -63,15 +63,6 @@ struct backend_reg : public brw_reg
 
    enum register_file file; /**< Register file: GRF, MRF, IMM. */
 
-   /**
-    * Register number.
-    *
-    * For GRF, it's a virtual register number until register allocation.
-    *
-    * For MRF, it's the hardware register.
-    */
-   uint16_t reg;
-
    /**
     * Offset within the virtual register.
     *
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 8c2056b767e..0570b00fadd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -51,12 +51,12 @@ src_reg::init()
    this->file = BAD_FILE;
 }
 
-src_reg::src_reg(register_file file, int reg, const glsl_type *type)
+src_reg::src_reg(register_file file, int nr, const glsl_type *type)
 {
    init();
 
    this->file = file;
-   this->reg = reg;
+   this->nr = nr;
    if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
       this->swizzle = brw_swizzle_for_size(type->vector_elements);
    else
@@ -120,7 +120,6 @@ src_reg::src_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
    this->file = HW_REG;
-   this->reg = 0;
    this->reg_offset = 0;
    this->reladdr = NULL;
 }
@@ -129,7 +128,6 @@ src_reg::src_reg(const dst_reg &reg) :
    backend_reg(static_cast<struct brw_reg>(reg))
 {
    this->file = reg.file;
-   this->reg = reg.reg;
    this->reg_offset = reg.reg_offset;
    this->reladdr = reg.reladdr;
    this->swizzle = brw_swizzle_for_mask(reg.writemask);
@@ -148,32 +146,32 @@ dst_reg::dst_reg()
    init();
 }
 
-dst_reg::dst_reg(register_file file, int reg)
+dst_reg::dst_reg(register_file file, int nr)
 {
    init();
 
    this->file = file;
-   this->reg = reg;
+   this->nr = nr;
 }
 
-dst_reg::dst_reg(register_file file, int reg, const glsl_type *type,
+dst_reg::dst_reg(register_file file, int nr, const glsl_type *type,
                  unsigned writemask)
 {
    init();
 
    this->file = file;
-   this->reg = reg;
+   this->nr = nr;
    this->type = brw_type_for_base_type(type);
    this->writemask = writemask;
 }
 
-dst_reg::dst_reg(register_file file, int reg, brw_reg_type type,
+dst_reg::dst_reg(register_file file, int nr, brw_reg_type type,
                  unsigned writemask)
 {
    init();
 
    this->file = file;
-   this->reg = reg;
+   this->nr = nr;
    this->type = type;
    this->writemask = writemask;
 }
@@ -182,7 +180,6 @@ dst_reg::dst_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
    this->file = HW_REG;
-   this->reg = 0;
    this->reg_offset = 0;
    this->reladdr = NULL;
 }
@@ -191,7 +188,6 @@ dst_reg::dst_reg(const src_reg &reg) :
    backend_reg(static_cast<struct brw_reg>(reg))
 {
    this->file = reg.file;
-   this->reg = reg.reg;
    this->reg_offset = reg.reg_offset;
    this->writemask = brw_mask_for_swizzle(reg.swizzle);
    this->reladdr = reg.reladdr;
@@ -201,7 +197,7 @@ bool
 dst_reg::equals(const dst_reg &r) const
 {
    return (file == r.file &&
-           reg == r.reg &&
+           nr == r.nr &&
            reg_offset == r.reg_offset &&
            type == r.type &&
            negate == r.negate &&
@@ -346,7 +342,7 @@ bool
 src_reg::equals(const src_reg &r) const
 {
    return (file == r.file &&
-	   reg == r.reg &&
+           nr == r.nr &&
 	   reg_offset == r.reg_offset &&
 	   type == r.type &&
 	   negate == r.negate &&
@@ -372,10 +368,10 @@ vec4_visitor::opt_vector_float()
    vec4_instruction *imm_inst[4];
 
    foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      if (last_reg != inst->dst.reg ||
+      if (last_reg != inst->dst.nr ||
           last_reg_offset != inst->dst.reg_offset ||
           last_reg_file != inst->dst.file) {
-         last_reg = inst->dst.reg;
+         last_reg = inst->dst.nr;
          last_reg_offset = inst->dst.reg_offset;
          last_reg_file = inst->dst.file;
          remaining_channels = WRITEMASK_XYZW;
@@ -497,7 +493,7 @@ vec4_visitor::split_uniform_registers()
    /* Prior to this, uniforms have been in an array sized according to
     * the number of vector uniforms present, sparsely filled (so an
     * aggregate results in reg indices being skipped over).  Now we're
-    * going to cut those aggregates up so each .reg index is one
+    * going to cut those aggregates up so each .nr index is one
     * vector.  The goal is to make elimination of unused uniform
     * components easier later.
     */
@@ -508,7 +504,7 @@ vec4_visitor::split_uniform_registers()
 
 	 assert(!inst->src[i].reladdr);
 
-	 inst->src[i].reg += inst->src[i].reg_offset;
+         inst->src[i].nr += inst->src[i].reg_offset;
 	 inst->src[i].reg_offset = 0;
       }
    }
@@ -557,7 +553,7 @@ vec4_visitor::pack_uniform_registers()
          if (inst->src[i].file != UNIFORM)
             continue;
 
-         int reg = inst->src[i].reg;
+         int reg = inst->src[i].nr;
          for (int c = 0; c < 4; c++) {
             if (!(readmask & (1 << c)))
                continue;
@@ -612,12 +608,12 @@ vec4_visitor::pack_uniform_registers()
    /* Now, update the instructions for our repacked uniforms. */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (int i = 0 ; i < 3; i++) {
-	 int src = inst->src[i].reg;
+         int src = inst->src[i].nr;
 
 	 if (inst->src[i].file != UNIFORM)
 	    continue;
 
-	 inst->src[i].reg = new_loc[src];
+         inst->src[i].nr = new_loc[src];
          inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
                                               new_chan[src], new_chan[src]);
       }
@@ -812,10 +808,10 @@ vec4_visitor::move_push_constants_to_pull_constants()
    foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
       for (int i = 0 ; i < 3; i++) {
 	 if (inst->src[i].file != UNIFORM ||
-	     pull_constant_loc[inst->src[i].reg] == -1)
+             pull_constant_loc[inst->src[i].nr] == -1)
 	    continue;
 
-	 int uniform = inst->src[i].reg;
+         int uniform = inst->src[i].nr;
 
 	 dst_reg temp = dst_reg(this, glsl_type::vec4_type);
 
@@ -823,7 +819,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
 				 pull_constant_loc[uniform]);
 
 	 inst->src[i].file = temp.file;
-	 inst->src[i].reg = temp.reg;
+         inst->src[i].nr = temp.nr;
 	 inst->src[i].reg_offset = temp.reg_offset;
 	 inst->src[i].reladdr = NULL;
       }
@@ -915,7 +911,7 @@ vec4_visitor::opt_set_dependency_control()
           * on, don't do dependency control across the read.
           */
          for (int i = 0; i < 3; i++) {
-            int reg = inst->src[i].reg + inst->src[i].reg_offset;
+            int reg = inst->src[i].nr + inst->src[i].reg_offset;
             if (inst->src[i].file == GRF) {
                last_grf_write[reg] = NULL;
             } else if (inst->src[i].file == HW_REG) {
@@ -934,7 +930,7 @@ vec4_visitor::opt_set_dependency_control()
          /* Now, see if we can do dependency control for this instruction
           * against a previous one writing to its destination.
           */
-         int reg = inst->dst.reg + inst->dst.reg_offset;
+         int reg = inst->dst.nr + inst->dst.reg_offset;
          if (inst->dst.file == GRF) {
             if (last_grf_write[reg] &&
                 !(inst->dst.writemask & grf_channels_written[reg])) {
@@ -957,7 +953,7 @@ vec4_visitor::opt_set_dependency_control()
 
             last_mrf_write[reg] = inst;
             mrf_channels_written[reg] |= inst->dst.writemask;
-         } else if (inst->dst.reg == HW_REG) {
+         } else if (inst->dst.nr == HW_REG) {
             if (inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE)
                memset(last_grf_write, 0, sizeof(last_grf_write));
             if (inst->dst.brw_reg::file == BRW_MESSAGE_REGISTER_FILE)
@@ -1058,7 +1054,7 @@ vec4_visitor::opt_register_coalesce()
 
       /* Remove no-op MOVs */
       if (inst->dst.file == inst->src[0].file &&
-          inst->dst.reg == inst->src[0].reg &&
+          inst->dst.nr == inst->src[0].nr &&
           inst->dst.reg_offset == inst->src[0].reg_offset) {
          bool is_nop_mov = true;
 
@@ -1179,8 +1175,8 @@ vec4_visitor::opt_register_coalesce()
           * in the register instead.
           */
          if (to_mrf && scan_inst->mlen > 0) {
-            if (inst->dst.reg >= scan_inst->base_mrf &&
-                inst->dst.reg < scan_inst->base_mrf + scan_inst->mlen) {
+            if (inst->dst.nr >= scan_inst->base_mrf &&
+                inst->dst.nr < scan_inst->base_mrf + scan_inst->mlen) {
                break;
             }
          } else {
@@ -1203,12 +1199,12 @@ vec4_visitor::opt_register_coalesce()
          vec4_instruction *scan_inst = _scan_inst;
 	 while (scan_inst != inst) {
 	    if (scan_inst->dst.file == GRF &&
-		scan_inst->dst.reg == inst->src[0].reg &&
+                scan_inst->dst.nr == inst->src[0].nr &&
 		scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
                scan_inst->reswizzle(inst->dst.writemask,
                                     inst->src[0].swizzle);
 	       scan_inst->dst.file = inst->dst.file;
-	       scan_inst->dst.reg = inst->dst.reg;
+               scan_inst->dst.nr = inst->dst.nr;
 	       scan_inst->dst.reg_offset = inst->dst.reg_offset;
                if (inst->saturate &&
                    inst->dst.type != scan_inst->dst.type) {
@@ -1306,11 +1302,11 @@ vec4_visitor::split_virtual_grfs()
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       if (inst->dst.file == GRF && inst->regs_written > 1)
-         split_grf[inst->dst.reg] = false;
+         split_grf[inst->dst.nr] = false;
 
       for (int i = 0; i < 3; i++) {
          if (inst->src[i].file == GRF && inst->regs_read(i) > 1)
-            split_grf[inst->src[i].reg] = false;
+            split_grf[inst->src[i].nr] = false;
       }
    }
 
@@ -1331,16 +1327,16 @@ vec4_visitor::split_virtual_grfs()
    }
 
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == GRF && split_grf[inst->dst.reg] &&
+      if (inst->dst.file == GRF && split_grf[inst->dst.nr] &&
           inst->dst.reg_offset != 0) {
-         inst->dst.reg = (new_virtual_grf[inst->dst.reg] +
+         inst->dst.nr = (new_virtual_grf[inst->dst.nr] +
                           inst->dst.reg_offset - 1);
          inst->dst.reg_offset = 0;
       }
       for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF && split_grf[inst->src[i].reg] &&
+         if (inst->src[i].file == GRF && split_grf[inst->src[i].nr] &&
              inst->src[i].reg_offset != 0) {
-            inst->src[i].reg = (new_virtual_grf[inst->src[i].reg] +
+            inst->src[i].nr = (new_virtual_grf[inst->src[i].nr] +
                                 inst->src[i].reg_offset - 1);
             inst->src[i].reg_offset = 0;
          }
@@ -1383,10 +1379,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
 
    switch (inst->dst.file) {
    case GRF:
-      fprintf(file, "vgrf%d.%d", inst->dst.reg, inst->dst.reg_offset);
+      fprintf(file, "vgrf%d.%d", inst->dst.nr, inst->dst.reg_offset);
       break;
    case MRF:
-      fprintf(file, "m%d", inst->dst.reg);
+      fprintf(file, "m%d", inst->dst.nr);
       break;
    case HW_REG:
       if (inst->dst.brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
@@ -1446,13 +1442,13 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
          fprintf(file, "|");
       switch (inst->src[i].file) {
       case GRF:
-         fprintf(file, "vgrf%d", inst->src[i].reg);
+         fprintf(file, "vgrf%d", inst->src[i].nr);
          break;
       case ATTR:
-         fprintf(file, "attr%d", inst->src[i].reg);
+         fprintf(file, "attr%d", inst->src[i].nr);
          break;
       case UNIFORM:
-         fprintf(file, "u%d", inst->src[i].reg);
+         fprintf(file, "u%d", inst->src[i].nr);
          break;
       case IMM:
          switch (inst->src[i].type) {
@@ -1514,7 +1510,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       /* Don't print .0; and only VGRFs have reg_offsets and sizes */
       if (inst->src[i].reg_offset != 0 &&
           inst->src[i].file == GRF &&
-          alloc.sizes[inst->src[i].reg] != 1)
+          alloc.sizes[inst->src[i].nr] != 1)
          fprintf(file, ".%d", inst->src[i].reg_offset);
 
       if (inst->src[i].file != IMM) {
@@ -1572,7 +1568,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       /* We have to support ATTR as a destination for GL_FIXED fixup. */
       if (inst->dst.file == ATTR) {
-	 int grf = attribute_map[inst->dst.reg + inst->dst.reg_offset];
+         int grf = attribute_map[inst->dst.nr + inst->dst.reg_offset];
 
          /* All attributes used in the shader need to have been assigned a
           * hardware register by the caller
@@ -1590,7 +1586,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
 	 if (inst->src[i].file != ATTR)
 	    continue;
 
-	 int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset];
+         int grf = attribute_map[inst->src[i].nr + inst->src[i].reg_offset];
 
          /* All attributes used in the shader need to have been assigned a
           * hardware register by the caller
@@ -1790,7 +1786,7 @@ vec4_visitor::convert_to_hw_regs()
          struct brw_reg reg;
          switch (src.file) {
          case GRF:
-            reg = brw_vec8_grf(src.reg + src.reg_offset, 0);
+            reg = brw_vec8_grf(src.nr + src.reg_offset, 0);
             reg.type = src.type;
             reg.swizzle = src.swizzle;
             reg.abs = src.abs;
@@ -1804,8 +1800,8 @@ vec4_visitor::convert_to_hw_regs()
 
          case UNIFORM:
             reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg +
-                                      (src.reg + src.reg_offset) / 2,
-                                      ((src.reg + src.reg_offset) % 2) * 4),
+                                      (src.nr + src.reg_offset) / 2,
+                                      ((src.nr + src.reg_offset) % 2) * 4),
                          0, 4, 1);
             reg.type = src.type;
             reg.swizzle = src.swizzle;
@@ -1836,14 +1832,14 @@ vec4_visitor::convert_to_hw_regs()
 
       switch (inst->dst.file) {
       case GRF:
-         reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+         reg = brw_vec8_grf(dst.nr + dst.reg_offset, 0);
          reg.type = dst.type;
          reg.writemask = dst.writemask;
          break;
 
       case MRF:
-         assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
-         reg = brw_message_reg(dst.reg + dst.reg_offset);
+         assert(((dst.nr + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+         reg = brw_message_reg(dst.nr + dst.reg_offset);
          reg.type = dst.type;
          reg.writemask = dst.writemask;
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 2be7b14ee70..b986c12eeb0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -271,7 +271,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
    for (int i = 1; i < 4; i++) {
       /* This is equals() except we don't care about the swizzle. */
       if (value.file != entry->value[i]->file ||
-	  value.reg != entry->value[i]->reg ||
+          value.nr != entry->value[i]->nr ||
 	  value.reg_offset != entry->value[i]->reg_offset ||
 	  value.type != entry->value[i]->type ||
 	  value.negate != entry->value[i]->negate ||
@@ -431,7 +431,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
          if (inst->regs_read(i) != 1)
             continue;
 
-	 int reg = (alloc.offsets[inst->src[i].reg] +
+         int reg = (alloc.offsets[inst->src[i].nr] +
 		    inst->src[i].reg_offset);
 
 	 /* Find the regs that each swizzle component came from.
@@ -474,7 +474,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
       /* Track available source registers. */
       if (inst->dst.file == GRF) {
 	 const int reg =
-	    alloc.offsets[inst->dst.reg] + inst->dst.reg_offset;
+            alloc.offsets[inst->dst.nr] + inst->dst.reg_offset;
 
 	 /* Update our destination's current channel values.  For a direct copy,
 	  * the value is the newly propagated source.  Otherwise, we don't know
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 5a277f74c44..259f6042d19 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -233,7 +233,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
              * overwrote.
              */
             if (inst->dst.file == entry->generator->src[i].file &&
-                inst->dst.reg == entry->generator->src[i].reg) {
+                inst->dst.nr == entry->generator->src[i].nr) {
                entry->remove();
                ralloc_free(entry);
                break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
index e7929ec2189..4aa98d72e75 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
@@ -82,9 +82,9 @@ inline unsigned
 var_from_reg(const simple_allocator &alloc, const src_reg &reg,
              unsigned c = 0)
 {
-   assert(reg.file == GRF && reg.reg < alloc.count &&
-          reg.reg_offset < alloc.sizes[reg.reg] && c < 4);
-   return (4 * (alloc.offsets[reg.reg] + reg.reg_offset) +
+   assert(reg.file == GRF && reg.nr < alloc.count &&
+          reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
+   return (4 * (alloc.offsets[reg.nr] + reg.reg_offset) +
            BRW_GET_SWZ(reg.swizzle, c));
 }
 
@@ -92,9 +92,9 @@ inline unsigned
 var_from_reg(const simple_allocator &alloc, const dst_reg &reg,
              unsigned c = 0)
 {
-   assert(reg.file == GRF && reg.reg < alloc.count &&
-          reg.reg_offset < alloc.sizes[reg.reg] && c < 4);
-   return 4 * (alloc.offsets[reg.reg] + reg.reg_offset) + c;
+   assert(reg.file == GRF && reg.nr < alloc.count &&
+          reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
+   return 4 * (alloc.offsets[reg.nr] + reg.reg_offset) + c;
 }
 
 } /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index a49eca56118..adad4e514d8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -35,7 +35,7 @@ static void
 assign(unsigned int *reg_hw_locations, backend_reg *reg)
 {
    if (reg->file == GRF) {
-      reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset;
+      reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset;
       reg->reg_offset = 0;
    }
 }
@@ -56,11 +56,11 @@ vec4_visitor::reg_allocate_trivial()
 
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       if (inst->dst.file == GRF)
-	 virtual_grf_used[inst->dst.reg] = true;
+         virtual_grf_used[inst->dst.nr] = true;
 
       for (unsigned i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF)
-	    virtual_grf_used[inst->src[i].reg] = true;
+            virtual_grf_used[inst->src[i].nr] = true;
       }
    }
 
@@ -297,7 +297,7 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
 
    /* See if any previous source in the same instructions reads scratch_reg */
    for (unsigned n = 0; n < i; n++) {
-      if (inst->src[n].file == GRF && inst->src[n].reg == scratch_reg)
+      if (inst->src[n].file == GRF && inst->src[n].nr == scratch_reg)
          prev_inst_read_scratch_reg = true;
    }
 
@@ -310,7 +310,7 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
        * it if the write is not conditional and the channels we write are
        * compatible with our read mask
        */
-      if (prev_inst->dst.file == GRF && prev_inst->dst.reg == scratch_reg) {
+      if (prev_inst->dst.file == GRF && prev_inst->dst.nr == scratch_reg) {
          return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
                 (brw_mask_for_swizzle(inst->src[i].swizzle) &
                  ~prev_inst->dst.writemask) == 0;
@@ -330,7 +330,7 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
       int n;
       for (n = 0; n < 3; n++) {
          if (prev_inst->src[n].file == GRF &&
-             prev_inst->src[n].reg == scratch_reg) {
+             prev_inst->src[n].nr == scratch_reg) {
             prev_inst_read_scratch_reg = true;
             break;
          }
@@ -379,18 +379,18 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
              * previous instruction, in which case we'll just reuse the scratch
              * reg for this instruction.
              */
-            if (!can_use_scratch_for_source(inst, i, inst->src[i].reg)) {
-               spill_costs[inst->src[i].reg] += loop_scale;
+            if (!can_use_scratch_for_source(inst, i, inst->src[i].nr)) {
+               spill_costs[inst->src[i].nr] += loop_scale;
                if (inst->src[i].reladdr)
-                  no_spill[inst->src[i].reg] = true;
+                  no_spill[inst->src[i].nr] = true;
             }
          }
       }
 
       if (inst->dst.file == GRF) {
-         spill_costs[inst->dst.reg] += loop_scale;
+         spill_costs[inst->dst.nr] += loop_scale;
          if (inst->dst.reladdr)
-            no_spill[inst->dst.reg] = true;
+            no_spill[inst->dst.nr] = true;
       }
 
       switch (inst->opcode) {
@@ -407,10 +407,10 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
          for (int i = 0; i < 3; i++) {
             if (inst->src[i].file == GRF)
-               no_spill[inst->src[i].reg] = true;
+               no_spill[inst->src[i].nr] = true;
          }
          if (inst->dst.file == GRF)
-            no_spill[inst->dst.reg] = true;
+            no_spill[inst->dst.nr] = true;
          break;
 
       default:
@@ -445,7 +445,7 @@ vec4_visitor::spill_reg(int spill_reg_nr)
    int scratch_reg = -1;
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) {
+         if (inst->src[i].file == GRF && inst->src[i].nr == spill_reg_nr) {
             if (scratch_reg == -1 ||
                 !can_use_scratch_for_source(inst, i, scratch_reg)) {
                /* We need to unspill anyway so make sure we read the full vec4
@@ -455,19 +455,19 @@ vec4_visitor::spill_reg(int spill_reg_nr)
                 */
                scratch_reg = alloc.allocate(1);
                src_reg temp = inst->src[i];
-               temp.reg = scratch_reg;
+               temp.nr = scratch_reg;
                temp.swizzle = BRW_SWIZZLE_XYZW;
                emit_scratch_read(block, inst,
                                  dst_reg(temp), inst->src[i], spill_offset);
             }
             assert(scratch_reg != -1);
-            inst->src[i].reg = scratch_reg;
+            inst->src[i].nr = scratch_reg;
          }
       }
 
-      if (inst->dst.file == GRF && inst->dst.reg == spill_reg_nr) {
+      if (inst->dst.file == GRF && inst->dst.nr == spill_reg_nr) {
          emit_scratch_write(block, inst, spill_offset);
-         scratch_reg = inst->dst.reg;
+         scratch_reg = inst->dst.nr;
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 7b11ac1675d..6038d90b433 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -633,7 +633,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
    init();
 
    this->file = GRF;
-   this->reg = v->alloc.allocate(type_size_vec4(type));
+   this->nr = v->alloc.allocate(type_size_vec4(type));
 
    if (type->is_array() || type->is_record()) {
       this->swizzle = BRW_SWIZZLE_NOOP;
@@ -651,7 +651,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
    init();
 
    this->file = GRF;
-   this->reg = v->alloc.allocate(type_size_vec4(type) * size);
+   this->nr = v->alloc.allocate(type_size_vec4(type) * size);
 
    this->swizzle = BRW_SWIZZLE_NOOP;
 
@@ -663,7 +663,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
    init();
 
    this->file = GRF;
-   this->reg = v->alloc.allocate(type_size_vec4(type));
+   this->nr = v->alloc.allocate(type_size_vec4(type));
 
    if (type->is_array() || type->is_record()) {
       this->writemask = WRITEMASK_XYZW;
@@ -1615,7 +1615,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
    inst->insert_after(block, write);
 
    inst->dst.file = temp.file;
-   inst->dst.reg = temp.reg;
+   inst->dst.nr = temp.nr;
    inst->dst.reg_offset = temp.reg_offset;
    inst->dst.reladdr = NULL;
 }
@@ -1642,10 +1642,10 @@ vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
                                           *src.reladdr);
 
    /* Now handle scratch access on src */
-   if (src.file == GRF && scratch_loc[src.reg] != -1) {
+   if (src.file == GRF && scratch_loc[src.nr] != -1) {
       dst_reg temp = dst_reg(this, glsl_type::vec4_type);
-      emit_scratch_read(block, inst, temp, src, scratch_loc[src.reg]);
-      src.reg = temp.reg;
+      emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
+      src.nr = temp.nr;
       src.reg_offset = temp.reg_offset;
       src.reladdr = NULL;
    }
@@ -1671,17 +1671,17 @@ vec4_visitor::move_grf_array_access_to_scratch()
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       if (inst->dst.file == GRF && inst->dst.reladdr) {
-         if (scratch_loc[inst->dst.reg] == -1) {
-            scratch_loc[inst->dst.reg] = last_scratch;
-            last_scratch += this->alloc.sizes[inst->dst.reg];
+         if (scratch_loc[inst->dst.nr] == -1) {
+            scratch_loc[inst->dst.nr] = last_scratch;
+            last_scratch += this->alloc.sizes[inst->dst.nr];
          }
 
          for (src_reg *iter = inst->dst.reladdr;
               iter->reladdr;
               iter = iter->reladdr) {
-            if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
-               scratch_loc[iter->reg] = last_scratch;
-               last_scratch += this->alloc.sizes[iter->reg];
+            if (iter->file == GRF && scratch_loc[iter->nr] == -1) {
+               scratch_loc[iter->nr] = last_scratch;
+               last_scratch += this->alloc.sizes[iter->nr];
             }
          }
       }
@@ -1690,9 +1690,9 @@ vec4_visitor::move_grf_array_access_to_scratch()
          for (src_reg *iter = &inst->src[i];
               iter->reladdr;
               iter = iter->reladdr) {
-            if (iter->file == GRF && scratch_loc[iter->reg] == -1) {
-               scratch_loc[iter->reg] = last_scratch;
-               last_scratch += this->alloc.sizes[iter->reg];
+            if (iter->file == GRF && scratch_loc[iter->nr] == -1) {
+               scratch_loc[iter->nr] = last_scratch;
+               last_scratch += this->alloc.sizes[iter->nr];
             }
          }
       }
@@ -1718,8 +1718,8 @@ vec4_visitor::move_grf_array_access_to_scratch()
       /* Now that we have handled any (possibly recursive) reladdr scratch
        * accesses for dst we can safely do the scratch write for dst itself
        */
-      if (inst->dst.file == GRF && scratch_loc[inst->dst.reg] != -1)
-         emit_scratch_write(block, inst, scratch_loc[inst->dst.reg]);
+      if (inst->dst.file == GRF && scratch_loc[inst->dst.nr] != -1)
+         emit_scratch_write(block, inst, scratch_loc[inst->dst.nr]);
 
       /* Now handle scratch access on any src. In this case, since inst->src[i]
        * already is a src_reg, we can just call emit_resolve_reladdr with
@@ -1788,7 +1788,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
             if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
                continue;
 
-            int uniform = inst->src[i].reg;
+            int uniform = inst->src[i].nr;
 
             if (inst->src[i].reladdr->reladdr)
                nested_reladdr = true;  /* will need another pass */
@@ -1819,7 +1819,7 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
                                     pull_constant_loc[uniform]);
 
             inst->src[i].file = temp.file;
-            inst->src[i].reg = temp.reg;
+            inst->src[i].nr = temp.nr;
             inst->src[i].reg_offset = temp.reg_offset;
             inst->src[i].reladdr = NULL;
          }
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index e80b71b558d..a1f91d9c56a 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -144,7 +144,7 @@ TEST_F(copy_propagation_test, test_swizzle_swizzle)
 
    copy_propagation(v);
 
-   EXPECT_EQ(test_mov->src[0].reg, a.reg);
+   EXPECT_EQ(test_mov->src[0].nr, a.nr);
    EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(SWIZZLE_Z,
                                                     SWIZZLE_W,
                                                     SWIZZLE_X,
@@ -174,7 +174,7 @@ TEST_F(copy_propagation_test, test_swizzle_writemask)
    copy_propagation(v);
 
    /* should not copy propagate */
-   EXPECT_EQ(test_mov->src[0].reg, b.reg);
+   EXPECT_EQ(test_mov->src[0].nr, b.nr);
    EXPECT_EQ(test_mov->src[0].swizzle, BRW_SWIZZLE4(SWIZZLE_W,
                                                     SWIZZLE_W,
                                                     SWIZZLE_W,
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index 2f824617454..d84e2e98ec0 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -213,7 +213,7 @@ TEST_F(register_coalesce_test, test_dp4_grf)
 
    register_coalesce(v);
 
-   EXPECT_EQ(dp4->dst.reg, to.reg);
+   EXPECT_EQ(dp4->dst.nr, to.nr);
    EXPECT_EQ(dp4->dst.writemask, WRITEMASK_Y);
 }
 
@@ -239,5 +239,5 @@ TEST_F(register_coalesce_test, test_channel_mul_grf)
 
    register_coalesce(v);
 
-   EXPECT_EQ(mul->dst.reg, to.reg);
+   EXPECT_EQ(mul->dst.nr, to.nr);
 }

From dba309fc14d1ca99251c8f8115d2a26ac86f14f6 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 30 Oct 2015 13:53:38 -0700
Subject: [PATCH 264/287] i965: Initialize registers.

The test (file == BAD_FILE) works on registers for which the constructor
has not run because BAD_FILE is zero.  The next commit will move
BAD_FILE in the enum so that it's no longer zero.

In the case of this->outputs, the constructor was being run implicitly,
and we were unnecessarily memsetting is to zero.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp     | 10 +++++++++-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  1 -
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp   |  9 +++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 73b09f5d03d..7a919857a97 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -262,6 +262,10 @@ void
 fs_visitor::nir_emit_system_values()
 {
    nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
+   for (unsigned i = 0; i < SYSTEM_VALUE_MAX; i++) {
+      nir_system_values[i] = fs_reg();
+   }
+
    nir_foreach_overload(nir, overload) {
       assert(strcmp(overload->function->name, "main") == 0);
       assert(overload->impl);
@@ -272,7 +276,11 @@ fs_visitor::nir_emit_system_values()
 void
 fs_visitor::nir_emit_impl(nir_function_impl *impl)
 {
-   nir_locals = reralloc(mem_ctx, nir_locals, fs_reg, impl->reg_alloc);
+   nir_locals = ralloc_array(mem_ctx, fs_reg, impl->reg_alloc);
+   for (unsigned i = 0; i < impl->reg_alloc; i++) {
+      nir_locals[i] = fs_reg();
+   }
+
    foreach_list_typed(nir_register, reg, node, &impl->registers) {
       unsigned array_elems =
          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index da7e9ca67ef..4b9f9751f80 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1190,7 +1190,6 @@ fs_visitor::init()
    this->nir_ssa_values = NULL;
 
    memset(&this->payload, 0, sizeof(this->payload));
-   memset(this->outputs, 0, sizeof(this->outputs));
    memset(this->output_components, 0, sizeof(this->output_components));
    this->source_depth_to_render_target = false;
    this->runtime_check_aads_emit = false;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index e0d5a14981a..8b6912e45f2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -106,6 +106,9 @@ void
 vec4_visitor::nir_setup_system_values()
 {
    nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
+   for (unsigned i = 0; i < SYSTEM_VALUE_MAX; i++) {
+      nir_system_values[i] = dst_reg();
+   }
 
    nir_foreach_overload(nir, overload) {
       assert(strcmp(overload->function->name, "main") == 0);
@@ -118,6 +121,9 @@ void
 vec4_visitor::nir_setup_inputs()
 {
    nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
+   for (unsigned i = 0; i < nir->num_inputs; i++) {
+      nir_inputs[i] = dst_reg();
+   }
 
    nir_foreach_variable(var, &nir->inputs) {
       int offset = var->data.driver_location;
@@ -148,6 +154,9 @@ void
 vec4_visitor::nir_emit_impl(nir_function_impl *impl)
 {
    nir_locals = ralloc_array(mem_ctx, dst_reg, impl->reg_alloc);
+   for (unsigned i = 0; i < impl->reg_alloc; i++) {
+      nir_locals[i] = dst_reg();
+   }
 
    foreach_list_typed(nir_register, reg, node, &impl->registers) {
       unsigned array_elems =

From 5a23b31c75556fa0fe9ca53db481bbec18c2baba Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 29 Oct 2015 22:04:22 -0700
Subject: [PATCH 265/287] i965: Move BAD_FILE from the beginning of enum
 register_file.

I'm going to begin using brw_reg's file field in backend_reg and its
derivatives, and in order to keep the hardware value for ARF as 0, we
have to do something different.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_shader.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 67d623cd35a..5632378eed7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -39,13 +39,13 @@
 #define MAX_VGRF_SIZE 16
 
 enum PACKED register_file {
-   BAD_FILE,
    GRF,
    MRF,
    IMM,
    HW_REG, /* a struct brw_reg */
    ATTR,
    UNIFORM, /* prog_data->params[reg] */
+   BAD_FILE,
 };
 
 #ifdef __cplusplus

From b163aa01487ab5f9b22c48b7badc5d65999c4985 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 26 Oct 2015 17:09:25 -0700
Subject: [PATCH 266/287] i965: Rename GRF to VGRF.

The 2-bit hardware register file field is ARF, GRF, MRF, IMM.

Rename GRF to VGRF (virtual GRF) so that we can reuse the GRF name to
mean an assigned general purpose register.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 104 +++++++++---------
 src/mesa/drivers/dri/i965/brw_fs.h            |   2 +-
 src/mesa/drivers/dri/i965/brw_fs_builder.h    |   4 +-
 .../dri/i965/brw_fs_cmod_propagation.cpp      |   2 +-
 .../dri/i965/brw_fs_combine_constants.cpp     |   4 +-
 .../dri/i965/brw_fs_copy_propagation.cpp      |  26 ++---
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp      |   6 +-
 .../dri/i965/brw_fs_dead_code_eliminate.cpp   |   6 +-
 .../drivers/dri/i965/brw_fs_generator.cpp     |   4 +-
 .../dri/i965/brw_fs_live_variables.cpp        |   6 +-
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp      |   6 +-
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  |  24 ++--
 .../dri/i965/brw_fs_register_coalesce.cpp     |   8 +-
 .../dri/i965/brw_fs_saturate_propagation.cpp  |   6 +-
 src/mesa/drivers/dri/i965/brw_fs_validate.cpp |   4 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp  |  16 +--
 src/mesa/drivers/dri/i965/brw_ir_fs.h         |   6 +-
 .../dri/i965/brw_schedule_instructions.cpp    |  26 ++---
 src/mesa/drivers/dri/i965/brw_shader.h        |   4 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp        |  32 +++---
 src/mesa/drivers/dri/i965/brw_vec4_builder.h  |   2 +-
 .../dri/i965/brw_vec4_cmod_propagation.cpp    |   2 +-
 .../dri/i965/brw_vec4_copy_propagation.cpp    |  14 +--
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp    |   4 +-
 .../dri/i965/brw_vec4_dead_code_eliminate.cpp |   8 +-
 .../dri/i965/brw_vec4_live_variables.cpp      |   8 +-
 .../dri/i965/brw_vec4_live_variables.h        |   4 +-
 src/mesa/drivers/dri/i965/brw_vec4_nir.cpp    |   8 +-
 .../dri/i965/brw_vec4_reg_allocate.cpp        |  26 ++---
 .../drivers/dri/i965/brw_vec4_visitor.cpp     |  16 +--
 30 files changed, 194 insertions(+), 194 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3ea97f22e97..e8ac1c2eda3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -76,7 +76,7 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
 
    /* This will be the case for almost all instructions. */
    switch (dst.file) {
-   case GRF:
+   case VGRF:
    case HW_REG:
    case MRF:
    case ATTR:
@@ -204,7 +204,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
       op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
 
    int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
-   fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type);
+   fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), dst.type);
    fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
    inst->regs_written = regs_written;
 
@@ -233,7 +233,7 @@ fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf)
    const fs_builder ubld = bld.annotate("send dependency resolve")
                               .half(0);
 
-   ubld.MOV(ubld.null_reg_f(), fs_reg(GRF, grf, BRW_REGISTER_TYPE_F));
+   ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F));
 }
 
 bool
@@ -286,12 +286,12 @@ fs_inst::is_send_from_grf() const
    case SHADER_OPCODE_URB_READ_SIMD8:
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
-      return src[1].file == GRF;
+      return src[1].file == VGRF;
    case FS_OPCODE_FB_WRITE:
-      return src[0].file == GRF;
+      return src[0].file == VGRF;
    default:
       if (is_tex())
-         return src[0].file == GRF;
+         return src[0].file == VGRF;
 
       return false;
    }
@@ -304,7 +304,7 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
       return false;
 
    fs_reg reg = this->src[0];
-   if (reg.file != GRF || reg.reg_offset != 0 || reg.stride == 0)
+   if (reg.file != VGRF || reg.reg_offset != 0 || reg.stride == 0)
       return false;
 
    if (grf_alloc.sizes[reg.nr] != this->regs_written)
@@ -540,7 +540,7 @@ fs_visitor::get_timestamp(const fs_builder &bld)
                                           0),
                              BRW_REGISTER_TYPE_UD));
 
-   fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+   fs_reg dst = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
 
    /* We want to read the 3 fields we care about even if it's not enabled in
     * the dispatch.
@@ -595,7 +595,7 @@ fs_visitor::emit_shader_time_end()
 
    fs_reg start = shader_start_time;
    start.negate = true;
-   fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+   fs_reg diff = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
    diff.set_smear(0);
 
    const fs_builder cbld = ibld.group(1, 0);
@@ -840,7 +840,7 @@ fs_inst::regs_read(int arg) const
       return 1;
 
    default:
-      if (is_tex() && arg == 0 && src[0].file == GRF)
+      if (is_tex() && arg == 0 && src[0].file == VGRF)
          return mlen;
       break;
    }
@@ -851,7 +851,7 @@ fs_inst::regs_read(int arg) const
    case UNIFORM:
    case IMM:
       return 1;
-   case GRF:
+   case VGRF:
    case ATTR:
    case HW_REG:
       return DIV_ROUND_UP(components_read(arg) *
@@ -954,7 +954,7 @@ fs_reg
 fs_visitor::vgrf(const glsl_type *const type)
 {
    int reg_width = dispatch_width / 8;
-   return fs_reg(GRF, alloc.allocate(type_size_scalar(type) * reg_width),
+   return fs_reg(VGRF, alloc.allocate(type_size_scalar(type) * reg_width),
                  brw_type_for_base_type(type));
 }
 
@@ -1299,9 +1299,9 @@ fs_visitor::emit_sampleid_setup()
    fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type));
 
    if (key->compute_sample_id) {
-      fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
+      fs_reg t1(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_D);
       t1.set_smear(0);
-      fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
+      fs_reg t2(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_W);
 
       /* The PS will be run in MSDISPMODE_PERSAMPLE. For example with
        * 8x multisampling, subspan 0 will represent sample N (where N
@@ -1724,14 +1724,14 @@ fs_visitor::split_virtual_grfs()
 
    /* Mark all used registers as fully splittable */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          int reg = vgrf_to_reg[inst->dst.nr];
          for (unsigned j = 1; j < this->alloc.sizes[inst->dst.nr]; j++)
             split_points[reg + j] = true;
       }
 
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             int reg = vgrf_to_reg[inst->src[i].nr];
             for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].nr]; j++)
                split_points[reg + j] = true;
@@ -1740,13 +1740,13 @@ fs_visitor::split_virtual_grfs()
    }
 
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
          for (int j = 1; j < inst->regs_written; j++)
             split_points[reg + j] = false;
       }
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
             for (int j = 1; j < inst->regs_read(i); j++)
                split_points[reg + j] = false;
@@ -1793,14 +1793,14 @@ fs_visitor::split_virtual_grfs()
    assert(reg == reg_count);
 
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          reg = vgrf_to_reg[inst->dst.nr] + inst->dst.reg_offset;
          inst->dst.nr = new_virtual_grf[reg];
          inst->dst.reg_offset = new_reg_offset[reg];
          assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]);
       }
       for (int i = 0; i < inst->sources; i++) {
-	 if (inst->src[i].file == GRF) {
+	 if (inst->src[i].file == VGRF) {
             reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].reg_offset;
             inst->src[i].nr = new_virtual_grf[reg];
             inst->src[i].reg_offset = new_reg_offset[reg];
@@ -1829,11 +1829,11 @@ fs_visitor::compact_virtual_grfs()
 
    /* Mark which virtual GRFs are used. */
    foreach_block_and_inst(block, const fs_inst, inst, cfg) {
-      if (inst->dst.file == GRF)
+      if (inst->dst.file == VGRF)
          remap_table[inst->dst.nr] = 0;
 
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF)
+         if (inst->src[i].file == VGRF)
             remap_table[inst->src[i].nr] = 0;
       }
    }
@@ -1858,11 +1858,11 @@ fs_visitor::compact_virtual_grfs()
 
    /* Patch all the instructions to use the newly renumbered registers */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      if (inst->dst.file == GRF)
+      if (inst->dst.file == VGRF)
          inst->dst.nr = remap_table[inst->dst.nr];
 
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF)
+         if (inst->src[i].file == VGRF)
             inst->src[i].nr = remap_table[inst->src[i].nr];
       }
    }
@@ -1872,7 +1872,7 @@ fs_visitor::compact_virtual_grfs()
     * think some random VGRF is delta_xy.
     */
    for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
-      if (delta_xy[i].file == GRF) {
+      if (delta_xy[i].file == VGRF) {
          if (remap_table[delta_xy[i].nr] != -1) {
             delta_xy[i].nr = remap_table[delta_xy[i].nr];
          } else {
@@ -2048,7 +2048,7 @@ fs_visitor::demote_pull_constants()
          brw_mark_surface_used(prog_data, index);
 
          /* Rewrite the instruction to use the temporary VGRF. */
-         inst->src[i].file = GRF;
+         inst->src[i].file = VGRF;
          inst->src[i].nr = dst.nr;
          inst->src[i].reg_offset = 0;
       }
@@ -2459,7 +2459,7 @@ fs_visitor::opt_register_renaming()
 
       /* Rewrite instruction sources. */
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF &&
+         if (inst->src[i].file == VGRF &&
              remap[inst->src[i].nr] != -1 &&
              remap[inst->src[i].nr] != inst->src[i].nr) {
             inst->src[i].nr = remap[inst->src[i].nr];
@@ -2470,7 +2470,7 @@ fs_visitor::opt_register_renaming()
       const int dst = inst->dst.nr;
 
       if (depth == 0 &&
-          inst->dst.file == GRF &&
+          inst->dst.file == VGRF &&
           alloc.sizes[inst->dst.nr] == inst->exec_size / 8 &&
           !inst->is_partial_write()) {
          if (remap[dst] == -1) {
@@ -2480,7 +2480,7 @@ fs_visitor::opt_register_renaming()
             inst->dst.nr = remap[dst];
             progress = true;
          }
-      } else if (inst->dst.file == GRF &&
+      } else if (inst->dst.file == VGRF &&
                  remap[dst] != -1 &&
                  remap[dst] != dst) {
          inst->dst.nr = remap[dst];
@@ -2492,7 +2492,7 @@ fs_visitor::opt_register_renaming()
       invalidate_live_intervals();
 
       for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
-         if (delta_xy[i].file == GRF && remap[delta_xy[i].nr] != -1) {
+         if (delta_xy[i].file == VGRF && remap[delta_xy[i].nr] != -1) {
             delta_xy[i].nr = remap[delta_xy[i].nr];
          }
       }
@@ -2560,7 +2560,7 @@ fs_visitor::compute_to_mrf()
 
       if (inst->opcode != BRW_OPCODE_MOV ||
 	  inst->is_partial_write() ||
-	  inst->dst.file != MRF || inst->src[0].file != GRF ||
+	  inst->dst.file != MRF || inst->src[0].file != VGRF ||
 	  inst->dst.type != inst->src[0].type ||
 	  inst->src[0].abs || inst->src[0].negate ||
           !inst->src[0].is_contiguous() ||
@@ -2590,7 +2590,7 @@ fs_visitor::compute_to_mrf()
        * rewrite the thing that made this GRF to write into the MRF.
        */
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
-	 if (scan_inst->dst.file == GRF &&
+	 if (scan_inst->dst.file == VGRF &&
             scan_inst->dst.nr == inst->src[0].nr) {
 	    /* Found the last thing to write our reg we want to turn
 	     * into a compute-to-MRF.
@@ -2646,7 +2646,7 @@ fs_visitor::compute_to_mrf()
 	  */
 	 bool interfered = false;
 	 for (int i = 0; i < scan_inst->sources; i++) {
-	    if (scan_inst->src[i].file == GRF &&
+	    if (scan_inst->src[i].file == VGRF &&
                 scan_inst->src[i].nr == inst->src[0].nr &&
 		scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
 	       interfered = true;
@@ -2841,7 +2841,7 @@ fs_visitor::remove_duplicate_mrf_writes()
       }
 
       /* Clear out any MRF move records whose sources got overwritten. */
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
 	 for (unsigned int i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
 	    if (last_mrf_move[i] &&
                 last_mrf_move[i]->src[0].nr == inst->dst.nr) {
@@ -2852,7 +2852,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 
       if (inst->opcode == BRW_OPCODE_MOV &&
 	  inst->dst.file == MRF &&
-	  inst->src[0].file == GRF &&
+	  inst->src[0].file == VGRF &&
 	  !inst->is_partial_write()) {
          last_mrf_move[inst->dst.nr] = inst;
       }
@@ -2870,7 +2870,7 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
    /* Clear the flag for registers that actually got read (as expected). */
    for (int i = 0; i < inst->sources; i++) {
       int grf;
-      if (inst->src[i].file == GRF) {
+      if (inst->src[i].file == VGRF) {
          grf = inst->src[i].nr;
       } else if (inst->src[i].file == HW_REG &&
                  inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
@@ -2940,7 +2940,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
        * instruction but a MOV that might have left us an outstanding
        * dependency has more latency than a MOV.
        */
-      if (scan_inst->dst.file == GRF) {
+      if (scan_inst->dst.file == VGRF) {
          for (int i = 0; i < scan_inst->regs_written; i++) {
             int reg = scan_inst->dst.nr + i;
 
@@ -3006,7 +3006,7 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
       /* We insert our reads as late as possible since they're reading the
        * result of a SEND, which has massive latency.
        */
-      if (scan_inst->dst.file == GRF &&
+      if (scan_inst->dst.file == VGRF &&
           scan_inst->dst.nr >= first_write_grf &&
           scan_inst->dst.nr < first_write_grf + write_len &&
           needs_dep[scan_inst->dst.nr - first_write_grf]) {
@@ -3039,7 +3039,7 @@ fs_visitor::insert_gen4_send_dependency_workarounds()
     */
 
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
-      if (inst->mlen != 0 && inst->dst.file == GRF) {
+      if (inst->mlen != 0 && inst->dst.file == VGRF) {
          insert_gen4_pre_send_dependency_workarounds(block, inst);
          insert_gen4_post_send_dependency_workarounds(block, inst);
          progress = true;
@@ -3087,11 +3087,11 @@ fs_visitor::lower_uniform_pull_constant_loads()
             /* We have to use a message header on Skylake to get SIMD4x2
              * mode.  Reserve space for the register.
             */
-            offset = payload = fs_reg(GRF, alloc.allocate(2));
+            offset = payload = fs_reg(VGRF, alloc.allocate(2));
             offset.reg_offset++;
             inst->mlen = 2;
          } else {
-            offset = payload = fs_reg(GRF, alloc.allocate(1));
+            offset = payload = fs_reg(VGRF, alloc.allocate(1));
             inst->mlen = 1;
          }
 
@@ -3138,7 +3138,7 @@ fs_visitor::lower_load_payload()
       if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
          continue;
 
-      assert(inst->dst.file == MRF || inst->dst.file == GRF);
+      assert(inst->dst.file == MRF || inst->dst.file == VGRF);
       assert(inst->saturate == false);
       fs_reg dst = inst->dst;
 
@@ -3258,7 +3258,7 @@ fs_visitor::lower_integer_multiplication()
              * single MUL instruction with that value in the proper location.
              */
             if (devinfo->gen < 7) {
-               fs_reg imm(GRF, alloc.allocate(dispatch_width / 8),
+               fs_reg imm(VGRF, alloc.allocate(dispatch_width / 8),
                           inst->dst.type);
                ibld.MOV(imm, inst->src[1]);
                ibld.MUL(inst->dst, imm, inst->src[0]);
@@ -3313,11 +3313,11 @@ fs_visitor::lower_integer_multiplication()
 
             fs_reg orig_dst = inst->dst;
             if (orig_dst.is_null() || orig_dst.file == MRF) {
-               inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+               inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
                                   inst->dst.type);
             }
             fs_reg low = inst->dst;
-            fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
+            fs_reg high(VGRF, alloc.allocate(dispatch_width / 8),
                         inst->dst.type);
 
             if (devinfo->gen >= 7) {
@@ -3511,7 +3511,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    }
 
    if (payload.aa_dest_stencil_reg) {
-      sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1));
+      sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
       bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
          .MOV(sources[length],
               fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
@@ -3519,7 +3519,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    }
 
    if (prog_data->uses_omask) {
-      sources[length] = fs_reg(GRF, bld.shader->alloc.allocate(1),
+      sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1),
                                BRW_REGISTER_TYPE_UD);
 
       /* Hand over gl_SampleMask.  Only the lower 16 bits of each channel are
@@ -3585,7 +3585,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
    fs_inst *load;
    if (devinfo->gen >= 7) {
       /* Send from the GRF */
-      fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F);
+      fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F);
       load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size);
       payload.nr = bld.shader->alloc.allocate(load->regs_written);
       load->dst = payload;
@@ -4026,7 +4026,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
    else
       mlen = length * reg_width;
 
-   const fs_reg src_payload = fs_reg(GRF, bld.shader->alloc.allocate(mlen),
+   const fs_reg src_payload = fs_reg(VGRF, bld.shader->alloc.allocate(mlen),
                                      BRW_REGISTER_TYPE_F);
    bld.LOAD_PAYLOAD(src_payload, sources, length, header_size);
 
@@ -4606,7 +4606,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
    }
 
    switch (inst->dst.file) {
-   case GRF:
+   case VGRF:
       fprintf(file, "vgrf%d", inst->dst.nr);
       if (alloc.sizes[inst->dst.nr] != inst->regs_written ||
           inst->dst.subreg_offset)
@@ -4663,7 +4663,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       if (inst->src[i].abs)
          fprintf(file, "|");
       switch (inst->src[i].file) {
-      case GRF:
+      case VGRF:
          fprintf(file, "vgrf%d", inst->src[i].nr);
          if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) ||
              inst->src[i].subreg_offset)
@@ -5094,7 +5094,7 @@ fs_visitor::fixup_3src_null_dest()
 {
    foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
       if (inst->is_3src() && inst->dst.is_null()) {
-         inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
+         inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
                             inst->dst.type);
       }
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8a93b564c81..55d8d03d73f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -70,7 +70,7 @@ offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
    switch (reg.file) {
    case BAD_FILE:
       break;
-   case GRF:
+   case VGRF:
    case MRF:
    case HW_REG:
    case ATTR:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index d5763f699d2..22b2f22073f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -179,7 +179,7 @@ namespace brw {
          assert(dispatch_width() <= 32);
 
          if (n > 0)
-            return dst_reg(GRF, shader->alloc.allocate(
+            return dst_reg(VGRF, shader->alloc.allocate(
                               DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
                                            REG_SIZE)),
                            type);
@@ -596,7 +596,7 @@ namespace brw {
       src_reg
       fix_3src_operand(const src_reg &src) const
       {
-         if (src.file == GRF || src.file == UNIFORM || src.stride > 1) {
+         if (src.file == VGRF || src.file == UNIFORM || src.stride > 1) {
             return src;
          } else {
             dst_reg expanded = vgrf(src.type);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
index 883e8d2a49f..8fdc959f992 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -62,7 +62,7 @@ opt_cmod_propagation_local(bblock_t *block)
            inst->opcode != BRW_OPCODE_MOV) ||
           inst->predicate != BRW_PREDICATE_NONE ||
           !inst->dst.is_null() ||
-          inst->src[0].file != GRF ||
+          inst->src[0].file != VGRF ||
           inst->src[0].abs)
          continue;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
index 127cee4f1e9..0c115f50748 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_combine_constants.cpp
@@ -268,7 +268,7 @@ fs_visitor::opt_combine_constants()
 
 
    /* Insert MOVs to load the constant values into GRFs. */
-   fs_reg reg(GRF, alloc.allocate(dispatch_width / 8));
+   fs_reg reg(VGRF, alloc.allocate(dispatch_width / 8));
    reg.stride = 0;
    for (int i = 0; i < table.len; i++) {
       struct imm *imm = &table.imm[i];
@@ -295,7 +295,7 @@ fs_visitor::opt_combine_constants()
    for (int i = 0; i < table.len; i++) {
       foreach_list_typed(reg_link, link, link, table.imm[i].uses) {
          fs_reg *reg = link->reg;
-         reg->file = GRF;
+         reg->file = VGRF;
          reg->nr = table.imm[i].nr;
          reg->subreg_offset = table.imm[i].subreg_offset;
          reg->stride = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 79594130526..cb018336927 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -154,7 +154,7 @@ fs_copy_prop_dataflow::setup_initial_values()
    /* Initialize the COPY and KILL sets. */
    foreach_block (block, cfg) {
       foreach_inst_in_block(fs_inst, inst, block) {
-         if (inst->dst.file != GRF)
+         if (inst->dst.file != VGRF)
             continue;
 
          /* Mark ACP entries which are killed by this instruction. */
@@ -278,19 +278,19 @@ is_logic_op(enum opcode opcode)
 bool
 fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
 {
-   if (inst->src[arg].file != GRF)
+   if (inst->src[arg].file != VGRF)
       return false;
 
    if (entry->src.file == IMM)
       return false;
-   assert(entry->src.file == GRF || entry->src.file == UNIFORM ||
+   assert(entry->src.file == VGRF || entry->src.file == UNIFORM ||
           entry->src.file == ATTR);
 
    if (entry->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
        inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD)
       return false;
 
-   assert(entry->dst.file == GRF);
+   assert(entry->dst.file == VGRF);
    if (inst->src[arg].nr != entry->dst.nr)
       return false;
 
@@ -392,7 +392,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
       inst->src[arg].subreg_offset = entry->src.subreg_offset;
       break;
    case ATTR:
-   case GRF:
+   case VGRF:
       {
          /* In this case, we'll just leave the width alone.  The source
           * register could have different widths depending on how it is
@@ -456,10 +456,10 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
       return false;
 
    for (int i = inst->sources - 1; i >= 0; i--) {
-      if (inst->src[i].file != GRF)
+      if (inst->src[i].file != VGRF)
          continue;
 
-      assert(entry->dst.file == GRF);
+      assert(entry->dst.file == VGRF);
       if (inst->src[i].nr != entry->dst.nr)
          continue;
 
@@ -652,8 +652,8 @@ static bool
 can_propagate_from(fs_inst *inst)
 {
    return (inst->opcode == BRW_OPCODE_MOV &&
-           inst->dst.file == GRF &&
-           ((inst->src[0].file == GRF &&
+           inst->dst.file == VGRF &&
+           ((inst->src[0].file == VGRF &&
              (inst->src[0].nr != inst->dst.nr ||
               inst->src[0].reg_offset != inst->dst.reg_offset)) ||
             inst->src[0].file == ATTR ||
@@ -675,7 +675,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
    foreach_inst_in_block(fs_inst, inst, block) {
       /* Try propagating into this instruction. */
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file != GRF)
+         if (inst->src[i].file != VGRF)
             continue;
 
          foreach_in_list(acp_entry, entry, &acp[inst->src[i].nr % ACP_HASH_SIZE]) {
@@ -688,7 +688,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
       }
 
       /* kill the destination from the ACP */
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          foreach_in_list_safe(acp_entry, entry, &acp[inst->dst.nr % ACP_HASH_SIZE]) {
 	    if (inst->overwrites_reg(entry->dst)) {
 	       entry->remove();
@@ -718,12 +718,12 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
          entry->saturate = inst->saturate;
          acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry);
       } else if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD &&
-                 inst->dst.file == GRF) {
+                 inst->dst.file == VGRF) {
          int offset = 0;
          for (int i = 0; i < inst->sources; i++) {
             int effective_width = i < inst->header_size ? 8 : inst->exec_size;
             int regs_written = effective_width / 8;
-            if (inst->src[i].file == GRF) {
+            if (inst->src[i].file == VGRF) {
                acp_entry *entry = ralloc(copy_prop_ctx, acp_entry);
                entry->dst = inst->dst;
                entry->dst.reg_offset = offset;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 08f89d54601..ee31cb6cd80 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -196,7 +196,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate)
          header_size = 0;
       }
 
-      assert(src.file == GRF);
+      assert(src.file == VGRF);
       payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources);
       for (int i = 0; i < header_size; i++) {
          payload[i] = src;
@@ -262,7 +262,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
                                        .at(block, entry->generator->next);
                int written = entry->generator->regs_written;
 
-               entry->tmp = fs_reg(GRF, alloc.allocate(written),
+               entry->tmp = fs_reg(VGRF, alloc.allocate(written),
                                    entry->generator->dst.type);
 
                create_copy_instr(ibld, entry->generator, entry->tmp, false);
@@ -320,7 +320,7 @@ fs_visitor::opt_cse_local(bblock_t *block)
             /* Kill any AEB entries using registers that don't get reused any
              * more -- a sure sign they'll fail operands_match().
              */
-            if (src_reg->file == GRF && virtual_grf_end[src_reg->nr] < ip) {
+            if (src_reg->file == VGRF && virtual_grf_end[src_reg->nr] < ip) {
                entry->remove();
                ralloc_free(entry);
                break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 1eaf1478877..a50cf6f24f1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -52,7 +52,7 @@ fs_visitor::dead_code_eliminate()
              sizeof(BITSET_WORD));
 
       foreach_inst_in_block_reverse(fs_inst, inst, block) {
-         if (inst->dst.file == GRF && !inst->has_side_effects()) {
+         if (inst->dst.file == VGRF && !inst->has_side_effects()) {
             bool result_live = false;
 
             if (inst->regs_written == 1) {
@@ -96,7 +96,7 @@ fs_visitor::dead_code_eliminate()
             continue;
          }
 
-         if (inst->dst.file == GRF) {
+         if (inst->dst.file == VGRF) {
             if (!inst->is_partial_write()) {
                int var = live_intervals->var_from_reg(inst->dst);
                for (int i = 0; i < inst->regs_written; i++) {
@@ -110,7 +110,7 @@ fs_visitor::dead_code_eliminate()
          }
 
          for (int i = 0; i < inst->sources; i++) {
-            if (inst->src[i].file == GRF) {
+            if (inst->src[i].file == VGRF) {
                int var = live_intervals->var_from_reg(inst->src[i]);
 
                for (int j = 0; j < inst->regs_read(i); j++) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 95649d870ee..16257a9fdc3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -37,7 +37,7 @@ static enum brw_reg_file
 brw_file_from_reg(fs_reg *reg)
 {
    switch (reg->file) {
-   case GRF:
+   case VGRF:
       return BRW_GENERAL_REGISTER_FILE;
    case MRF:
       return BRW_MESSAGE_REGISTER_FILE;
@@ -61,7 +61,7 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
    case MRF:
       assert((reg->nr & ~(1 << 7)) < BRW_MAX_MRF(gen));
       /* Fallthrough */
-   case GRF:
+   case VGRF:
       if (reg->stride == 0) {
          brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
       } else if (inst->exec_size < 8) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index ce066a9778e..80fb8c28f81 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -117,7 +117,7 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
    /* The def[] bitset marks when an initialization in a block completely
     * screens off previous updates of that variable (VGRF channel).
     */
-   if (inst->dst.file == GRF && !inst->is_partial_write()) {
+   if (inst->dst.file == VGRF && !inst->is_partial_write()) {
       if (!BITSET_TEST(bd->use, var))
          BITSET_SET(bd->def, var);
    }
@@ -149,7 +149,7 @@ fs_live_variables::setup_def_use()
 	 for (unsigned int i = 0; i < inst->sources; i++) {
             fs_reg reg = inst->src[i];
 
-            if (reg.file != GRF)
+            if (reg.file != VGRF)
                continue;
 
             for (int j = 0; j < inst->regs_read(i); j++) {
@@ -172,7 +172,7 @@ fs_live_variables::setup_def_use()
          }
 
          /* Set def[] for this instruction */
-         if (inst->dst.file == GRF) {
+         if (inst->dst.file == VGRF) {
             fs_reg reg = inst->dst;
             for (int j = 0; j < inst->regs_written; j++) {
                setup_one_write(bd, inst, ip, reg);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7a919857a97..c282f835cae 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1135,7 +1135,7 @@ fs_visitor::emit_percomp(const fs_builder &bld, const fs_inst &inst,
       fs_inst *new_inst = new(mem_ctx) fs_inst(inst);
       new_inst->dst = offset(new_inst->dst, bld, i);
       for (unsigned j = 0; j < new_inst->sources; j++)
-         if (new_inst->src[j].file == GRF)
+         if (new_inst->src[j].file == VGRF)
             new_inst->src[j] = offset(new_inst->src[j], bld, i);
 
       bld.emit(new_inst);
@@ -2450,10 +2450,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        * SURFTYPE_BUFFER.
        */
       int regs_written = 4 * mlen;
-      fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen),
+      fs_reg src_payload = fs_reg(VGRF, alloc.allocate(mlen),
                                   BRW_REGISTER_TYPE_UD);
       bld.LOAD_PAYLOAD(src_payload, &source, 1, 0);
-      fs_reg buffer_size = fs_reg(GRF, alloc.allocate(regs_written),
+      fs_reg buffer_size = fs_reg(VGRF, alloc.allocate(regs_written),
                                   BRW_REGISTER_TYPE_UD);
       const unsigned index = prog_data->binding_table.ssbo_start + ssbo_index;
       fs_inst *inst = bld.emit(FS_OPCODE_GET_BUFFER_SIZE, buffer_size,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 99ccdb15e6f..e2234d1324d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -35,7 +35,7 @@ using namespace brw;
 static void
 assign_reg(unsigned *reg_hw_locations, fs_reg *reg)
 {
-   if (reg->file == GRF) {
+   if (reg->file == VGRF) {
       reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset;
       reg->reg_offset = 0;
    }
@@ -584,7 +584,7 @@ fs_visitor::assign_regs(bool allow_spilling)
        * that register and set it to the appropriate class.
        */
       if (compiler->fs_reg_sets[rsi].aligned_pairs_class >= 0 &&
-          this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
+          this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == VGRF &&
           this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].nr == i) {
          c = compiler->fs_reg_sets[rsi].aligned_pairs_class;
       }
@@ -644,11 +644,11 @@ fs_visitor::assign_regs(bool allow_spilling)
        * destination interfere.
        */
       foreach_block_and_inst(block, fs_inst, inst, cfg) {
-         if (inst->dst.file != GRF)
+         if (inst->dst.file != VGRF)
             continue;
 
          for (int i = 0; i < inst->sources; ++i) {
-            if (inst->src[i].file == GRF) {
+            if (inst->src[i].file == VGRF) {
                ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
             }
          }
@@ -786,7 +786,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
     */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       for (unsigned int i = 0; i < inst->sources; i++) {
-	 if (inst->src[i].file == GRF) {
+	 if (inst->src[i].file == VGRF) {
             spill_costs[inst->src[i].nr] += loop_scale;
 
             /* Register spilling logic assumes full-width registers; smeared
@@ -802,7 +802,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
 	 }
       }
 
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          spill_costs[inst->dst.nr] += inst->regs_written * loop_scale;
 
          if (!inst->dst.is_contiguous()) {
@@ -821,13 +821,13 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
 	 break;
 
       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
-	 if (inst->src[0].file == GRF)
+	 if (inst->src[0].file == VGRF)
             no_spill[inst->src[0].nr] = true;
 	 break;
 
       case SHADER_OPCODE_GEN4_SCRATCH_READ:
       case SHADER_OPCODE_GEN7_SCRATCH_READ:
-	 if (inst->dst.file == GRF)
+	 if (inst->dst.file == VGRF)
             no_spill[inst->dst.nr] = true;
 	 break;
 
@@ -883,12 +883,12 @@ fs_visitor::spill_reg(int spill_reg)
     */
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
       for (unsigned int i = 0; i < inst->sources; i++) {
-	 if (inst->src[i].file == GRF &&
+	 if (inst->src[i].file == VGRF &&
              inst->src[i].nr == spill_reg) {
             int regs_read = inst->regs_read(i);
             int subset_spill_offset = (spill_offset +
                                        REG_SIZE * inst->src[i].reg_offset);
-            fs_reg unspill_dst(GRF, alloc.allocate(regs_read));
+            fs_reg unspill_dst(VGRF, alloc.allocate(regs_read));
 
             inst->src[i].nr = unspill_dst.nr;
             inst->src[i].reg_offset = 0;
@@ -898,11 +898,11 @@ fs_visitor::spill_reg(int spill_reg)
 	 }
       }
 
-      if (inst->dst.file == GRF &&
+      if (inst->dst.file == VGRF &&
           inst->dst.nr == spill_reg) {
          int subset_spill_offset = (spill_offset +
                                     REG_SIZE * inst->dst.reg_offset);
-         fs_reg spill_src(GRF, alloc.allocate(inst->regs_written));
+         fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written));
 
          inst->dst.nr = spill_src.nr;
          inst->dst.reg_offset = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
index ce1d66e7ed6..4578ad597c2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp
@@ -70,11 +70,11 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst)
         inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD) ||
        inst->is_partial_write() ||
        inst->saturate ||
-       inst->src[0].file != GRF ||
+       inst->src[0].file != VGRF ||
        inst->src[0].negate ||
        inst->src[0].abs ||
        !inst->src[0].is_contiguous() ||
-       inst->dst.file != GRF ||
+       inst->dst.file != VGRF ||
        inst->dst.type != inst->src[0].type) {
       return false;
    }
@@ -250,7 +250,7 @@ fs_visitor::register_coalesce()
       }
 
       foreach_block_and_inst(block, fs_inst, scan_inst, cfg) {
-         if (scan_inst->dst.file == GRF &&
+         if (scan_inst->dst.file == VGRF &&
              scan_inst->dst.nr == src_reg) {
             scan_inst->dst.nr = dst_reg;
             scan_inst->dst.reg_offset =
@@ -258,7 +258,7 @@ fs_visitor::register_coalesce()
          }
 
          for (int j = 0; j < scan_inst->sources; j++) {
-            if (scan_inst->src[j].file == GRF &&
+            if (scan_inst->src[j].file == VGRF &&
                 scan_inst->src[j].nr == src_reg) {
                scan_inst->src[j].nr = dst_reg;
                scan_inst->src[j].reg_offset =
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index 0c48dcd180c..52570943996 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -53,9 +53,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
 
       if (inst->opcode != BRW_OPCODE_MOV ||
           !inst->saturate ||
-          inst->dst.file != GRF ||
+          inst->dst.file != VGRF ||
           inst->dst.type != inst->src[0].type ||
-          inst->src[0].file != GRF ||
+          inst->src[0].file != VGRF ||
           inst->src[0].abs ||
           inst->src[0].negate)
          continue;
@@ -90,7 +90,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
             break;
          }
          for (int i = 0; i < scan_inst->sources; i++) {
-            if (scan_inst->src[i].file == GRF &&
+            if (scan_inst->src[i].file == VGRF &&
                 scan_inst->src[i].nr == inst->src[0].nr &&
                 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
                if (scan_inst->opcode != BRW_OPCODE_MOV ||
diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
index a79c343ce02..90edd023b30 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp
@@ -42,13 +42,13 @@ void
 fs_visitor::validate()
 {
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          fsv_assert(inst->dst.reg_offset + inst->regs_written <=
                     alloc.sizes[inst->dst.nr]);
       }
 
       for (unsigned i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             fsv_assert(inst->src[i].reg_offset + inst->regs_read(i) <=
                        (int)alloc.sizes[inst->src[i].nr]);
          }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 4b9f9751f80..a7bd9cea7af 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -143,7 +143,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
     * tracking to get the scaling factor.
     */
    if (devinfo->gen < 6 && is_rect) {
-      fs_reg dst = fs_reg(GRF, alloc.allocate(coord_components));
+      fs_reg dst = fs_reg(VGRF, alloc.allocate(coord_components));
       fs_reg src = coordinate;
       coordinate = dst;
 
@@ -581,7 +581,7 @@ fs_visitor::emit_interpolation_setup_gen6()
        * Thus we can do a single add(16) in SIMD8 or an add(32) in SIMD16 to
        * compute our pixel centers.
        */
-      fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8),
+      fs_reg int_pixel_xy(VGRF, alloc.allocate(dispatch_width / 8),
                           BRW_REGISTER_TYPE_UW);
 
       const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0);
@@ -908,7 +908,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
     *    "The write data payload can be between 1 and 8 message phases long."
     */
    if (vue_map->slots_valid == 0) {
-      fs_reg payload = fs_reg(GRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
+      fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
       bld.exec_all().MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
                                                 BRW_REGISTER_TYPE_UD)));
 
@@ -975,7 +975,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
             break;
          }
 
-         fs_reg zero(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+         fs_reg zero(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
          bld.MOV(zero, fs_reg(0u));
 
          sources[length++] = zero;
@@ -1027,7 +1027,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
              * temp register and use that for the payload.
              */
             for (int i = 0; i < 4; i++) {
-               fs_reg reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type);
+               fs_reg reg = fs_reg(VGRF, alloc.allocate(1), outputs[varying].type);
                fs_reg src = offset(this->outputs[varying], bld, i);
                set_saturate(true, bld.MOV(reg, src));
                sources[length++] = reg;
@@ -1053,7 +1053,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       if (flush) {
          fs_reg *payload_sources =
             ralloc_array(mem_ctx, fs_reg, length + header_size);
-         fs_reg payload = fs_reg(GRF, alloc.allocate(length + header_size),
+         fs_reg payload = fs_reg(VGRF, alloc.allocate(length + header_size),
                                  BRW_REGISTER_TYPE_F);
          payload_sources[0] =
             fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
@@ -1091,7 +1091,7 @@ fs_visitor::emit_cs_terminate()
     * make sure it uses the appropriate register range.
     */
    struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
-   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+   fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
    bld.group(8, 0).exec_all().MOV(payload, g0);
 
    /* Send a message to the thread spawner to terminate the thread. */
@@ -1108,7 +1108,7 @@ fs_visitor::emit_barrier()
    /* We are getting the barrier ID from the compute shader header */
    assert(stage == MESA_SHADER_COMPUTE);
 
-   fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
+   fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
 
    const fs_builder pbld = bld.exec_all().group(8, 0);
 
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 9309ba58e68..a3b3a4fec78 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -90,7 +90,7 @@ byte_offset(fs_reg reg, unsigned delta)
    switch (reg.file) {
    case BAD_FILE:
       break;
-   case GRF:
+   case VGRF:
    case ATTR:
       reg.reg_offset += delta / 32;
       break;
@@ -117,7 +117,7 @@ horiz_offset(fs_reg reg, unsigned delta)
        * horizontal offset should be a harmless no-op.
        */
       break;
-   case GRF:
+   case VGRF:
    case MRF:
    case ATTR:
       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
@@ -159,7 +159,7 @@ half(fs_reg reg, unsigned idx)
    case IMM:
       return reg;
 
-   case GRF:
+   case VGRF:
    case MRF:
       return horiz_offset(reg, 8 * idx);
 
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 2f92595e215..fb5df2d043f 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -583,7 +583,7 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
       if (is_src_duplicate(inst, i))
          continue;
 
-      if (inst->src[i].file == GRF) {
+      if (inst->src[i].file == VGRF) {
          reads_remaining[inst->src[i].nr]++;
       } else if (inst->src[i].file == HW_REG &&
                  inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
@@ -660,7 +660,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
    if (!reads_remaining)
       return;
 
-   if (inst->dst.file == GRF) {
+   if (inst->dst.file == VGRF) {
       written[inst->dst.nr] = true;
    }
 
@@ -668,7 +668,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
       if (is_src_duplicate(inst, i))
           continue;
 
-      if (inst->src[i].file == GRF) {
+      if (inst->src[i].file == VGRF) {
          reads_remaining[inst->src[i].nr]--;
       } else if (inst->src[i].file == HW_REG &&
                  inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE &&
@@ -685,7 +685,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
    fs_inst *inst = (fs_inst *)be;
    int benefit = 0;
 
-   if (inst->dst.file == GRF) {
+   if (inst->dst.file == VGRF) {
       if (!BITSET_TEST(livein[block_idx], inst->dst.nr) &&
           !written[inst->dst.nr])
          benefit -= v->alloc.sizes[inst->dst.nr];
@@ -695,7 +695,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
       if (is_src_duplicate(inst, i))
          continue;
 
-      if (inst->src[i].file == GRF &&
+      if (inst->src[i].file == VGRF &&
           !BITSET_TEST(liveout[block_idx], inst->src[i].nr) &&
           reads_remaining[inst->src[i].nr] == 1)
          benefit += v->alloc.sizes[inst->src[i].nr];
@@ -950,7 +950,7 @@ fs_instruction_scheduler::calculate_deps()
 
       /* read-after-write deps. */
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(last_grf_write[inst->src[i].nr + r], n);
@@ -999,7 +999,7 @@ fs_instruction_scheduler::calculate_deps()
       }
 
       /* write-after-write deps. */
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++) {
                add_dep(last_grf_write[inst->dst.nr + r], n);
@@ -1076,7 +1076,7 @@ fs_instruction_scheduler::calculate_deps()
 
       /* write-after-read deps. */
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
@@ -1127,7 +1127,7 @@ fs_instruction_scheduler::calculate_deps()
       /* Update the things this instruction wrote, so earlier reads
        * can mark this as WAR dependency.
        */
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.nr + r] = n;
@@ -1215,7 +1215,7 @@ vec4_instruction_scheduler::calculate_deps()
 
       /* read-after-write deps. */
       for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
                add_dep(last_grf_write[inst->src[i].nr + j], n);
          } else if (inst->src[i].file == HW_REG &&
@@ -1258,7 +1258,7 @@ vec4_instruction_scheduler::calculate_deps()
       }
 
       /* write-after-write deps. */
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          for (unsigned j = 0; j < inst->regs_written; ++j) {
             add_dep(last_grf_write[inst->dst.nr + j], n);
             last_grf_write[inst->dst.nr + j] = n;
@@ -1313,7 +1313,7 @@ vec4_instruction_scheduler::calculate_deps()
 
       /* write-after-read deps. */
       for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
                add_dep(n, last_grf_write[inst->src[i].nr + j]);
          } else if (inst->src[i].file == HW_REG &&
@@ -1354,7 +1354,7 @@ vec4_instruction_scheduler::calculate_deps()
       /* Update the things this instruction wrote, so earlier reads
        * can mark this as WAR dependency.
        */
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          for (unsigned j = 0; j < inst->regs_written; ++j)
             last_grf_write[inst->dst.nr + j] = n;
       } else if (inst->dst.file == MRF) {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 5632378eed7..bc723adc317 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -39,7 +39,7 @@
 #define MAX_VGRF_SIZE 16
 
 enum PACKED register_file {
-   GRF,
+   VGRF,
    MRF,
    IMM,
    HW_REG, /* a struct brw_reg */
@@ -61,7 +61,7 @@ struct backend_reg : public brw_reg
    bool is_accumulator() const;
    bool in_range(const backend_reg &r, unsigned n) const;
 
-   enum register_file file; /**< Register file: GRF, MRF, IMM. */
+   enum register_file file; /**< Register file: VGRF, MRF, IMM. */
 
    /**
     * Offset within the virtual register.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 0570b00fadd..334ba8395dd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -467,7 +467,7 @@ vec4_visitor::opt_reduce_swizzle()
 
       /* Update sources' swizzles. */
       for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file != GRF &&
+         if (inst->src[i].file != VGRF &&
              inst->src[i].file != ATTR &&
              inst->src[i].file != UNIFORM)
             continue;
@@ -912,7 +912,7 @@ vec4_visitor::opt_set_dependency_control()
           */
          for (int i = 0; i < 3; i++) {
             int reg = inst->src[i].nr + inst->src[i].reg_offset;
-            if (inst->src[i].file == GRF) {
+            if (inst->src[i].file == VGRF) {
                last_grf_write[reg] = NULL;
             } else if (inst->src[i].file == HW_REG) {
                memset(last_grf_write, 0, sizeof(last_grf_write));
@@ -931,7 +931,7 @@ vec4_visitor::opt_set_dependency_control()
           * against a previous one writing to its destination.
           */
          int reg = inst->dst.nr + inst->dst.reg_offset;
-         if (inst->dst.file == GRF) {
+         if (inst->dst.file == VGRF) {
             if (last_grf_write[reg] &&
                 !(inst->dst.writemask & grf_channels_written[reg])) {
                last_grf_write[reg]->no_dd_clear = true;
@@ -1045,9 +1045,9 @@ vec4_visitor::opt_register_coalesce()
       next_ip++;
 
       if (inst->opcode != BRW_OPCODE_MOV ||
-          (inst->dst.file != GRF && inst->dst.file != MRF) ||
+          (inst->dst.file != VGRF && inst->dst.file != MRF) ||
 	  inst->predicate ||
-	  inst->src[0].file != GRF ||
+	  inst->src[0].file != VGRF ||
 	  inst->dst.type != inst->src[0].type ||
 	  inst->src[0].abs || inst->src[0].negate || inst->src[0].reladdr)
 	 continue;
@@ -1110,7 +1110,7 @@ vec4_visitor::opt_register_coalesce()
 
                if (devinfo->gen == 6) {
                   /* gen6 math instructions must have the destination be
-                   * GRF, so no compute-to-MRF for them.
+                   * VGRF, so no compute-to-MRF for them.
                    */
                   if (scan_inst->is_math()) {
                      break;
@@ -1198,7 +1198,7 @@ vec4_visitor::opt_register_coalesce()
 	  */
          vec4_instruction *scan_inst = _scan_inst;
 	 while (scan_inst != inst) {
-	    if (scan_inst->dst.file == GRF &&
+	    if (scan_inst->dst.file == VGRF &&
                 scan_inst->dst.nr == inst->src[0].nr &&
 		scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
                scan_inst->reswizzle(inst->dst.writemask,
@@ -1301,11 +1301,11 @@ vec4_visitor::split_virtual_grfs()
     * to split.
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == GRF && inst->regs_written > 1)
+      if (inst->dst.file == VGRF && inst->regs_written > 1)
          split_grf[inst->dst.nr] = false;
 
       for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF && inst->regs_read(i) > 1)
+         if (inst->src[i].file == VGRF && inst->regs_read(i) > 1)
             split_grf[inst->src[i].nr] = false;
       }
    }
@@ -1327,14 +1327,14 @@ vec4_visitor::split_virtual_grfs()
    }
 
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == GRF && split_grf[inst->dst.nr] &&
+      if (inst->dst.file == VGRF && split_grf[inst->dst.nr] &&
           inst->dst.reg_offset != 0) {
          inst->dst.nr = (new_virtual_grf[inst->dst.nr] +
                           inst->dst.reg_offset - 1);
          inst->dst.reg_offset = 0;
       }
       for (int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF && split_grf[inst->src[i].nr] &&
+         if (inst->src[i].file == VGRF && split_grf[inst->src[i].nr] &&
              inst->src[i].reg_offset != 0) {
             inst->src[i].nr = (new_virtual_grf[inst->src[i].nr] +
                                 inst->src[i].reg_offset - 1);
@@ -1378,7 +1378,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
    fprintf(file, " ");
 
    switch (inst->dst.file) {
-   case GRF:
+   case VGRF:
       fprintf(file, "vgrf%d.%d", inst->dst.nr, inst->dst.reg_offset);
       break;
    case MRF:
@@ -1441,7 +1441,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       if (inst->src[i].abs)
          fprintf(file, "|");
       switch (inst->src[i].file) {
-      case GRF:
+      case VGRF:
          fprintf(file, "vgrf%d", inst->src[i].nr);
          break;
       case ATTR:
@@ -1509,7 +1509,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
 
       /* Don't print .0; and only VGRFs have reg_offsets and sizes */
       if (inst->src[i].reg_offset != 0 &&
-          inst->src[i].file == GRF &&
+          inst->src[i].file == VGRF &&
           alloc.sizes[inst->src[i].nr] != 1)
          fprintf(file, ".%d", inst->src[i].reg_offset);
 
@@ -1785,7 +1785,7 @@ vec4_visitor::convert_to_hw_regs()
          struct src_reg &src = inst->src[i];
          struct brw_reg reg;
          switch (src.file) {
-         case GRF:
+         case VGRF:
             reg = brw_vec8_grf(src.nr + src.reg_offset, 0);
             reg.type = src.type;
             reg.swizzle = src.swizzle;
@@ -1831,7 +1831,7 @@ vec4_visitor::convert_to_hw_regs()
       struct brw_reg reg;
 
       switch (inst->dst.file) {
-      case GRF:
+      case VGRF:
          reg = brw_vec8_grf(dst.nr + dst.reg_offset, 0);
          reg.type = dst.type;
          reg.writemask = dst.writemask;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_builder.h b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
index a90cadb77db..a76a4ce4639 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_builder.h
@@ -144,7 +144,7 @@ namespace brw {
          assert(dispatch_width() <= 32);
 
          if (n > 0)
-            return retype(dst_reg(GRF, shader->alloc.allocate(
+            return retype(dst_reg(VGRF, shader->alloc.allocate(
                                      n * DIV_ROUND_UP(type_sz(type), 4))),
                            type);
          else
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
index 329f24269ce..7aa8f5d9b8f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp
@@ -48,7 +48,7 @@ opt_cmod_propagation_local(bblock_t *block)
            inst->opcode != BRW_OPCODE_MOV) ||
           inst->predicate != BRW_PREDICATE_NONE ||
           !inst->dst.is_null() ||
-          inst->src[0].file != GRF ||
+          inst->src[0].file != VGRF ||
           inst->src[0].abs)
          continue;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index b986c12eeb0..3b76e36a803 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -47,7 +47,7 @@ is_direct_copy(vec4_instruction *inst)
 {
    return (inst->opcode == BRW_OPCODE_MOV &&
 	   !inst->predicate &&
-	   inst->dst.file == GRF &&
+	   inst->dst.file == VGRF &&
 	   !inst->dst.reladdr &&
 	   !inst->src[0].reladdr &&
 	   (inst->dst.type == inst->src[0].type ||
@@ -70,8 +70,8 @@ is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
    const src_reg *src = values[ch];
 
    /* consider GRF only */
-   assert(inst->dst.file == GRF);
-   if (!src || src->file != GRF)
+   assert(inst->dst.file == VGRF);
+   if (!src || src->file != VGRF)
       return false;
 
    return (src->in_range(inst->dst, inst->regs_written) &&
@@ -292,7 +292,7 @@ try_copy_propagate(const struct brw_device_info *devinfo,
 
    /* Check that we can propagate that value */
    if (value.file != UNIFORM &&
-       value.file != GRF &&
+       value.file != VGRF &&
        value.file != ATTR)
       return false;
 
@@ -416,14 +416,14 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
       }
 
       /* For each source arg, see if each component comes from a copy
-       * from the same type file (IMM, GRF, UNIFORM), and try
+       * from the same type file (IMM, VGRF, UNIFORM), and try
        * optimizing out access to the copy result
        */
       for (int i = 2; i >= 0; i--) {
 	 /* Copied values end up in GRFs, and we don't track reladdr
 	  * accesses.
 	  */
-	 if (inst->src[i].file != GRF ||
+	 if (inst->src[i].file != VGRF ||
 	     inst->src[i].reladdr)
 	    continue;
 
@@ -472,7 +472,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
       }
 
       /* Track available source registers. */
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
 	 const int reg =
             alloc.offsets[inst->dst.nr] + inst->dst.reg_offset;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 259f6042d19..cdb76e4759d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -174,7 +174,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
              */
             bool no_existing_temp = entry->tmp.file == BAD_FILE;
             if (no_existing_temp && !entry->generator->dst.is_null()) {
-               entry->tmp = retype(src_reg(GRF, alloc.allocate(
+               entry->tmp = retype(src_reg(VGRF, alloc.allocate(
                                               entry->generator->regs_written),
                                            NULL), inst->dst.type);
 
@@ -242,7 +242,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
             /* Kill any AEB entries using registers that don't get reused any
              * more -- a sure sign they'll fail operands_match().
              */
-            if (src->file == GRF) {
+            if (src->file == VGRF) {
                if (var_range_end(var_from_reg(alloc, *src), 4) < ip) {
                   entry->remove();
                   ralloc_free(entry);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
index e8a51d6e066..58aed810fcd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
@@ -78,11 +78,11 @@ vec4_visitor::dead_code_eliminate()
              sizeof(BITSET_WORD));
 
       foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
-         if ((inst->dst.file == GRF && !inst->has_side_effects()) ||
+         if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
              (inst->dst.is_null() && inst->writes_flag())){
             bool result_live[4] = { false };
 
-            if (inst->dst.file == GRF) {
+            if (inst->dst.file == VGRF) {
                for (unsigned i = 0; i < inst->regs_written; i++) {
                   for (int c = 0; c < 4; c++)
                      result_live[c] |= BITSET_TEST(
@@ -134,7 +134,7 @@ vec4_visitor::dead_code_eliminate()
             }
          }
 
-         if (inst->dst.file == GRF && !inst->predicate) {
+         if (inst->dst.file == VGRF && !inst->predicate) {
             for (unsigned i = 0; i < inst->regs_written; i++) {
                for (int c = 0; c < 4; c++) {
                   if (inst->dst.writemask & (1 << c)) {
@@ -151,7 +151,7 @@ vec4_visitor::dead_code_eliminate()
          }
 
          for (int i = 0; i < 3; i++) {
-            if (inst->src[i].file == GRF) {
+            if (inst->src[i].file == VGRF) {
                for (unsigned j = 0; j < inst->regs_read(i); j++) {
                   for (int c = 0; c < 4; c++) {
                      BITSET_SET(live, var_from_reg(alloc,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
index aa9a6572eee..57d5fbb75dd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp
@@ -75,7 +75,7 @@ vec4_live_variables::setup_def_use()
 
 	 /* Set use[] for this instruction */
 	 for (unsigned int i = 0; i < 3; i++) {
-	    if (inst->src[i].file == GRF) {
+	    if (inst->src[i].file == VGRF) {
                for (unsigned j = 0; j < inst->regs_read(i); j++) {
                   for (int c = 0; c < 4; c++) {
                      const unsigned v =
@@ -97,7 +97,7 @@ vec4_live_variables::setup_def_use()
 	  * are the things that screen off preceding definitions of a
 	  * variable, and thus qualify for being in def[].
 	  */
-	 if (inst->dst.file == GRF &&
+	 if (inst->dst.file == VGRF &&
 	     (!inst->predicate || inst->opcode == BRW_OPCODE_SEL)) {
             for (unsigned i = 0; i < inst->regs_written; i++) {
                for (int c = 0; c < 4; c++) {
@@ -256,7 +256,7 @@ vec4_visitor::calculate_live_intervals()
    int ip = 0;
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
-	 if (inst->src[i].file == GRF) {
+	 if (inst->src[i].file == VGRF) {
             for (unsigned j = 0; j < inst->regs_read(i); j++) {
                for (int c = 0; c < 4; c++) {
                   const unsigned v =
@@ -268,7 +268,7 @@ vec4_visitor::calculate_live_intervals()
 	 }
       }
 
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          for (unsigned i = 0; i < inst->regs_written; i++) {
             for (int c = 0; c < 4; c++) {
                if (inst->dst.writemask & (1 << c)) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
index 4aa98d72e75..12d281eb245 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.h
@@ -82,7 +82,7 @@ inline unsigned
 var_from_reg(const simple_allocator &alloc, const src_reg &reg,
              unsigned c = 0)
 {
-   assert(reg.file == GRF && reg.nr < alloc.count &&
+   assert(reg.file == VGRF && reg.nr < alloc.count &&
           reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
    return (4 * (alloc.offsets[reg.nr] + reg.reg_offset) +
            BRW_GET_SWZ(reg.swizzle, c));
@@ -92,7 +92,7 @@ inline unsigned
 var_from_reg(const simple_allocator &alloc, const dst_reg &reg,
              unsigned c = 0)
 {
-   assert(reg.file == GRF && reg.nr < alloc.count &&
+   assert(reg.file == VGRF && reg.nr < alloc.count &&
           reg.reg_offset < alloc.sizes[reg.nr] && c < 4);
    return 4 * (alloc.offsets[reg.nr] + reg.reg_offset) + c;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 8b6912e45f2..27933d7d61c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -162,7 +162,7 @@ vec4_visitor::nir_emit_impl(nir_function_impl *impl)
       unsigned array_elems =
          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
 
-      nir_locals[reg->index] = dst_reg(GRF, alloc.allocate(array_elems));
+      nir_locals[reg->index] = dst_reg(VGRF, alloc.allocate(array_elems));
    }
 
    nir_ssa_values = ralloc_array(mem_ctx, dst_reg, impl->ssa_alloc);
@@ -291,7 +291,7 @@ dst_reg
 vec4_visitor::get_nir_dest(nir_dest dest)
 {
    if (dest.is_ssa) {
-      dst_reg dst = dst_reg(GRF, alloc.allocate(1));
+      dst_reg dst = dst_reg(VGRF, alloc.allocate(1));
       nir_ssa_values[dest.ssa.index] = dst;
       return dst;
    } else {
@@ -351,7 +351,7 @@ vec4_visitor::get_nir_src(nir_src src, unsigned num_components)
 void
 vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
 {
-   dst_reg reg = dst_reg(GRF, alloc.allocate(1));
+   dst_reg reg = dst_reg(VGRF, alloc.allocate(1));
    reg.type =  BRW_REGISTER_TYPE_D;
 
    unsigned remaining = brw_writemask_for_size(instr->def.num_components);
@@ -1731,7 +1731,7 @@ vec4_visitor::nir_emit_texture(nir_tex_instr *instr)
 void
 vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr)
 {
-   nir_ssa_values[instr->def.index] = dst_reg(GRF, alloc.allocate(1));
+   nir_ssa_values[instr->def.index] = dst_reg(VGRF, alloc.allocate(1));
 }
 
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index adad4e514d8..6d27a4694d3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -34,7 +34,7 @@ namespace brw {
 static void
 assign(unsigned int *reg_hw_locations, backend_reg *reg)
 {
-   if (reg->file == GRF) {
+   if (reg->file == VGRF) {
       reg->nr = reg_hw_locations[reg->nr] + reg->reg_offset;
       reg->reg_offset = 0;
    }
@@ -55,11 +55,11 @@ vec4_visitor::reg_allocate_trivial()
    }
 
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == GRF)
+      if (inst->dst.file == VGRF)
          virtual_grf_used[inst->dst.nr] = true;
 
       for (unsigned i = 0; i < 3; i++) {
-	 if (inst->src[i].file == GRF)
+	 if (inst->src[i].file == VGRF)
             virtual_grf_used[inst->src[i].nr] = true;
       }
    }
@@ -292,12 +292,12 @@ static bool
 can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
                            unsigned scratch_reg)
 {
-   assert(inst->src[i].file == GRF);
+   assert(inst->src[i].file == VGRF);
    bool prev_inst_read_scratch_reg = false;
 
    /* See if any previous source in the same instructions reads scratch_reg */
    for (unsigned n = 0; n < i; n++) {
-      if (inst->src[n].file == GRF && inst->src[n].nr == scratch_reg)
+      if (inst->src[n].file == VGRF && inst->src[n].nr == scratch_reg)
          prev_inst_read_scratch_reg = true;
    }
 
@@ -310,7 +310,7 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
        * it if the write is not conditional and the channels we write are
        * compatible with our read mask
        */
-      if (prev_inst->dst.file == GRF && prev_inst->dst.nr == scratch_reg) {
+      if (prev_inst->dst.file == VGRF && prev_inst->dst.nr == scratch_reg) {
          return (!prev_inst->predicate || prev_inst->opcode == BRW_OPCODE_SEL) &&
                 (brw_mask_for_swizzle(inst->src[i].swizzle) &
                  ~prev_inst->dst.writemask) == 0;
@@ -329,7 +329,7 @@ can_use_scratch_for_source(const vec4_instruction *inst, unsigned i,
        */
       int n;
       for (n = 0; n < 3; n++) {
-         if (prev_inst->src[n].file == GRF &&
+         if (prev_inst->src[n].file == VGRF &&
              prev_inst->src[n].nr == scratch_reg) {
             prev_inst_read_scratch_reg = true;
             break;
@@ -374,7 +374,7 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF) {
+         if (inst->src[i].file == VGRF) {
             /* We will only unspill src[i] it it wasn't unspilled for the
              * previous instruction, in which case we'll just reuse the scratch
              * reg for this instruction.
@@ -387,7 +387,7 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
          }
       }
 
-      if (inst->dst.file == GRF) {
+      if (inst->dst.file == VGRF) {
          spill_costs[inst->dst.nr] += loop_scale;
          if (inst->dst.reladdr)
             no_spill[inst->dst.nr] = true;
@@ -406,10 +406,10 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
       case SHADER_OPCODE_GEN4_SCRATCH_READ:
       case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
          for (int i = 0; i < 3; i++) {
-            if (inst->src[i].file == GRF)
+            if (inst->src[i].file == VGRF)
                no_spill[inst->src[i].nr] = true;
          }
-         if (inst->dst.file == GRF)
+         if (inst->dst.file == VGRF)
             no_spill[inst->dst.nr] = true;
          break;
 
@@ -445,7 +445,7 @@ vec4_visitor::spill_reg(int spill_reg_nr)
    int scratch_reg = -1;
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
       for (unsigned int i = 0; i < 3; i++) {
-         if (inst->src[i].file == GRF && inst->src[i].nr == spill_reg_nr) {
+         if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg_nr) {
             if (scratch_reg == -1 ||
                 !can_use_scratch_for_source(inst, i, scratch_reg)) {
                /* We need to unspill anyway so make sure we read the full vec4
@@ -465,7 +465,7 @@ vec4_visitor::spill_reg(int spill_reg_nr)
          }
       }
 
-      if (inst->dst.file == GRF && inst->dst.nr == spill_reg_nr) {
+      if (inst->dst.file == VGRF && inst->dst.nr == spill_reg_nr) {
          emit_scratch_write(block, inst, spill_offset);
          scratch_reg = inst->dst.nr;
       }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 6038d90b433..fda3d7c4427 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -632,7 +632,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
 {
    init();
 
-   this->file = GRF;
+   this->file = VGRF;
    this->nr = v->alloc.allocate(type_size_vec4(type));
 
    if (type->is_array() || type->is_record()) {
@@ -650,7 +650,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
 
    init();
 
-   this->file = GRF;
+   this->file = VGRF;
    this->nr = v->alloc.allocate(type_size_vec4(type) * size);
 
    this->swizzle = BRW_SWIZZLE_NOOP;
@@ -662,7 +662,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
 {
    init();
 
-   this->file = GRF;
+   this->file = VGRF;
    this->nr = v->alloc.allocate(type_size_vec4(type));
 
    if (type->is_array() || type->is_record()) {
@@ -1642,7 +1642,7 @@ vec4_visitor::emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
                                           *src.reladdr);
 
    /* Now handle scratch access on src */
-   if (src.file == GRF && scratch_loc[src.nr] != -1) {
+   if (src.file == VGRF && scratch_loc[src.nr] != -1) {
       dst_reg temp = dst_reg(this, glsl_type::vec4_type);
       emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
       src.nr = temp.nr;
@@ -1670,7 +1670,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
     * scratch.
     */
    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == GRF && inst->dst.reladdr) {
+      if (inst->dst.file == VGRF && inst->dst.reladdr) {
          if (scratch_loc[inst->dst.nr] == -1) {
             scratch_loc[inst->dst.nr] = last_scratch;
             last_scratch += this->alloc.sizes[inst->dst.nr];
@@ -1679,7 +1679,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
          for (src_reg *iter = inst->dst.reladdr;
               iter->reladdr;
               iter = iter->reladdr) {
-            if (iter->file == GRF && scratch_loc[iter->nr] == -1) {
+            if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
                scratch_loc[iter->nr] = last_scratch;
                last_scratch += this->alloc.sizes[iter->nr];
             }
@@ -1690,7 +1690,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
          for (src_reg *iter = &inst->src[i];
               iter->reladdr;
               iter = iter->reladdr) {
-            if (iter->file == GRF && scratch_loc[iter->nr] == -1) {
+            if (iter->file == VGRF && scratch_loc[iter->nr] == -1) {
                scratch_loc[iter->nr] = last_scratch;
                last_scratch += this->alloc.sizes[iter->nr];
             }
@@ -1718,7 +1718,7 @@ vec4_visitor::move_grf_array_access_to_scratch()
       /* Now that we have handled any (possibly recursive) reladdr scratch
        * accesses for dst we can safely do the scratch write for dst itself
        */
-      if (inst->dst.file == GRF && scratch_loc[inst->dst.nr] != -1)
+      if (inst->dst.file == VGRF && scratch_loc[inst->dst.nr] != -1)
          emit_scratch_write(block, inst, scratch_loc[inst->dst.nr]);
 
       /* Now handle scratch access on any src. In this case, since inst->src[i]

From b99e1fd547035be9a6da5ee1b78b8a853c2ef3e0 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 00:22:29 +0000
Subject: [PATCH 267/287] i965/fs: Handle type-V immediates in
 brw_reg_from_fs_reg().

We use brw_imm_v() to produce type-V immediates, which generates a
brw_reg with fs_reg's .file set to HW_REG. The next commit will rid us
of HW_REGs, so we need to handle BRW_REGISTER_TYPE_V in the IMM case.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 16257a9fdc3..6e17f23f759 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -111,6 +111,9 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
       case BRW_REGISTER_TYPE_VF:
          brw_reg = brw_imm_vf(reg->ud);
          break;
+      case BRW_REGISTER_TYPE_V:
+         brw_reg = brw_imm_v(reg->ud);
+         break;
       default:
 	 unreachable("not reached");
       }

From 4b0fbebf024e564c195f3ce94e1ce43a3d6442ea Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 00:25:04 +0000
Subject: [PATCH 268/287] i965/fs: Set stride correctly for immediates in
 fs_reg(brw_reg).

The fs_reg() constructors for immediates set stride to 0, except for
vector-immediates, which set stride to 1.  This patch makes the fs_reg
constructor that takes a brw_reg do likewise, so that stride is set
correctly for cases such as fs_reg(brw_imm_v(...)).

The generator asserts that this is true (and presumably it's useful in
some optimization passes?) and the VF fs_reg constructors did this (by
virtue of the fact that it doesn't override what init() does).

In the next commit, calling this constructor with brw_imm_* will generate
an IMM file register rather than a HW_REG, making this change necessary
to avoid breakage with existing uses of brw_imm_v().

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e8ac1c2eda3..ba42d717914 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -428,6 +428,12 @@ fs_reg::fs_reg(struct brw_reg reg) :
    this->subreg_offset = 0;
    this->reladdr = NULL;
    this->stride = 1;
+   if (this->file == IMM &&
+       (this->type != BRW_REGISTER_TYPE_V &&
+        this->type != BRW_REGISTER_TYPE_UV &&
+        this->type != BRW_REGISTER_TYPE_VF)) {
+      this->stride = 0;
+   }
 }
 
 bool

From b3315a6f56fb93f2884168cbf9358b2606641db5 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 26 Oct 2015 17:52:57 -0700
Subject: [PATCH 269/287] i965: Replace HW_REG with ARF/FIXED_GRF.

HW_REGs are (were!) kind of awful. If the file was HW_REG, you had to
look at different fields for type, abs, negate, writemask, swizzle, and
a second file. They also caused annoying problems like immediate sources
being considered scheduling barriers (commit 6148e94e2) and other such
nonsense.

Instead use ARF/FIXED_GRF/MRF for fixed registers in those files.

After a sufficient amount of time has passed since "GRF" was used, we
can rename FIXED_GRF -> GRF, but doing so now would make rebasing awful.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp          | 120 +++++++--------
 src/mesa/drivers/dri/i965/brw_fs.h            |   5 +-
 .../dri/i965/brw_fs_copy_propagation.cpp      |   3 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp      |   3 +-
 .../drivers/dri/i965/brw_fs_generator.cpp     |   7 +-
 .../drivers/dri/i965/brw_fs_reg_allocate.cpp  |   5 +-
 src/mesa/drivers/dri/i965/brw_ir_fs.h         |   9 +-
 src/mesa/drivers/dri/i965/brw_ir_vec4.h       |   6 +-
 .../dri/i965/brw_schedule_instructions.cpp    |  53 ++-----
 src/mesa/drivers/dri/i965/brw_shader.cpp      |   8 +-
 src/mesa/drivers/dri/i965/brw_shader.h        |   5 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp        | 141 +++++++-----------
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp    |   3 +-
 13 files changed, 157 insertions(+), 211 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ba42d717914..9a898726048 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -77,7 +77,8 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
    /* This will be the case for almost all instructions. */
    switch (dst.file) {
    case VGRF:
-   case HW_REG:
+   case ARF:
+   case FIXED_GRF:
    case MRF:
    case ATTR:
       this->regs_written = DIV_ROUND_UP(dst.component_size(exec_size),
@@ -423,7 +424,7 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
 fs_reg::fs_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
-   this->file = HW_REG;
+   this->file = (enum register_file)reg.file;
    this->reg_offset = 0;
    this->subreg_offset = 0;
    this->reladdr = NULL;
@@ -439,24 +440,17 @@ fs_reg::fs_reg(struct brw_reg reg) :
 bool
 fs_reg::equals(const fs_reg &r) const
 {
-   return (file == r.file &&
-           nr == r.nr &&
+   return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
            reg_offset == r.reg_offset &&
            subreg_offset == r.subreg_offset &&
-           type == r.type &&
-           negate == r.negate &&
-           abs == r.abs &&
            !reladdr && !r.reladdr &&
-           (file != HW_REG ||
-            memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0) &&
-           (file != IMM || d == r.d) &&
            stride == r.stride);
 }
 
 fs_reg &
 fs_reg::set_smear(unsigned subreg)
 {
-   assert(file != HW_REG && file != IMM);
+   assert(file != ARF && file != FIXED_GRF && file != IMM);
    subreg_offset = subreg * type_sz(type);
    stride = 0;
    return *this;
@@ -471,7 +465,7 @@ fs_reg::is_contiguous() const
 unsigned
 fs_reg::component_size(unsigned width) const
 {
-   const unsigned stride = (file != HW_REG ? this->stride :
+   const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride :
                             hstride == 0 ? 0 :
                             1 << (hstride - 1));
    return MAX2(width * stride, 1) * type_sz(type);
@@ -857,9 +851,10 @@ fs_inst::regs_read(int arg) const
    case UNIFORM:
    case IMM:
       return 1;
+   case ARF:
+   case FIXED_GRF:
    case VGRF:
    case ATTR:
-   case HW_REG:
       return DIV_ROUND_UP(components_read(arg) *
                           src[arg].component_size(exec_size),
                           REG_SIZE);
@@ -1596,12 +1591,12 @@ fs_visitor::assign_urb_setup()
     */
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       if (inst->opcode == FS_OPCODE_LINTERP) {
-	 assert(inst->src[1].file == HW_REG);
+	 assert(inst->src[1].file == FIXED_GRF);
          inst->src[1].nr += urb_start;
       }
 
       if (inst->opcode == FS_OPCODE_CINTERP) {
-	 assert(inst->src[0].file == HW_REG);
+	 assert(inst->src[0].file == FIXED_GRF);
          inst->src[0].nr += urb_start;
       }
    }
@@ -1682,7 +1677,7 @@ fs_visitor::assign_gs_urb_setup()
          inst->base_mrf = -1;
       }
 
-      /* Rewrite all ATTR file references to HW_REGs. */
+      /* Rewrite all ATTR file references to GRFs. */
       convert_attr_sources_to_hw_regs(inst);
    }
 }
@@ -2797,7 +2792,7 @@ fs_visitor::emit_repclear_shader()
    assign_curb_setup();
 
    /* Now that we have the uniform assigned, go ahead and force it to a vec4. */
-   assert(mov->src[0].file == HW_REG);
+   assert(mov->src[0].file == FIXED_GRF);
    mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
 }
 
@@ -2876,10 +2871,7 @@ clear_deps_for_inst_src(fs_inst *inst, bool *deps, int first_grf, int grf_len)
    /* Clear the flag for registers that actually got read (as expected). */
    for (int i = 0; i < inst->sources; i++) {
       int grf;
-      if (inst->src[i].file == VGRF) {
-         grf = inst->src[i].nr;
-      } else if (inst->src[i].file == HW_REG &&
-                 inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+      if (inst->src[i].file == VGRF || inst->src[i].file == FIXED_GRF) {
          grf = inst->src[i].nr;
       } else {
          continue;
@@ -4619,6 +4611,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
          fprintf(file, "+%d.%d",
                  inst->dst.reg_offset, inst->dst.subreg_offset);
       break;
+   case FIXED_GRF:
+      fprintf(file, "g%d", inst->dst.nr);
+      break;
    case MRF:
       fprintf(file, "m%d", inst->dst.nr);
       break;
@@ -4631,29 +4626,23 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
    case ATTR:
       fprintf(file, "***attr%d***", inst->dst.nr + inst->dst.reg_offset);
       break;
-   case HW_REG:
-      if (inst->dst.brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
-         switch (inst->dst.nr) {
-         case BRW_ARF_NULL:
-            fprintf(file, "null");
-            break;
-         case BRW_ARF_ADDRESS:
-            fprintf(file, "a0.%d", inst->dst.subnr);
-            break;
-         case BRW_ARF_ACCUMULATOR:
-            fprintf(file, "acc%d", inst->dst.subnr);
-            break;
-         case BRW_ARF_FLAG:
-            fprintf(file, "f%d.%d", inst->dst.nr & 0xf,
-                             inst->dst.subnr);
-            break;
-         default:
-            fprintf(file, "arf%d.%d", inst->dst.nr & 0xf,
-                               inst->dst.subnr);
-            break;
-         }
-      } else {
-         fprintf(file, "hw_reg%d", inst->dst.nr);
+   case ARF:
+      switch (inst->dst.nr) {
+      case BRW_ARF_NULL:
+         fprintf(file, "null");
+         break;
+      case BRW_ARF_ADDRESS:
+         fprintf(file, "a0.%d", inst->dst.subnr);
+         break;
+      case BRW_ARF_ACCUMULATOR:
+         fprintf(file, "acc%d", inst->dst.subnr);
+         break;
+      case BRW_ARF_FLAG:
+         fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+         break;
+      default:
+         fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+         break;
       }
       if (inst->dst.subnr)
          fprintf(file, "+%d", inst->dst.subnr);
@@ -4676,6 +4665,9 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
             fprintf(file, "+%d.%d", inst->src[i].reg_offset,
                     inst->src[i].subreg_offset);
          break;
+      case FIXED_GRF:
+         fprintf(file, "g%d", inst->src[i].nr);
+         break;
       case MRF:
          fprintf(file, "***m%d***", inst->src[i].nr);
          break;
@@ -4719,29 +4711,23 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
             break;
          }
          break;
-      case HW_REG:
-         if (inst->src[i].brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
-            switch (inst->src[i].nr) {
-            case BRW_ARF_NULL:
-               fprintf(file, "null");
-               break;
-            case BRW_ARF_ADDRESS:
-               fprintf(file, "a0.%d", inst->src[i].subnr);
-               break;
-            case BRW_ARF_ACCUMULATOR:
-               fprintf(file, "acc%d", inst->src[i].subnr);
-               break;
-            case BRW_ARF_FLAG:
-               fprintf(file, "f%d.%d", inst->src[i].nr & 0xf,
-                                inst->src[i].subnr);
-               break;
-            default:
-               fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf,
-                                  inst->src[i].subnr);
-               break;
-            }
-         } else {
-            fprintf(file, "hw_reg%d", inst->src[i].nr);
+      case ARF:
+         switch (inst->src[i].nr) {
+         case BRW_ARF_NULL:
+            fprintf(file, "null");
+            break;
+         case BRW_ARF_ADDRESS:
+            fprintf(file, "a0.%d", inst->src[i].subnr);
+            break;
+         case BRW_ARF_ACCUMULATOR:
+            fprintf(file, "acc%d", inst->src[i].subnr);
+            break;
+         case BRW_ARF_FLAG:
+            fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+            break;
+         default:
+            fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+            break;
          }
          if (inst->src[i].subnr)
             fprintf(file, "+%d", inst->src[i].subnr);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 55d8d03d73f..f40e58b8ca0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -70,9 +70,10 @@ offset(fs_reg reg, const brw::fs_builder& bld, unsigned delta)
    switch (reg.file) {
    case BAD_FILE:
       break;
-   case VGRF:
+   case ARF:
+   case FIXED_GRF:
    case MRF:
-   case HW_REG:
+   case VGRF:
    case ATTR:
       return byte_offset(reg,
                          delta * reg.component_size(bld.dispatch_width()));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index cb018336927..426ea57d8f9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -387,7 +387,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    switch (entry->src.file) {
    case UNIFORM:
    case BAD_FILE:
-   case HW_REG:
+   case ARF:
+   case FIXED_GRF:
       inst->src[arg].reg_offset = entry->src.reg_offset;
       inst->src[arg].subreg_offset = entry->src.subreg_offset;
       break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index ee31cb6cd80..8c67caff6e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -226,7 +226,8 @@ fs_visitor::opt_cse_local(bblock_t *block)
    foreach_inst_in_block(fs_inst, inst, block) {
       /* Skip some cases. */
       if (is_expression(this, inst) && !inst->is_partial_write() &&
-          (inst->dst.file != HW_REG || inst->dst.is_null()))
+          ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||
+           inst->dst.is_null()))
       {
          bool found = false;
          bool negate = false;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6e17f23f759..fa1e83497cc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -37,6 +37,9 @@ static enum brw_reg_file
 brw_file_from_reg(fs_reg *reg)
 {
    switch (reg->file) {
+   case ARF:
+      return BRW_ARCHITECTURE_REGISTER_FILE;
+   case FIXED_GRF:
    case VGRF:
       return BRW_GENERAL_REGISTER_FILE;
    case MRF:
@@ -44,7 +47,6 @@ brw_file_from_reg(fs_reg *reg)
    case IMM:
       return BRW_IMMEDIATE_VALUE;
    case BAD_FILE:
-   case HW_REG:
    case ATTR:
    case UNIFORM:
       unreachable("not reached");
@@ -118,7 +120,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
 	 unreachable("not reached");
       }
       break;
-   case HW_REG:
+   case ARF:
+   case FIXED_GRF:
       brw_reg = *static_cast<struct brw_reg *>(reg);
       break;
    case BAD_FILE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index e2234d1324d..1b61f9fe01c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -366,13 +366,12 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
       else
          use_ip = ip;
 
-      /* Note that UNIFORM args have been turned into FIXED_HW_REG by
+      /* Note that UNIFORM args have been turned into FIXED_GRF by
        * assign_curbe_setup(), and interpolation uses fixed hardware regs from
        * the start (see interp_reg()).
        */
       for (int i = 0; i < inst->sources; i++) {
-         if (inst->src[i].file == HW_REG &&
-             inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+         if (inst->src[i].file == FIXED_GRF) {
             int node_nr = inst->src[i].nr;
             if (node_nr >= payload_node_count)
                continue;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index a3b3a4fec78..61e72f7d536 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -97,8 +97,9 @@ byte_offset(fs_reg reg, unsigned delta)
    case MRF:
       reg.nr += delta / 32;
       break;
+   case ARF:
+   case FIXED_GRF:
    case IMM:
-   case HW_REG:
    case UNIFORM:
       assert(delta == 0);
    }
@@ -121,7 +122,8 @@ horiz_offset(fs_reg reg, unsigned delta)
    case MRF:
    case ATTR:
       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
-   case HW_REG:
+   case ARF:
+   case FIXED_GRF:
       assert(delta == 0);
    }
    return reg;
@@ -163,8 +165,9 @@ half(fs_reg reg, unsigned idx)
    case MRF:
       return horiz_offset(reg, 8 * idx);
 
+   case ARF:
+   case FIXED_GRF:
    case ATTR:
-   case HW_REG:
       unreachable("Cannot take half of this register type");
    }
    return reg;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index d3f0d61b55b..dcec5f30b06 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -68,7 +68,8 @@ retype(src_reg reg, enum brw_reg_type type)
 static inline src_reg
 offset(src_reg reg, unsigned delta)
 {
-   assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM));
+   assert(delta == 0 ||
+          (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
    reg.reg_offset += delta;
    return reg;
 }
@@ -132,7 +133,8 @@ retype(dst_reg reg, enum brw_reg_type type)
 static inline dst_reg
 offset(dst_reg reg, unsigned delta)
 {
-   assert(delta == 0 || (reg.file != HW_REG && reg.file != IMM));
+   assert(delta == 0 ||
+          (reg.file != ARF && reg.file != FIXED_GRF && reg.file != IMM));
    reg.reg_offset += delta;
    return reg;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index fb5df2d043f..776f75d3e58 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -585,8 +585,7 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be)
 
       if (inst->src[i].file == VGRF) {
          reads_remaining[inst->src[i].nr]++;
-      } else if (inst->src[i].file == HW_REG &&
-                 inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+      } else if (inst->src[i].file == FIXED_GRF) {
          if (inst->src[i].nr >= hw_reg_count)
             continue;
 
@@ -670,8 +669,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be)
 
       if (inst->src[i].file == VGRF) {
          reads_remaining[inst->src[i].nr]--;
-      } else if (inst->src[i].file == HW_REG &&
-                 inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE &&
+      } else if (inst->src[i].file == FIXED_GRF &&
                  inst->src[i].nr < hw_reg_count) {
          for (int off = 0; off < inst->regs_read(i); off++)
             hw_reads_remaining[inst->src[i].nr + off]--;
@@ -700,8 +698,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be)
           reads_remaining[inst->src[i].nr] == 1)
          benefit += v->alloc.sizes[inst->src[i].nr];
 
-      if (inst->src[i].file == HW_REG &&
-          inst->src[i].brw_reg::file == BRW_GENERAL_REGISTER_FILE &&
+      if (inst->src[i].file == FIXED_GRF &&
           inst->src[i].nr < hw_reg_count) {
          for (int off = 0; off < inst->regs_read(i); off++) {
             int reg = inst->src[i].nr + off;
@@ -959,9 +956,7 @@ fs_instruction_scheduler::calculate_deps()
                   add_dep(last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], n);
                }
             }
-         } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].brw_reg::file ==
-                     BRW_GENERAL_REGISTER_FILE)) {
+         } else if (inst->src[i].file == FIXED_GRF) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(last_grf_write[inst->src[i].nr + r], n);
@@ -972,9 +967,7 @@ fs_instruction_scheduler::calculate_deps()
             add_dep(last_accumulator_write, n);
          } else if (inst->src[i].file != BAD_FILE &&
                     inst->src[i].file != IMM &&
-                    inst->src[i].file != UNIFORM &&
-                    (inst->src[i].file != HW_REG ||
-                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
+                    inst->src[i].file != UNIFORM) {
             assert(inst->src[i].file != MRF);
             add_barrier_deps(n);
          }
@@ -1024,8 +1017,7 @@ fs_instruction_scheduler::calculate_deps()
             add_dep(last_mrf_write[reg], n);
             last_mrf_write[reg] = n;
          }
-      } else if (inst->dst.file == HW_REG &&
-                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+      } else if (inst->dst.file == FIXED_GRF) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.nr + r] = n;
@@ -1085,9 +1077,7 @@ fs_instruction_scheduler::calculate_deps()
                   add_dep(n, last_grf_write[inst->src[i].nr * 16 + inst->src[i].reg_offset + r], 0);
                }
             }
-         } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].brw_reg::file ==
-                     BRW_GENERAL_REGISTER_FILE)) {
+         } else if (inst->src[i].file == FIXED_GRF) {
             if (post_reg_alloc) {
                for (int r = 0; r < inst->regs_read(i); r++)
                   add_dep(n, last_grf_write[inst->src[i].nr + r], 0);
@@ -1098,9 +1088,7 @@ fs_instruction_scheduler::calculate_deps()
             add_dep(n, last_accumulator_write, 0);
          } else if (inst->src[i].file != BAD_FILE &&
                     inst->src[i].file != IMM &&
-                    inst->src[i].file != UNIFORM &&
-                    (inst->src[i].file != HW_REG ||
-                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
+                    inst->src[i].file != UNIFORM) {
             assert(inst->src[i].file != MRF);
             add_barrier_deps(n);
          }
@@ -1149,8 +1137,7 @@ fs_instruction_scheduler::calculate_deps()
 
             last_mrf_write[reg] = n;
          }
-      } else if (inst->dst.file == HW_REG &&
-                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+      } else if (inst->dst.file == FIXED_GRF) {
          if (post_reg_alloc) {
             for (int r = 0; r < inst->regs_written; r++)
                last_grf_write[inst->dst.nr + r] = n;
@@ -1218,18 +1205,14 @@ vec4_instruction_scheduler::calculate_deps()
          if (inst->src[i].file == VGRF) {
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
                add_dep(last_grf_write[inst->src[i].nr + j], n);
-         } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].brw_reg::file ==
-                     BRW_GENERAL_REGISTER_FILE)) {
+         } else if (inst->src[i].file == FIXED_GRF) {
             add_dep(last_fixed_grf_write, n);
          } else if (inst->src[i].is_accumulator()) {
             assert(last_accumulator_write);
             add_dep(last_accumulator_write, n);
          } else if (inst->src[i].file != BAD_FILE &&
                     inst->src[i].file != IMM &&
-                    inst->src[i].file != UNIFORM &&
-                    (inst->src[i].file != HW_REG ||
-                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
+                    inst->src[i].file != UNIFORM) {
             /* No reads from MRF, and ATTR is already translated away */
             assert(inst->src[i].file != MRF &&
                    inst->src[i].file != ATTR);
@@ -1266,8 +1249,7 @@ vec4_instruction_scheduler::calculate_deps()
       } else if (inst->dst.file == MRF) {
          add_dep(last_mrf_write[inst->dst.nr], n);
          last_mrf_write[inst->dst.nr] = n;
-     } else if (inst->dst.file == HW_REG &&
-                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+     } else if (inst->dst.file == FIXED_GRF) {
          last_fixed_grf_write = n;
       } else if (inst->dst.is_accumulator()) {
          add_dep(last_accumulator_write, n);
@@ -1316,17 +1298,13 @@ vec4_instruction_scheduler::calculate_deps()
          if (inst->src[i].file == VGRF) {
             for (unsigned j = 0; j < inst->regs_read(i); ++j)
                add_dep(n, last_grf_write[inst->src[i].nr + j]);
-         } else if (inst->src[i].file == HW_REG &&
-                    (inst->src[i].brw_reg::file ==
-                     BRW_GENERAL_REGISTER_FILE)) {
+         } else if (inst->src[i].file == FIXED_GRF) {
             add_dep(n, last_fixed_grf_write);
          } else if (inst->src[i].is_accumulator()) {
             add_dep(n, last_accumulator_write);
          } else if (inst->src[i].file != BAD_FILE &&
                     inst->src[i].file != IMM &&
-                    inst->src[i].file != UNIFORM &&
-                    (inst->src[i].file != HW_REG ||
-                     inst->src[i].brw_reg::file != BRW_IMMEDIATE_VALUE)) {
+                    inst->src[i].file != UNIFORM) {
             assert(inst->src[i].file != MRF &&
                    inst->src[i].file != ATTR);
             add_barrier_deps(n);
@@ -1359,8 +1337,7 @@ vec4_instruction_scheduler::calculate_deps()
             last_grf_write[inst->dst.nr + j] = n;
       } else if (inst->dst.file == MRF) {
          last_mrf_write[inst->dst.nr] = n;
-      } else if (inst->dst.file == HW_REG &&
-                 inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE) {
+      } else if (inst->dst.file == FIXED_GRF) {
          last_fixed_grf_write = n;
       } else if (inst->dst.is_accumulator()) {
          last_accumulator_write = n;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d736d0e46ac..a0c74a21540 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -733,18 +733,14 @@ backend_reg::is_negative_one() const
 bool
 backend_reg::is_null() const
 {
-   return file == HW_REG &&
-          brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE &&
-          nr == BRW_ARF_NULL;
+   return file == ARF && nr == BRW_ARF_NULL;
 }
 
 
 bool
 backend_reg::is_accumulator() const
 {
-   return file == HW_REG &&
-          brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE &&
-          nr == BRW_ARF_ACCUMULATOR;
+   return file == ARF && nr == BRW_ARF_ACCUMULATOR;
 }
 
 bool
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index bc723adc317..767752101ca 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -39,10 +39,11 @@
 #define MAX_VGRF_SIZE 16
 
 enum PACKED register_file {
-   VGRF,
+   ARF,
+   FIXED_GRF,
    MRF,
    IMM,
-   HW_REG, /* a struct brw_reg */
+   VGRF,
    ATTR,
    UNIFORM, /* prog_data->params[reg] */
    BAD_FILE,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 334ba8395dd..60d4ed90e12 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -119,7 +119,7 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
 src_reg::src_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
-   this->file = HW_REG;
+   this->file = (enum register_file)reg.file;
    this->reg_offset = 0;
    this->reladdr = NULL;
 }
@@ -127,7 +127,7 @@ src_reg::src_reg(struct brw_reg reg) :
 src_reg::src_reg(const dst_reg &reg) :
    backend_reg(static_cast<struct brw_reg>(reg))
 {
-   this->file = reg.file;
+   this->file = (enum register_file)reg.file;
    this->reg_offset = reg.reg_offset;
    this->reladdr = reg.reladdr;
    this->swizzle = brw_swizzle_for_mask(reg.writemask);
@@ -159,7 +159,7 @@ dst_reg::dst_reg(register_file file, int nr, const glsl_type *type,
 {
    init();
 
-   this->file = file;
+   this->file = (enum register_file)file;
    this->nr = nr;
    this->type = brw_type_for_base_type(type);
    this->writemask = writemask;
@@ -179,7 +179,7 @@ dst_reg::dst_reg(register_file file, int nr, brw_reg_type type,
 dst_reg::dst_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
-   this->file = HW_REG;
+   this->file = (enum register_file)reg.file;
    this->reg_offset = 0;
    this->reladdr = NULL;
 }
@@ -196,17 +196,10 @@ dst_reg::dst_reg(const src_reg &reg) :
 bool
 dst_reg::equals(const dst_reg &r) const
 {
-   return (file == r.file &&
-           nr == r.nr &&
+   return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
            reg_offset == r.reg_offset &&
-           type == r.type &&
-           negate == r.negate &&
-           abs == r.abs &&
-           writemask == r.writemask &&
            (reladdr == r.reladdr ||
-            (reladdr && r.reladdr && reladdr->equals(*r.reladdr))) &&
-           (file != HW_REG ||
-            memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0));
+            (reladdr && r.reladdr && reladdr->equals(*r.reladdr))));
 }
 
 bool
@@ -341,17 +334,9 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
 bool
 src_reg::equals(const src_reg &r) const
 {
-   return (file == r.file &&
-           nr == r.nr &&
+   return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
 	   reg_offset == r.reg_offset &&
-	   type == r.type &&
-	   negate == r.negate &&
-	   abs == r.abs &&
-	   swizzle == r.swizzle &&
-	   !reladdr && !r.reladdr &&
-           (file != HW_REG ||
-            memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0) &&
-           (file != IMM || d == r.d));
+	   !reladdr && !r.reladdr);
 }
 
 bool
@@ -439,7 +424,9 @@ vec4_visitor::opt_reduce_swizzle()
    bool progress = false;
 
    foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
-      if (inst->dst.file == BAD_FILE || inst->dst.file == HW_REG ||
+      if (inst->dst.file == BAD_FILE ||
+          inst->dst.file == ARF ||
+          inst->dst.file == FIXED_GRF ||
           inst->is_send_from_grf())
          continue;
 
@@ -914,7 +901,7 @@ vec4_visitor::opt_set_dependency_control()
             int reg = inst->src[i].nr + inst->src[i].reg_offset;
             if (inst->src[i].file == VGRF) {
                last_grf_write[reg] = NULL;
-            } else if (inst->src[i].file == HW_REG) {
+            } else if (inst->src[i].file == FIXED_GRF) {
                memset(last_grf_write, 0, sizeof(last_grf_write));
                break;
             }
@@ -931,7 +918,7 @@ vec4_visitor::opt_set_dependency_control()
           * against a previous one writing to its destination.
           */
          int reg = inst->dst.nr + inst->dst.reg_offset;
-         if (inst->dst.file == VGRF) {
+         if (inst->dst.file == VGRF || inst->dst.file == FIXED_GRF) {
             if (last_grf_write[reg] &&
                 !(inst->dst.writemask & grf_channels_written[reg])) {
                last_grf_write[reg]->no_dd_clear = true;
@@ -953,11 +940,6 @@ vec4_visitor::opt_set_dependency_control()
 
             last_mrf_write[reg] = inst;
             mrf_channels_written[reg] |= inst->dst.writemask;
-         } else if (inst->dst.nr == HW_REG) {
-            if (inst->dst.brw_reg::file == BRW_GENERAL_REGISTER_FILE)
-               memset(last_grf_write, 0, sizeof(last_grf_write));
-            if (inst->dst.brw_reg::file == BRW_MESSAGE_REGISTER_FILE)
-               memset(last_mrf_write, 0, sizeof(last_mrf_write));
          }
       }
    }
@@ -985,11 +967,8 @@ vec4_instruction::can_reswizzle(const struct brw_device_info *devinfo,
    if (mlen > 0)
       return false;
 
-   /* We can't use swizzles on the accumulator and that's really the only
-    * HW_REG we would care to reswizzle so just disallow them all.
-    */
    for (int i = 0; i < 3; i++) {
-      if (src[i].file == HW_REG)
+      if (src[i].is_accumulator())
          return false;
    }
 
@@ -1381,32 +1360,29 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
    case VGRF:
       fprintf(file, "vgrf%d.%d", inst->dst.nr, inst->dst.reg_offset);
       break;
+   case FIXED_GRF:
+      fprintf(file, "g%d", inst->dst.nr);
+      break;
    case MRF:
       fprintf(file, "m%d", inst->dst.nr);
       break;
-   case HW_REG:
-      if (inst->dst.brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
-         switch (inst->dst.nr) {
-         case BRW_ARF_NULL:
-            fprintf(file, "null");
-            break;
-         case BRW_ARF_ADDRESS:
-            fprintf(file, "a0.%d", inst->dst.subnr);
-            break;
-         case BRW_ARF_ACCUMULATOR:
-            fprintf(file, "acc%d", inst->dst.subnr);
-            break;
-         case BRW_ARF_FLAG:
-            fprintf(file, "f%d.%d", inst->dst.nr & 0xf,
-                             inst->dst.subnr);
-            break;
-         default:
-            fprintf(file, "arf%d.%d", inst->dst.nr & 0xf,
-                               inst->dst.subnr);
-            break;
-         }
-      } else {
-         fprintf(file, "hw_reg%d", inst->dst.nr);
+   case ARF:
+      switch (inst->dst.nr) {
+      case BRW_ARF_NULL:
+         fprintf(file, "null");
+         break;
+      case BRW_ARF_ADDRESS:
+         fprintf(file, "a0.%d", inst->dst.subnr);
+         break;
+      case BRW_ARF_ACCUMULATOR:
+         fprintf(file, "acc%d", inst->dst.subnr);
+         break;
+      case BRW_ARF_FLAG:
+         fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+         break;
+      default:
+         fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
+         break;
       }
       if (inst->dst.subnr)
          fprintf(file, "+%d", inst->dst.subnr);
@@ -1444,6 +1420,9 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
       case VGRF:
          fprintf(file, "vgrf%d", inst->src[i].nr);
          break;
+      case FIXED_GRF:
+         fprintf(file, "g%d", inst->src[i].nr);
+         break;
       case ATTR:
          fprintf(file, "attr%d", inst->src[i].nr);
          break;
@@ -1473,29 +1452,23 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file)
             break;
          }
          break;
-      case HW_REG:
-         if (inst->src[i].brw_reg::file == BRW_ARCHITECTURE_REGISTER_FILE) {
-            switch (inst->src[i].nr) {
-            case BRW_ARF_NULL:
-               fprintf(file, "null");
-               break;
-            case BRW_ARF_ADDRESS:
-               fprintf(file, "a0.%d", inst->src[i].subnr);
-               break;
-            case BRW_ARF_ACCUMULATOR:
-               fprintf(file, "acc%d", inst->src[i].subnr);
-               break;
-            case BRW_ARF_FLAG:
-               fprintf(file, "f%d.%d", inst->src[i].nr & 0xf,
-                                inst->src[i].subnr);
-               break;
-            default:
-               fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf,
-                                  inst->src[i].subnr);
-               break;
-            }
-         } else {
-            fprintf(file, "hw_reg%d", inst->src[i].nr);
+      case ARF:
+         switch (inst->src[i].nr) {
+         case BRW_ARF_NULL:
+            fprintf(file, "null");
+            break;
+         case BRW_ARF_ADDRESS:
+            fprintf(file, "a0.%d", inst->src[i].subnr);
+            break;
+         case BRW_ARF_ACCUMULATOR:
+            fprintf(file, "acc%d", inst->src[i].subnr);
+            break;
+         case BRW_ARF_FLAG:
+            fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+            break;
+         default:
+            fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
+            break;
          }
          if (inst->src[i].subnr)
             fprintf(file, "+%d", inst->src[i].subnr);
@@ -1812,7 +1785,8 @@ vec4_visitor::convert_to_hw_regs()
             assert(!src.reladdr);
             break;
 
-         case HW_REG:
+         case ARF:
+         case FIXED_GRF:
             continue;
 
          case BAD_FILE:
@@ -1844,7 +1818,8 @@ vec4_visitor::convert_to_hw_regs()
          reg.writemask = dst.writemask;
          break;
 
-      case HW_REG:
+      case ARF:
+      case FIXED_GRF:
          reg = dst;
          break;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index cdb76e4759d..85cbf24092e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -143,7 +143,8 @@ vec4_visitor::opt_cse_local(bblock_t *block)
    foreach_inst_in_block (vec4_instruction, inst, block) {
       /* Skip some cases. */
       if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
-          (inst->dst.file != HW_REG || inst->dst.is_null()))
+          ((inst->dst.file != ARF && inst->dst.file != FIXED_GRF) ||
+           inst->dst.is_null()))
       {
          bool found = false;
 

From 49b3215d7076db8b9afe8998b01ef250795b5892 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 26 Oct 2015 18:41:27 -0700
Subject: [PATCH 270/287] i965: Combine register file field.

The first four values (2-bits) are hardware values, and VGRF, ATTR, and
UNIFORM remain values used in the IR.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_defines.h | 11 +++++++++++
 src/mesa/drivers/dri/i965/brw_fs.cpp    |  5 ++---
 src/mesa/drivers/dri/i965/brw_ir_fs.h   |  4 ++--
 src/mesa/drivers/dri/i965/brw_ir_vec4.h |  8 ++++----
 src/mesa/drivers/dri/i965/brw_reg.h     |  4 ++--
 src/mesa/drivers/dri/i965/brw_shader.h  | 13 -------------
 src/mesa/drivers/dri/i965/brw_vec4.cpp  | 16 ++++++----------
 7 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 47350301a14..5044982caec 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1404,6 +1404,17 @@ enum PACKED brw_reg_file {
    BRW_GENERAL_REGISTER_FILE      = 1,
    BRW_MESSAGE_REGISTER_FILE      = 2,
    BRW_IMMEDIATE_VALUE            = 3,
+
+   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+   FIXED_GRF = BRW_GENERAL_REGISTER_FILE,
+   MRF = BRW_MESSAGE_REGISTER_FILE,
+   IMM = BRW_IMMEDIATE_VALUE,
+
+   /* These are not hardware values */
+   VGRF,
+   ATTR,
+   UNIFORM, /* prog_data->params[reg] */
+   BAD_FILE,
 };
 
 #define BRW_HW_REG_TYPE_UD  0
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9a898726048..b8c88f73dfa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -424,7 +424,6 @@ fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
 fs_reg::fs_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
-   this->file = (enum register_file)reg.file;
    this->reg_offset = 0;
    this->subreg_offset = 0;
    this->reladdr = NULL;
@@ -959,7 +958,7 @@ fs_visitor::vgrf(const glsl_type *const type)
                  brw_type_for_base_type(type));
 }
 
-fs_reg::fs_reg(enum register_file file, int nr)
+fs_reg::fs_reg(enum brw_reg_file file, int nr)
 {
    init();
    this->file = file;
@@ -968,7 +967,7 @@ fs_reg::fs_reg(enum register_file file, int nr)
    this->stride = (file == UNIFORM ? 0 : 1);
 }
 
-fs_reg::fs_reg(enum register_file file, int nr, enum brw_reg_type type)
+fs_reg::fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type)
 {
    init();
    this->file = file;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h
index 61e72f7d536..7e977e9e727 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h
@@ -42,8 +42,8 @@ public:
    explicit fs_reg(uint8_t vf[4]);
    explicit fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3);
    fs_reg(struct brw_reg reg);
-   fs_reg(enum register_file file, int nr);
-   fs_reg(enum register_file file, int nr, enum brw_reg_type type);
+   fs_reg(enum brw_reg_file file, int nr);
+   fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
 
    bool equals(const fs_reg &r) const;
    bool is_contiguous() const;
diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
index dcec5f30b06..110e64b979e 100644
--- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
@@ -39,7 +39,7 @@ public:
 
    void init();
 
-   src_reg(register_file file, int nr, const glsl_type *type);
+   src_reg(enum brw_reg_file file, int nr, const glsl_type *type);
    src_reg();
    src_reg(float f);
    src_reg(uint32_t u);
@@ -108,10 +108,10 @@ public:
    void init();
 
    dst_reg();
-   dst_reg(register_file file, int nr);
-   dst_reg(register_file file, int nr, const glsl_type *type,
+   dst_reg(enum brw_reg_file file, int nr);
+   dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
            unsigned writemask);
-   dst_reg(register_file file, int nr, brw_reg_type type,
+   dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
            unsigned writemask);
    dst_reg(struct brw_reg reg);
    dst_reg(class vec4_visitor *v, const struct glsl_type *type);
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h
index a2f41559503..3da83b43b5d 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -232,11 +232,11 @@ const char *brw_reg_type_letters(unsigned brw_reg_type);
  */
 struct brw_reg {
    enum brw_reg_type type:4;
-   enum brw_reg_file file:2;
+   enum brw_reg_file file:3;      /* :2 hardware format */
    unsigned negate:1;             /* source only */
    unsigned abs:1;                /* source only */
    unsigned address_mode:1;       /* relative addressing, hopefully! */
-   unsigned pad0:2;
+   unsigned pad0:1;
    unsigned subnr:5;              /* :1 in align16 */
    unsigned nr:16;
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 767752101ca..c851941021d 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -38,17 +38,6 @@
 #define MAX_SAMPLER_MESSAGE_SIZE 11
 #define MAX_VGRF_SIZE 16
 
-enum PACKED register_file {
-   ARF,
-   FIXED_GRF,
-   MRF,
-   IMM,
-   VGRF,
-   ATTR,
-   UNIFORM, /* prog_data->params[reg] */
-   BAD_FILE,
-};
-
 #ifdef __cplusplus
 struct backend_reg : public brw_reg
 {
@@ -62,8 +51,6 @@ struct backend_reg : public brw_reg
    bool is_accumulator() const;
    bool in_range(const backend_reg &r, unsigned n) const;
 
-   enum register_file file; /**< Register file: VGRF, MRF, IMM. */
-
    /**
     * Offset within the virtual register.
     *
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 60d4ed90e12..c39e8545c3b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -51,7 +51,7 @@ src_reg::init()
    this->file = BAD_FILE;
 }
 
-src_reg::src_reg(register_file file, int nr, const glsl_type *type)
+src_reg::src_reg(enum brw_reg_file file, int nr, const glsl_type *type)
 {
    init();
 
@@ -119,7 +119,6 @@ src_reg::src_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
 src_reg::src_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
-   this->file = (enum register_file)reg.file;
    this->reg_offset = 0;
    this->reladdr = NULL;
 }
@@ -127,7 +126,6 @@ src_reg::src_reg(struct brw_reg reg) :
 src_reg::src_reg(const dst_reg &reg) :
    backend_reg(static_cast<struct brw_reg>(reg))
 {
-   this->file = (enum register_file)reg.file;
    this->reg_offset = reg.reg_offset;
    this->reladdr = reg.reladdr;
    this->swizzle = brw_swizzle_for_mask(reg.writemask);
@@ -146,7 +144,7 @@ dst_reg::dst_reg()
    init();
 }
 
-dst_reg::dst_reg(register_file file, int nr)
+dst_reg::dst_reg(enum brw_reg_file file, int nr)
 {
    init();
 
@@ -154,18 +152,18 @@ dst_reg::dst_reg(register_file file, int nr)
    this->nr = nr;
 }
 
-dst_reg::dst_reg(register_file file, int nr, const glsl_type *type,
+dst_reg::dst_reg(enum brw_reg_file file, int nr, const glsl_type *type,
                  unsigned writemask)
 {
    init();
 
-   this->file = (enum register_file)file;
+   this->file = file;
    this->nr = nr;
    this->type = brw_type_for_base_type(type);
    this->writemask = writemask;
 }
 
-dst_reg::dst_reg(register_file file, int nr, brw_reg_type type,
+dst_reg::dst_reg(enum brw_reg_file file, int nr, brw_reg_type type,
                  unsigned writemask)
 {
    init();
@@ -179,7 +177,6 @@ dst_reg::dst_reg(register_file file, int nr, brw_reg_type type,
 dst_reg::dst_reg(struct brw_reg reg) :
    backend_reg(reg)
 {
-   this->file = (enum register_file)reg.file;
    this->reg_offset = 0;
    this->reladdr = NULL;
 }
@@ -187,7 +184,6 @@ dst_reg::dst_reg(struct brw_reg reg) :
 dst_reg::dst_reg(const src_reg &reg) :
    backend_reg(static_cast<struct brw_reg>(reg))
 {
-   this->file = reg.file;
    this->reg_offset = reg.reg_offset;
    this->writemask = brw_mask_for_swizzle(reg.swizzle);
    this->reladdr = reg.reladdr;
@@ -345,7 +341,7 @@ vec4_visitor::opt_vector_float()
    bool progress = false;
 
    int last_reg = -1, last_reg_offset = -1;
-   enum register_file last_reg_file = BAD_FILE;
+   enum brw_reg_file last_reg_file = BAD_FILE;
 
    int remaining_channels = 0;
    uint8_t imm[4];

From 0eb3db117b56b081ee2674cc8940c193ffc3c41b Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Mon, 2 Nov 2015 10:23:12 -0800
Subject: [PATCH 271/287] i965: Use BRW_MRF_COMPR4 macro in more places.

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_disasm.c         | 4 ++--
 src/mesa/drivers/dri/i965/brw_eu_emit.c        | 4 ++--
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp         | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index 76b9bed8548..650bdeea344 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -725,7 +725,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr)
 
    /* Clear the Compr4 instruction compression bit. */
    if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
-      _reg_nr &= ~(1 << 7);
+      _reg_nr &= ~BRW_MRF_COMPR4;
 
    if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
       switch (_reg_nr & 0xf0) {
@@ -1649,7 +1649,7 @@ brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo,
          if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_COMPRESSED &&
              opcode_descs[opcode].ndst > 0 &&
              brw_inst_dst_reg_file(devinfo, inst) == BRW_MESSAGE_REGISTER_FILE &&
-             brw_inst_dst_da_reg_nr(devinfo, inst) & (1 << 7)) {
+             brw_inst_dst_da_reg_nr(devinfo, inst) & BRW_MRF_COMPR4) {
             format(file, " compr4");
          } else {
             err |= control(file, "compression control", compr_ctrl,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index ec04d7de0e0..da1ddfddb50 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -147,7 +147,7 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
    const struct brw_device_info *devinfo = p->devinfo;
 
    if (dest.file == BRW_MESSAGE_REGISTER_FILE)
-      assert((dest.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+      assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
    else if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(dest.nr < 128);
 
@@ -311,7 +311,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    const struct brw_device_info *devinfo = p->devinfo;
 
    if (reg.file == BRW_MESSAGE_REGISTER_FILE)
-      assert((reg.nr & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+      assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
    else if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
       assert(reg.nr < 128);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index fa1e83497cc..e9860212579 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -61,7 +61,7 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen)
 
    switch (reg->file) {
    case MRF:
-      assert((reg->nr & ~(1 << 7)) < BRW_MAX_MRF(gen));
+      assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(gen));
       /* Fallthrough */
    case VGRF:
       if (reg->stride == 0) {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c39e8545c3b..a086b43e11a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1808,7 +1808,7 @@ vec4_visitor::convert_to_hw_regs()
          break;
 
       case MRF:
-         assert(((dst.nr + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen));
+         assert(((dst.nr + dst.reg_offset) & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
          reg = brw_message_reg(dst.nr + dst.reg_offset);
          reg.type = dst.type;
          reg.writemask = dst.writemask;

From 8b145d6a3de381a568d8001131e48257611a542a Mon Sep 17 00:00:00 2001
From: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Date: Fri, 13 Nov 2015 13:36:43 +0200
Subject: [PATCH 272/287] i965: Don't write beyond allocated memory.

Reviewed-by: Eduardo Lima Mitev <elima@igalia.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_eu_validate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_validate.c b/src/mesa/drivers/dri/i965/brw_eu_validate.c
index eb57962bea3..2de2ea1babc 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_validate.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_validate.c
@@ -39,7 +39,7 @@ cat(struct string *dest, const struct string src)
 {
    dest->str = realloc(dest->str, dest->len + src.len + 1);
    memcpy(dest->str + dest->len, src.str, src.len);
-   dest->str[dest->len + src.len + 1] = '\0';
+   dest->str[dest->len + src.len] = '\0';
    dest->len = dest->len + src.len;
 }
 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})

From 386759b02dac1382072cecef4d6520a0770f995e Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 13 Nov 2015 12:13:14 -0800
Subject: [PATCH 273/287] i965: Silence warning.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

intel_asm_annotation.c: In function ‘annotation_insert_error’:
intel_asm_annotation.c:214:18:
warning: ‘ann’ may be used uninitialized in this function
[-Wmaybe-uninitialized]
       ann->error = ralloc_strdup(annotation->mem_ctx, error);
                         ^

I initially tried changing the type of ann_count to unsigned (is
currently int), since that in addition to the check that it's non-zero
at the beginning of the function seems sufficient to prove that it must
be greater than zero. Unfortunately that wasn't sufficient.
---
 src/mesa/drivers/dri/i965/intel_asm_annotation.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index 52878fde43e..bb7786ba748 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -185,6 +185,8 @@ annotation_insert_error(struct annotation_info *annotation, unsigned offset,
    if (!annotation_array_ensure_space(annotation))
       return;
 
+   assume(annotation->ann_count > 0);
+
    for (int i = 0; i < annotation->ann_count; i++) {
       struct annotation *cur = &annotation->ann[i];
       struct annotation *next = &annotation->ann[i + 1];
@@ -206,8 +208,6 @@ annotation_insert_error(struct annotation_info *annotation, unsigned offset,
       break;
    }
 
-   assume(ann != NULL);
-
    if (ann->error)
       ralloc_strcat(&ann->error, error);
    else

From 7a879e422bcdaf89bde286de6c7b9db5c34f7fc3 Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Fri, 13 Nov 2015 12:16:48 -0800
Subject: [PATCH 274/287] i965: Remove unneeded #includes.

Some of these are no longer needed since all the backends switched to
NIR.
---
 src/mesa/drivers/dri/i965/intel_asm_annotation.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
index bb7786ba748..fdd605a7db0 100644
--- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c
+++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c
@@ -23,12 +23,8 @@
 
 #include "brw_cfg.h"
 #include "brw_eu.h"
-#include "brw_context.h"
 #include "intel_debug.h"
 #include "intel_asm_annotation.h"
-#include "program/prog_print.h"
-#include "program/prog_instruction.h"
-#include "main/macros.h"
 #include "glsl/nir/nir.h"
 
 void

From 758f12fd98dea9a9682becf2d496bd38ef3959e5 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Tue, 10 Nov 2015 12:36:58 -0800
Subject: [PATCH 275/287] meta/generate_mipmap: Don't leak the sampler object

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Cc: "10.6 11.0" <mesa-stable@lists.freedesktop.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/common/meta_generate_mipmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c
index 4800278a467..a9da0a21ba3 100644
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -128,6 +128,8 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gen_mipmap_state *mipmap)
    mipmap->VAO = 0;
    _mesa_DeleteBuffers(1, &mipmap->VBO);
    mipmap->VBO = 0;
+   _mesa_DeleteSamplers(1, &mipmap->Sampler);
+   mipmap->Sampler = 0;
 
    _mesa_meta_blit_shader_table_cleanup(&mipmap->shaders);
 }

From 1cb49eedb52c387caf6a0035e5baad29bb55e3ff Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 2 Nov 2015 14:29:42 -0800
Subject: [PATCH 276/287] i965: Silence unused parameter warnings in
 get_buffer_rect

brw_meta_fast_clear.c: In function 'get_buffer_rect':
brw_meta_fast_clear.c:318:37: warning: unused parameter 'brw' [-Wunused-parameter]
 get_buffer_rect(struct brw_context *brw, struct gl_framebuffer *fb,
                                     ^
brw_meta_fast_clear.c:319:44: warning: unused parameter 'irb' [-Wunused-parameter]
                 struct intel_renderbuffer *irb, struct rect *rect)
                                            ^

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_meta_fast_clear.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
index 69fe7b4aa5b..12e7c32e424 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -314,8 +314,7 @@ get_fast_clear_rect(struct gl_framebuffer *fb,
 }
 
 static void
-get_buffer_rect(struct brw_context *brw, struct gl_framebuffer *fb,
-                struct intel_renderbuffer *irb, struct rect *rect)
+get_buffer_rect(const struct gl_framebuffer *fb, struct rect *rect)
 {
    rect->x0 = fb->_Xmin;
    rect->x1 = fb->_Xmax;
@@ -526,12 +525,12 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
 
       case REP_CLEAR:
          rep_clear_buffers |= 1 << index;
-         get_buffer_rect(brw, fb, irb, &clear_rect);
+         get_buffer_rect(fb, &clear_rect);
          break;
 
       case PLAIN_CLEAR:
          plain_clear_buffers |= 1 << index;
-         get_buffer_rect(brw, fb, irb, &clear_rect);
+         get_buffer_rect(fb, &clear_rect);
          continue;
       }
    }

From 0df452cd0d9da031d2ef29853d39112fdf8e1d46 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 30 Sep 2015 17:17:35 -0700
Subject: [PATCH 277/287] nir/lower_io: Use load_per_vertex_input intrinsics
 for TCS and TES.

Tessellation control shader inputs are an array indexed by the vertex
number, like geometry shader inputs.  There aren't per-patch TCS inputs.

Tessellation evaluation shaders have both per-vertex and per-patch
inputs.  Per-vertex inputs get the new intrinsics; per-patch inputs
continue to use the ordinary load_input intrinsics, as they already
work like we want them to.

v2: Change stage_uses_per_vertex_inputs into is_per_vertex_input(),
    which takes a variable (requested by Jason Ekstrand).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_lower_io.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 688b48f4675..16ba1a10644 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -68,10 +68,14 @@ nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
  * by a vertex number (such as geometry shader inputs).
  */
 static bool
-stage_uses_per_vertex_inputs(struct lower_io_state *state)
+is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
 {
    gl_shader_stage stage = state->builder.shader->stage;
-   return stage == MESA_SHADER_GEOMETRY;
+
+   return var->data.mode == nir_var_shader_in && !var->data.patch &&
+          (stage == MESA_SHADER_TESS_CTRL ||
+           stage == MESA_SHADER_TESS_EVAL ||
+           stage == MESA_SHADER_GEOMETRY);
 }
 
 static unsigned
@@ -184,8 +188,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
          if (mode != nir_var_shader_in && mode != nir_var_uniform)
             continue;
 
-         bool per_vertex = stage_uses_per_vertex_inputs(state) &&
-                           mode == nir_var_shader_in;
+         bool per_vertex =
+            is_per_vertex_input(state, intrin->variables[0]->var);
 
          nir_ssa_def *indirect;
          nir_ssa_def *vertex_index;

From c51d7d5fe3425b0b1cb551f47979a1e41f1f73d8 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 2 Oct 2015 00:11:01 -0700
Subject: [PATCH 278/287] nir/lower_io: Introduce nir_store_per_vertex_output
 intrinsics.

Similar to nir_load_per_vertex_input, but for outputs.  This is not
useful in geometry shaders, but will be useful in tessellation shaders.

v2: Change stage_uses_per_vertex_outputs() to is_per_vertex_output(),
    taking a nir_variable (requested by Jason Ekstrand).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_intrinsics.h |  1 +
 src/glsl/nir/nir_lower_io.c   | 28 +++++++++++++++++++++++-----
 src/glsl/nir/nir_print.c      |  2 ++
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 36fb2861c16..26ac7ce9cd7 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -272,6 +272,7 @@ LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
              false, 0, 0, 1 + extra_indices, flags)
 
 STORE(output, 0, 0, 0, 0)
+STORE(per_vertex_output, 1, 1, 0, 0)
 STORE(ssbo, 1, 1, 1, 0)
 
 LAST_INTRINSIC(store_ssbo_indirect)
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 16ba1a10644..b7b599da6d4 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -78,6 +78,14 @@ is_per_vertex_input(struct lower_io_state *state, nir_variable *var)
            stage == MESA_SHADER_GEOMETRY);
 }
 
+static bool
+is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
+{
+   gl_shader_stage stage = state->builder.shader->stage;
+   return var->data.mode == nir_var_shader_out && !var->data.patch &&
+          stage == MESA_SHADER_TESS_CTRL;
+}
+
 static unsigned
 get_io_offset(nir_deref_var *deref, nir_instr *instr,
               nir_ssa_def **vertex_index,
@@ -237,16 +245,23 @@ nir_lower_io_block(nir_block *block, void *void_state)
             continue;
 
          nir_ssa_def *indirect;
+         nir_ssa_def *vertex_index;
+
+         bool per_vertex =
+            is_per_vertex_output(state, intrin->variables[0]->var);
 
          unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
-                                         NULL, &indirect, state);
+                                         per_vertex ? &vertex_index : NULL,
+                                         &indirect, state);
          offset += intrin->variables[0]->var->data.driver_location;
 
          nir_intrinsic_op store_op;
-         if (indirect) {
-            store_op = nir_intrinsic_store_output_indirect;
+         if (per_vertex) {
+            store_op = indirect ? nir_intrinsic_store_per_vertex_output_indirect
+                                : nir_intrinsic_store_per_vertex_output;
          } else {
-            store_op = nir_intrinsic_store_output;
+            store_op = indirect ? nir_intrinsic_store_output_indirect
+                                : nir_intrinsic_store_output;
          }
 
          nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
@@ -256,8 +271,11 @@ nir_lower_io_block(nir_block *block, void *void_state)
 
          nir_src_copy(&store->src[0], &intrin->src[0], store);
 
+         if (per_vertex)
+            store->src[1] = nir_src_for_ssa(vertex_index);
+
          if (indirect)
-            store->src[1] = nir_src_for_ssa(indirect);
+            store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(indirect);
 
          nir_instr_insert_before(&intrin->instr, &store->instr);
          nir_instr_remove(&intrin->instr);
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 30220c5e48d..23fcafeb7e8 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -450,6 +450,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       break;
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_output_indirect:
+   case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_vertex_output_indirect:
       var_list = &state->shader->outputs;
       break;
    default:

From 134728fdaef9d2a5d072d25b31437ac0fecd9076 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 19 Oct 2015 11:44:28 -0700
Subject: [PATCH 279/287] nir: Allow outputs reads and add the relevant
 intrinsics.

Normally, we rely on nir_lower_outputs_to_temporaries to create shadow
variables for outputs, buffering the results and writing them all out
at the end of the program.  However, this is infeasible for tessellation
control shader outputs.

Tessellation control shaders can generate multiple output vertices, and
write per-vertex outputs.  These are arrays indexed by the vertex
number; each thread only writes one element, but can read any other
element - including those being concurrently written by other threads.
The barrier() intrinsic synchronizes between threads.

Even if we tried to shadow every output element (which is of dubious
value), we'd have to read updated values in at barrier() time, which
means we need to allow output reads.

Most stages should continue using nir_lower_outputs_to_temporaries(),
but in theory drivers could choose not to if they really wanted.

v2: Rebase to accomodate Jason's review feedback.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_intrinsics.h |  2 ++
 src/glsl/nir/nir_lower_io.c   | 23 +++++++++++++++++------
 src/glsl/nir/nir_print.c      |  2 ++
 src/glsl/nir/nir_validate.c   |  2 --
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 26ac7ce9cd7..b8d7d6c68cb 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -255,6 +255,8 @@ LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
 LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
  * Stores work the same way as loads, except now the first register input is
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index b7b599da6d4..8a4177fb9f0 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -161,6 +161,15 @@ load_op(struct lower_io_state *state,
                              nir_intrinsic_load_input;
       }
       break;
+   case nir_var_shader_out:
+      if (per_vertex) {
+         op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect :
+                             nir_intrinsic_load_per_vertex_output;
+      } else {
+         op = has_indirect ? nir_intrinsic_load_output_indirect :
+                             nir_intrinsic_load_output;
+      }
+      break;
    case nir_var_uniform:
       op = has_indirect ? nir_intrinsic_load_uniform_indirect :
                           nir_intrinsic_load_uniform;
@@ -191,13 +200,16 @@ nir_lower_io_block(nir_block *block, void *void_state)
       if (state->mode != -1 && state->mode != mode)
          continue;
 
+      if (mode != nir_var_shader_in &&
+          mode != nir_var_shader_out &&
+          mode != nir_var_uniform)
+         continue;
+
       switch (intrin->intrinsic) {
       case nir_intrinsic_load_var: {
-         if (mode != nir_var_shader_in && mode != nir_var_uniform)
-            continue;
-
          bool per_vertex =
-            is_per_vertex_input(state, intrin->variables[0]->var);
+            is_per_vertex_input(state, intrin->variables[0]->var) ||
+            is_per_vertex_output(state, intrin->variables[0]->var);
 
          nir_ssa_def *indirect;
          nir_ssa_def *vertex_index;
@@ -241,8 +253,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
       }
 
       case nir_intrinsic_store_var: {
-         if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
-            continue;
+         assert(mode == nir_var_shader_out);
 
          nir_ssa_def *indirect;
          nir_ssa_def *vertex_index;
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 23fcafeb7e8..f7f5fdf3181 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -448,6 +448,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
    case nir_intrinsic_load_per_vertex_input_indirect:
       var_list = &state->shader->inputs;
       break;
+   case nir_intrinsic_load_output:
+   case nir_intrinsic_load_output_indirect:
    case nir_intrinsic_store_output:
    case nir_intrinsic_store_output_indirect:
    case nir_intrinsic_store_per_vertex_output:
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index 51c2529dc38..ed374b921fa 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -405,7 +405,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
              (instr->variables[0]->var->data.mode == nir_var_uniform &&
               glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE));
       assert(instr->num_components == glsl_get_vector_elements(type));
-      assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
       break;
    }
    case nir_intrinsic_store_var: {
@@ -426,7 +425,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
       assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
              instr->variables[0]->var->data.mode != nir_var_uniform &&
              instr->variables[0]->var->data.mode != nir_var_shader_storage);
-      assert(instr->variables[1]->var->data.mode != nir_var_shader_out);
       break;
    default:
       break;

From d12bde0944d1d69401ef1d854aa0ab92b5a6af54 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Mon, 19 Oct 2015 11:28:15 -0700
Subject: [PATCH 280/287] nir: Don't lower TCS outputs to temporaries.

We'd like to shadow these when possible, but the current code doesn't
work properly for TCS outputs.  For now, disable it.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir_lower_outputs_to_temporaries.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
index 80f43951b5c..9441f4762b6 100644
--- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c
+++ b/src/glsl/nir/nir_lower_outputs_to_temporaries.c
@@ -78,6 +78,9 @@ nir_lower_outputs_to_temporaries(nir_shader *shader)
 {
    struct lower_outputs_state state;
 
+   if (shader->stage == MESA_SHADER_TESS_CTRL)
+      return;
+
    state.shader = shader;
    exec_list_move_nodes_to(&shader->outputs, &state.old_outputs);
 

From 26f9469a46585f64b24fb1037aaae7c757a5e6e1 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 7 Nov 2015 22:35:33 -0800
Subject: [PATCH 281/287] nir: Add helpers for getting input/output intrinsic
 sources.

With the many variants of IO intrinsics, particular sources are often in
different locations.  It's convenient to say "give me the indirect
offset" or "give me the vertex index" and have it just work, without
having to think about exactly which kind of intrinsic you have.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
---
 src/glsl/nir/nir.h          |  3 +++
 src/glsl/nir/nir_lower_io.c | 42 +++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 4ed2cbd2b67..beabcafef4e 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1933,6 +1933,9 @@ void nir_assign_var_locations(struct exec_list *var_list,
 void nir_lower_io(nir_shader *shader,
                   nir_variable_mode mode,
                   int (*type_size)(const struct glsl_type *));
+nir_src *nir_get_io_indirect_src(nir_intrinsic_instr *instr);
+nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
+
 void nir_lower_vars_to_ssa(nir_shader *shader);
 
 bool nir_remove_dead_variables(nir_shader *shader);
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 8a4177fb9f0..00a31458310 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -328,3 +328,45 @@ nir_lower_io(nir_shader *shader, nir_variable_mode mode,
          nir_lower_io_impl(overload->impl, mode, type_size);
    }
 }
+
+/**
+ * Return the indirect source for a load/store indirect intrinsic.
+ */
+nir_src *
+nir_get_io_indirect_src(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_input_indirect:
+   case nir_intrinsic_load_output_indirect:
+   case nir_intrinsic_load_uniform_indirect:
+      return &instr->src[0];
+   case nir_intrinsic_load_per_vertex_input_indirect:
+   case nir_intrinsic_load_per_vertex_output_indirect:
+   case nir_intrinsic_store_output_indirect:
+      return &instr->src[1];
+   case nir_intrinsic_store_per_vertex_output_indirect:
+      return &instr->src[2];
+   default:
+      return NULL;
+   }
+}
+
+/**
+ * Return the vertex index source for a load/store per_vertex intrinsic.
+ */
+nir_src *
+nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_load_per_vertex_output:
+   case nir_intrinsic_load_per_vertex_input_indirect:
+   case nir_intrinsic_load_per_vertex_output_indirect:
+      return &instr->src[0];
+   case nir_intrinsic_store_per_vertex_output:
+   case nir_intrinsic_store_per_vertex_output_indirect:
+      return &instr->src[1];
+   default:
+      return NULL;
+   }
+}

From f88c175a29bb287d41ef90343eb6670525475a06 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Wed, 11 Nov 2015 22:37:53 -0800
Subject: [PATCH 282/287] i965: Make convert_attr_sources_to_hw_regs handle
 stride == 0.

This makes expressions like component(fs_reg(ATTR, n), 7) get a proper
<0,1,0> region instead of the invalid <0,8,0>.

Nobody uses this today, but I plan to.

v2: Rebase on Matt's changes; simplify.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com> [v1]
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b8c88f73dfa..b8d48dadde9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1614,11 +1614,12 @@ fs_visitor::convert_attr_sources_to_hw_regs(fs_inst *inst)
                    inst->src[i].nr +
                    inst->src[i].reg_offset;
 
+         unsigned width = inst->src[i].stride == 0 ? 1 : inst->exec_size;
          struct brw_reg reg =
             stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
                                inst->src[i].subreg_offset),
                    inst->exec_size * inst->src[i].stride,
-                   inst->exec_size, inst->src[i].stride);
+                   width, inst->src[i].stride);
          reg.abs = inst->src[i].abs;
          reg.negate = inst->src[i].negate;
 

From a4ba476c30ebcb99694c6167ac9b8af9414cb656 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Tue, 10 Nov 2015 00:48:33 -0800
Subject: [PATCH 283/287] i965: Print input/output VUE maps on INTEL_DEBUG=vs,
 gs.

I've been carrying around a patch to do this for the last few months,
and it's been exceedingly useful for debugging GS and tessellation
problems.  I've caught lots of bugs by inspecting the interface
expectations of two adjacent stages.

It's not that much spam, so I figure we may as well just print it.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Matt Turner <mattst88@gmail.com>
---
 src/mesa/drivers/dri/i965/brw_compiler.h      |  2 ++
 .../drivers/dri/i965/brw_vec4_gs_visitor.cpp  |  6 +++++
 src/mesa/drivers/dri/i965/brw_vs.c            |  6 ++++-
 src/mesa/drivers/dri/i965/brw_vue_map.c       | 27 +++++++++++++++++++
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index f022f3829be..e3a26d6a353 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -458,6 +458,8 @@ struct brw_vue_map {
    int num_slots;
 };
 
+void brw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map);
+
 /**
  * Convert a VUE slot number into a byte offset within the VUE.
  */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 49c10837334..1a09f76a20c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -812,6 +812,12 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
    /* Now that prog_data setup is done, we are ready to actually compile the
     * program.
     */
+   if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
+      fprintf(stderr, "GS Input ");
+      brw_print_vue_map(stderr, &c.input_vue_map);
+      fprintf(stderr, "GS Output ");
+      brw_print_vue_map(stderr, &prog_data->base.vue_map);
+   }
 
    if (compiler->scalar_gs) {
       /* TODO: Support instanced GS.  We have basically no tests... */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 0b805b1c0c4..967448e0e41 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -159,9 +159,13 @@ brw_codegen_vs_prog(struct brw_context *brw,
       start_time = get_time();
    }
 
-   if (unlikely(INTEL_DEBUG & DEBUG_VS))
+   if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
       brw_dump_ir("vertex", prog, vs ? &vs->base : NULL, &vp->program.Base);
 
+      fprintf(stderr, "VS Output ");
+      brw_print_vue_map(stderr, &prog_data.base.vue_map);
+   }
+
    int st_index = -1;
    if (INTEL_DEBUG & DEBUG_SHADER_TIME)
       st_index = brw_get_shader_time_index(brw, prog, &vp->program.Base, ST_VS);
diff --git a/src/mesa/drivers/dri/i965/brw_vue_map.c b/src/mesa/drivers/dri/i965/brw_vue_map.c
index 45662bd5afc..edb16087410 100644
--- a/src/mesa/drivers/dri/i965/brw_vue_map.c
+++ b/src/mesa/drivers/dri/i965/brw_vue_map.c
@@ -178,3 +178,30 @@ brw_compute_vue_map(const struct brw_device_info *devinfo,
 
    vue_map->num_slots = separate ? slot + 1 : slot;
 }
+
+static const char *
+varying_name(brw_varying_slot slot)
+{
+   if (slot < VARYING_SLOT_MAX)
+      return gl_varying_slot_name(slot);
+
+   static const char *brw_names[] = {
+      [BRW_VARYING_SLOT_NDC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_NDC",
+      [BRW_VARYING_SLOT_PAD - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PAD",
+      [BRW_VARYING_SLOT_PNTC - VARYING_SLOT_MAX] = "BRW_VARYING_SLOT_PNTC",
+   };
+
+   return brw_names[slot - VARYING_SLOT_MAX];
+}
+
+void
+brw_print_vue_map(FILE *fp, const struct brw_vue_map *vue_map)
+{
+   fprintf(fp, "VUE map (%d slots, %s)\n",
+           vue_map->num_slots, vue_map->separate ? "SSO" : "non-SSO");
+   for (int i = 0; i < vue_map->num_slots; i++) {
+      fprintf(fp, "  [%d] %s\n", i,
+              varying_name(vue_map->slot_to_varying[i]));
+   }
+   fprintf(fp, "\n");
+}

From 511de1a80cedc0add386dad79cce56dd68d2f611 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Thu, 12 Nov 2015 13:02:05 -0800
Subject: [PATCH 284/287] glsl: Allow implicit int -> uint conversions for the
 % operator.

GLSL 4.00 and GL_ARB_gpu_shader5 introduced a new int -> uint implicit
conversion rule and updated the rules for modulus to use them.  (In
earlier languages, none of the implicit conversion rules did anything
relevant, so there was no point in applying them.)

This allows expressions such as:

   int foo;
   uint bar;
   uint mod = foo % bar;

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
---
 src/glsl/ast_to_hir.cpp | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 51ea183147d..f5292435058 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -538,18 +538,20 @@ bit_logic_result_type(const struct glsl_type *type_a,
 }
 
 static const struct glsl_type *
-modulus_result_type(const struct glsl_type *type_a,
-                    const struct glsl_type *type_b,
+modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
                     struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
 {
+   const glsl_type *type_a = value_a->type;
+   const glsl_type *type_b = value_b->type;
+
    if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) {
       return glsl_type::error_type;
    }
 
-   /* From GLSL 1.50 spec, page 56:
+   /* Section 5.9 (Expressions) of the GLSL 4.00 specification says:
+    *
     *    "The operator modulus (%) operates on signed or unsigned integers or
-    *    integer vectors. The operand types must both be signed or both be
-    *    unsigned."
+    *    integer vectors."
     */
    if (!type_a->is_integer()) {
       _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer");
@@ -559,11 +561,28 @@ modulus_result_type(const struct glsl_type *type_a,
       _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer");
       return glsl_type::error_type;
    }
-   if (type_a->base_type != type_b->base_type) {
+
+   /*    "If the fundamental types in the operands do not match, then the
+    *    conversions from section 4.1.10 "Implicit Conversions" are applied
+    *    to create matching types."
+    *
+    * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit
+    * int -> uint conversion rules.  Prior to that, there were no implicit
+    * conversions.  So it's harmless to apply them universally - no implicit
+    * conversions will exist.  If the types don't match, we'll receive false,
+    * and raise an error, satisfying the GLSL 1.50 spec, page 56:
+    *
+    *    "The operand types must both be signed or unsigned."
+    */
+   if (!apply_implicit_conversion(type_a, value_b, state) &&
+       !apply_implicit_conversion(type_b, value_a, state)) {
       _mesa_glsl_error(loc, state,
-                       "operands of %% must have the same base type");
+                       "could not implicitly convert operands to "
+                       "modulus (%%) operator");
       return glsl_type::error_type;
    }
+   type_a = value_a->type;
+   type_b = value_b->type;
 
    /*    "The operands cannot be vectors of differing size. If one operand is
     *    a scalar and the other vector, then the scalar is applied component-
@@ -1311,7 +1330,7 @@ ast_expression::do_hir(exec_list *instructions,
       op[0] = this->subexpressions[0]->hir(instructions, state);
       op[1] = this->subexpressions[1]->hir(instructions, state);
 
-      type = modulus_result_type(op[0]->type, op[1]->type, state, & loc);
+      type = modulus_result_type(op[0], op[1], state, &loc);
 
       assert(operations[this->oper] == ir_binop_mod);
 
@@ -1558,7 +1577,7 @@ ast_expression::do_hir(exec_list *instructions,
       op[0] = this->subexpressions[0]->hir(instructions, state);
       op[1] = this->subexpressions[1]->hir(instructions, state);
 
-      type = modulus_result_type(op[0]->type, op[1]->type, state, & loc);
+      type = modulus_result_type(op[0], op[1], state, &loc);
 
       assert(operations[this->oper] == ir_binop_mod);
 

From 5480bbd90ea288877b6e56d4860feb8f97bcba80 Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Sat, 7 Nov 2015 01:37:33 -0800
Subject: [PATCH 285/287] i965: Add a SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT
 opcode.

We need to use per-slot offsets when there's non-uniform indexing,
as each SIMD channel could have a different index.  We want to use
them for any non-constant index (even if uniform), as it lives in
the message header instead of the descriptor, allowing us to set
offsets in GRFs rather than immediates.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h        | 7 ++-----
 src/mesa/drivers/dri/i965/brw_fs.cpp           | 2 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 ++++
 src/mesa/drivers/dri/i965/brw_shader.cpp       | 2 ++
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 5044982caec..6484484ed34 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1055,13 +1055,10 @@ enum opcode {
    SHADER_OPCODE_GEN7_SCRATCH_READ,
 
    /**
-    * Gen8+ SIMD8 URB Read message.
-    *
-    * Source 0: The header register, containing URB handles (g1).
-    *
-    * Currently only supports constant offsets, in inst->offset.
+    * Gen8+ SIMD8 URB Read messages.
     */
    SHADER_OPCODE_URB_READ_SIMD8,
+   SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT,
 
    SHADER_OPCODE_URB_WRITE_SIMD8,
    SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b8d48dadde9..80b8c8e1207 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -285,6 +285,7 @@ fs_inst::is_send_from_grf() const
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
    case SHADER_OPCODE_URB_READ_SIMD8:
+   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
       return true;
    case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
       return src[1].file == VGRF;
@@ -807,6 +808,7 @@ fs_inst::regs_read(int arg) const
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
    case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
    case SHADER_OPCODE_URB_READ_SIMD8:
+   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
    case SHADER_OPCODE_UNTYPED_ATOMIC:
    case SHADER_OPCODE_UNTYPED_SURFACE_READ:
    case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e9860212579..139cda3ca59 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -387,6 +387,9 @@ fs_generator::generate_urb_read(fs_inst *inst,
    brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB);
    brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ);
 
+   if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT)
+      brw_inst_set_urb_per_slot_offset(p->devinfo, send, true);
+
    brw_inst_set_mlen(p->devinfo, send, inst->mlen);
    brw_inst_set_rlen(p->devinfo, send, inst->regs_written);
    brw_inst_set_header_present(p->devinfo, send, true);
@@ -2077,6 +2080,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 	 break;
 
       case SHADER_OPCODE_URB_READ_SIMD8:
+      case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
          generate_urb_read(inst, dst, src[0]);
          break;
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index a0c74a21540..a438e1881d5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -429,6 +429,8 @@ brw_instruction_name(enum opcode op)
       return "gen8_urb_write_simd8_masked_per_slot";
    case SHADER_OPCODE_URB_READ_SIMD8:
       return "urb_read_simd8";
+   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
+      return "urb_read_simd8_per_slot";
 
    case SHADER_OPCODE_FIND_LIVE_CHANNEL:
       return "find_live_channel";

From 3a0fef0005eca63c6f8067d55145b8e884221cfa Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@freedesktop.org>
Date: Mon, 2 Nov 2015 01:23:59 -0800
Subject: [PATCH 286/287] nir: Silence GCC maybe-uninitialized warnings.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nir/nir_control_flow.c: In function ‘split_block_cursor.isra.11’:
nir/nir_control_flow.c:460:15: warning: ‘after’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       *_after = after;
               ^
nir/nir_control_flow.c:458:16: warning: ‘before’ may be used uninitialized in this function [-Wmaybe-uninitialized]
       *_before = before;
                ^

Signed-off-by: Vinson Lee <vlee@freedesktop.org>
Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
---
 src/glsl/nir/nir_control_flow.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c
index 7f51c4faf49..96395a41615 100644
--- a/src/glsl/nir/nir_control_flow.c
+++ b/src/glsl/nir/nir_control_flow.c
@@ -452,6 +452,9 @@ split_block_cursor(nir_cursor cursor,
          before = split_block_before_instr(nir_instr_next(cursor.instr));
       }
       break;
+
+   default:
+      unreachable("not reached");
    }
 
    if (_before)

From f94e1d97381ec787c2abbbcd5265252596217e33 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 14 Nov 2015 10:28:55 -0500
Subject: [PATCH 287/287] nouveau: don't expose HEVC decoding support

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: mesa-stable@lists.freedesktop.org
---
 src/gallium/drivers/nouveau/nouveau_vp3_video.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.c b/src/gallium/drivers/nouveau/nouveau_vp3_video.c
index f3a64b22c57..4652e56c49a 100644
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.c
@@ -437,6 +437,7 @@ nouveau_vp3_screen_get_video_param(struct pipe_screen *pscreen,
       /* VP3 does not support MPEG4, VP4+ do. */
       return entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM &&
          profile >= PIPE_VIDEO_PROFILE_MPEG1 &&
+         profile < PIPE_VIDEO_PROFILE_HEVC_MAIN &&
          (!vp3 || codec != PIPE_VIDEO_FORMAT_MPEG4) &&
          firmware_present(pscreen, profile);
    case PIPE_VIDEO_CAP_NPOT_TEXTURES: