anv: Rework push constant handling

This substantially reworks both the state setup side of push constant handling and the pipeline compile side. The fundamental change here is that we're no longer respecting the prog_data::param array and instead are just instructing the back-end compiler to leave the array alone. This makes the state setup side substantially simpler because we can now just memcpy the whole block of push constants and don't have to upload one DWORD at a time. This also means that we can compute the full push constant layout up-front and just trust the back-end compiler to not mess with it. Maybe one day we'll decide that the back-end compiler can do useful things there again but for now, this is functionally no different from what we had before this commit and makes the NIR handling cleaner. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
2019-11-07 17:16:14 -06:00 · 2019-11-07 17:16:14 -06:00 · 9baa33cef0
parent ca91ab8015
commit 9baa33cef0
11 changed files with 176 additions and 228 deletions
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@ -260,7 +260,6 @@ VULKAN_FILES := \
 	vulkan/anv_nir_apply_pipeline_layout.c \
 	vulkan/anv_nir_compute_push_layout.c \
 	vulkan/anv_nir_lower_multiview.c \
-	vulkan/anv_nir_lower_push_constants.c \
 	vulkan/anv_nir_lower_ycbcr_textures.c \
 	vulkan/anv_pass.c \
 	vulkan/anv_perf.c \
--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@ -745,71 +745,18 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
   return state;
 }

-static uint32_t
-anv_push_constant_value(const struct anv_cmd_pipeline_state *state,
-                        const struct anv_push_constants *data, uint32_t param)
-{
-   if (BRW_PARAM_IS_BUILTIN(param)) {
-      switch (param) {
-      case BRW_PARAM_BUILTIN_ZERO:
-         return 0;
-      case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X:
-         return data->cs.base_work_group_id[0];
-      case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y:
-         return data->cs.base_work_group_id[1];
-      case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z:
-         return data->cs.base_work_group_id[2];
-      default:
-         unreachable("Invalid param builtin");
-      }
-   } else if (ANV_PARAM_IS_PUSH(param)) {
-      uint32_t offset = ANV_PARAM_PUSH_OFFSET(param);
-      assert(offset % sizeof(uint32_t) == 0);
-      if (offset < sizeof(data->client_data))
-         return *(uint32_t *)((uint8_t *)data + offset);
-      else
-         return 0;
-   } else if (ANV_PARAM_IS_DYN_OFFSET(param)) {
-      unsigned idx = ANV_PARAM_DYN_OFFSET_IDX(param);
-      assert(idx < MAX_DYNAMIC_BUFFERS);
-      return data->dynamic_offsets[idx];
-   }
-
-   assert(!"Invalid param");
-   return 0;
-}
-
 struct anv_state
 anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
                              gl_shader_stage stage)
 {
-   struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.gfx.base;
-   struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
-
-   /* If we don't have this stage, bail. */
-   if (!anv_pipeline_has_stage(pipeline, stage))
-      return (struct anv_state) { .offset = 0 };
-
   struct anv_push_constants *data =
      &cmd_buffer->state.push_constants[stage];
-   const struct brw_stage_prog_data *prog_data =
-      pipeline->shaders[stage]->prog_data;
-
-   /* If we don't actually have any push constants, bail. */
-   if (prog_data == NULL || prog_data->nr_params == 0)
-      return (struct anv_state) { .offset = 0 };

   struct anv_state state =
      anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
-                                         prog_data->nr_params * sizeof(float),
+                                         sizeof(struct anv_push_constants),
                                         32 /* bottom 5 bits MBZ */);
-
-   /* Walk through the param array and fill the buffer with data */
-   uint32_t *u32_map = state.map;
-   for (unsigned i = 0; i < prog_data->nr_params; i++) {
-      u32_map[i] = anv_push_constant_value(pipeline_state, data,
-                                           prog_data->param[i]);
-   }
+   memcpy(state.map, data, sizeof(struct anv_push_constants));

   return state;
 }
@ -817,14 +764,13 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
 struct anv_state
 anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
 {
-   struct anv_cmd_pipeline_state *pipeline_state = &cmd_buffer->state.compute.base;
   struct anv_push_constants *data =
      &cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
   struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
   const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
-   const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
+   const struct anv_push_range *range =
+      &pipeline->shaders[MESA_SHADER_COMPUTE]->bind_map.push_ranges[0];

-   /* If we don't actually have any push constants, bail. */
   if (cs_prog_data->push.total.size == 0)
      return (struct anv_state) { .offset = 0 };

@ -837,33 +783,25 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
                                         aligned_total_push_constants_size,
                                         push_constant_alignment);

-   /* Walk through the param array and fill the buffer with data */
-   uint32_t *u32_map = state.map;
+   void *dst = state.map;
+   const void *src = (char *)data + (range->start * 32);

   if (cs_prog_data->push.cross_thread.size > 0) {
-      for (unsigned i = 0;
-           i < cs_prog_data->push.cross_thread.dwords;
-           i++) {
-         assert(prog_data->param[i] != BRW_PARAM_BUILTIN_SUBGROUP_ID);
-         u32_map[i] = anv_push_constant_value(pipeline_state, data,
-                                              prog_data->param[i]);
-      }
+      memcpy(dst, src, cs_prog_data->push.cross_thread.size);
+      dst += cs_prog_data->push.cross_thread.size;
+      src += cs_prog_data->push.cross_thread.size;
   }

   if (cs_prog_data->push.per_thread.size > 0) {
      for (unsigned t = 0; t < cs_prog_data->threads; t++) {
-         unsigned dst =
-            8 * (cs_prog_data->push.per_thread.regs * t +
-                 cs_prog_data->push.cross_thread.regs);
-         unsigned src = cs_prog_data->push.cross_thread.dwords;
-         for ( ; src < prog_data->nr_params; src++, dst++) {
-            if (prog_data->param[src] == BRW_PARAM_BUILTIN_SUBGROUP_ID) {
-               u32_map[dst] = t;
-            } else {
-               u32_map[dst] = anv_push_constant_value(pipeline_state, data,
-                                                      prog_data->param[src]);
-            }
-         }
+         memcpy(dst, src, cs_prog_data->push.per_thread.size);
+
+         uint32_t *subgroup_id = dst +
+            offsetof(struct anv_push_constants, cs.subgroup_id) -
+            (range->start * 32 + cs_prog_data->push.cross_thread.size);
+         *subgroup_id = t;
+
+         dst += cs_prog_data->push.per_thread.size;
      }
   }

--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@ -560,7 +560,7 @@ anv_physical_device_init(struct anv_physical_device *device,
   device->compiler->constant_buffer_0_is_relative =
      device->info.gen < 8 || !device->has_context_isolation;
   device->compiler->supports_shader_constants = true;
-   device->compiler->compact_params = true;
+   device->compiler->compact_params = false;

   /* Broadwell PRM says:
    *
--- a/src/intel/vulkan/anv_nir.h
+++ b/src/intel/vulkan/anv_nir.h
@ -31,8 +31,6 @@
 extern "C" {
 #endif

-void anv_nir_lower_push_constants(nir_shader *shader);
-
 bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask);

 bool anv_nir_lower_ycbcr_textures(nir_shader *shader,
@ -59,12 +57,16 @@ void anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
                                   struct brw_stage_prog_data *prog_data,
                                   struct anv_pipeline_bind_map *map);

-void anv_compute_push_layout(const struct anv_physical_device *pdevice,
-                             struct brw_stage_prog_data *prog_data,
-                             struct anv_pipeline_bind_map *map);
+void anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
+                                 nir_shader *nir,
+                                 struct brw_stage_prog_data *prog_data,
+                                 struct anv_pipeline_bind_map *map,
+                                 void *mem_ctx);

-bool anv_nir_add_base_work_group_id(nir_shader *shader,
-                                    struct brw_cs_prog_data *prog_data);
+void anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
+                                  struct anv_pipeline_bind_map *map);
+
+bool anv_nir_add_base_work_group_id(nir_shader *shader);

 #ifdef __cplusplus
 }
--- a/src/intel/vulkan/anv_nir_add_base_work_group_id.c
+++ b/src/intel/vulkan/anv_nir_add_base_work_group_id.c
@ -26,13 +26,11 @@
 #include "compiler/brw_compiler.h"

 bool
-anv_nir_add_base_work_group_id(nir_shader *shader,
-                               struct brw_cs_prog_data *prog_data)
+anv_nir_add_base_work_group_id(nir_shader *shader)
 {
   assert(shader->info.stage == MESA_SHADER_COMPUTE);

   nir_builder b;
-   int base_id_offset = -1;
   bool progress = false;
   nir_foreach_function(function, shader) {
      if (!function->impl)
@ -51,27 +49,14 @@ anv_nir_add_base_work_group_id(nir_shader *shader,

            b.cursor = nir_after_instr(&load_id->instr);

-            if (base_id_offset < 0) {
-               /* If we don't have a set of BASE_WORK_GROUP_ID params,
-                * add them.
-                */
-               assert(shader->num_uniforms == prog_data->base.nr_params * 4);
-               uint32_t *param =
-                  brw_stage_prog_data_add_params(&prog_data->base, 3);
-               param[0] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X;
-               param[1] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y;
-               param[2] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z;
-
-               base_id_offset = shader->num_uniforms;
-               shader->num_uniforms += 12;
-            }
-
            nir_intrinsic_instr *load_base =
-               nir_intrinsic_instr_create(shader, nir_intrinsic_load_uniform);
+               nir_intrinsic_instr_create(shader, nir_intrinsic_load_push_constant);
            load_base->num_components = 3;
            load_base->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
            nir_ssa_dest_init(&load_base->instr, &load_base->dest, 3, 32, NULL);
-            nir_intrinsic_set_base(load_base, base_id_offset);
+            nir_intrinsic_set_base(load_base,
+                                   offsetof(struct anv_push_constants,
+                                            cs.base_work_group_id));
            nir_intrinsic_set_range(load_base, 3 * sizeof(uint32_t));
            nir_builder_instr_insert(&b, &load_base->instr);

--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@ -46,9 +46,8 @@ struct apply_pipeline_layout_state {
   /* Place to flag lowered instructions so we don't lower them twice */
   struct set *lowered_instrs;

-   int dynamic_offset_uniform_start;
-
   bool uses_constants;
+   bool has_dynamic_buffers;
   uint8_t constants_offset;
   struct {
      bool desc_buffer_used;
@ -564,7 +563,7 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
      if (!state->add_bounds_checks)
         desc = nir_pack_64_2x32(b, nir_channels(b, desc, 0x3));

-      if (state->dynamic_offset_uniform_start >= 0) {
+      if (state->has_dynamic_buffers) {
         /* This shader has dynamic offsets and we have no way of knowing
          * (save from the dynamic offset base index) if this buffer has a
          * dynamic offset.
@ -598,8 +597,10 @@ lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin,
         }

         nir_intrinsic_instr *dyn_load =
-            nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_uniform);
-         nir_intrinsic_set_base(dyn_load, state->dynamic_offset_uniform_start);
+            nir_intrinsic_instr_create(b->shader,
+                                       nir_intrinsic_load_push_constant);
+         nir_intrinsic_set_base(dyn_load, offsetof(struct anv_push_constants,
+                                                   dynamic_offsets));
         nir_intrinsic_set_range(dyn_load, MAX_DYNAMIC_BUFFERS * 4);
         dyn_load->src[0] = nir_src_for_ssa(nir_imul_imm(b, dyn_offset_idx, 4));
         dyn_load->num_components = 1;
@ -1119,7 +1120,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
      .add_bounds_checks = robust_buffer_access,
      .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access),
      .lowered_instrs = _mesa_pointer_set_create(mem_ctx),
-      .dynamic_offset_uniform_start = -1,
   };

   for (unsigned s = 0; s < layout->num_sets; s++) {
@ -1209,18 +1209,16 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
   qsort(infos, used_binding_count, sizeof(struct binding_info),
         compare_binding_infos);

-   bool have_dynamic_buffers = false;
-
   for (unsigned i = 0; i < used_binding_count; i++) {
      unsigned set = infos[i].set, b = infos[i].binding;
      struct anv_descriptor_set_binding_layout *binding =
            &layout->set[set].layout->binding[b];

-      if (binding->dynamic_offset_index >= 0)
-         have_dynamic_buffers = true;
-
      const uint32_t array_size = binding->array_size;

+      if (binding->dynamic_offset_index >= 0)
+         state.has_dynamic_buffers = true;
+
      if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
         if (map->surface_count + array_size > MAX_BINDING_TABLE_SIZE ||
             anv_descriptor_requires_bindless(pdevice, binding, false)) {
@ -1290,16 +1288,6 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
      }
   }

-   if (have_dynamic_buffers) {
-      state.dynamic_offset_uniform_start = shader->num_uniforms;
-      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
-                                                       MAX_DYNAMIC_BUFFERS);
-      for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++)
-         param[i] = ANV_PARAM_DYN_OFFSET(i);
-      shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 4;
-      assert(shader->num_uniforms == prog_data->nr_params * 4);
-   }
-
   nir_foreach_variable(var, &shader->uniforms) {
      const struct glsl_type *glsl_type = glsl_without_array(var->type);

--- a/src/intel/vulkan/anv_nir_compute_push_layout.c
+++ b/src/intel/vulkan/anv_nir_compute_push_layout.c
@ -25,16 +25,111 @@
 #include "compiler/brw_nir.h"

 void
-anv_compute_push_layout(const struct anv_physical_device *pdevice,
-                        struct brw_stage_prog_data *prog_data,
-                        struct anv_pipeline_bind_map *map)
+anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
+                            nir_shader *nir,
+                            struct brw_stage_prog_data *prog_data,
+                            struct anv_pipeline_bind_map *map,
+                            void *mem_ctx)
 {
+   memset(map->push_ranges, 0, sizeof(map->push_ranges));
+
+   unsigned push_start = UINT_MAX, push_end = 0;
+   nir_foreach_function(function, nir) {
+      if (!function->impl)
+         continue;
+
+      nir_foreach_block(block, function->impl) {
+         nir_foreach_instr(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_push_constant)
+               continue;
+
+            unsigned base = nir_intrinsic_base(intrin);
+            unsigned range = nir_intrinsic_range(intrin);
+            push_start = MIN2(push_start, base);
+            push_end = MAX2(push_end, base + range);
+         }
+      }
+   }
+
+   const bool has_push_intrinsic = push_start <= push_end;
+
+   if (nir->info.stage == MESA_SHADER_COMPUTE) {
+      /* For compute shaders, we always have to have the subgroup ID.  The
+       * back-end compiler will "helpfully" add it for us in the last push
+       * constant slot.  Yes, there is an off-by-one error here but that's
+       * because the back-end will add it so we want to claim the number of
+       * push constants one dword less than the full amount including
+       * gl_SubgroupId.
+       */
+      assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));
+      push_end = offsetof(struct anv_push_constants, cs.subgroup_id);
+   }
+
+   /* Align push_start down to a 32B boundary and make it no larger than
+    * push_end (no push constants is indicated by push_start = UINT_MAX).
+    */
+   push_start = MIN2(push_start, push_end);
+   push_start &= ~31u;
+
+   if (has_push_intrinsic) {
+      nir_foreach_function(function, nir) {
+         if (!function->impl)
+            continue;
+
+         nir_foreach_block(block, function->impl) {
+            nir_foreach_instr(instr, block) {
+               if (instr->type != nir_instr_type_intrinsic)
+                  continue;
+
+               nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+               if (intrin->intrinsic != nir_intrinsic_load_push_constant)
+                  continue;
+
+               intrin->intrinsic = nir_intrinsic_load_uniform;
+               nir_intrinsic_set_base(intrin,
+                                      nir_intrinsic_base(intrin) -
+                                      push_start);
+            }
+         }
+      }
+   }
+
+   /* For vec4 our push data size needs to be aligned to a vec4 and for
+    * scalar, it needs to be aligned to a DWORD.
+    */
+   const unsigned align =
+      pdevice->compiler->scalar_stage[nir->info.stage] ? 4 : 16;
+   nir->num_uniforms = ALIGN(push_end - push_start, align);
+   prog_data->nr_params = nir->num_uniforms / 4;
+   prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
+
   struct anv_push_range push_constant_range = {
      .set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
-      .length = DIV_ROUND_UP(prog_data->nr_params, 8),
+      .start = push_start / 32,
+      .length = DIV_ROUND_UP(push_end - push_start, 32),
   };

-   if (pdevice->info.gen >= 8 || pdevice->info.is_haswell) {
+   if ((pdevice->info.gen >= 8 || pdevice->info.is_haswell) &&
+       nir->info.stage != MESA_SHADER_COMPUTE) {
+      brw_nir_analyze_ubo_ranges(pdevice->compiler, nir, NULL,
+                                 prog_data->ubo_ranges);
+
+      /* We can push at most 64 registers worth of data.  The back-end
+       * compiler would do this fixup for us but we'd like to calculate
+       * the push constant layout ourselves.
+       */
+      unsigned total_push_regs = push_constant_range.length;
+      for (unsigned i = 0; i < 4; i++) {
+         if (total_push_regs + prog_data->ubo_ranges[i].length > 64)
+            prog_data->ubo_ranges[i].length = 64 - total_push_regs;
+         total_push_regs += prog_data->ubo_ranges[i].length;
+      }
+      assert(total_push_regs <= 64);
+
      /* The Skylake PRM contains the following restriction:
       *
       *    "The driver must ensure The following case does not occur
@ -72,7 +167,31 @@ anv_compute_push_layout(const struct anv_physical_device *pdevice,
       * rule that would require us to iterate in the other direction
       * and possibly mess around with dynamic state base address.
       * Don't bother; just emit regular push constants at n = 0.
+       *
+       * In the compute case, we don't have multiple push ranges so it's
+       * better to just provide one in push_ranges[0].
       */
      map->push_ranges[0] = push_constant_range;
   }
 }
+
+void
+anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
+                             struct anv_pipeline_bind_map *map)
+{
+#ifndef NDEBUG
+   unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);
+   for (unsigned i = 0; i < 4; i++)
+      prog_data_push_size += prog_data->ubo_ranges[i].length;
+
+   unsigned bind_map_push_size = 0;
+   for (unsigned i = 0; i < 4; i++)
+      bind_map_push_size += map->push_ranges[i].length;
+
+   /* We could go through everything again but it should be enough to assert
+    * that they push the same number of registers.  This should alert us if
+    * the back-end compiler decides to re-arrange stuff or shrink a range.
+    */
+   assert(prog_data_push_size == bind_map_push_size);
+#endif
+}
--- a/src/intel/vulkan/anv_nir_lower_push_constants.c
+++ b/src/intel/vulkan/anv_nir_lower_push_constants.c
@ -1,49 +0,0 @@
-/*
- * Copyright © 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "anv_nir.h"
-
-void
-anv_nir_lower_push_constants(nir_shader *shader)
-{
-   nir_foreach_function(function, shader) {
-      if (!function->impl)
-         continue;
-
-      nir_foreach_block(block, function->impl) {
-         nir_foreach_instr(instr, block) {
-            if (instr->type != nir_instr_type_intrinsic)
-               continue;
-
-            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-
-            /* TODO: Handle indirect push constants */
-            if (intrin->intrinsic != nir_intrinsic_load_push_constant)
-               continue;
-
-            /* We just turn them into uniform loads */
-            intrin->intrinsic = nir_intrinsic_load_uniform;
-         }
-      }
-   }
-}
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@ -672,37 +672,11 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,

   NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);

-   NIR_PASS_V(nir, anv_nir_lower_push_constants);
-
   if (nir->info.stage != MESA_SHADER_COMPUTE)
      NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);

   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));

-   if (nir->num_uniforms > 0) {
-      assert(prog_data->nr_params == 0);
-
-      /* If the shader uses any push constants at all, we'll just give
-       * them the maximum possible number
-       */
-      assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE);
-      nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE;
-      prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float);
-      prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
-
-      /* We now set the param values to be offsets into a
-       * anv_push_constant_data structure.  Since the compiler doesn't
-       * actually dereference any of the gl_constant_value pointers in the
-       * params array, it doesn't really matter what we put here.
-       */
-      struct anv_push_constants *null_data = NULL;
-      /* Fill out the push constants section of the param array */
-      for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) {
-         prog_data->param[i] = ANV_PARAM_PUSH(
-            (uintptr_t)&null_data->client_data[i * sizeof(float)]);
-      }
-   }
-
   if (nir->info.num_ssbos > 0 || nir->info.num_images > 0)
      pipeline->needs_data_cache = true;

@ -732,10 +706,8 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
              nir_lower_non_uniform_texture_access |
              nir_lower_non_uniform_image_access);

-   if (nir->info.stage != MESA_SHADER_COMPUTE)
-      brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
-
-   assert(nir->num_uniforms == prog_data->nr_params * 4);
+   anv_nir_compute_push_layout(pdevice, nir, prog_data,
+                               &stage->bind_map, mem_ctx);

   stage->nir = nir;
 }
@ -1394,9 +1366,8 @@ anv_pipeline_compile_graphics(struct anv_pipeline *pipeline,
         goto fail;
      }

-      anv_compute_push_layout(&pipeline->device->instance->physicalDevice,
-                              &stages[s].prog_data.base,
-                              &stages[s].bind_map);
+      anv_nir_validate_push_layout(&stages[s].prog_data.base,
+                                   &stages[s].bind_map);

      struct anv_shader_bin *bin =
         anv_device_upload_kernel(pipeline->device, cache,
@ -1559,10 +1530,9 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
      }

-      anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);
+      NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);

-      NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id,
-                 &stage.prog_data.cs);
+      anv_pipeline_lower_nir(pipeline, mem_ctx, &stage, layout);

      NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
                 nir_var_mem_shared, shared_type_info);
@ -1578,6 +1548,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
      }

+      anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
+
      const unsigned code_size = stage.prog_data.base.program_size;
      bin = anv_device_upload_kernel(pipeline->device, cache,
                                     &stage.cache_key, sizeof(stage.cache_key),
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@ -2362,14 +2362,6 @@ struct anv_xfb_binding {
   VkDeviceSize                                 size;
 };

-#define ANV_PARAM_PUSH(offset)         ((1 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_PUSH(param)       ((uint32_t)(param) >> 16 == 1)
-#define ANV_PARAM_PUSH_OFFSET(param)   ((param) & 0xffff)
-
-#define ANV_PARAM_DYN_OFFSET(offset)      ((2 << 16) | (uint32_t)(offset))
-#define ANV_PARAM_IS_DYN_OFFSET(param)    ((uint32_t)(param) >> 16 == 2)
-#define ANV_PARAM_DYN_OFFSET_IDX(param)   ((param) & 0xffff)
-
 struct anv_push_constants {
   /** Push constant data provided by the client through vkPushConstants */
   uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
@ -2390,6 +2382,9 @@ struct anv_push_constants {
       * uploading the push constants for compute shaders.
       */
      uint32_t subgroup_id;
+
+      /** Pad out to a multiple of 32 bytes */
+      uint32_t pad[4];
   } cs;
 };

--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@ -116,7 +116,6 @@ libanv_files = files(
  'anv_nir_apply_pipeline_layout.c',
  'anv_nir_compute_push_layout.c',
  'anv_nir_lower_multiview.c',
-  'anv_nir_lower_push_constants.c',
  'anv_nir_lower_ycbcr_textures.c',
  'anv_pass.c',
  'anv_perf.c',