ir3: Assert that we cannot have enough concurrent waves for CS with barrier

If we have a compute shader that has a big workgroup, a barrier, and a branchstack which limits max_waves - this may result in a situation when we cannot run concurrently all waves of the workgroup, which would lead to a hang. Blob just explodes in such case. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14110>
2021-12-07 16:43:21 +02:00 · 2021-12-07 16:43:21 +02:00 · 3792fbfcf6
parent 9ed4d49c97
commit 3792fbfcf6
1 changed files with 36 additions and 18 deletions
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@ -177,24 +177,6 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
   const struct ir3_compiler *compiler = v->shader->compiler;
   unsigned max_waves = compiler->max_waves;
   /* If this is a compute shader, compute the limit based on shared size */
   if ((v->type == MESA_SHADER_COMPUTE) ||
       (v->type == MESA_SHADER_KERNEL)) {
      /* Shared is allocated in chunks of 1k */
      unsigned shared_per_wg = ALIGN_POT(v->shared_size, 1024);
      if (shared_per_wg > 0 && !v->local_size_variable) {
         unsigned wgs_per_core = compiler->local_mem_size / shared_per_wg;
         unsigned threads_per_wg =
            v->local_size[0] * v->local_size[1] * v->local_size[2];
         unsigned waves_per_wg =
            DIV_ROUND_UP(threads_per_wg, compiler->threadsize_base *
                                            (double_threadsize ? 2 : 1) *
                                            compiler->wave_granularity);
         max_waves = MIN2(max_waves, waves_per_wg * wgs_per_core *
                                        compiler->wave_granularity);
      }
   }
   /* Compute the limit based on branchstack */
   if (v->branchstack > 0) {
      unsigned branchstack_max_waves = compiler->branchstack_size /
@ -203,6 +185,42 @@ ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
      max_waves = MIN2(max_waves, branchstack_max_waves);
   }
   /* If this is a compute shader, compute the limit based on shared size */
   if ((v->type == MESA_SHADER_COMPUTE) ||
       (v->type == MESA_SHADER_KERNEL)) {
      unsigned threads_per_wg =
         v->local_size[0] * v->local_size[1] * v->local_size[2];
      unsigned waves_per_wg =
         DIV_ROUND_UP(threads_per_wg, compiler->threadsize_base *
                                         (double_threadsize ? 2 : 1) *
                                         compiler->wave_granularity);
      /* Shared is allocated in chunks of 1k */
      unsigned shared_per_wg = ALIGN_POT(v->shared_size, 1024);
      if (shared_per_wg > 0 && !v->local_size_variable) {
         unsigned wgs_per_core = compiler->local_mem_size / shared_per_wg;
         max_waves = MIN2(max_waves, waves_per_wg * wgs_per_core *
                                        compiler->wave_granularity);
      }
      /* If we have a compute shader that has a big workgroup, a barrier, and
       * a branchstack which limits max_waves - this may result in a situation
       * when we cannot run concurrently all waves of the workgroup, which
       * would lead to a hang.
       *
       * TODO: Could we spill branchstack or is there other way around?
       * Blob just explodes in such case.
       */
      if (v->has_barrier && (max_waves < waves_per_wg)) {
         mesa_loge(
            "Compute shader (%s:%s) which has workgroup barrier cannot be used "
            "because it's impossible to have enough concurrent waves.",
            v->shader->nir->info.name, v->shader->nir->info.label);
         exit(1);
      }
   }
   return max_waves;
 }