mesa: add hw select name stack code path

HW code path will not flush vertex whenever name stack change. It will save the current name stack and write to select buffer only when no space left or exit select mode. This let us submit multi draws from different name stack at once instead of submit draws for a single name stack then wait it finish before submit next one. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15765>
2022-03-09 15:03:01 +08:00 · 2022-03-09 15:03:01 +08:00 · d231f95591
parent 429c7fbaa1
commit d231f95591
7 changed files with 209 additions and 4 deletions
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@ -76,6 +76,10 @@

 /** Maximum Name stack depth */
 #define MAX_NAME_STACK_DEPTH 64
+/** Name stack buffer size */
+#define NAME_STACK_BUFFER_SIZE 2048
+/** Maximum name stack result number */
+#define MAX_NAME_STACK_RESULT_NUM 256

 /** Minimum point size */
 #define MIN_POINT_SIZE 1.0
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@ -1136,6 +1136,7 @@ _mesa_free_context_data(struct gl_context *ctx, bool destroy_debug_output)

   _mesa_free_attrib_data(ctx);
   _mesa_free_eval_data( ctx );
+   _mesa_free_feedback(ctx);
   _mesa_free_texture_data( ctx );
   _mesa_free_image_textures(ctx);
   _mesa_free_matrix_data( ctx );
--- a/src/mesa/main/context.h
+++ b/src/mesa/main/context.h
@ -412,6 +412,13 @@ _mesa_has_texture_view(const struct gl_context *ctx)
          _mesa_has_OES_texture_view(ctx);
 }

+static inline bool
+_mesa_hw_select_enabled(const struct gl_context *ctx)
+{
+   return ctx->RenderMode == GL_SELECT &&
+      ctx->Const.HardwareAcceleratedSelect;
+}
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/mesa/main/feedback.c
+++ b/src/mesa/main/feedback.c
@ -36,6 +36,7 @@
 #include "macros.h"
 #include "mtypes.h"
 #include "api_exec_decl.h"
+#include "bufferobj.h"

 #include "state_tracker/st_cb_feedback.h"

@ -221,13 +222,170 @@ _mesa_update_hitflag(struct gl_context *ctx, GLfloat z)
   }
 }

+static void
+alloc_select_resource(struct gl_context *ctx)
+{
+   struct gl_selection *s = &ctx->Select;
+
+   if (!ctx->Const.HardwareAcceleratedSelect)
+      return;
+
+   if (!s->SaveBuffer) {
+      s->SaveBuffer = malloc(NAME_STACK_BUFFER_SIZE);
+      if (!s->SaveBuffer) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "Cannot allocate name stack save buffer");
+         return;
+      }
+   }
+
+   if (!s->Result) {
+      s->Result = _mesa_bufferobj_alloc(ctx, -1);
+      if (!s->Result) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "Cannot allocate select result buffer");
+         return;
+      }
+
+      GLuint init_result[MAX_NAME_STACK_RESULT_NUM * 3];
+      for (int i = 0; i < MAX_NAME_STACK_RESULT_NUM; i++) {
+         init_result[i * 3] = 0;              /* hit */
+         init_result[i * 3 + 1] = 0xffffffff; /* minz */
+         init_result[i * 3 + 2] = 0;          /* maxz */
+      }
+
+      bool success = _mesa_bufferobj_data(ctx,
+                                          GL_SHADER_STORAGE_BUFFER,
+                                          sizeof(init_result),
+                                          init_result,
+                                          GL_STATIC_DRAW, 0,
+                                          s->Result);
+      if (!success) {
+         _mesa_reference_buffer_object(ctx, &s->Result, NULL);
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "Cannot init result buffer");
+         return;
+      }
+   }
+}
+
+static bool
+save_used_name_stack(struct gl_context *ctx)
+{
+   struct gl_selection *s = &ctx->Select;
+
+   if (!ctx->Const.HardwareAcceleratedSelect)
+      return false;
+
+   /* We have two kinds of name stack user:
+    *   1. glRasterPos (CPU based) will set HitFlag
+    *   2. draw call for GPU will set ResultUsed
+    */
+   if (!s->ResultUsed && !s->HitFlag)
+      return false;
+
+   void *save = (char *)s->SaveBuffer + s->SaveBufferTail;
+
+   /* save meta data */
+   uint8_t *metadata = save;
+   metadata[0] = s->HitFlag;
+   metadata[1] = s->ResultUsed;
+   metadata[2] = s->NameStackDepth;
+   metadata[3] = 0;
+
+   /* save hit data */
+   int index = 1;
+   if (s->HitFlag) {
+      float *hit = save;
+      hit[index++] = s->HitMinZ;
+      hit[index++] = s->HitMaxZ;
+   }
+
+   /* save name stack */
+   memcpy((uint32_t *)save + index, s->NameStack, s->NameStackDepth * sizeof(GLuint));
+   index += s->NameStackDepth;
+
+   s->SaveBufferTail += index * sizeof(GLuint);
+   s->SavedStackNum++;
+
+   /* if current slot has been used, store result to next slot in result buffer */
+   if (s->ResultUsed)
+      s->ResultOffset += 3 * sizeof(GLuint);
+
+   /* reset fields */
+   s->HitFlag = GL_FALSE;
+   s->HitMinZ = 1.0;
+   s->HitMaxZ = 0;
+
+   s->ResultUsed = GL_FALSE;
+
+   /* return true if we have no enough space for the next name stack data */
+   return s->ResultOffset >= MAX_NAME_STACK_RESULT_NUM * 3 * sizeof(GLuint) ||
+      s->SaveBufferTail >= NAME_STACK_BUFFER_SIZE - (MAX_NAME_STACK_DEPTH + 3) * sizeof(GLuint);
+}
+
 static void
 update_hit_record(struct gl_context *ctx)
 {
   struct gl_selection *s = &ctx->Select;

   if (ctx->Const.HardwareAcceleratedSelect) {
+      if (!s->SavedStackNum)
+         return;

+      unsigned size = s->ResultOffset;
+      GLuint *result = size ? alloca(size) : NULL;
+      _mesa_bufferobj_get_subdata(ctx, 0, size, result, s->Result);
+
+      unsigned index = 0;
+      unsigned *save = s->SaveBuffer;
+      for (int i = 0; i < s->SavedStackNum; i++) {
+         uint8_t *metadata = (uint8_t *)(save++);
+
+         unsigned zmin, zmax;
+         bool cpu_hit = !!metadata[0];
+         if (cpu_hit) {
+            /* map [0, 1] to [0, UINT_MAX]*/
+            zmin = (unsigned) ((float)(~0u) * *(float *)(save++));
+            zmax = (unsigned) ((float)(~0u) * *(float *)(save++));
+         } else {
+            zmin = ~0u;
+            zmax = 0;
+         }
+
+         bool gpu_hit = false;
+         if (metadata[1]) {
+            gpu_hit = !!result[index];
+
+            if (gpu_hit) {
+               zmin = MIN2(zmin, result[index + 1]);
+               zmax = MAX2(zmax, result[index + 2]);
+
+               /* reset data */
+               result[index]     = 0;          /* hit */
+               result[index + 1] = 0xffffffff; /* minz */
+               result[index + 2] = 0;          /* maxz */
+            }
+            index += 3;
+         }
+
+         int depth = metadata[2];
+         if (cpu_hit || gpu_hit) {
+            /* hit */
+            write_record(ctx, depth);
+            write_record(ctx, zmin);
+            write_record(ctx, zmax);
+
+            for (int j = 0; j < depth; j++)
+               write_record(ctx, save[j]);
+            s->Hits++;
+         }
+         save += depth;
+      }
+
+      /* reset result buffer */
+      _mesa_bufferobj_subdata(ctx, 0, size, result, s->Result);
+
+      s->SaveBufferTail = 0;
+      s->SavedStackNum = 0;
+      s->ResultOffset = 0;
   } else {
      if (!s->HitFlag)
         return;
@ -261,6 +419,13 @@ reset_name_stack_to_empty(struct gl_context *ctx)
   s->HitFlag = GL_FALSE;
   s->HitMinZ = 1.0;
   s->HitMaxZ = 0.0;
+
+   if (ctx->Const.HardwareAcceleratedSelect) {
+      s->SaveBufferTail = 0;
+      s->SavedStackNum = 0;
+      s->ResultUsed = GL_FALSE;
+      s->ResultOffset = 0;
+   }
 }

 /**
@ -277,6 +442,7 @@ _mesa_InitNames( void )

   FLUSH_VERTICES(ctx, 0, 0);

+   save_used_name_stack(ctx);
   update_hit_record(ctx);

   reset_name_stack_to_empty(ctx);
@ -303,7 +469,7 @@ _mesa_LoadName( GLuint name )
      return;
   }

-   if (!ctx->Const.HardwareAcceleratedSelect) {
+   if (!ctx->Const.HardwareAcceleratedSelect || save_used_name_stack(ctx)) {
      FLUSH_VERTICES(ctx, 0, 0);
      update_hit_record(ctx);
   }
@ -332,7 +498,7 @@ _mesa_PushName( GLuint name )
      return;
   }

-   if (!ctx->Const.HardwareAcceleratedSelect) {
+   if (!ctx->Const.HardwareAcceleratedSelect || save_used_name_stack(ctx)) {
      FLUSH_VERTICES(ctx, 0, 0);
      update_hit_record(ctx);
   }
@ -359,7 +525,7 @@ _mesa_PopName( void )
      return;
   }

-   if (!ctx->Const.HardwareAcceleratedSelect) {
+   if (!ctx->Const.HardwareAcceleratedSelect || save_used_name_stack(ctx)) {
      FLUSH_VERTICES(ctx, 0, 0);
      update_hit_record(ctx);
   }
@ -408,6 +574,7 @@ _mesa_RenderMode( GLenum mode )
 	 result = 0;
 	 break;
      case GL_SELECT:
+	 save_used_name_stack(ctx);
 	 update_hit_record(ctx);

 	 if (ctx->Select.BufferCount > ctx->Select.BufferSize) {
@ -448,6 +615,7 @@ _mesa_RenderMode( GLenum mode )
 	    /* haven't called glSelectBuffer yet */
 	    _mesa_error( ctx, GL_INVALID_OPERATION, "glRenderMode" );
 	 }
+	 alloc_select_resource(ctx);
 	 break;
      case GL_FEEDBACK:
 	 if (ctx->Feedback.BufferSize==0) {
@ -495,4 +663,12 @@ void _mesa_init_feedback( struct gl_context * ctx )
   ctx->RenderMode = GL_RENDER;
 }

+void _mesa_free_feedback(struct gl_context * ctx)
+{
+   struct gl_selection *s = &ctx->Select;
+
+   free(s->SaveBuffer);
+   _mesa_reference_buffer_object(ctx, &s->Result, NULL);
+}
+
 /*@}*/
--- a/src/mesa/main/feedback.h
+++ b/src/mesa/main/feedback.h
@ -52,4 +52,7 @@ _mesa_update_hitflag( struct gl_context *ctx, GLfloat z );
 extern void
 _mesa_init_feedback( struct gl_context *ctx );

+extern void
+_mesa_free_feedback( struct gl_context *ctx );
+
 #endif /* FEEDBACK_H */
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@ -1787,6 +1787,15 @@ struct gl_selection
   GLboolean HitFlag;	/**< hit flag */
   GLfloat HitMinZ;	/**< minimum hit depth */
   GLfloat HitMaxZ;	/**< maximum hit depth */
+
+   /* HW GL_SELECT */
+   void *SaveBuffer;        /**< array holds multi stack data */
+   GLuint SaveBufferTail;   /**< offset to SaveBuffer's tail */
+   GLuint SavedStackNum;    /**< number of saved stacks */
+
+   GLboolean ResultUsed;    /**< whether any draw used result buffer */
+   GLuint ResultOffset;     /**< offset into result buffer */
+   struct gl_buffer_object *Result; /**< result buffer */
 };


--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@ -922,8 +922,13 @@ _mesa_End(void)
      last_draw->count = count;
      exec->vtx.markers[last].end = 1;

-      if (count)
+      if (count) {
+         /* mark result buffer used */
+         if (_mesa_hw_select_enabled(ctx))
+            ctx->Select.ResultUsed = GL_TRUE;
+
         ctx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+      }

      /* Special handling for GL_LINE_LOOP */
      if (exec->vtx.mode[last] == GL_LINE_LOOP &&