From 0b4be5baaadfbb1e40470f07db68c1b74b5f4f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 23 Dec 2020 06:21:57 -0500 Subject: [PATCH] glthread: add specialized versions of unmarshal_Draw funcs without user buffers This decreases CPU time spent in the unmarshal_DrawElements function from 0.44% to 0.26% if no user buffers are present. Instead of converting all calls to either unmarshal_DrawArraysInstanced- BaseInstance or unmarshal_DrawElementsInstancedBaseVertexBaseInstance, which both also conditionally bind uploaded user buffers if needed and call one of: - DrawArraysInstancedBaseInstance - DrawElementsInstancedBaseVertexBaseInstance - DrawRangeElementsBaseVertex, add 3 unmarshal draw variants that are specialized version of the above that never bind uploaded user buffers. This removes all conditionals from the unmarshal functions for the common case when there are no user buffers. Unused function enums are used for the various draw variants. For example, CMD_DrawArrays is used to dispatch DrawArraysInstacedBaseInstance without user buffers, while CMD_DrawArraysInstacedBaseInstance is used to dispatch the same with user buffers. glthread isn't flexible enough to do it cleanly. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/mesa/main/glthread_draw.c | 216 ++++++++++++++++++++++++++++------ 1 file changed, 178 insertions(+), 38 deletions(-) diff --git a/src/mesa/main/glthread_draw.c b/src/mesa/main/glthread_draw.c index 1fd6828e1f6..263be4fe487 100644 --- a/src/mesa/main/glthread_draw.c +++ b/src/mesa/main/glthread_draw.c @@ -253,6 +253,53 @@ upload_vertices(struct gl_context *ctx, unsigned user_buffer_mask, return true; } +/* Generic DrawArrays structure NOT supporting user buffers. Ignore the name. */ +struct marshal_cmd_DrawArrays +{ + struct marshal_cmd_base cmd_base; + GLenum mode; + GLint first; + GLsizei count; + GLsizei instance_count; + GLuint baseinstance; +}; + +void +_mesa_unmarshal_DrawArrays(struct gl_context *ctx, + const struct marshal_cmd_DrawArrays *cmd) +{ + /* Ignore the function name. We use DISPATCH_CMD_DrawArrays + * for all DrawArrays variants without user buffers, and + * DISPATCH_CMD_DrawArraysInstancedBaseInstance for all DrawArrays + * variants with user buffrs. + */ + const GLenum mode = cmd->mode; + const GLint first = cmd->first; + const GLsizei count = cmd->count; + const GLsizei instance_count = cmd->instance_count; + const GLuint baseinstance = cmd->baseinstance; + + CALL_DrawArraysInstancedBaseInstance(ctx->CurrentServerDispatch, + (mode, first, count, instance_count, + baseinstance)); +} + +static ALWAYS_INLINE void +draw_arrays_async(struct gl_context *ctx, GLenum mode, GLint first, + GLsizei count, GLsizei instance_count, GLuint baseinstance) +{ + int cmd_size = sizeof(struct marshal_cmd_DrawArrays); + struct marshal_cmd_DrawArrays *cmd = + _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawArrays, cmd_size); + + cmd->mode = mode; + cmd->first = first; + cmd->count = count; + cmd->instance_count = instance_count; + cmd->baseinstance = baseinstance; +} + +/* Generic DrawArrays structure supporting user buffers. Ignore the name. */ struct marshal_cmd_DrawArraysInstancedBaseInstance { struct marshal_cmd_base cmd_base; @@ -268,6 +315,11 @@ void _mesa_unmarshal_DrawArraysInstancedBaseInstance(struct gl_context *ctx, const struct marshal_cmd_DrawArraysInstancedBaseInstance *cmd) { + /* Ignore the function name. We use DISPATCH_CMD_DrawArrays + * for all DrawArrays variants without user buffers, and + * DISPATCH_CMD_DrawArraysInstancedBaseInstance for all DrawArrays + * variants with user buffrs. + */ const GLenum mode = cmd->mode; const GLint first = cmd->first; const GLsizei count = cmd->count; @@ -295,10 +347,10 @@ _mesa_unmarshal_DrawArraysInstancedBaseInstance(struct gl_context *ctx, } static ALWAYS_INLINE void -draw_arrays_async(struct gl_context *ctx, GLenum mode, GLint first, - GLsizei count, GLsizei instance_count, GLuint baseinstance, - unsigned user_buffer_mask, - const struct glthread_attrib_binding *buffers) +draw_arrays_async_user(struct gl_context *ctx, GLenum mode, GLint first, + GLsizei count, GLsizei instance_count, GLuint baseinstance, + unsigned user_buffer_mask, + const struct glthread_attrib_binding *buffers) { int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]); int cmd_size = sizeof(struct marshal_cmd_DrawArraysInstancedBaseInstance) + @@ -341,8 +393,7 @@ draw_arrays(GLenum mode, GLint first, GLsizei count, GLsizei instance_count, */ if (ctx->API == API_OPENGL_CORE || !user_buffer_mask || count <= 0 || instance_count <= 0) { - draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance, - 0, NULL); + draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance); return; } @@ -358,8 +409,8 @@ draw_arrays(GLenum mode, GLint first, GLsizei count, GLsizei instance_count, return; } - draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance, - user_buffer_mask, buffers); + draw_arrays_async_user(ctx, mode, first, count, instance_count, baseinstance, + user_buffer_mask, buffers); } struct marshal_cmd_MultiDrawArrays @@ -496,6 +547,107 @@ sync: (mode, first, count, draw_count)); } +/* DrawElementsInstancedBaseVertexBaseInstance not supporting user buffers. + * Ignore the name. + */ +struct marshal_cmd_DrawElementsInstancedARB +{ + struct marshal_cmd_base cmd_base; + GLenum mode; + GLenum type; + GLsizei count; + GLsizei instance_count; + GLint basevertex; + GLuint baseinstance; + const GLvoid *indices; +}; + +void +_mesa_unmarshal_DrawElementsInstancedARB(struct gl_context *ctx, + const struct marshal_cmd_DrawElementsInstancedARB *cmd) +{ + /* Ignore the function name. We use DISPATCH_CMD_DrawElementsInstanced- + * BaseVertexBaseInstance for all DrawElements variants with user buffers, + * and both DISPATCH_CMD_DrawElementsInstancedARB and DISPATCH_CMD_Draw- + * RangeElementsBaseVertex for all draw elements variants without user + * buffers. + */ + const GLenum mode = cmd->mode; + const GLsizei count = cmd->count; + const GLenum type = cmd->type; + const GLvoid *indices = cmd->indices; + const GLsizei instance_count = cmd->instance_count; + const GLint basevertex = cmd->basevertex; + const GLuint baseinstance = cmd->baseinstance; + + CALL_DrawElementsInstancedBaseVertexBaseInstance(ctx->CurrentServerDispatch, + (mode, count, type, indices, + instance_count, basevertex, + baseinstance)); +} + +struct marshal_cmd_DrawRangeElementsBaseVertex +{ + struct marshal_cmd_base cmd_base; + GLenum mode; + GLenum type; + GLsizei count; + GLint basevertex; + GLuint min_index; + GLuint max_index; + const GLvoid *indices; +}; + +void +_mesa_unmarshal_DrawRangeElementsBaseVertex(struct gl_context *ctx, + const struct marshal_cmd_DrawRangeElementsBaseVertex *cmd) +{ + const GLenum mode = cmd->mode; + const GLsizei count = cmd->count; + const GLenum type = cmd->type; + const GLvoid *indices = cmd->indices; + const GLint basevertex = cmd->basevertex; + const GLuint min_index = cmd->min_index; + const GLuint max_index = cmd->max_index; + + CALL_DrawRangeElementsBaseVertex(ctx->CurrentServerDispatch, + (mode, min_index, max_index, count, + type, indices, basevertex)); +} + +static ALWAYS_INLINE void +draw_elements_async(struct gl_context *ctx, GLenum mode, GLsizei count, + GLenum type, const GLvoid *indices, GLsizei instance_count, + GLint basevertex, GLuint baseinstance, + bool index_bounds_valid, GLuint min_index, GLuint max_index) +{ + if (index_bounds_valid) { + int cmd_size = sizeof(struct marshal_cmd_DrawRangeElementsBaseVertex); + struct marshal_cmd_DrawRangeElementsBaseVertex *cmd = + _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawRangeElementsBaseVertex, cmd_size); + + cmd->mode = mode; + cmd->count = count; + cmd->type = type; + cmd->indices = indices; + cmd->basevertex = basevertex; + cmd->min_index = min_index; + cmd->max_index = max_index; + } else { + int cmd_size = sizeof(struct marshal_cmd_DrawElementsInstancedARB); + struct marshal_cmd_DrawElementsInstancedARB *cmd = + _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawElementsInstancedARB, cmd_size); + + cmd->mode = mode; + cmd->count = count; + cmd->type = type; + cmd->indices = indices; + cmd->instance_count = instance_count; + cmd->basevertex = basevertex; + cmd->baseinstance = baseinstance; + } +} + struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance { struct marshal_cmd_base cmd_base; @@ -517,6 +669,12 @@ void _mesa_unmarshal_DrawElementsInstancedBaseVertexBaseInstance(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance *cmd) { + /* Ignore the function name. We use DISPATCH_CMD_DrawElementsInstanced- + * BaseVertexBaseInstance for all DrawElements variants with user buffers, + * and both DISPATCH_CMD_DrawElementsInstancedARB and DISPATCH_CMD_Draw- + * RangeElementsBaseVertex for all draw elements variants without user + * buffers. + */ const GLenum mode = cmd->mode; const GLsizei count = cmd->count; const GLenum type = cmd->type; @@ -563,13 +721,13 @@ _mesa_unmarshal_DrawElementsInstancedBaseVertexBaseInstance(struct gl_context *c } static ALWAYS_INLINE void -draw_elements_async(struct gl_context *ctx, GLenum mode, GLsizei count, - GLenum type, const GLvoid *indices, GLsizei instance_count, - GLint basevertex, GLuint baseinstance, - bool index_bounds_valid, GLuint min_index, GLuint max_index, - struct gl_buffer_object *index_buffer, - unsigned user_buffer_mask, - const struct glthread_attrib_binding *buffers) +draw_elements_async_user(struct gl_context *ctx, GLenum mode, GLsizei count, + GLenum type, const GLvoid *indices, GLsizei instance_count, + GLint basevertex, GLuint baseinstance, + bool index_bounds_valid, GLuint min_index, GLuint max_index, + struct gl_buffer_object *index_buffer, + unsigned user_buffer_mask, + const struct glthread_attrib_binding *buffers) { int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]); int cmd_size = sizeof(struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance) + @@ -620,7 +778,7 @@ draw_elements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, (!user_buffer_mask && !has_user_indices)) { draw_elements_async(ctx, mode, count, type, indices, instance_count, basevertex, baseinstance, index_bounds_valid, - min_index, max_index, 0, 0, NULL); + min_index, max_index); return; } @@ -670,10 +828,10 @@ draw_elements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, index_buffer = upload_indices(ctx, count, index_size, &indices); /* Draw asynchronously. */ - draw_elements_async(ctx, mode, count, type, indices, instance_count, - basevertex, baseinstance, index_bounds_valid, - min_index, max_index, index_buffer, - user_buffer_mask, buffers); + draw_elements_async_user(ctx, mode, count, type, indices, instance_count, + basevertex, baseinstance, index_bounds_valid, + min_index, max_index, index_buffer, + user_buffer_mask, buffers); return; sync: @@ -1049,12 +1207,6 @@ _mesa_marshal_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, draw_count, NULL); } -void -_mesa_unmarshal_DrawArrays(struct gl_context *ctx, const struct marshal_cmd_DrawArrays *cmd) -{ - unreachable("never used - DrawArraysInstancedBaseInstance is used instead"); -} - void _mesa_unmarshal_DrawArraysInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawArraysInstancedARB *cmd) { @@ -1073,24 +1225,12 @@ _mesa_unmarshal_DrawRangeElements(struct gl_context *ctx, const struct marshal_c unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); } -void -_mesa_unmarshal_DrawElementsInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedARB *cmd) -{ - unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); -} - void _mesa_unmarshal_DrawElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsBaseVertex *cmd) { unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); } -void -_mesa_unmarshal_DrawRangeElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawRangeElementsBaseVertex *cmd) -{ - unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); -} - void _mesa_unmarshal_DrawElementsInstancedBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseVertex *cmd) {