glthread: add specialized versions of unmarshal_Draw funcs without user buffers

This decreases CPU time spent in the unmarshal_DrawElements function
from 0.44% to 0.26% if no user buffers are present.

Instead of converting all calls to either unmarshal_DrawArraysInstanced-
BaseInstance or unmarshal_DrawElementsInstancedBaseVertexBaseInstance,
which both also conditionally bind uploaded user buffers if needed and
call one of:
- DrawArraysInstancedBaseInstance
- DrawElementsInstancedBaseVertexBaseInstance
- DrawRangeElementsBaseVertex,
add 3 unmarshal draw variants that are specialized version of the above that
never bind uploaded user buffers. This removes all conditionals from
the unmarshal functions for the common case when there are no user buffers.

Unused function enums are used for the various draw variants. For example,
CMD_DrawArrays is used to dispatch DrawArraysInstacedBaseInstance without
user buffers, while CMD_DrawArraysInstacedBaseInstance is used to dispatch
the same with user buffers. glthread isn't flexible enough to do it cleanly.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8297>
This commit is contained in:
Marek Olšák 2020-12-23 06:21:57 -05:00 committed by Marge Bot
parent 8f53b54a3b
commit 0b4be5baaa
1 changed files with 178 additions and 38 deletions

View File

@ -253,6 +253,53 @@ upload_vertices(struct gl_context *ctx, unsigned user_buffer_mask,
return true;
}
/* Generic DrawArrays structure NOT supporting user buffers. Ignore the name. */
struct marshal_cmd_DrawArrays
{
struct marshal_cmd_base cmd_base;
GLenum mode;
GLint first;
GLsizei count;
GLsizei instance_count;
GLuint baseinstance;
};
void
_mesa_unmarshal_DrawArrays(struct gl_context *ctx,
const struct marshal_cmd_DrawArrays *cmd)
{
/* Ignore the function name. We use DISPATCH_CMD_DrawArrays
* for all DrawArrays variants without user buffers, and
* DISPATCH_CMD_DrawArraysInstancedBaseInstance for all DrawArrays
* variants with user buffrs.
*/
const GLenum mode = cmd->mode;
const GLint first = cmd->first;
const GLsizei count = cmd->count;
const GLsizei instance_count = cmd->instance_count;
const GLuint baseinstance = cmd->baseinstance;
CALL_DrawArraysInstancedBaseInstance(ctx->CurrentServerDispatch,
(mode, first, count, instance_count,
baseinstance));
}
static ALWAYS_INLINE void
draw_arrays_async(struct gl_context *ctx, GLenum mode, GLint first,
GLsizei count, GLsizei instance_count, GLuint baseinstance)
{
int cmd_size = sizeof(struct marshal_cmd_DrawArrays);
struct marshal_cmd_DrawArrays *cmd =
_mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawArrays, cmd_size);
cmd->mode = mode;
cmd->first = first;
cmd->count = count;
cmd->instance_count = instance_count;
cmd->baseinstance = baseinstance;
}
/* Generic DrawArrays structure supporting user buffers. Ignore the name. */
struct marshal_cmd_DrawArraysInstancedBaseInstance
{
struct marshal_cmd_base cmd_base;
@ -268,6 +315,11 @@ void
_mesa_unmarshal_DrawArraysInstancedBaseInstance(struct gl_context *ctx,
const struct marshal_cmd_DrawArraysInstancedBaseInstance *cmd)
{
/* Ignore the function name. We use DISPATCH_CMD_DrawArrays
* for all DrawArrays variants without user buffers, and
* DISPATCH_CMD_DrawArraysInstancedBaseInstance for all DrawArrays
* variants with user buffrs.
*/
const GLenum mode = cmd->mode;
const GLint first = cmd->first;
const GLsizei count = cmd->count;
@ -295,10 +347,10 @@ _mesa_unmarshal_DrawArraysInstancedBaseInstance(struct gl_context *ctx,
}
static ALWAYS_INLINE void
draw_arrays_async(struct gl_context *ctx, GLenum mode, GLint first,
GLsizei count, GLsizei instance_count, GLuint baseinstance,
unsigned user_buffer_mask,
const struct glthread_attrib_binding *buffers)
draw_arrays_async_user(struct gl_context *ctx, GLenum mode, GLint first,
GLsizei count, GLsizei instance_count, GLuint baseinstance,
unsigned user_buffer_mask,
const struct glthread_attrib_binding *buffers)
{
int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]);
int cmd_size = sizeof(struct marshal_cmd_DrawArraysInstancedBaseInstance) +
@ -341,8 +393,7 @@ draw_arrays(GLenum mode, GLint first, GLsizei count, GLsizei instance_count,
*/
if (ctx->API == API_OPENGL_CORE || !user_buffer_mask ||
count <= 0 || instance_count <= 0) {
draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance,
0, NULL);
draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance);
return;
}
@ -358,8 +409,8 @@ draw_arrays(GLenum mode, GLint first, GLsizei count, GLsizei instance_count,
return;
}
draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance,
user_buffer_mask, buffers);
draw_arrays_async_user(ctx, mode, first, count, instance_count, baseinstance,
user_buffer_mask, buffers);
}
struct marshal_cmd_MultiDrawArrays
@ -496,6 +547,107 @@ sync:
(mode, first, count, draw_count));
}
/* DrawElementsInstancedBaseVertexBaseInstance not supporting user buffers.
* Ignore the name.
*/
struct marshal_cmd_DrawElementsInstancedARB
{
struct marshal_cmd_base cmd_base;
GLenum mode;
GLenum type;
GLsizei count;
GLsizei instance_count;
GLint basevertex;
GLuint baseinstance;
const GLvoid *indices;
};
void
_mesa_unmarshal_DrawElementsInstancedARB(struct gl_context *ctx,
const struct marshal_cmd_DrawElementsInstancedARB *cmd)
{
/* Ignore the function name. We use DISPATCH_CMD_DrawElementsInstanced-
* BaseVertexBaseInstance for all DrawElements variants with user buffers,
* and both DISPATCH_CMD_DrawElementsInstancedARB and DISPATCH_CMD_Draw-
* RangeElementsBaseVertex for all draw elements variants without user
* buffers.
*/
const GLenum mode = cmd->mode;
const GLsizei count = cmd->count;
const GLenum type = cmd->type;
const GLvoid *indices = cmd->indices;
const GLsizei instance_count = cmd->instance_count;
const GLint basevertex = cmd->basevertex;
const GLuint baseinstance = cmd->baseinstance;
CALL_DrawElementsInstancedBaseVertexBaseInstance(ctx->CurrentServerDispatch,
(mode, count, type, indices,
instance_count, basevertex,
baseinstance));
}
struct marshal_cmd_DrawRangeElementsBaseVertex
{
struct marshal_cmd_base cmd_base;
GLenum mode;
GLenum type;
GLsizei count;
GLint basevertex;
GLuint min_index;
GLuint max_index;
const GLvoid *indices;
};
void
_mesa_unmarshal_DrawRangeElementsBaseVertex(struct gl_context *ctx,
const struct marshal_cmd_DrawRangeElementsBaseVertex *cmd)
{
const GLenum mode = cmd->mode;
const GLsizei count = cmd->count;
const GLenum type = cmd->type;
const GLvoid *indices = cmd->indices;
const GLint basevertex = cmd->basevertex;
const GLuint min_index = cmd->min_index;
const GLuint max_index = cmd->max_index;
CALL_DrawRangeElementsBaseVertex(ctx->CurrentServerDispatch,
(mode, min_index, max_index, count,
type, indices, basevertex));
}
static ALWAYS_INLINE void
draw_elements_async(struct gl_context *ctx, GLenum mode, GLsizei count,
GLenum type, const GLvoid *indices, GLsizei instance_count,
GLint basevertex, GLuint baseinstance,
bool index_bounds_valid, GLuint min_index, GLuint max_index)
{
if (index_bounds_valid) {
int cmd_size = sizeof(struct marshal_cmd_DrawRangeElementsBaseVertex);
struct marshal_cmd_DrawRangeElementsBaseVertex *cmd =
_mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawRangeElementsBaseVertex, cmd_size);
cmd->mode = mode;
cmd->count = count;
cmd->type = type;
cmd->indices = indices;
cmd->basevertex = basevertex;
cmd->min_index = min_index;
cmd->max_index = max_index;
} else {
int cmd_size = sizeof(struct marshal_cmd_DrawElementsInstancedARB);
struct marshal_cmd_DrawElementsInstancedARB *cmd =
_mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawElementsInstancedARB, cmd_size);
cmd->mode = mode;
cmd->count = count;
cmd->type = type;
cmd->indices = indices;
cmd->instance_count = instance_count;
cmd->basevertex = basevertex;
cmd->baseinstance = baseinstance;
}
}
struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance
{
struct marshal_cmd_base cmd_base;
@ -517,6 +669,12 @@ void
_mesa_unmarshal_DrawElementsInstancedBaseVertexBaseInstance(struct gl_context *ctx,
const struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance *cmd)
{
/* Ignore the function name. We use DISPATCH_CMD_DrawElementsInstanced-
* BaseVertexBaseInstance for all DrawElements variants with user buffers,
* and both DISPATCH_CMD_DrawElementsInstancedARB and DISPATCH_CMD_Draw-
* RangeElementsBaseVertex for all draw elements variants without user
* buffers.
*/
const GLenum mode = cmd->mode;
const GLsizei count = cmd->count;
const GLenum type = cmd->type;
@ -563,13 +721,13 @@ _mesa_unmarshal_DrawElementsInstancedBaseVertexBaseInstance(struct gl_context *c
}
static ALWAYS_INLINE void
draw_elements_async(struct gl_context *ctx, GLenum mode, GLsizei count,
GLenum type, const GLvoid *indices, GLsizei instance_count,
GLint basevertex, GLuint baseinstance,
bool index_bounds_valid, GLuint min_index, GLuint max_index,
struct gl_buffer_object *index_buffer,
unsigned user_buffer_mask,
const struct glthread_attrib_binding *buffers)
draw_elements_async_user(struct gl_context *ctx, GLenum mode, GLsizei count,
GLenum type, const GLvoid *indices, GLsizei instance_count,
GLint basevertex, GLuint baseinstance,
bool index_bounds_valid, GLuint min_index, GLuint max_index,
struct gl_buffer_object *index_buffer,
unsigned user_buffer_mask,
const struct glthread_attrib_binding *buffers)
{
int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]);
int cmd_size = sizeof(struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance) +
@ -620,7 +778,7 @@ draw_elements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices,
(!user_buffer_mask && !has_user_indices)) {
draw_elements_async(ctx, mode, count, type, indices, instance_count,
basevertex, baseinstance, index_bounds_valid,
min_index, max_index, 0, 0, NULL);
min_index, max_index);
return;
}
@ -670,10 +828,10 @@ draw_elements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices,
index_buffer = upload_indices(ctx, count, index_size, &indices);
/* Draw asynchronously. */
draw_elements_async(ctx, mode, count, type, indices, instance_count,
basevertex, baseinstance, index_bounds_valid,
min_index, max_index, index_buffer,
user_buffer_mask, buffers);
draw_elements_async_user(ctx, mode, count, type, indices, instance_count,
basevertex, baseinstance, index_bounds_valid,
min_index, max_index, index_buffer,
user_buffer_mask, buffers);
return;
sync:
@ -1049,12 +1207,6 @@ _mesa_marshal_MultiDrawElementsEXT(GLenum mode, const GLsizei *count,
draw_count, NULL);
}
void
_mesa_unmarshal_DrawArrays(struct gl_context *ctx, const struct marshal_cmd_DrawArrays *cmd)
{
unreachable("never used - DrawArraysInstancedBaseInstance is used instead");
}
void
_mesa_unmarshal_DrawArraysInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawArraysInstancedARB *cmd)
{
@ -1073,24 +1225,12 @@ _mesa_unmarshal_DrawRangeElements(struct gl_context *ctx, const struct marshal_c
unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
}
void
_mesa_unmarshal_DrawElementsInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedARB *cmd)
{
unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
}
void
_mesa_unmarshal_DrawElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsBaseVertex *cmd)
{
unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
}
void
_mesa_unmarshal_DrawRangeElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawRangeElementsBaseVertex *cmd)
{
unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead");
}
void
_mesa_unmarshal_DrawElementsInstancedBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseVertex *cmd)
{