mesa,st/mesa: add a fast path for non-static VAOs

Skip most of _mesa_update_vao_derived_arrays if the VAO is not static.
Drivers need a separate codepath for this.

This increases performance by 7% with glthread and the game "torcs".

The reason is that glthread uploads vertices and sets vertex buffers
every draw call, so the overhead is very noticable. glthread doesn't
hide the overhead, because the driver thread is the busiest thread.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4314>
This commit is contained in:
Marek Olšák 2020-03-27 05:07:02 -04:00 committed by Marge Bot
parent 2e3a9d7828
commit 42842306d3
5 changed files with 64 additions and 0 deletions

View File

@ -546,6 +546,17 @@ _mesa_update_vao_derived_arrays(struct gl_context *ctx,
vao->_EffEnabledVBO = _mesa_vao_enable_to_vp_inputs(mode, enabled & vbos);
vao->_EffEnabledNonZeroDivisor =
_mesa_vao_enable_to_vp_inputs(mode, enabled & divisor_is_nonzero);
/* Fast path when the VAO is updated too often. */
if (vao->IsDynamic)
return;
/* More than 4 updates turn the VAO to dynamic. */
if (ctx->Const.AllowDynamicVAOFastPath && ++vao->NumUpdates > 4) {
vao->IsDynamic = true;
return;
}
/* Walk those enabled arrays that have a real vbo attached */
GLbitfield mask = enabled;
while (mask) {

View File

@ -1589,6 +1589,8 @@ copy_array_object(struct gl_context *ctx,
dest->NonZeroDivisorMask = src->NonZeroDivisorMask;
dest->_AttributeMapMode = src->_AttributeMapMode;
dest->NewArrays = src->NewArrays;
dest->NumUpdates = src->NumUpdates;
dest->IsDynamic = src->IsDynamic;
}
/**

View File

@ -1539,6 +1539,14 @@ struct gl_vertex_array_object
*/
GLboolean EverBound;
/**
* Whether the VAO is changed by the application so often that some of
* the derived fields are not updated at all to decrease overhead.
* Also, interleaved arrays are not detected, because it's too expensive
* to do that before every draw call.
*/
bool IsDynamic;
/**
* Marked to true if the object is shared between contexts and immutable.
* Then reference counting is done using atomics and thread safe.
@ -1546,6 +1554,12 @@ struct gl_vertex_array_object
*/
bool SharedAndImmutable;
/**
* Number of updates that were done by the application. This is used to
* decide whether the VAO is static or dynamic.
*/
unsigned NumUpdates;
/** Vertex attribute arrays */
struct gl_array_attributes VertexAttrib[VERT_ATTRIB_MAX];
@ -4164,6 +4178,9 @@ struct gl_constants
/** Whether out-of-order draw (Begin/End) optimizations are allowed. */
bool AllowDrawOutOfOrder;
/** Whether to allow the fast path for frequently updated VAOs. */
bool AllowDynamicVAOFastPath;
/** GL_ARB_gl_spirv */
struct spirv_supported_capabilities SpirVCapabilities;

View File

@ -149,6 +149,38 @@ st_setup_arrays(struct st_context *st,
st->draw_needs_minmax_index =
(userbuf_attribs & ~_mesa_draw_nonzero_divisor_bits(ctx)) != 0;
if (vao->IsDynamic) {
while (mask) {
const gl_vert_attrib attr = u_bit_scan(&mask);
const struct gl_array_attributes *const attrib =
_mesa_draw_array_attrib(vao, attr);
const struct gl_vertex_buffer_binding *const binding =
&vao->BufferBinding[attrib->BufferBindingIndex];
const unsigned bufidx = (*num_vbuffers)++;
/* Set the vertex buffer. */
if (binding->BufferObj) {
struct st_buffer_object *stobj = st_buffer_object(binding->BufferObj);
vbuffer[bufidx].buffer.resource = stobj ? stobj->buffer : NULL;
vbuffer[bufidx].is_user_buffer = false;
vbuffer[bufidx].buffer_offset = binding->Offset +
attrib->RelativeOffset;
} else {
vbuffer[bufidx].buffer.user = attrib->Ptr;
vbuffer[bufidx].is_user_buffer = true;
vbuffer[bufidx].buffer_offset = 0;
}
vbuffer[bufidx].stride = binding->Stride; /* in bytes */
/* Set the vertex element. */
init_velement(vp, velements->velems, &attrib->Format, 0,
binding->InstanceDivisor, bufidx,
input_to_index[attr]);
}
return;
}
while (mask) {
/* The attribute index to start pulling a binding */
const gl_vert_attrib i = ffs(mask) - 1;

View File

@ -583,6 +583,8 @@ void st_init_limits(struct pipe_screen *screen,
c->MultiDrawWithUserIndices =
screen->get_param(screen, PIPE_CAP_DRAW_INFO_START_WITH_USER_INDICES);
c->AllowDynamicVAOFastPath = true;
c->glBeginEndBufferSize =
screen->get_param(screen, PIPE_CAP_GL_BEGIN_END_BUFFER_SIZE);
}