/* * Copyright © 2020 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ /* Draw function marshalling for glthread. * * The purpose of these glDraw wrappers is to upload non-VBO vertex and * index data, so that glthread doesn't have to execute synchronously. */ #include "c99_alloca.h" #include "main/glthread_marshal.h" #include "main/dispatch.h" #include "main/varray.h" static inline unsigned get_index_size(GLenum type) { /* GL_UNSIGNED_BYTE - GL_UNSIGNED_BYTE = 0 * GL_UNSIGNED_SHORT - GL_UNSIGNED_BYTE = 2 * GL_UNSIGNED_INT - GL_UNSIGNED_BYTE = 4 * * Divide by 2 to get n=0,1,2, then the index size is: 1 << n */ return 1 << ((type - GL_UNSIGNED_BYTE) >> 1); } static inline bool is_index_type_valid(GLenum type) { /* GL_UNSIGNED_BYTE = 0x1401 * GL_UNSIGNED_SHORT = 0x1403 * GL_UNSIGNED_INT = 0x1405 * * The trick is that bit 1 and bit 2 mean USHORT and UINT, respectively. * After clearing those two bits (with ~6), we should get UBYTE. * Both bits can't be set, because the enum would be greater than UINT. */ return type <= GL_UNSIGNED_INT && (type & ~6) == GL_UNSIGNED_BYTE; } static ALWAYS_INLINE struct gl_buffer_object * upload_indices(struct gl_context *ctx, unsigned count, unsigned index_size, const GLvoid **indices) { struct gl_buffer_object *upload_buffer = NULL; unsigned upload_offset = 0; assert(count); _mesa_glthread_upload(ctx, *indices, index_size * count, &upload_offset, &upload_buffer, NULL); assert(upload_buffer); *indices = (const GLvoid*)(intptr_t)upload_offset; return upload_buffer; } static ALWAYS_INLINE struct gl_buffer_object * upload_multi_indices(struct gl_context *ctx, unsigned total_count, unsigned index_size, unsigned draw_count, const GLsizei *count, const GLvoid *const *indices, const GLvoid **out_indices) { struct gl_buffer_object *upload_buffer = NULL; unsigned upload_offset = 0; uint8_t *upload_ptr = NULL; assert(total_count); _mesa_glthread_upload(ctx, NULL, index_size * total_count, &upload_offset, &upload_buffer, &upload_ptr); assert(upload_buffer); for (unsigned i = 0, offset = 0; i < draw_count; i++) { if (count[i] == 0) continue; unsigned size = count[i] * index_size; memcpy(upload_ptr + offset, indices[i], size); out_indices[i] = (const GLvoid*)(intptr_t)(upload_offset + offset); offset += size; } return upload_buffer; } static ALWAYS_INLINE bool upload_vertices(struct gl_context *ctx, unsigned user_buffer_mask, unsigned start_vertex, unsigned num_vertices, unsigned start_instance, unsigned num_instances, struct glthread_attrib_binding *buffers) { struct glthread_vao *vao = ctx->GLThread.CurrentVAO; unsigned attrib_mask_iter = vao->Enabled; unsigned num_buffers = 0; assert((num_vertices || !(user_buffer_mask & ~vao->NonZeroDivisorMask)) && (num_instances || !(user_buffer_mask & vao->NonZeroDivisorMask))); if (unlikely(vao->BufferInterleaved & user_buffer_mask)) { /* Slower upload path where some buffers reference multiple attribs, * so we have to use 2 while loops instead of 1. */ unsigned start_offset[VERT_ATTRIB_MAX]; unsigned end_offset[VERT_ATTRIB_MAX]; uint32_t buffer_mask = 0; while (attrib_mask_iter) { unsigned i = u_bit_scan(&attrib_mask_iter); unsigned binding_index = vao->Attrib[i].BufferIndex; if (!(user_buffer_mask & (1 << binding_index))) continue; unsigned stride = vao->Attrib[binding_index].Stride; unsigned instance_div = vao->Attrib[binding_index].Divisor; unsigned element_size = vao->Attrib[i].ElementSize; unsigned offset = vao->Attrib[i].RelativeOffset; unsigned size; if (instance_div) { /* Per-instance attrib. */ /* Figure out how many instances we'll render given instance_div. We * can't use the typical div_round_up() pattern because the CTS uses * instance_div = ~0 for a test, which overflows div_round_up()'s * addition. */ unsigned count = num_instances / instance_div; if (count * instance_div != num_instances) count++; offset += stride * start_instance; size = stride * (count - 1) + element_size; } else { /* Per-vertex attrib. */ offset += stride * start_vertex; size = stride * (num_vertices - 1) + element_size; } unsigned binding_index_bit = 1u << binding_index; /* Update upload offsets. */ if (!(buffer_mask & binding_index_bit)) { start_offset[binding_index] = offset; end_offset[binding_index] = offset + size; } else { if (offset < start_offset[binding_index]) start_offset[binding_index] = offset; if (offset + size > end_offset[binding_index]) end_offset[binding_index] = offset + size; } buffer_mask |= binding_index_bit; } /* Upload buffers. */ while (buffer_mask) { struct gl_buffer_object *upload_buffer = NULL; unsigned upload_offset = 0; unsigned start, end; unsigned binding_index = u_bit_scan(&buffer_mask); start = start_offset[binding_index]; end = end_offset[binding_index]; assert(start < end); const void *ptr = vao->Attrib[binding_index].Pointer; _mesa_glthread_upload(ctx, (uint8_t*)ptr + start, end - start, &upload_offset, &upload_buffer, NULL); assert(upload_buffer); buffers[num_buffers].buffer = upload_buffer; buffers[num_buffers].offset = upload_offset - start; buffers[num_buffers].original_pointer = ptr; num_buffers++; } return true; } /* Faster path where all attribs are separate. */ while (attrib_mask_iter) { unsigned i = u_bit_scan(&attrib_mask_iter); unsigned binding_index = vao->Attrib[i].BufferIndex; if (!(user_buffer_mask & (1 << binding_index))) continue; struct gl_buffer_object *upload_buffer = NULL; unsigned upload_offset = 0; unsigned stride = vao->Attrib[binding_index].Stride; unsigned instance_div = vao->Attrib[binding_index].Divisor; unsigned element_size = vao->Attrib[i].ElementSize; unsigned offset = vao->Attrib[i].RelativeOffset; unsigned size; if (instance_div) { /* Per-instance attrib. */ /* Figure out how many instances we'll render given instance_div. We * can't use the typical div_round_up() pattern because the CTS uses * instance_div = ~0 for a test, which overflows div_round_up()'s * addition. */ unsigned count = num_instances / instance_div; if (count * instance_div != num_instances) count++; offset += stride * start_instance; size = stride * (count - 1) + element_size; } else { /* Per-vertex attrib. */ offset += stride * start_vertex; size = stride * (num_vertices - 1) + element_size; } const void *ptr = vao->Attrib[binding_index].Pointer; _mesa_glthread_upload(ctx, (uint8_t*)ptr + offset, size, &upload_offset, &upload_buffer, NULL); assert(upload_buffer); buffers[num_buffers].buffer = upload_buffer; buffers[num_buffers].offset = upload_offset - offset; buffers[num_buffers].original_pointer = ptr; num_buffers++; } return true; } /* Generic DrawArrays structure NOT supporting user buffers. Ignore the name. */ struct marshal_cmd_DrawArrays { struct marshal_cmd_base cmd_base; GLenum mode; GLint first; GLsizei count; GLsizei instance_count; GLuint baseinstance; }; uint32_t _mesa_unmarshal_DrawArrays(struct gl_context *ctx, const struct marshal_cmd_DrawArrays *cmd, const uint64_t *last) { /* Ignore the function name. We use DISPATCH_CMD_DrawArrays * for all DrawArrays variants without user buffers, and * DISPATCH_CMD_DrawArraysInstancedBaseInstance for all DrawArrays * variants with user buffrs. */ const GLenum mode = cmd->mode; const GLint first = cmd->first; const GLsizei count = cmd->count; const GLsizei instance_count = cmd->instance_count; const GLuint baseinstance = cmd->baseinstance; CALL_DrawArraysInstancedBaseInstance(ctx->CurrentServerDispatch, (mode, first, count, instance_count, baseinstance)); return cmd->cmd_base.cmd_size; } static ALWAYS_INLINE void draw_arrays_async(struct gl_context *ctx, GLenum mode, GLint first, GLsizei count, GLsizei instance_count, GLuint baseinstance) { int cmd_size = sizeof(struct marshal_cmd_DrawArrays); struct marshal_cmd_DrawArrays *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawArrays, cmd_size); cmd->mode = mode; cmd->first = first; cmd->count = count; cmd->instance_count = instance_count; cmd->baseinstance = baseinstance; } /* Generic DrawArrays structure supporting user buffers. Ignore the name. */ struct marshal_cmd_DrawArraysInstancedBaseInstance { struct marshal_cmd_base cmd_base; GLenum mode; GLint first; GLsizei count; GLsizei instance_count; GLuint baseinstance; GLuint user_buffer_mask; }; uint32_t _mesa_unmarshal_DrawArraysInstancedBaseInstance(struct gl_context *ctx, const struct marshal_cmd_DrawArraysInstancedBaseInstance *cmd, const uint64_t *last) { /* Ignore the function name. We use DISPATCH_CMD_DrawArrays * for all DrawArrays variants without user buffers, and * DISPATCH_CMD_DrawArraysInstancedBaseInstance for all DrawArrays * variants with user buffrs. */ const GLenum mode = cmd->mode; const GLint first = cmd->first; const GLsizei count = cmd->count; const GLsizei instance_count = cmd->instance_count; const GLuint baseinstance = cmd->baseinstance; const GLuint user_buffer_mask = cmd->user_buffer_mask; const struct glthread_attrib_binding *buffers = (const struct glthread_attrib_binding *)(cmd + 1); /* Bind uploaded buffers if needed. */ if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, false); } CALL_DrawArraysInstancedBaseInstance(ctx->CurrentServerDispatch, (mode, first, count, instance_count, baseinstance)); /* Restore states. */ if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, true); } return cmd->cmd_base.cmd_size; } static ALWAYS_INLINE void draw_arrays_async_user(struct gl_context *ctx, GLenum mode, GLint first, GLsizei count, GLsizei instance_count, GLuint baseinstance, unsigned user_buffer_mask, const struct glthread_attrib_binding *buffers) { int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]); int cmd_size = sizeof(struct marshal_cmd_DrawArraysInstancedBaseInstance) + buffers_size; struct marshal_cmd_DrawArraysInstancedBaseInstance *cmd; cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawArraysInstancedBaseInstance, cmd_size); cmd->mode = mode; cmd->first = first; cmd->count = count; cmd->instance_count = instance_count; cmd->baseinstance = baseinstance; cmd->user_buffer_mask = user_buffer_mask; if (user_buffer_mask) memcpy(cmd + 1, buffers, buffers_size); } static ALWAYS_INLINE void draw_arrays(GLenum mode, GLint first, GLsizei count, GLsizei instance_count, GLuint baseinstance, bool compiled_into_dlist) { GET_CURRENT_CONTEXT(ctx); struct glthread_vao *vao = ctx->GLThread.CurrentVAO; unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled; if (compiled_into_dlist && ctx->GLThread.ListMode) { _mesa_glthread_finish_before(ctx, "DrawArrays"); /* Use the function that's compiled into a display list. */ CALL_DrawArrays(ctx->CurrentServerDispatch, (mode, first, count)); return; } /* Fast path when nothing needs to be done. * * This is also an error path. Zero counts should still call the driver * for possible GL errors. */ if (ctx->API == API_OPENGL_CORE || !user_buffer_mask || count <= 0 || instance_count <= 0) { draw_arrays_async(ctx, mode, first, count, instance_count, baseinstance); return; } /* Upload and draw. */ struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX]; if (!ctx->GLThread.SupportsNonVBOUploads || !upload_vertices(ctx, user_buffer_mask, first, count, baseinstance, instance_count, buffers)) { _mesa_glthread_finish_before(ctx, "DrawArrays"); CALL_DrawArraysInstancedBaseInstance(ctx->CurrentServerDispatch, (mode, first, count, instance_count, baseinstance)); return; } draw_arrays_async_user(ctx, mode, first, count, instance_count, baseinstance, user_buffer_mask, buffers); } struct marshal_cmd_MultiDrawArrays { struct marshal_cmd_base cmd_base; GLenum mode; GLsizei draw_count; GLuint user_buffer_mask; }; uint32_t _mesa_unmarshal_MultiDrawArrays(struct gl_context *ctx, const struct marshal_cmd_MultiDrawArrays *cmd, const uint64_t *last) { const GLenum mode = cmd->mode; const GLsizei draw_count = cmd->draw_count; const GLuint user_buffer_mask = cmd->user_buffer_mask; const char *variable_data = (const char *)(cmd + 1); const GLint *first = (GLint *)variable_data; variable_data += sizeof(GLint) * draw_count; const GLsizei *count = (GLsizei *)variable_data; variable_data += sizeof(GLsizei) * draw_count; const struct glthread_attrib_binding *buffers = (const struct glthread_attrib_binding *)variable_data; /* Bind uploaded buffers if needed. */ if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, false); } CALL_MultiDrawArrays(ctx->CurrentServerDispatch, (mode, first, count, draw_count)); /* Restore states. */ if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, true); } return cmd->cmd_base.cmd_size; } static ALWAYS_INLINE void multi_draw_arrays_async(struct gl_context *ctx, GLenum mode, const GLint *first, const GLsizei *count, GLsizei draw_count, unsigned user_buffer_mask, const struct glthread_attrib_binding *buffers) { int first_size = sizeof(GLint) * draw_count; int count_size = sizeof(GLsizei) * draw_count; int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]); int cmd_size = sizeof(struct marshal_cmd_MultiDrawArrays) + first_size + count_size + buffers_size; struct marshal_cmd_MultiDrawArrays *cmd; cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_MultiDrawArrays, cmd_size); cmd->mode = mode; cmd->draw_count = draw_count; cmd->user_buffer_mask = user_buffer_mask; char *variable_data = (char*)(cmd + 1); memcpy(variable_data, first, first_size); variable_data += first_size; memcpy(variable_data, count, count_size); if (user_buffer_mask) { variable_data += count_size; memcpy(variable_data, buffers, buffers_size); } } void GLAPIENTRY _mesa_marshal_MultiDrawArrays(GLenum mode, const GLint *first, const GLsizei *count, GLsizei draw_count) { GET_CURRENT_CONTEXT(ctx); struct glthread_vao *vao = ctx->GLThread.CurrentVAO; unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled; if (ctx->GLThread.ListMode) goto sync; if (draw_count >= 0 && (ctx->API == API_OPENGL_CORE || !user_buffer_mask)) { multi_draw_arrays_async(ctx, mode, first, count, draw_count, 0, NULL); return; } /* If the draw count is too high or negative, the queue can't be used. */ if (!ctx->GLThread.SupportsNonVBOUploads || draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 16) goto sync; unsigned min_index = ~0; unsigned max_index_exclusive = 0; for (unsigned i = 0; i < draw_count; i++) { GLsizei vertex_count = count[i]; if (vertex_count < 0) { /* Just call the driver to set the error. */ multi_draw_arrays_async(ctx, mode, first, count, draw_count, 0, NULL); return; } if (vertex_count == 0) continue; min_index = MIN2(min_index, first[i]); max_index_exclusive = MAX2(max_index_exclusive, first[i] + vertex_count); } unsigned num_vertices = max_index_exclusive - min_index; if (num_vertices == 0) { /* Nothing to do, but call the driver to set possible GL errors. */ multi_draw_arrays_async(ctx, mode, first, count, draw_count, 0, NULL); return; } /* Upload and draw. */ struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX]; if (!upload_vertices(ctx, user_buffer_mask, min_index, num_vertices, 0, 1, buffers)) goto sync; multi_draw_arrays_async(ctx, mode, first, count, draw_count, user_buffer_mask, buffers); return; sync: _mesa_glthread_finish_before(ctx, "MultiDrawArrays"); CALL_MultiDrawArrays(ctx->CurrentServerDispatch, (mode, first, count, draw_count)); } /* DrawElementsInstancedBaseVertexBaseInstance not supporting user buffers. * Ignore the name. */ struct marshal_cmd_DrawElementsInstancedARB { struct marshal_cmd_base cmd_base; GLenum mode; GLenum type; GLsizei count; GLsizei instance_count; GLint basevertex; GLuint baseinstance; const GLvoid *indices; }; uint32_t _mesa_unmarshal_DrawElementsInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedARB *cmd, const uint64_t *last) { /* Ignore the function name. We use DISPATCH_CMD_DrawElementsInstanced- * BaseVertexBaseInstance for all DrawElements variants with user buffers, * and both DISPATCH_CMD_DrawElementsInstancedARB and DISPATCH_CMD_Draw- * RangeElementsBaseVertex for all draw elements variants without user * buffers. */ const GLenum mode = cmd->mode; const GLsizei count = cmd->count; const GLenum type = cmd->type; const GLvoid *indices = cmd->indices; const GLsizei instance_count = cmd->instance_count; const GLint basevertex = cmd->basevertex; const GLuint baseinstance = cmd->baseinstance; CALL_DrawElementsInstancedBaseVertexBaseInstance(ctx->CurrentServerDispatch, (mode, count, type, indices, instance_count, basevertex, baseinstance)); return cmd->cmd_base.cmd_size; } struct marshal_cmd_DrawRangeElementsBaseVertex { struct marshal_cmd_base cmd_base; GLenum mode; GLenum type; GLsizei count; GLint basevertex; GLuint min_index; GLuint max_index; const GLvoid *indices; }; uint32_t _mesa_unmarshal_DrawRangeElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawRangeElementsBaseVertex *cmd, const uint64_t *last) { const GLenum mode = cmd->mode; const GLsizei count = cmd->count; const GLenum type = cmd->type; const GLvoid *indices = cmd->indices; const GLint basevertex = cmd->basevertex; const GLuint min_index = cmd->min_index; const GLuint max_index = cmd->max_index; CALL_DrawRangeElementsBaseVertex(ctx->CurrentServerDispatch, (mode, min_index, max_index, count, type, indices, basevertex)); return cmd->cmd_base.cmd_size; } static ALWAYS_INLINE void draw_elements_async(struct gl_context *ctx, GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei instance_count, GLint basevertex, GLuint baseinstance, bool index_bounds_valid, GLuint min_index, GLuint max_index) { if (index_bounds_valid) { int cmd_size = sizeof(struct marshal_cmd_DrawRangeElementsBaseVertex); struct marshal_cmd_DrawRangeElementsBaseVertex *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawRangeElementsBaseVertex, cmd_size); cmd->mode = mode; cmd->count = count; cmd->type = type; cmd->indices = indices; cmd->basevertex = basevertex; cmd->min_index = min_index; cmd->max_index = max_index; } else { int cmd_size = sizeof(struct marshal_cmd_DrawElementsInstancedARB); struct marshal_cmd_DrawElementsInstancedARB *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawElementsInstancedARB, cmd_size); cmd->mode = mode; cmd->count = count; cmd->type = type; cmd->indices = indices; cmd->instance_count = instance_count; cmd->basevertex = basevertex; cmd->baseinstance = baseinstance; } } struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance { struct marshal_cmd_base cmd_base; bool index_bounds_valid; GLenum mode; GLenum type; GLsizei count; GLsizei instance_count; GLint basevertex; GLuint baseinstance; GLuint min_index; GLuint max_index; GLuint user_buffer_mask; const GLvoid *indices; struct gl_buffer_object *index_buffer; }; uint32_t _mesa_unmarshal_DrawElementsInstancedBaseVertexBaseInstance(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance *cmd, const uint64_t *last) { /* Ignore the function name. We use DISPATCH_CMD_DrawElementsInstanced- * BaseVertexBaseInstance for all DrawElements variants with user buffers, * and both DISPATCH_CMD_DrawElementsInstancedARB and DISPATCH_CMD_Draw- * RangeElementsBaseVertex for all draw elements variants without user * buffers. */ const GLenum mode = cmd->mode; const GLsizei count = cmd->count; const GLenum type = cmd->type; const GLvoid *indices = cmd->indices; const GLsizei instance_count = cmd->instance_count; const GLint basevertex = cmd->basevertex; const GLuint baseinstance = cmd->baseinstance; const GLuint min_index = cmd->min_index; const GLuint max_index = cmd->max_index; const GLuint user_buffer_mask = cmd->user_buffer_mask; struct gl_buffer_object *index_buffer = cmd->index_buffer; const struct glthread_attrib_binding *buffers = (const struct glthread_attrib_binding *)(cmd + 1); /* Bind uploaded buffers if needed. */ if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, false); } if (index_buffer) { _mesa_InternalBindElementBuffer(ctx, index_buffer); } /* Draw. */ if (cmd->index_bounds_valid && instance_count == 1 && baseinstance == 0) { CALL_DrawRangeElementsBaseVertex(ctx->CurrentServerDispatch, (mode, min_index, max_index, count, type, indices, basevertex)); } else { CALL_DrawElementsInstancedBaseVertexBaseInstance(ctx->CurrentServerDispatch, (mode, count, type, indices, instance_count, basevertex, baseinstance)); } /* Restore states. */ if (index_buffer) { _mesa_InternalBindElementBuffer(ctx, NULL); } if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, true); } return cmd->cmd_base.cmd_size; } static ALWAYS_INLINE void draw_elements_async_user(struct gl_context *ctx, GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei instance_count, GLint basevertex, GLuint baseinstance, bool index_bounds_valid, GLuint min_index, GLuint max_index, struct gl_buffer_object *index_buffer, unsigned user_buffer_mask, const struct glthread_attrib_binding *buffers) { int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]); int cmd_size = sizeof(struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance) + buffers_size; struct marshal_cmd_DrawElementsInstancedBaseVertexBaseInstance *cmd; cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_DrawElementsInstancedBaseVertexBaseInstance, cmd_size); cmd->mode = mode; cmd->count = count; cmd->type = type; cmd->indices = indices; cmd->instance_count = instance_count; cmd->basevertex = basevertex; cmd->baseinstance = baseinstance; cmd->min_index = min_index; cmd->max_index = max_index; cmd->user_buffer_mask = user_buffer_mask; cmd->index_bounds_valid = index_bounds_valid; cmd->index_buffer = index_buffer; if (user_buffer_mask) memcpy(cmd + 1, buffers, buffers_size); } static void draw_elements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei instance_count, GLint basevertex, GLuint baseinstance, bool index_bounds_valid, GLuint min_index, GLuint max_index, bool compiled_into_dlist) { GET_CURRENT_CONTEXT(ctx); struct glthread_vao *vao = ctx->GLThread.CurrentVAO; unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled; bool has_user_indices = vao->CurrentElementBufferName == 0; if (compiled_into_dlist && ctx->GLThread.ListMode) goto sync; /* Fast path when nothing needs to be done. * * This is also an error path. Zero counts should still call the driver * for possible GL errors. */ if (ctx->API == API_OPENGL_CORE || count <= 0 || instance_count <= 0 || max_index < min_index || !is_index_type_valid(type) || (!user_buffer_mask && !has_user_indices)) { draw_elements_async(ctx, mode, count, type, indices, instance_count, basevertex, baseinstance, index_bounds_valid, min_index, max_index); return; } if (!ctx->GLThread.SupportsNonVBOUploads) goto sync; bool need_index_bounds = user_buffer_mask & ~vao->NonZeroDivisorMask; unsigned index_size = get_index_size(type); if (need_index_bounds && !index_bounds_valid) { /* Sync if indices come from a buffer and vertices come from memory * and index bounds are not valid. * * We would have to map the indices to compute the index bounds, and * for that we would have to sync anyway. */ if (!has_user_indices) goto sync; /* Compute the index bounds. */ min_index = ~0; max_index = 0; vbo_get_minmax_index_mapped(count, index_size, ctx->GLThread._RestartIndex[index_size - 1], ctx->GLThread._PrimitiveRestart, indices, &min_index, &max_index); index_bounds_valid = true; } unsigned start_vertex = min_index + basevertex; unsigned num_vertices = max_index + 1 - min_index; /* If there is too much data to upload, sync and let the driver unroll * indices. */ if (util_is_vbo_upload_ratio_too_large(count, num_vertices)) goto sync; struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX]; if (user_buffer_mask && !upload_vertices(ctx, user_buffer_mask, start_vertex, num_vertices, baseinstance, instance_count, buffers)) goto sync; /* Upload indices. */ struct gl_buffer_object *index_buffer = NULL; if (has_user_indices) index_buffer = upload_indices(ctx, count, index_size, &indices); /* Draw asynchronously. */ draw_elements_async_user(ctx, mode, count, type, indices, instance_count, basevertex, baseinstance, index_bounds_valid, min_index, max_index, index_buffer, user_buffer_mask, buffers); return; sync: _mesa_glthread_finish_before(ctx, "DrawElements"); if (compiled_into_dlist && ctx->GLThread.ListMode) { /* Only use the ones that are compiled into display lists. */ if (basevertex) { CALL_DrawElementsBaseVertex(ctx->CurrentServerDispatch, (mode, count, type, indices, basevertex)); } else if (index_bounds_valid) { CALL_DrawRangeElements(ctx->CurrentServerDispatch, (mode, min_index, max_index, count, type, indices)); } else { CALL_DrawElements(ctx->CurrentServerDispatch, (mode, count, type, indices)); } } else if (index_bounds_valid && instance_count == 1 && baseinstance == 0) { CALL_DrawRangeElementsBaseVertex(ctx->CurrentServerDispatch, (mode, min_index, max_index, count, type, indices, basevertex)); } else { CALL_DrawElementsInstancedBaseVertexBaseInstance(ctx->CurrentServerDispatch, (mode, count, type, indices, instance_count, basevertex, baseinstance)); } } struct marshal_cmd_MultiDrawElementsBaseVertex { struct marshal_cmd_base cmd_base; bool has_base_vertex; GLenum mode; GLenum type; GLsizei draw_count; GLuint user_buffer_mask; struct gl_buffer_object *index_buffer; }; uint32_t _mesa_unmarshal_MultiDrawElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_MultiDrawElementsBaseVertex *cmd, const uint64_t *last) { const GLenum mode = cmd->mode; const GLenum type = cmd->type; const GLsizei draw_count = cmd->draw_count; const GLuint user_buffer_mask = cmd->user_buffer_mask; struct gl_buffer_object *index_buffer = cmd->index_buffer; const bool has_base_vertex = cmd->has_base_vertex; const char *variable_data = (const char *)(cmd + 1); const GLsizei *count = (GLsizei *)variable_data; variable_data += sizeof(GLsizei) * draw_count; const GLvoid *const *indices = (const GLvoid *const *)variable_data; variable_data += sizeof(const GLvoid *const *) * draw_count; const GLsizei *basevertex = NULL; if (has_base_vertex) { basevertex = (GLsizei *)variable_data; variable_data += sizeof(GLsizei) * draw_count; } const struct glthread_attrib_binding *buffers = (const struct glthread_attrib_binding *)variable_data; /* Bind uploaded buffers if needed. */ if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, false); } if (index_buffer) { _mesa_InternalBindElementBuffer(ctx, index_buffer); } /* Draw. */ if (has_base_vertex) { CALL_MultiDrawElementsBaseVertex(ctx->CurrentServerDispatch, (mode, count, type, indices, draw_count, basevertex)); } else { CALL_MultiDrawElementsEXT(ctx->CurrentServerDispatch, (mode, count, type, indices, draw_count)); } /* Restore states. */ if (index_buffer) { _mesa_InternalBindElementBuffer(ctx, NULL); } if (user_buffer_mask) { _mesa_InternalBindVertexBuffers(ctx, buffers, user_buffer_mask, true); } return cmd->cmd_base.cmd_size; } static ALWAYS_INLINE void multi_draw_elements_async(struct gl_context *ctx, GLenum mode, const GLsizei *count, GLenum type, const GLvoid *const *indices, GLsizei draw_count, const GLsizei *basevertex, struct gl_buffer_object *index_buffer, unsigned user_buffer_mask, const struct glthread_attrib_binding *buffers) { int count_size = sizeof(GLsizei) * draw_count; int indices_size = sizeof(indices[0]) * draw_count; int basevertex_size = basevertex ? sizeof(GLsizei) * draw_count : 0; int buffers_size = util_bitcount(user_buffer_mask) * sizeof(buffers[0]); int cmd_size = sizeof(struct marshal_cmd_MultiDrawElementsBaseVertex) + count_size + indices_size + basevertex_size + buffers_size; struct marshal_cmd_MultiDrawElementsBaseVertex *cmd; cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_MultiDrawElementsBaseVertex, cmd_size); cmd->mode = mode; cmd->type = type; cmd->draw_count = draw_count; cmd->user_buffer_mask = user_buffer_mask; cmd->index_buffer = index_buffer; cmd->has_base_vertex = basevertex != NULL; char *variable_data = (char*)(cmd + 1); memcpy(variable_data, count, count_size); variable_data += count_size; memcpy(variable_data, indices, indices_size); variable_data += indices_size; if (basevertex) { memcpy(variable_data, basevertex, basevertex_size); variable_data += basevertex_size; } if (user_buffer_mask) memcpy(variable_data, buffers, buffers_size); } void GLAPIENTRY _mesa_marshal_MultiDrawElementsBaseVertex(GLenum mode, const GLsizei *count, GLenum type, const GLvoid *const *indices, GLsizei draw_count, const GLsizei *basevertex) { GET_CURRENT_CONTEXT(ctx); struct glthread_vao *vao = ctx->GLThread.CurrentVAO; unsigned user_buffer_mask = vao->UserPointerMask & vao->BufferEnabled; bool has_user_indices = vao->CurrentElementBufferName == 0; if (ctx->GLThread.ListMode) goto sync; /* Fast path when nothing needs to be done. */ if (draw_count >= 0 && (ctx->API == API_OPENGL_CORE || !is_index_type_valid(type) || (!user_buffer_mask && !has_user_indices))) { multi_draw_elements_async(ctx, mode, count, type, indices, draw_count, basevertex, NULL, 0, NULL); return; } bool need_index_bounds = user_buffer_mask & ~vao->NonZeroDivisorMask; /* If the draw count is too high or negative, the queue can't be used. * * Sync if indices come from a buffer and vertices come from memory * and index bounds are not valid. We would have to map the indices * to compute the index bounds, and for that we would have to sync anyway. */ if (!ctx->GLThread.SupportsNonVBOUploads || draw_count < 0 || draw_count > MARSHAL_MAX_CMD_SIZE / 32 || (need_index_bounds && !has_user_indices)) goto sync; unsigned index_size = get_index_size(type); unsigned min_index = ~0; unsigned max_index = 0; unsigned total_count = 0; unsigned num_vertices = 0; /* This is always true if there is per-vertex data that needs to be * uploaded. */ if (need_index_bounds) { /* Compute the index bounds. */ for (unsigned i = 0; i < draw_count; i++) { GLsizei vertex_count = count[i]; if (vertex_count < 0) { /* Just call the driver to set the error. */ multi_draw_elements_async(ctx, mode, count, type, indices, draw_count, basevertex, NULL, 0, NULL); return; } if (vertex_count == 0) continue; unsigned min = ~0, max = 0; vbo_get_minmax_index_mapped(vertex_count, index_size, ctx->GLThread._RestartIndex[index_size - 1], ctx->GLThread._PrimitiveRestart, indices[i], &min, &max); if (basevertex) { min += basevertex[i]; max += basevertex[i]; } min_index = MIN2(min_index, min); max_index = MAX2(max_index, max); total_count += vertex_count; } num_vertices = max_index + 1 - min_index; if (total_count == 0 || num_vertices == 0) { /* Nothing to do, but call the driver to set possible GL errors. */ multi_draw_elements_async(ctx, mode, count, type, indices, draw_count, basevertex, NULL, 0, NULL); return; } /* If there is too much data to upload, sync and let the driver unroll * indices. */ if (util_is_vbo_upload_ratio_too_large(total_count, num_vertices)) goto sync; } else if (has_user_indices) { /* Only compute total_count for the upload of indices. */ for (unsigned i = 0; i < draw_count; i++) { GLsizei vertex_count = count[i]; if (vertex_count < 0) { /* Just call the driver to set the error. */ multi_draw_elements_async(ctx, mode, count, type, indices, draw_count, basevertex, NULL, 0, NULL); return; } if (vertex_count == 0) continue; total_count += vertex_count; } if (total_count == 0) { /* Nothing to do, but call the driver to set possible GL errors. */ multi_draw_elements_async(ctx, mode, count, type, indices, draw_count, basevertex, NULL, 0, NULL); return; } } /* Upload vertices. */ struct glthread_attrib_binding buffers[VERT_ATTRIB_MAX]; if (user_buffer_mask && !upload_vertices(ctx, user_buffer_mask, min_index, num_vertices, 0, 1, buffers)) goto sync; /* Upload indices. */ struct gl_buffer_object *index_buffer = NULL; if (has_user_indices) { const GLvoid **out_indices = alloca(sizeof(indices[0]) * draw_count); index_buffer = upload_multi_indices(ctx, total_count, index_size, draw_count, count, indices, out_indices); indices = out_indices; } /* Draw asynchronously. */ multi_draw_elements_async(ctx, mode, count, type, indices, draw_count, basevertex, index_buffer, user_buffer_mask, buffers); return; sync: _mesa_glthread_finish_before(ctx, "DrawElements"); if (basevertex) { CALL_MultiDrawElementsBaseVertex(ctx->CurrentServerDispatch, (mode, count, type, indices, draw_count, basevertex)); } else { CALL_MultiDrawElementsEXT(ctx->CurrentServerDispatch, (mode, count, type, indices, draw_count)); } } void GLAPIENTRY _mesa_marshal_DrawArrays(GLenum mode, GLint first, GLsizei count) { draw_arrays(mode, first, count, 1, 0, true); } void GLAPIENTRY _mesa_marshal_DrawArraysInstancedARB(GLenum mode, GLint first, GLsizei count, GLsizei instance_count) { draw_arrays(mode, first, count, instance_count, 0, false); } void GLAPIENTRY _mesa_marshal_DrawArraysInstancedBaseInstance(GLenum mode, GLint first, GLsizei count, GLsizei instance_count, GLuint baseinstance) { draw_arrays(mode, first, count, instance_count, baseinstance, false); } void GLAPIENTRY _mesa_marshal_DrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices) { draw_elements(mode, count, type, indices, 1, 0, 0, false, 0, 0, true); } void GLAPIENTRY _mesa_marshal_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) { draw_elements(mode, count, type, indices, 1, 0, 0, true, start, end, true); } void GLAPIENTRY _mesa_marshal_DrawElementsInstancedARB(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei instance_count) { draw_elements(mode, count, type, indices, instance_count, 0, 0, false, 0, 0, false); } void GLAPIENTRY _mesa_marshal_DrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLint basevertex) { draw_elements(mode, count, type, indices, 1, basevertex, 0, false, 0, 0, true); } void GLAPIENTRY _mesa_marshal_DrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices, GLint basevertex) { draw_elements(mode, count, type, indices, 1, basevertex, 0, true, start, end, false); } void GLAPIENTRY _mesa_marshal_DrawElementsInstancedBaseVertex(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei instance_count, GLint basevertex) { draw_elements(mode, count, type, indices, instance_count, basevertex, 0, false, 0, 0, false); } void GLAPIENTRY _mesa_marshal_DrawElementsInstancedBaseInstance(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei instance_count, GLuint baseinstance) { draw_elements(mode, count, type, indices, instance_count, 0, baseinstance, false, 0, 0, false); } void GLAPIENTRY _mesa_marshal_DrawElementsInstancedBaseVertexBaseInstance(GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei instance_count, GLint basevertex, GLuint baseinstance) { draw_elements(mode, count, type, indices, instance_count, basevertex, baseinstance, false, 0, 0, false); } void GLAPIENTRY _mesa_marshal_MultiDrawElementsEXT(GLenum mode, const GLsizei *count, GLenum type, const GLvoid *const *indices, GLsizei draw_count) { _mesa_marshal_MultiDrawElementsBaseVertex(mode, count, type, indices, draw_count, NULL); } uint32_t _mesa_unmarshal_DrawArraysInstancedARB(struct gl_context *ctx, const struct marshal_cmd_DrawArraysInstancedARB *cmd, const uint64_t *last) { unreachable("never used - DrawArraysInstancedBaseInstance is used instead"); return 0; } uint32_t _mesa_unmarshal_DrawElements(struct gl_context *ctx, const struct marshal_cmd_DrawElements *cmd, const uint64_t *last) { unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); return 0; } uint32_t _mesa_unmarshal_DrawRangeElements(struct gl_context *ctx, const struct marshal_cmd_DrawRangeElements *cmd, const uint64_t *last) { unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); return 0; } uint32_t _mesa_unmarshal_DrawElementsBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsBaseVertex *cmd, const uint64_t *last) { unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); return 0; } uint32_t _mesa_unmarshal_DrawElementsInstancedBaseVertex(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseVertex *cmd, const uint64_t *last) { unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); return 0; } uint32_t _mesa_unmarshal_DrawElementsInstancedBaseInstance(struct gl_context *ctx, const struct marshal_cmd_DrawElementsInstancedBaseInstance *cmd, const uint64_t *last) { unreachable("never used - DrawElementsInstancedBaseVertexBaseInstance is used instead"); return 0; } uint32_t _mesa_unmarshal_MultiDrawElementsEXT(struct gl_context *ctx, const struct marshal_cmd_MultiDrawElementsEXT *cmd, const uint64_t *last) { unreachable("never used - MultiDrawElementsBaseVertex is used instead"); return 0; }