From b83da28863cf427e329c4799ed39f1588affcca3 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Fri, 16 Jul 2021 14:16:43 +0200 Subject: [PATCH] dlist: implement vertices deduplication MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the vertices attributes are in RAM, we can easily compute their hash and compare to earlier vertices (in the same list so they have compatible vertex_size). We can't do that for list that will be executed using loopback because the replay code ignore the index buffer. Reviewed-by: Marek Olšák Part-of: --- src/mesa/vbo/vbo_save_api.c | 144 ++++++++++++++++++++++++++++++++---- 1 file changed, 129 insertions(+), 15 deletions(-) diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index 7a56b5ee73b..f36fa413173 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -83,6 +83,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/varray.h" #include "util/bitscan.h" #include "util/u_memory.h" +#include "util/hash_table.h" #include "gallium/include/pipe/p_state.h" @@ -448,6 +449,85 @@ realloc_storage(struct gl_context *ctx, int prim_count, int vertex_count) } } +struct vertex_key { + unsigned vertex_size; + fi_type *vertex_attributes; +}; + +static uint32_t _hash_vertex_key(const void *key) +{ + struct vertex_key *k = (struct vertex_key*)key; + unsigned sz = k->vertex_size; + assert(sz); + return _mesa_hash_data(k->vertex_attributes, sz * sizeof(float)); +} + +static bool _compare_vertex_key(const void *key1, const void *key2) +{ + struct vertex_key *k1 = (struct vertex_key*)key1; + struct vertex_key *k2 = (struct vertex_key*)key2; + /* All the compared vertices are going to be drawn with the same VAO, + * so we can compare the attributes. */ + assert (k1->vertex_size == k2->vertex_size); + return memcmp(k1->vertex_attributes, + k2->vertex_attributes, + k1->vertex_size * sizeof(float)) == 0; +} + +static void _free_entry(struct hash_entry *entry) +{ + free((void*)entry->key); +} + +/* Add vertex to the vertex buffer and return its index. If this vertex is a duplicate + * of an existing vertex, return the original index instead. + */ +static uint32_t +add_vertex(struct vbo_save_context *save, struct hash_table *hash_to_index, + uint32_t index, uint32_t offset_in_bytes, uint32_t base_index, fi_type *new_buffer, + uint32_t *max_index) +{ + /* If vertex deduplication is disabled return the original index. */ + if (!hash_to_index) + return index; + + /* Apply the offset into buffer_in_ram ... */ + fi_type *vert = save->vertex_store->buffer_in_ram + offset_in_bytes / sizeof(float); + /* ... and cancel the start_offset trick. + * This way we get the correct offset in all cases (= start_offset being 0 or not). + */ + vert += save->vertex_size * (index - base_index); + + struct vertex_key *key = malloc(sizeof(struct vertex_key)); + key->vertex_size = save->vertex_size; + key->vertex_attributes = vert; + + struct hash_entry *entry = _mesa_hash_table_search(hash_to_index, key); + if (entry) { + free(key); + /* We found an existing vertex with the same hash, return its index. */ + return (uintptr_t) entry->data; + } else { + /* This is a new vertex. Determine a new index and copy its attributes to the vertex + * buffer. Note that 'new_buffer' is created at each list compilation so we write vertices + * starting at index 0. + */ + uint32_t n = _mesa_hash_table_num_entries(hash_to_index); + *max_index = MAX2(n + base_index, *max_index); + + memcpy(&new_buffer[save->vertex_size * n], + vert, + save->vertex_size * sizeof(fi_type)); + + _mesa_hash_table_insert(hash_to_index, key, (void*)(uintptr_t)(n + base_index)); + + /* The index buffer is shared between list compilations, so add the base index to get + * the final index. + */ + return n + base_index; + } +} + /** * Insert the active immediate struct onto the display list currently @@ -614,6 +694,18 @@ compile_vertex_list(struct gl_context *ctx) struct _mesa_prim *merged_prims = NULL; int idx = 0; + struct hash_table *vertex_to_index = NULL; + fi_type *temp_vertices_buffer = NULL; + + /* The loopback replay code doesn't use the index buffer, so we can't + * dedup vertices in this case. + */ + if (!ctx->ListState.Current.UseLoopback) { + vertex_to_index = _mesa_hash_table_create(NULL, _hash_vertex_key, _compare_vertex_key); + temp_vertices_buffer = malloc(save->vertex_store->buffer_in_ram_size); + } + + uint32_t max_index = 0; int last_valid_prim = -1; /* Construct indices array. */ @@ -647,13 +739,17 @@ compile_vertex_list(struct gl_context *ctx) unsigned tri_count = merged_prims[last_valid_prim].count - 2; indices[idx] = indices[idx - 1]; - indices[idx + 1] = original_prims[i].start; + indices[idx + 1] = add_vertex(save, vertex_to_index, original_prims[i].start, + original_buffer_offset, start_offset, + temp_vertices_buffer, &max_index); idx += 2; merged_prims[last_valid_prim].count += 2; if (tri_count % 2) { /* Add another index to preserve winding order */ - indices[idx++] = original_prims[i].start; + indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start, + original_buffer_offset, start_offset, + temp_vertices_buffer, &max_index); merged_prims[last_valid_prim].count++; } } @@ -670,10 +766,14 @@ compile_vertex_list(struct gl_context *ctx) (original_prims[i + 1].mode == GL_LINE_STRIP || original_prims[i + 1].mode == GL_LINES)))) { for (unsigned j = 0; j < vertex_count; j++) { - indices[idx++] = original_prims[i].start + j; + indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j, + original_buffer_offset, start_offset, + temp_vertices_buffer, &max_index); /* Repeat all but the first/last indices. */ if (j && j != vertex_count - 1) { - indices[idx++] = original_prims[i].start + j; + indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j, + original_buffer_offset, start_offset, + temp_vertices_buffer, &max_index); } } } else { @@ -681,7 +781,9 @@ compile_vertex_list(struct gl_context *ctx) mode = original_prims[i].mode; for (unsigned j = 0; j < vertex_count; j++) { - indices[idx++] = original_prims[i].start + j; + indices[idx++] = add_vertex(save, vertex_to_index, original_prims[i].start + j, + original_buffer_offset, start_offset, + temp_vertices_buffer, &max_index); } } @@ -712,11 +814,11 @@ compile_vertex_list(struct gl_context *ctx) _mesa_reference_buffer_object(ctx, &save->previous_ib, NULL); save->previous_ib = ctx->Driver.NewBufferObject(ctx, VBO_BUF_ID + 1); bool success = ctx->Driver.BufferData(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - MAX2(VBO_SAVE_INDEX_SIZE, idx) * sizeof(uint32_t), - NULL, - GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT, - save->previous_ib); + GL_ELEMENT_ARRAY_BUFFER_ARB, + MAX2(VBO_SAVE_INDEX_SIZE, idx) * sizeof(uint32_t), + NULL, + GL_STATIC_DRAW_ARB, GL_MAP_WRITE_BIT, + save->previous_ib); if (!success) { _mesa_reference_buffer_object(ctx, &save->previous_ib, NULL); _mesa_error(ctx, GL_OUT_OF_MEMORY, "IB allocation"); @@ -737,11 +839,23 @@ compile_vertex_list(struct gl_context *ctx) node->cold->prim_count = 0; } - ctx->Driver.BufferSubData(ctx, - original_buffer_offset, - idx * save->vertex_size * sizeof(fi_type), - &save->vertex_store->buffer_in_ram[original_buffer_offset / sizeof(float)], - save->vertex_store->bufferobj); + + if (vertex_to_index) { + ctx->Driver.BufferSubData(ctx, + original_buffer_offset, + (max_index - start_offset + 1) * save->vertex_size * sizeof(fi_type), + temp_vertices_buffer, + save->vertex_store->bufferobj); + + _mesa_hash_table_destroy(vertex_to_index, _free_entry); + free(temp_vertices_buffer); + } else { + ctx->Driver.BufferSubData(ctx, + original_buffer_offset, + idx * save->vertex_size * sizeof(fi_type), + &save->vertex_store->buffer_in_ram[original_buffer_offset / sizeof(float)], + save->vertex_store->bufferobj); + } /* Prepare for DrawGallium */ memset(&node->merged.info, 0, sizeof(struct pipe_draw_info));