diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp index 83cb319b410..66f18365cc7 100644 --- a/src/gallium/drivers/swr/swr_scratch.cpp +++ b/src/gallium/drivers/swr/swr_scratch.cpp @@ -25,6 +25,7 @@ #include "swr_context.h" #include "swr_screen.h" #include "swr_scratch.h" +#include "swr_fence.h" #include "swr_fence_work.h" #include "api.h" @@ -46,8 +47,10 @@ swr_copy_to_scratch_space(struct swr_context *ctx, space->current_size = max_size_in_flight; if (space->base) { - /* defer delete, use aligned-free */ + /* defer delete, use aligned-free, fence finish enforces the defer + * delete will be on the *next* fence */ struct swr_screen *screen = swr_screen(ctx->pipe.screen); + swr_fence_finish(ctx->pipe.screen, NULL, screen->flush_fence, 0); swr_fence_work_free(screen->flush_fence, space->base, true); space->base = NULL; } diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 07ff9b46ad2..81c70b4568d 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1422,12 +1422,20 @@ swr_update_derived(struct pipe_context *pipe, partial_inbounds = 0; min_vertex_index = info.min_index + info.index_bias; - /* Use user memory directly. The draw will access user-buffer - * directly and then block. It's easier and usually - * faster than copying. - */ - post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW; - p_data = (const uint8_t *) vb->buffer.user; + size = AlignUp(size, 4); + /* If size of client memory copy is too large, don't copy. The + * draw will access user-buffer directly and then block. This is + * faster than queuing many large client draws. */ + if (size >= screen->client_copy_limit) { + post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW; + p_data = (const uint8_t *) vb->buffer.user; + } else { + /* Copy only needed vertices to scratch space */ + const void *ptr = (const uint8_t *) vb->buffer.user + base; + ptr = (uint8_t *)swr_copy_to_scratch_space( + ctx, &ctx->scratch->vertex_buffer, ptr, size); + p_data = (const uint8_t *)ptr - base; + } } else if (vb->buffer.resource) { /* VBO */ if (!pitch) { @@ -1488,12 +1496,20 @@ swr_update_derived(struct pipe_context *pipe, size = info.count * pitch; - /* Use user memory directly. The draw will access user-buffer - * directly and then block. It's easier and usually - * faster than copying. - */ - post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW; - p_data = (const uint8_t *) info.index.user; + size = AlignUp(size, 4); + /* If size of client memory copy is too large, don't copy. The + * draw will access user-buffer directly and then block. This is + * faster than queuing many large client draws. */ + if (size >= screen->client_copy_limit) { + post_update_dirty_flags |= SWR_BLOCK_CLIENT_DRAW; + p_data = (const uint8_t *) info.index.user; + } else { + /* Copy indices to scratch space */ + const void *ptr = info.index.user; + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->index_buffer, ptr, size); + p_data = (const uint8_t *)ptr; + } } SWR_INDEX_BUFFER_STATE swrIndexBuffer;