vc4: Just stream out fallback IB contents.
The idea I had when I wrote the original shadow code was that you'd see a set_index_buffer to the IB, then a bunch of draws out of it. What's actually happening in openarena is that set_index_buffer occurs at every draw, so we end up making a new shadow BO every time, and converting more of the BO than is actually used in the draw. While I could maybe come up with a better caching scheme, for now just do the simple thing that doesn't result in a new shadow IB allocation per draw. Improves performance of isosurf in drawelements mode by 58.7967% +/- 3.86152% (n=8).
This commit is contained in:
parent
f8de6277bf
commit
10aacf5ae8
|
@ -29,6 +29,7 @@
|
||||||
#include "util/u_inlines.h"
|
#include "util/u_inlines.h"
|
||||||
#include "util/u_memory.h"
|
#include "util/u_memory.h"
|
||||||
#include "util/u_blitter.h"
|
#include "util/u_blitter.h"
|
||||||
|
#include "util/u_upload_mgr.h"
|
||||||
#include "indices/u_primconvert.h"
|
#include "indices/u_primconvert.h"
|
||||||
#include "pipe/p_screen.h"
|
#include "pipe/p_screen.h"
|
||||||
|
|
||||||
|
@ -410,6 +411,9 @@ vc4_context_destroy(struct pipe_context *pctx)
|
||||||
if (vc4->primconvert)
|
if (vc4->primconvert)
|
||||||
util_primconvert_destroy(vc4->primconvert);
|
util_primconvert_destroy(vc4->primconvert);
|
||||||
|
|
||||||
|
if (vc4->uploader)
|
||||||
|
u_upload_destroy(vc4->uploader);
|
||||||
|
|
||||||
util_slab_destroy(&vc4->transfer_pool);
|
util_slab_destroy(&vc4->transfer_pool);
|
||||||
|
|
||||||
pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
|
pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
|
||||||
|
@ -466,6 +470,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
|
||||||
if (!vc4->primconvert)
|
if (!vc4->primconvert)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
vc4->uploader = u_upload_create(pctx, 16 * 1024, 4,
|
||||||
|
PIPE_BIND_INDEX_BUFFER);
|
||||||
|
|
||||||
vc4_debug |= saved_shaderdb_flag;
|
vc4_debug |= saved_shaderdb_flag;
|
||||||
|
|
||||||
return &vc4->base;
|
return &vc4->base;
|
||||||
|
|
|
@ -243,6 +243,8 @@ struct vc4_context {
|
||||||
/** Seqno of the last CL flush's job. */
|
/** Seqno of the last CL flush's job. */
|
||||||
uint64_t last_emit_seqno;
|
uint64_t last_emit_seqno;
|
||||||
|
|
||||||
|
struct u_upload_mgr *uploader;
|
||||||
|
|
||||||
/** @{ Current pipeline state objects */
|
/** @{ Current pipeline state objects */
|
||||||
struct pipe_scissor_state scissor;
|
struct pipe_scissor_state scissor;
|
||||||
struct pipe_blend_state *blend;
|
struct pipe_blend_state *blend;
|
||||||
|
|
|
@ -266,13 +266,17 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||||
* definitions, up to but not including QUADS.
|
* definitions, up to but not including QUADS.
|
||||||
*/
|
*/
|
||||||
if (info->indexed) {
|
if (info->indexed) {
|
||||||
struct vc4_resource *rsc = vc4_resource(vc4->indexbuf.buffer);
|
|
||||||
uint32_t offset = vc4->indexbuf.offset;
|
uint32_t offset = vc4->indexbuf.offset;
|
||||||
uint32_t index_size = vc4->indexbuf.index_size;
|
uint32_t index_size = vc4->indexbuf.index_size;
|
||||||
if (rsc->shadow_parent) {
|
struct pipe_resource *prsc;
|
||||||
vc4_update_shadow_index_buffer(pctx, &vc4->indexbuf);
|
if (vc4->indexbuf.index_size == 4) {
|
||||||
offset = 0;
|
prsc = vc4_get_shadow_index_buffer(pctx, &vc4->indexbuf,
|
||||||
|
info->count, &offset);
|
||||||
|
index_size = 2;
|
||||||
|
} else {
|
||||||
|
prsc = vc4->indexbuf.buffer;
|
||||||
}
|
}
|
||||||
|
struct vc4_resource *rsc = vc4_resource(prsc);
|
||||||
|
|
||||||
cl_start_reloc(&vc4->bcl, 1);
|
cl_start_reloc(&vc4->bcl, 1);
|
||||||
cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
|
cl_u8(&vc4->bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
|
||||||
|
@ -284,6 +288,9 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||||
cl_u32(&vc4->bcl, info->count);
|
cl_u32(&vc4->bcl, info->count);
|
||||||
cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
|
cl_reloc(vc4, &vc4->bcl, rsc->bo, offset);
|
||||||
cl_u32(&vc4->bcl, max_index);
|
cl_u32(&vc4->bcl, max_index);
|
||||||
|
|
||||||
|
if (vc4->indexbuf.index_size == 4)
|
||||||
|
pipe_resource_reference(&prsc, NULL);
|
||||||
} else {
|
} else {
|
||||||
cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
|
cl_u8(&vc4->bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
|
||||||
cl_u8(&vc4->bcl, info->mode);
|
cl_u8(&vc4->bcl, info->mode);
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "util/u_format.h"
|
#include "util/u_format.h"
|
||||||
#include "util/u_inlines.h"
|
#include "util/u_inlines.h"
|
||||||
#include "util/u_surface.h"
|
#include "util/u_surface.h"
|
||||||
|
#include "util/u_upload_mgr.h"
|
||||||
|
|
||||||
#include "vc4_screen.h"
|
#include "vc4_screen.h"
|
||||||
#include "vc4_context.h"
|
#include "vc4_context.h"
|
||||||
|
@ -638,41 +639,37 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
|
||||||
* was in user memory, it would be nice to not have uploaded it to a VBO
|
* was in user memory, it would be nice to not have uploaded it to a VBO
|
||||||
* before translating.
|
* before translating.
|
||||||
*/
|
*/
|
||||||
void
|
struct pipe_resource *
|
||||||
vc4_update_shadow_index_buffer(struct pipe_context *pctx,
|
vc4_get_shadow_index_buffer(struct pipe_context *pctx,
|
||||||
const struct pipe_index_buffer *ib)
|
const struct pipe_index_buffer *ib,
|
||||||
|
uint32_t count,
|
||||||
|
uint32_t *shadow_offset)
|
||||||
{
|
{
|
||||||
struct vc4_resource *shadow = vc4_resource(ib->buffer);
|
struct vc4_context *vc4 = vc4_context(pctx);
|
||||||
struct vc4_resource *orig = vc4_resource(shadow->shadow_parent);
|
struct vc4_resource *orig = vc4_resource(ib->buffer);
|
||||||
uint32_t count = shadow->base.b.width0 / 2;
|
|
||||||
|
|
||||||
if (shadow->writes == orig->writes)
|
|
||||||
return;
|
|
||||||
|
|
||||||
perf_debug("Fallback conversion for %d uint indices\n", count);
|
perf_debug("Fallback conversion for %d uint indices\n", count);
|
||||||
|
|
||||||
|
void *data;
|
||||||
|
struct pipe_resource *shadow_rsc = NULL;
|
||||||
|
u_upload_alloc(vc4->uploader, 0, count * 2,
|
||||||
|
shadow_offset, &shadow_rsc, &data);
|
||||||
|
uint16_t *dst = data;
|
||||||
|
|
||||||
struct pipe_transfer *src_transfer;
|
struct pipe_transfer *src_transfer;
|
||||||
uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b,
|
uint32_t *src = pipe_buffer_map_range(pctx, &orig->base.b,
|
||||||
ib->offset,
|
ib->offset,
|
||||||
count * 4,
|
count * 4,
|
||||||
PIPE_TRANSFER_READ, &src_transfer);
|
PIPE_TRANSFER_READ, &src_transfer);
|
||||||
|
|
||||||
struct pipe_transfer *dst_transfer;
|
|
||||||
uint16_t *dst = pipe_buffer_map_range(pctx, &shadow->base.b,
|
|
||||||
0,
|
|
||||||
count * 2,
|
|
||||||
PIPE_TRANSFER_WRITE, &dst_transfer);
|
|
||||||
|
|
||||||
for (int i = 0; i < count; i++) {
|
for (int i = 0; i < count; i++) {
|
||||||
uint32_t src_index = src[i];
|
uint32_t src_index = src[i];
|
||||||
assert(src_index <= 0xffff);
|
assert(src_index <= 0xffff);
|
||||||
dst[i] = src_index;
|
dst[i] = src_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
pctx->transfer_unmap(pctx, dst_transfer);
|
|
||||||
pctx->transfer_unmap(pctx, src_transfer);
|
pctx->transfer_unmap(pctx, src_transfer);
|
||||||
|
|
||||||
shadow->writes = orig->writes;
|
return shadow_rsc;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -107,8 +107,10 @@ struct pipe_resource *vc4_resource_create(struct pipe_screen *pscreen,
|
||||||
const struct pipe_resource *tmpl);
|
const struct pipe_resource *tmpl);
|
||||||
void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
|
void vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
|
||||||
struct pipe_sampler_view *view);
|
struct pipe_sampler_view *view);
|
||||||
void vc4_update_shadow_index_buffer(struct pipe_context *pctx,
|
struct pipe_resource *vc4_get_shadow_index_buffer(struct pipe_context *pctx,
|
||||||
const struct pipe_index_buffer *ib);
|
const struct pipe_index_buffer *ib,
|
||||||
|
uint32_t count,
|
||||||
|
uint32_t *offset);
|
||||||
void vc4_dump_surface(struct pipe_surface *psurf);
|
void vc4_dump_surface(struct pipe_surface *psurf);
|
||||||
|
|
||||||
#endif /* VC4_RESOURCE_H */
|
#endif /* VC4_RESOURCE_H */
|
||||||
|
|
|
@ -304,24 +304,8 @@ vc4_set_index_buffer(struct pipe_context *pctx,
|
||||||
|
|
||||||
if (ib) {
|
if (ib) {
|
||||||
assert(!ib->user_buffer);
|
assert(!ib->user_buffer);
|
||||||
|
|
||||||
if (ib->index_size == 4) {
|
|
||||||
struct pipe_resource tmpl = *ib->buffer;
|
|
||||||
assert(tmpl.format == PIPE_FORMAT_R8_UNORM);
|
|
||||||
assert(tmpl.height0 == 1);
|
|
||||||
tmpl.width0 = (tmpl.width0 - ib->offset) / 2;
|
|
||||||
struct pipe_resource *pshadow =
|
|
||||||
vc4_resource_create(&vc4->screen->base, &tmpl);
|
|
||||||
struct vc4_resource *shadow = vc4_resource(pshadow);
|
|
||||||
pipe_resource_reference(&shadow->shadow_parent, ib->buffer);
|
|
||||||
|
|
||||||
pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
|
|
||||||
vc4->indexbuf.buffer = pshadow;
|
|
||||||
vc4->indexbuf.index_size = 2;
|
|
||||||
} else {
|
|
||||||
pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
|
pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
|
||||||
vc4->indexbuf.index_size = ib->index_size;
|
vc4->indexbuf.index_size = ib->index_size;
|
||||||
}
|
|
||||||
vc4->indexbuf.offset = ib->offset;
|
vc4->indexbuf.offset = ib->offset;
|
||||||
} else {
|
} else {
|
||||||
pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
|
pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
|
||||||
|
|
Loading…
Reference in New Issue