nvfx: emit bo relocations only when needed
Should improve performance, possibly significantly.
This commit is contained in:
parent
ea70969618
commit
c907b94713
|
@ -46,6 +46,9 @@ nvfx_destroy(struct pipe_context *pipe)
|
|||
if (nvfx->draw)
|
||||
draw_destroy(nvfx->draw);
|
||||
|
||||
if(nvfx->screen->cur_ctx == nvfx)
|
||||
nvfx->screen->cur_ctx = NULL;
|
||||
|
||||
FREE(nvfx);
|
||||
}
|
||||
|
||||
|
@ -72,8 +75,6 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
|
|||
nvfx->pipe.clear = nvfx_clear;
|
||||
nvfx->pipe.flush = nvfx_flush;
|
||||
|
||||
screen->base.channel->user_private = nvfx;
|
||||
|
||||
nvfx->is_nv4x = screen->is_nv4x;
|
||||
/* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs
|
||||
* However, my code for that doesn't work, so use vp clipping for all cards, which works.
|
||||
|
@ -103,6 +104,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
|
|||
nvfx->hw_pointsprite_control = -1;
|
||||
nvfx->hw_vp_output = -1;
|
||||
nvfx->use_vertex_buffers = -1;
|
||||
nvfx->relocs_needed = NVFX_RELOCATE_ALL;
|
||||
|
||||
LIST_INITHEAD(&nvfx->render_cache);
|
||||
|
||||
|
|
|
@ -47,6 +47,13 @@
|
|||
#define NVFX_NEW_INDEX (1 << 16)
|
||||
#define NVFX_NEW_SPRITE (1 << 17)
|
||||
|
||||
#define NVFX_RELOCATE_FRAMEBUFFER (1 << 0)
|
||||
#define NVFX_RELOCATE_FRAGTEX (1 << 1)
|
||||
#define NVFX_RELOCATE_FRAGPROG (1 << 2)
|
||||
#define NVFX_RELOCATE_VTXBUF (1 << 3)
|
||||
#define NVFX_RELOCATE_IDXBUF (1 << 4)
|
||||
#define NVFX_RELOCATE_ALL 0x1f
|
||||
|
||||
struct nvfx_rasterizer_state {
|
||||
struct pipe_rasterizer_state pipe;
|
||||
unsigned sb_len;
|
||||
|
@ -199,6 +206,8 @@ struct nvfx_context {
|
|||
int hw_pointsprite_control;
|
||||
int hw_vp_output;
|
||||
struct nvfx_fragment_program* hw_fragprog;
|
||||
|
||||
unsigned relocs_needed;
|
||||
};
|
||||
|
||||
static INLINE struct nvfx_context *
|
||||
|
@ -290,10 +299,25 @@ extern void nvfx_state_sr_validate(struct nvfx_context *nvfx);
|
|||
extern void nvfx_state_zsa_validate(struct nvfx_context *nvfx);
|
||||
|
||||
/* nvfx_state_emit.c */
|
||||
extern void nvfx_state_relocate(struct nvfx_context *nvfx);
|
||||
extern void nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs);
|
||||
extern boolean nvfx_state_validate(struct nvfx_context *nvfx);
|
||||
extern boolean nvfx_state_validate_swtnl(struct nvfx_context *nvfx);
|
||||
extern void nvfx_state_emit(struct nvfx_context *nvfx);
|
||||
|
||||
static inline void
|
||||
nvfx_state_emit(struct nvfx_context *nvfx)
|
||||
{
|
||||
unsigned relocs = NVFX_RELOCATE_FRAMEBUFFER | NVFX_RELOCATE_FRAGTEX | NVFX_RELOCATE_FRAGPROG;
|
||||
if (nvfx->render_mode == HW)
|
||||
{
|
||||
relocs |= NVFX_RELOCATE_VTXBUF;
|
||||
if(nvfx->use_index_buffer)
|
||||
relocs |= NVFX_RELOCATE_IDXBUF;
|
||||
}
|
||||
|
||||
relocs &= nvfx->relocs_needed;
|
||||
if(relocs)
|
||||
nvfx_state_relocate(nvfx, relocs);
|
||||
}
|
||||
|
||||
/* nvfx_transfer.c */
|
||||
extern void nvfx_init_transfer_functions(struct pipe_context *pipe);
|
||||
|
|
|
@ -1471,6 +1471,8 @@ update:
|
|||
nvfx->hw_pointsprite_control = pointsprite_control;
|
||||
}
|
||||
}
|
||||
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1487,6 +1489,7 @@ nvfx_fragprog_relocate(struct nvfx_context *nvfx)
|
|||
OUT_RELOC(chan, bo, offset, fp_flags | NOUVEAU_BO_LOW |
|
||||
NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
|
||||
NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGPROG;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -205,6 +205,7 @@ nvfx_fragtex_validate(struct nvfx_context *nvfx)
|
|||
}
|
||||
}
|
||||
nvfx->dirty_samplers = 0;
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -231,6 +232,7 @@ nvfx_fragtex_relocate(struct nvfx_context *nvfx)
|
|||
OUT_RELOC(chan, bo, nvfx->hw_txf[unit], tex_flags | NOUVEAU_BO_OR | NOUVEAU_BO_DUMMY,
|
||||
NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1);
|
||||
}
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAGTEX;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -377,6 +377,14 @@ nvfx_screen_get_vertex_buffer_flags(struct nvfx_screen* screen)
|
|||
return vram_hack ? NOUVEAU_BO_VRAM : NOUVEAU_BO_GART;
|
||||
}
|
||||
|
||||
static void nvfx_channel_flush_notify(struct nouveau_channel* chan)
|
||||
{
|
||||
struct nvfx_screen* screen = chan->user_private;
|
||||
struct nvfx_context* nvfx = screen->cur_ctx;
|
||||
if(nvfx)
|
||||
nvfx->relocs_needed = NVFX_RELOCATE_ALL;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
|
||||
{
|
||||
|
@ -398,6 +406,9 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
|
|||
return NULL;
|
||||
}
|
||||
chan = screen->base.channel;
|
||||
screen->cur_ctx = NULL;
|
||||
chan->user_private = screen;
|
||||
chan->flush_notify = nvfx_channel_flush_notify;
|
||||
|
||||
pscreen->winsys = ws;
|
||||
pscreen->destroy = nvfx_screen_destroy;
|
||||
|
|
|
@ -20,6 +20,7 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
|
|||
nvfx->hw_pointsprite_control = -1;
|
||||
nvfx->hw_vp_output = -1;
|
||||
nvfx->screen->cur_ctx = nvfx;
|
||||
nvfx->relocs_needed = NVFX_RELOCATE_ALL;
|
||||
}
|
||||
|
||||
/* These can trigger use the of 3D engine to copy temporaries.
|
||||
|
@ -244,12 +245,12 @@ nvfx_state_validate_common(struct nvfx_context *nvfx)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
nvfx_state_emit(struct nvfx_context *nvfx)
|
||||
inline void
|
||||
nvfx_state_relocate(struct nvfx_context *nvfx, unsigned relocs)
|
||||
{
|
||||
struct nouveau_channel* chan = nvfx->screen->base.channel;
|
||||
/* we need to ensure there is enough space to output relocations in one go */
|
||||
unsigned max_relocs = 0
|
||||
const unsigned max_relocs = 0
|
||||
+ 16 /* vertex buffers, incl. dma flag */
|
||||
+ 2 /* index buffer plus format+dma flag */
|
||||
+ 2 * 5 /* 4 cbufs + zsbuf, plus dma objects */
|
||||
|
@ -257,22 +258,19 @@ nvfx_state_emit(struct nvfx_context *nvfx)
|
|||
+ 2 * 4 /* vertex textures plus format+dma flag */
|
||||
+ 1 /* fragprog incl dma flag */
|
||||
;
|
||||
MARK_RING(chan, max_relocs * 2, max_relocs * 2);
|
||||
nvfx_state_relocate(nvfx);
|
||||
}
|
||||
|
||||
void
|
||||
nvfx_state_relocate(struct nvfx_context *nvfx)
|
||||
{
|
||||
nvfx_framebuffer_relocate(nvfx);
|
||||
nvfx_fragtex_relocate(nvfx);
|
||||
nvfx_fragprog_relocate(nvfx);
|
||||
if (nvfx->render_mode == HW)
|
||||
{
|
||||
MARK_RING(chan, max_relocs * 2, max_relocs * 2);
|
||||
|
||||
if(relocs & NVFX_RELOCATE_FRAMEBUFFER)
|
||||
nvfx_framebuffer_relocate(nvfx);
|
||||
if(relocs & NVFX_RELOCATE_FRAGTEX)
|
||||
nvfx_fragtex_relocate(nvfx);
|
||||
if(relocs & NVFX_RELOCATE_FRAGPROG)
|
||||
nvfx_fragprog_relocate(nvfx);
|
||||
if(relocs & NVFX_RELOCATE_VTXBUF)
|
||||
nvfx_vbo_relocate(nvfx);
|
||||
if(nvfx->use_index_buffer)
|
||||
nvfx_idxbuf_relocate(nvfx);
|
||||
}
|
||||
if(relocs & NVFX_RELOCATE_IDXBUF)
|
||||
nvfx_idxbuf_relocate(nvfx);
|
||||
}
|
||||
|
||||
boolean
|
||||
|
|
|
@ -278,6 +278,7 @@ nvfx_framebuffer_validate(struct nvfx_context *nvfx, unsigned prepare_result)
|
|||
OUT_RING(chan, RING_3D(NV34TCL_VIEWPORT_TX_ORIGIN, 1));
|
||||
OUT_RING(chan, 0);
|
||||
}
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -307,4 +308,5 @@ nvfx_framebuffer_relocate(struct nvfx_context *nvfx)
|
|||
DO(NV40, 3);
|
||||
|
||||
DO_(nvfx->hw_zeta, NV34, ZETA);
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_FRAMEBUFFER;
|
||||
}
|
||||
|
|
|
@ -334,6 +334,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
|
|||
OUT_RING(chan, 0);
|
||||
|
||||
nvfx->hw_vtxelt_nr = nvfx->vtxelt->num_elements;
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
@ -362,6 +363,7 @@ nvfx_vbo_relocate(struct nvfx_context *nvfx)
|
|||
vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
|
||||
0, NV34TCL_VTXBUF_ADDRESS_DMA1);
|
||||
}
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_VTXBUF;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -382,6 +384,7 @@ nvfx_idxbuf_emit(struct nvfx_context* nvfx, unsigned ib_flags)
|
|||
OUT_RELOC(chan, bo, nvfx->idxbuf.offset + 1, ib_flags | NOUVEAU_BO_LOW, 0, 0);
|
||||
OUT_RELOC(chan, bo, ib_format, ib_flags | NOUVEAU_BO_OR,
|
||||
0, NV34TCL_IDXBUF_FORMAT_DMA1);
|
||||
nvfx->relocs_needed &=~ NVFX_RELOCATE_IDXBUF;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
Loading…
Reference in New Issue