nvc0: add support for indirect compute on Fermi
When indirect compute is used, the size of the grid (in blocks) is stored as three integers inside a buffer. This requires a macro to set up GRIDDIM_YX and GRIDDIM_Z. Changes from v2: - do not launch the grid if the number of groups for a dimension is 0 Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu>
This commit is contained in:
parent
fa7333a742
commit
c6293877f0
|
@ -1,5 +1,5 @@
|
|||
ENVYAS?=envyas
|
||||
TARGETS=com9097.mme.h
|
||||
TARGETS=com9097.mme.h com90c0.mme.h
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
/* NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT
|
||||
*
|
||||
* arg = num_groups_x
|
||||
* parm[0] = num_groups_y
|
||||
* parm[1] = num_groups_z
|
||||
*/
|
||||
.section #mme90c0_launch_grid_indirect
|
||||
parm $r2 maddr 0x108e /* GRIDDIM_YX */
|
||||
braz $r1 #fail
|
||||
parm $r3
|
||||
braz annul $r2 #fail
|
||||
braz annul $r3 #fail
|
||||
send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */
|
||||
send $r3
|
||||
maddrsend 0xa7 /* COMPUTE_BEGIN */
|
||||
maddrsend 0x282 /* UNKA08 */
|
||||
maddr 0xda /* LAUNCH */
|
||||
send 0x1000
|
||||
maddrsend 0x281 /* COMPUTE_END */
|
||||
exit maddr 0xd8 /* UNK360 */
|
||||
send 0x1
|
||||
fail:
|
||||
exit
|
||||
nop
|
|
@ -0,0 +1,19 @@
|
|||
uint32_t mme90c0_launch_grid_indirect[] = {
|
||||
0x04238251,
|
||||
0x00034807,
|
||||
0x00000301,
|
||||
/* 0x000e: fail */
|
||||
0x0002d027,
|
||||
0x00029827,
|
||||
0x84008842,
|
||||
0x00001841,
|
||||
0x0029c071,
|
||||
0x00a08071,
|
||||
0x00368021,
|
||||
0x04000041,
|
||||
0x00a04071,
|
||||
0x003600a1,
|
||||
0x00004041,
|
||||
0x00000091,
|
||||
0x00000011,
|
||||
};
|
|
@ -360,14 +360,6 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
|
||||
PUSH_DATA (push, cp->num_gprs);
|
||||
|
||||
/* grid/block setup */
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
|
||||
PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
|
||||
PUSH_DATA (push, info->grid[2]);
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
|
||||
PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
|
||||
PUSH_DATA (push, info->block[2]);
|
||||
|
||||
/* launch preliminary setup */
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
|
||||
PUSH_DATA (push, 0x1);
|
||||
|
@ -376,17 +368,39 @@ nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
|
||||
PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
|
||||
|
||||
/* kernel launching */
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
|
||||
PUSH_DATA (push, 0x1000);
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
|
||||
PUSH_DATA (push, 0x1);
|
||||
/* block setup */
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
|
||||
PUSH_DATA (push, (info->block[1] << 16) | info->block[0]);
|
||||
PUSH_DATA (push, info->block[2]);
|
||||
|
||||
if (unlikely(info->indirect)) {
|
||||
struct nv04_resource *res = nv04_resource(info->indirect);
|
||||
uint32_t offset = res->offset + info->indirect_offset;
|
||||
unsigned macro = NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT;
|
||||
|
||||
nouveau_pushbuf_space(push, 16, 0, 1);
|
||||
PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
|
||||
PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3));
|
||||
nouveau_pushbuf_data(push, res->bo, offset,
|
||||
NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
|
||||
} else {
|
||||
/* grid setup */
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
|
||||
PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]);
|
||||
PUSH_DATA (push, info->grid[2]);
|
||||
|
||||
/* kernel launching */
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
|
||||
PUSH_DATA (push, 0x1000);
|
||||
BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
|
||||
PUSH_DATA (push, 0x1);
|
||||
}
|
||||
|
||||
/* Invalidate all 3D constbufs because they are aliased with COMPUTE. */
|
||||
nvc0->dirty |= NVC0_NEW_CONSTBUF;
|
||||
|
|
|
@ -35,4 +35,6 @@
|
|||
|
||||
#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
|
||||
|
||||
#define NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT 0x00003860
|
||||
|
||||
#endif /* __NVC0_MACROS_H__ */
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "nvc0/nvc0_screen.h"
|
||||
|
||||
#include "nvc0/mme/com9097.mme.h"
|
||||
#include "nvc0/mme/com90c0.mme.h"
|
||||
|
||||
static boolean
|
||||
nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
|
@ -1074,6 +1075,7 @@ nvc0_screen_create(struct nouveau_device *dev)
|
|||
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
|
||||
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
|
||||
MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
|
||||
MK_MACRO(NVC0_COMPUTE_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
|
|
Loading…
Reference in New Issue