nv50: pass in third axis via user param

This is probably not the most efficient way to go for all geometries,
but the assumption is that kernels tend to be x/y-heavy rather than
z-heavy. Iterates over each z slice and passes in the current value via
user param. (And bump all user params by a dword.)

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Pierre Moreau <dev@pmoreau.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9299>
This commit is contained in:
Ilia Mirkin 2021-02-24 19:34:05 -05:00
parent c3e9be9b5a
commit de71feccbf
3 changed files with 16 additions and 10 deletions

View File

@ -479,7 +479,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
unsigned size = align(nv50->compprog->parm_size, 0x4);
BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
PUSH_DATA (push, (size / 4) << 8);
PUSH_DATA (push, (1 + (size / 4)) << 8);
if (size) {
struct nouveau_mm_allocation *mm;
@ -498,7 +498,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
nouveau_pushbuf_space(push, 0, 0, 1);
BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4);
BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4);
nouveau_pushbuf_data(push, bo, offset, size);
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
@ -545,9 +545,15 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
BEGIN_NV04(push, NV50_CP(GRIDID), 1);
PUSH_DATA (push, 1);
/* kernel launching */
BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
PUSH_DATA (push, 0);
for (int i = 0; i < info->grid[2]; i++) {
BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1);
PUSH_DATA (push, info->grid[2] | i << 16);
/* kernel launching */
BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
PUSH_DATA (push, 0);
}
BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
PUSH_DATA (push, 0);

View File

@ -375,7 +375,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
prog->gp.has_viewport = 0;
if (prog->type == PIPE_SHADER_COMPUTE)
info->prop.cp.inputOffset = 0x10;
info->prop.cp.inputOffset = 0x14;
info_out.driverPriv = prog;

View File

@ -58,8 +58,8 @@ static const uint64_t nv50_read_hw_sm_counters_code[] =
* mov $r2 $pm2
* mov $r3 $pm3
* mov $r4 $physid
* ld $r5 b32 s[0x10]
* ld $r6 b32 s[0x14]
* ld $r5 b32 s[0x14]
* ld $r6 b32 s[0x18]
* and b32 $r4 $r4 0x000f0000
* shr u32 $r4 $r4 0x10
* mul $r4 u24 $r4 0x14
@ -81,8 +81,8 @@ static const uint64_t nv50_read_hw_sm_counters_code[] =
0x6001878000000009ULL,
0x6001c7800000000dULL,
0x6000078000000011ULL,
0x4400c78010000815ULL,
0x4400c78010000a19ULL,
0x4400c78010000a15ULL,
0x4400c78010000c19ULL,
0x0000f003d0000811ULL,
0xe410078030100811ULL,
0x0000000340540811ULL,