nv50: pass in third axis via user param
This is probably not the most efficient way to go for all geometries, but the assumption is that kernels tend to be x/y-heavy rather than z-heavy. Iterates over each z slice and passes in the current value via user param. (And bump all user params by a dword.) Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Pierre Moreau <dev@pmoreau.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9299>
This commit is contained in:
parent
c3e9be9b5a
commit
de71feccbf
|
@ -479,7 +479,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
|
|||
unsigned size = align(nv50->compprog->parm_size, 0x4);
|
||||
|
||||
BEGIN_NV04(push, NV50_CP(USER_PARAM_COUNT), 1);
|
||||
PUSH_DATA (push, (size / 4) << 8);
|
||||
PUSH_DATA (push, (1 + (size / 4)) << 8);
|
||||
|
||||
if (size) {
|
||||
struct nouveau_mm_allocation *mm;
|
||||
|
@ -498,7 +498,7 @@ nv50_compute_upload_input(struct nv50_context *nv50, const uint32_t *input)
|
|||
|
||||
nouveau_pushbuf_space(push, 0, 0, 1);
|
||||
|
||||
BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), size / 4);
|
||||
BEGIN_NV04(push, NV50_CP(USER_PARAM(1)), size / 4);
|
||||
nouveau_pushbuf_data(push, bo, offset, size);
|
||||
|
||||
nouveau_fence_work(screen->base.fence.current, nouveau_mm_free_work, mm);
|
||||
|
@ -545,9 +545,15 @@ nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
BEGIN_NV04(push, NV50_CP(GRIDID), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
|
||||
for (int i = 0; i < info->grid[2]; i++) {
|
||||
BEGIN_NV04(push, NV50_CP(USER_PARAM(0)), 1);
|
||||
PUSH_DATA (push, info->grid[2] | i << 16);
|
||||
|
||||
/* kernel launching */
|
||||
BEGIN_NV04(push, NV50_CP(LAUNCH), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
}
|
||||
|
||||
BEGIN_NV04(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
|
|
|
@ -375,7 +375,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset,
|
|||
prog->gp.has_viewport = 0;
|
||||
|
||||
if (prog->type == PIPE_SHADER_COMPUTE)
|
||||
info->prop.cp.inputOffset = 0x10;
|
||||
info->prop.cp.inputOffset = 0x14;
|
||||
|
||||
info_out.driverPriv = prog;
|
||||
|
||||
|
|
|
@ -58,8 +58,8 @@ static const uint64_t nv50_read_hw_sm_counters_code[] =
|
|||
* mov $r2 $pm2
|
||||
* mov $r3 $pm3
|
||||
* mov $r4 $physid
|
||||
* ld $r5 b32 s[0x10]
|
||||
* ld $r6 b32 s[0x14]
|
||||
* ld $r5 b32 s[0x14]
|
||||
* ld $r6 b32 s[0x18]
|
||||
* and b32 $r4 $r4 0x000f0000
|
||||
* shr u32 $r4 $r4 0x10
|
||||
* mul $r4 u24 $r4 0x14
|
||||
|
@ -81,8 +81,8 @@ static const uint64_t nv50_read_hw_sm_counters_code[] =
|
|||
0x6001878000000009ULL,
|
||||
0x6001c7800000000dULL,
|
||||
0x6000078000000011ULL,
|
||||
0x4400c78010000815ULL,
|
||||
0x4400c78010000a19ULL,
|
||||
0x4400c78010000a15ULL,
|
||||
0x4400c78010000c19ULL,
|
||||
0x0000f003d0000811ULL,
|
||||
0xe410078030100811ULL,
|
||||
0x0000000340540811ULL,
|
||||
|
|
Loading…
Reference in New Issue