From 4e3c51cbd8a2240a1d18fedad142b9c2ed9573fb Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 7 Jun 2022 11:31:03 -0700 Subject: [PATCH] freedreno/a5xx: Set the buffer bit appropriately in XS_CTRL_REG0. This seems to be how the bit gets used, from grepping my blob traces. Hopefully this helps stabilize some stuff. Part-of: --- src/freedreno/registers/adreno/a5xx.xml | 4 +++- src/gallium/drivers/freedreno/a5xx/fd5_compute.c | 3 ++- src/gallium/drivers/freedreno/a5xx/fd5_program.c | 12 ++++++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/freedreno/registers/adreno/a5xx.xml b/src/freedreno/registers/adreno/a5xx.xml index 536fcafc94e..010f62e9d82 100644 --- a/src/freedreno/registers/adreno/a5xx.xml +++ b/src/freedreno/registers/adreno/a5xx.xml @@ -2463,7 +2463,9 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set - + + + diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c index 6f236c1f48c..d98b173e9b1 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_compute.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_compute.c @@ -60,7 +60,8 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v) A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) | - 0x6 /* XXX */); + COND(instrlen != 0, A5XX_SP_CS_CTRL_REG0_BUFFER) | + 0x2 /* XXX */); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) | diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index 2b7fb722ac2..28c310576f6 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -381,7 +381,14 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | - 0x6 | /* XXX seems to be always set? */ + COND(s[VS].instrlen != 0, A5XX_SP_VS_CTRL_REG0_BUFFER) | + /* XXX: 0x2 is only unset in + * dEQP-GLES3.functional.ubo.single_nested_struct_array.single_buffer.packed_instance_array_vertex + * on a collection of blob traces. That shader is 1091 instrs, 0 + * half, 3 full, 108 constlen. Other >1091 instr non-VS shaders don't + * unset it, so that's not the trick. + */ + 0x2 | A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) | COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); @@ -531,7 +538,8 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING( ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) | - 0x40006 | /* XXX set pretty much everywhere */ + 0x40002 | /* XXX set pretty much everywhere */ + COND(s[FS].instrlen != 0, A5XX_SP_FS_CTRL_REG0_BUFFER) | A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |