freedreno/a5xx: Set the buffer bit appropriately in XS_CTRL_REG0.

This seems to be how the bit gets used, from grepping my blob traces.
Hopefully this helps stabilize some stuff.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17004>
This commit is contained in:
Emma Anholt 2022-06-07 11:31:03 -07:00 committed by Marge Bot
parent 6cf2b24eaf
commit 4e3c51cbd8
3 changed files with 15 additions and 4 deletions

View File

@ -2463,7 +2463,9 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set
<bitfield name="INSTRLEN" low="1" high="31" type="uint"/>
</bitset>
<bitset name="a5xx_sp_xs_ctrl_reg0" inline="yes">
<!-- bit1 + bit2 set for "buffer" mode (ie. shader small enough to fit internally) -->
<!-- bit1 almost always set -->
<!-- set for "buffer mode" (ie. shader small enough to fit internally) -->
<bitfield name="BUFFER" pos="2" type="boolean"/>
<!-- 24 or more (full size) GPRS and blob uses TWO_QUADS instead of FOUR_QUADS -->
<bitfield name="THREADSIZE" pos="3" type="a3xx_threadsize"/>
<bitfield name="HALFREGFOOTPRINT" low="4" high="9" type="uint"/>

View File

@ -60,7 +60,8 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) |
0x6 /* XXX */);
COND(instrlen != 0, A5XX_SP_CS_CTRL_REG0_BUFFER) |
0x2 /* XXX */);
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |

View File

@ -381,7 +381,14 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
ring,
A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
0x6 | /* XXX seems to be always set? */
COND(s[VS].instrlen != 0, A5XX_SP_VS_CTRL_REG0_BUFFER) |
/* XXX: 0x2 is only unset in
* dEQP-GLES3.functional.ubo.single_nested_struct_array.single_buffer.packed_instance_array_vertex
* on a collection of blob traces. That shader is 1091 instrs, 0
* half, 3 full, 108 constlen. Other >1091 instr non-VS shaders don't
* unset it, so that's not the trick.
*/
0x2 |
A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) |
COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
@ -531,7 +538,8 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_RING(
ring,
COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
0x40006 | /* XXX set pretty much everywhere */
0x40002 | /* XXX set pretty much everywhere */
COND(s[FS].instrlen != 0, A5XX_SP_FS_CTRL_REG0_BUFFER) |
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |