freedreno/a5xx: Set the buffer bit appropriately in XS_CTRL_REG0.
This seems to be how the bit gets used, from grepping my blob traces. Hopefully this helps stabilize some stuff. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17004>
This commit is contained in:
parent
6cf2b24eaf
commit
4e3c51cbd8
|
@ -2463,7 +2463,9 @@ bit 7 for RECTLIST (clear) when z32s8 (used for clear of depth32? not set
|
||||||
<bitfield name="INSTRLEN" low="1" high="31" type="uint"/>
|
<bitfield name="INSTRLEN" low="1" high="31" type="uint"/>
|
||||||
</bitset>
|
</bitset>
|
||||||
<bitset name="a5xx_sp_xs_ctrl_reg0" inline="yes">
|
<bitset name="a5xx_sp_xs_ctrl_reg0" inline="yes">
|
||||||
<!-- bit1 + bit2 set for "buffer" mode (ie. shader small enough to fit internally) -->
|
<!-- bit1 almost always set -->
|
||||||
|
<!-- set for "buffer mode" (ie. shader small enough to fit internally) -->
|
||||||
|
<bitfield name="BUFFER" pos="2" type="boolean"/>
|
||||||
<!-- 24 or more (full size) GPRS and blob uses TWO_QUADS instead of FOUR_QUADS -->
|
<!-- 24 or more (full size) GPRS and blob uses TWO_QUADS instead of FOUR_QUADS -->
|
||||||
<bitfield name="THREADSIZE" pos="3" type="a3xx_threadsize"/>
|
<bitfield name="THREADSIZE" pos="3" type="a3xx_threadsize"/>
|
||||||
<bitfield name="HALFREGFOOTPRINT" low="4" high="9" type="uint"/>
|
<bitfield name="HALFREGFOOTPRINT" low="4" high="9" type="uint"/>
|
||||||
|
|
|
@ -60,7 +60,8 @@ cs_program_emit(struct fd_ringbuffer *ring, struct ir3_shader_variant *v)
|
||||||
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
|
A5XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) |
|
||||||
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
|
A5XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
|
||||||
A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) |
|
A5XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v)) |
|
||||||
0x6 /* XXX */);
|
COND(instrlen != 0, A5XX_SP_CS_CTRL_REG0_BUFFER) |
|
||||||
|
0x2 /* XXX */);
|
||||||
|
|
||||||
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
|
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
|
||||||
OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |
|
OUT_RING(ring, A5XX_HLSQ_CS_CONFIG_CONSTOBJECTOFFSET(0) |
|
||||||
|
|
|
@ -381,7 +381,14 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
ring,
|
ring,
|
||||||
A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
|
A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
|
||||||
A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
|
A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
|
||||||
0x6 | /* XXX seems to be always set? */
|
COND(s[VS].instrlen != 0, A5XX_SP_VS_CTRL_REG0_BUFFER) |
|
||||||
|
/* XXX: 0x2 is only unset in
|
||||||
|
* dEQP-GLES3.functional.ubo.single_nested_struct_array.single_buffer.packed_instance_array_vertex
|
||||||
|
* on a collection of blob traces. That shader is 1091 instrs, 0
|
||||||
|
* half, 3 full, 108 constlen. Other >1091 instr non-VS shaders don't
|
||||||
|
* unset it, so that's not the trick.
|
||||||
|
*/
|
||||||
|
0x2 |
|
||||||
A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) |
|
A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(s[VS].v)) |
|
||||||
COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
|
COND(s[VS].v->need_pixlod, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
|
||||||
|
|
||||||
|
@ -531,7 +538,8 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||||
OUT_RING(
|
OUT_RING(
|
||||||
ring,
|
ring,
|
||||||
COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
|
COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
|
||||||
0x40006 | /* XXX set pretty much everywhere */
|
0x40002 | /* XXX set pretty much everywhere */
|
||||||
|
COND(s[FS].instrlen != 0, A5XX_SP_FS_CTRL_REG0_BUFFER) |
|
||||||
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
|
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
|
||||||
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
|
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
|
||||||
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
|
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
|
||||||
|
|
Loading…
Reference in New Issue