turnip: enable infinities for f16 math and document the register

When float16 is enabled this will allow to pass a number of
float16 tests.

When A6XX_SP_FLOAT_CNTL_F16_NO_INF is set - all operations which
generate +-infinity generate +-MAX_HALF_FLOAT.

Fixes some tests from:
 dEQP-VK.spirv_assembly.instruction.*.float16.*
 dEQP-VK.spirv_assembly.instruction.*.float_controls.fp16.*

E.g.:
 dEQP-VK.spirv_assembly.instruction.graphics.float16.arithmetic_1.sinh_vert
 dEQP-VK.spirv_assembly.instruction.compute.float16.arithmetic_4.length
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp16.input_args.log_denorm_flush_to_zero_nostorage
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp16.input_args.log2_denorm_flush_to_zero_nostorage
 dEQP-VK.spirv_assembly.instruction.compute.float_controls.fp16.input_args.inv_sqrt_denorm_flush_to_zero_nostorage

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9840>
This commit is contained in:
Danylo Piliaiev 2021-03-25 14:53:20 +02:00 committed by Marge Bot
parent 14460faa64
commit 64aaa4afc3
7 changed files with 15 additions and 10 deletions

View File

@ -1631,7 +1631,7 @@ registers:
00000001 SP_ADDR_MODE_CNTL: ADDR_64B
deadbeef SP_NC_MODE_CNTL: 0xdeadbeef
deadbeef SP_UNKNOWN_AE03: 0xdeadbeef
00000004 SP_UNKNOWN_AE04: 0x4
00000004 SP_FLOAT_CNTL: { 0x4 }
deadbeef 0xae0c: deadbeef
deadbeef SP_PERFCTR_ENABLE: { VS | HS | DS | GS | CS | 0xdeadbec0 }
00000000 SP_PERFCTR_SP_SEL[0]+0: 00000000

View File

@ -18,8 +18,8 @@ t4 write RB_CCU_CNTL (8e07)
t4 write RB_UNKNOWN_8E04 (8e04)
RB_UNKNOWN_8E04: 0x100000
000000000105801c: 0000: 408e0401 00100000
t4 write SP_UNKNOWN_AE04 (ae04)
SP_UNKNOWN_AE04: 0x8
t4 write SP_FLOAT_CNTL (ae04)
SP_FLOAT_CNTL: { F16_NO_INF }
0000000001058024: 0000: 48ae0401 00000008
t4 write SP_UNKNOWN_AE00 (ae00)
SP_UNKNOWN_AE00: 0
@ -350,7 +350,7 @@ t7 opcode: CP_BLIT (2c) (2 dwords)
!+ 0000f180 SP_2D_DST_FORMAT: { COLOR_FORMAT = FMT6_8_8_8_8_UNORM | MASK = 0xf }
+ 00000000 SP_UNKNOWN_AE00: 0
!+ 00000410 SP_UNKNOWN_AE03: 0x410
!+ 00000008 SP_UNKNOWN_AE04: 0x8
!+ 00000008 SP_FLOAT_CNTL: { F16_NO_INF }
!+ 0000003f SP_PERFCTR_ENABLE: { VS | HS | DS | GS | FS | CS }
!+ 01011000 SP_PS_TP_BORDER_COLOR_BASE_ADDR: 0x1011000
+ 00000000 SP_PS_TP_BORDER_COLOR_BASE_ADDR_HI: 0

View File

@ -15,8 +15,8 @@ t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords)
t4 write RB_UNKNOWN_8E04 (8e04)
RB_UNKNOWN_8E04: 0
0000000001d91014: 0000: 408e0401 00000000
t4 write SP_UNKNOWN_AE04 (ae04)
SP_UNKNOWN_AE04: 0x8
t4 write SP_FLOAT_CNTL (ae04)
SP_FLOAT_CNTL: { F16_NO_INF }
0000000001d9101c: 0000: 48ae0401 00000008
t4 write SP_UNKNOWN_AE00 (ae00)
SP_UNKNOWN_AE00: 0
@ -1124,7 +1124,7 @@ t7 opcode: CP_DRAW_INDX_OFFSET (38) (4 dwords)
+ 00000000 SP_IBO_COUNT: 0
+ 00000000 SP_UNKNOWN_AE00: 0
!+ 00001430 SP_UNKNOWN_AE03: 0x1430
!+ 00000008 SP_UNKNOWN_AE04: 0x8
!+ 00000008 SP_FLOAT_CNTL: { F16_NO_INF }
!+ 0000003f SP_PERFCTR_ENABLE: { VS | HS | DS | GS | FS | CS }
+ 00000000 SP_UNKNOWN_B182: 0
+ 00000000 SP_UNKNOWN_B183: 0

View File

@ -123,6 +123,9 @@ cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
OUT_PKT4(ring, REG_A6XX_SP_PERFCTR_ENABLE, 1);
OUT_RING(ring, A6XX_SP_PERFCTR_ENABLE_CS);
OUT_PKT4(ring, REG_A6XX_SP_FLOAT_CNTL, 1);
OUT_RING(ring, 0);
OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1);
OUT_RING(ring, A6XX_HLSQ_INVALIDATE_CMD_VS_STATE |
A6XX_HLSQ_INVALIDATE_CMD_HS_STATE |

View File

@ -3180,7 +3180,9 @@ to upconvert to 32b float internally?
<!-- TODO: valid bits 0x3c3f, see kernel -->
</reg32>
<reg32 offset="0xae03" name="SP_UNKNOWN_AE03"/>
<reg32 offset="0xae04" name="SP_UNKNOWN_AE04" low="0" high="3"/> <!-- 0x8 or 0 ? -->
<reg32 offset="0xae04" name="SP_FLOAT_CNTL">
<bitfield name="F16_NO_INF" pos="3" type="boolean"/>
</reg32>
<reg32 offset="0xae0f" name="SP_PERFCTR_ENABLE">
<!-- some perfcntrs are affected by a per-stage enable bit

View File

@ -743,7 +743,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
A6XX_RB_CCU_CNTL(.offset = phys_dev->info.a6xx.ccu_offset_bypass));
cmd->state.ccu_state = TU_CMD_CCU_SYSMEM;
tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_FLOAT_CNTL, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0);
tu_cs_emit_write_reg(cs, REG_A6XX_SP_PERFCTR_ENABLE, 0x3f);
tu_cs_emit_write_reg(cs, REG_A6XX_TPL1_UNKNOWN_B605, 0x44);

View File

@ -1285,7 +1285,7 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
OUT_WFI5(ring);
WRITE(REG_A6XX_RB_UNKNOWN_8E04, 0x0);
WRITE(REG_A6XX_SP_UNKNOWN_AE04, 0x8);
WRITE(REG_A6XX_SP_FLOAT_CNTL, A6XX_SP_FLOAT_CNTL_F16_NO_INF);
WRITE(REG_A6XX_SP_UNKNOWN_AE00, 0);
WRITE(REG_A6XX_SP_PERFCTR_ENABLE, 0x3f);
WRITE(REG_A6XX_TPL1_UNKNOWN_B605, 0x44);