From 8ece71507db9ca8c1cd01974f81a17d1f52efd0c Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 18 May 2020 17:03:21 +0200 Subject: [PATCH] aco: allocate a temp VGPR for some 8-bit/16-bit reduction ops on GFX10 Signed-off-by: Samuel Pitoiset Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_reduce_assign.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index 3dd396e2569..708f401d5fe 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -125,10 +125,13 @@ void setup_reduce_temp(Program* program) op == fmin64 || op == fmax64 || op == umin64 || op == umax64 || op == imin64 || op == imax64 || op == imul64; + bool gfx10_need_vtmp = op == imul8 || op == imax8 || op == imin8 || op == umin8 || + op == imul16 || op == imax16 || op == imin16 || op == umin16 || + op == iadd64; if (program->chip_class >= GFX10 && cluster_size == 64) need_vtmp = true; - if (program->chip_class >= GFX10 && op == iadd64) + if (program->chip_class >= GFX10 && gfx10_need_vtmp) need_vtmp = true; if (program->chip_class <= GFX7) need_vtmp = true;