nir: mind rounding mode on fadd, fsub, fmul and fma opcodes
According to Vulkan spec, the new execution modes affect only correctly rounded SPIR-V instructions, which includes fadd, fsub and fmul. v2: - Fix fmul, fsub and fadd round-to-zero definitions, they should use auxiliary functions to calculate the proper value because Mesa uses round-to-nearest-even rounding mode by default (Connor). v3: - Do an actual fused multiply-add at ffma (Connor). v4: - Simplify fadd and fmul for bit sizes < 64 (Connor). - Do not use double ffma for 32 bits float (Connor). Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> Signed-off-by: Andres Gomez <agomez@igalia.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> [v3]
This commit is contained in:
parent
0ac07c7ca7
commit
7580707345
|
@ -64,6 +64,7 @@ template = """\
|
|||
#include "util/rounding.h" /* for _mesa_roundeven */
|
||||
#include "util/half_float.h"
|
||||
#include "util/double.h"
|
||||
#include "util/softfloat.h"
|
||||
#include "util/bigmath.h"
|
||||
#include "nir_constant_expressions.h"
|
||||
|
||||
|
|
|
@ -492,7 +492,16 @@ def binop_reduce(name, output_size, output_type, src_type, prereduce_expr,
|
|||
[4, 4], [src_type, src_type], False, _2src_commutative,
|
||||
final(reduce_(reduce_(src0, src1), reduce_(src2, src3))))
|
||||
|
||||
binop("fadd", tfloat, _2src_commutative + associative, "src0 + src1")
|
||||
binop("fadd", tfloat, _2src_commutative + associative,"""
|
||||
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
||||
if (bit_size == 64)
|
||||
dst = _mesa_double_add_rtz(src0, src1);
|
||||
else
|
||||
dst = _mesa_double_to_float_rtz((double)src0 + (double)src1);
|
||||
} else {
|
||||
dst = src0 + src1;
|
||||
}
|
||||
""")
|
||||
binop("iadd", tint, _2src_commutative + associative, "src0 + src1")
|
||||
binop("iadd_sat", tint, _2src_commutative, """
|
||||
src1 > 0 ?
|
||||
|
@ -508,10 +517,28 @@ binop("isub_sat", tint, "", """
|
|||
""")
|
||||
binop("usub_sat", tuint, "", "src0 < src1 ? 0 : src0 - src1")
|
||||
|
||||
binop("fsub", tfloat, "", "src0 - src1")
|
||||
binop("fsub", tfloat, "", """
|
||||
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
||||
if (bit_size == 64)
|
||||
dst = _mesa_double_sub_rtz(src0, src1);
|
||||
else
|
||||
dst = _mesa_double_to_float_rtz((double)src0 - (double)src1);
|
||||
} else {
|
||||
dst = src0 - src1;
|
||||
}
|
||||
""")
|
||||
binop("isub", tint, "", "src0 - src1")
|
||||
|
||||
binop("fmul", tfloat, _2src_commutative + associative, "src0 * src1")
|
||||
binop("fmul", tfloat, _2src_commutative + associative, """
|
||||
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
||||
if (bit_size == 64)
|
||||
dst = _mesa_double_mul_rtz(src0, src1);
|
||||
else
|
||||
dst = _mesa_double_to_float_rtz((double)src0 * (double)src1);
|
||||
} else {
|
||||
dst = src0 * src1;
|
||||
}
|
||||
""")
|
||||
# low 32-bits of signed/unsigned integer multiply
|
||||
binop("imul", tint, _2src_commutative + associative, "src0 * src1")
|
||||
|
||||
|
@ -834,7 +861,21 @@ def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
|
|||
[src1_size, src2_size, src3_size],
|
||||
[tuint, tuint, tuint], False, "", const_expr)
|
||||
|
||||
triop("ffma", tfloat, _2src_commutative, "src0 * src1 + src2")
|
||||
triop("ffma", tfloat, _2src_commutative, """
|
||||
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
||||
if (bit_size == 64)
|
||||
dst = _mesa_double_fma_rtz(src0, src1, src2);
|
||||
else if (bit_size == 32)
|
||||
dst = _mesa_float_fma_rtz(src0, src1, src2);
|
||||
else
|
||||
dst = _mesa_double_to_float_rtz(_mesa_double_fma_rtz(src0, src1, src2));
|
||||
} else {
|
||||
if (bit_size == 32)
|
||||
dst = fmaf(src0, src1, src2);
|
||||
else
|
||||
dst = fma(src0, src1, src2);
|
||||
}
|
||||
""")
|
||||
|
||||
triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
|
||||
|
||||
|
|
Loading…
Reference in New Issue