mesa: implement glUniform for packed FP16 uniforms

Let's just say that this passes dEQP.

If you think the  _mesa_float_to_half conversions are costly, you can
enable FP16 uniforms only if the CPU supports F16C, which is fast.
Drivers will control whether this is used, not common code.

ARM will need something that is equivalent to F16C.

Acked-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9050>
This commit is contained in:
Marek Olšák 2021-02-12 08:44:31 -05:00 committed by Marge Bot
parent 6604aa2606
commit 6bf18fe036
1 changed files with 163 additions and 5 deletions

View File

@ -963,6 +963,9 @@ validate_uniform(GLint location, GLsizei count, const GLvoid *values,
case GLSL_TYPE_IMAGE:
match = (basicType == GLSL_TYPE_INT && _mesa_is_desktop_gl(ctx));
break;
case GLSL_TYPE_FLOAT16:
match = basicType == GLSL_TYPE_FLOAT;
break;
default:
match = (basicType == uni->type->base_type);
break;
@ -1071,8 +1074,9 @@ copy_uniforms_to_storage(gl_constant_value *storage,
const gl_constant_value *src = (const gl_constant_value*)values;
bool copy_as_uint64 = uni->is_bindless &&
(uni->type->is_sampler() || uni->type->is_image());
bool copy_to_float16 = uni->type->base_type == GLSL_TYPE_FLOAT16;
if (!uni->type->is_boolean() && !copy_as_uint64) {
if (!uni->type->is_boolean() && !copy_as_uint64 && !copy_to_float16) {
unsigned size = sizeof(storage[0]) * components * count * size_mul;
if (!memcmp(storage, values, size))
@ -1082,6 +1086,46 @@ copy_uniforms_to_storage(gl_constant_value *storage,
_mesa_flush_vertices_for_uniforms(ctx, uni);
memcpy(storage, values, size);
return true;
} else if (copy_to_float16) {
assert(ctx->Const.PackedDriverUniformStorage);
const unsigned dst_components = align(components, 2);
uint16_t *dst = (uint16_t*)storage;
int i = 0;
unsigned c = 0;
if (flush) {
/* Find the first element that's different. */
for (; i < count; i++) {
for (; c < components; c++) {
if (dst[c] != _mesa_float_to_half(src[c].f)) {
_mesa_flush_vertices_for_uniforms(ctx, uni);
flush = false;
goto break_loops;
}
}
c = 0;
dst += dst_components;
src += components;
}
break_loops:
if (flush)
return false; /* No change. */
}
/* Set the remaining elements. We know that at least 1 element is
* different and that we have flushed.
*/
for (; i < count; i++) {
for (; c < components; c++)
dst[c] = _mesa_float_to_half(src[c].f);
c = 0;
dst += dst_components;
src += components;
}
return true;
} else if (copy_as_uint64) {
const unsigned elems = components * count;
@ -1230,8 +1274,14 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
if (ctx->Const.PackedDriverUniformStorage &&
(uni->is_bindless || !uni->type->contains_opaque())) {
for (unsigned s = 0; s < uni->num_driver_storage; s++) {
unsigned dword_components = components;
/* 16-bit uniforms are packed. */
if (glsl_base_type_is_16bit(uni->type->base_type))
dword_components = DIV_ROUND_UP(dword_components, 2);
storage = (gl_constant_value *)
uni->driver_storage[s].data + (size_mul * offset * components);
uni->driver_storage[s].data + (size_mul * offset * dword_components);
if (copy_uniforms_to_storage(storage, uni, ctx, count, values, size_mul,
offset, components, basicType, !ctx_flushed))
@ -1362,7 +1412,106 @@ copy_uniform_matrix_to_storage(struct gl_context *ctx,
const unsigned elements = components * vectors;
const unsigned size = sizeof(storage[0]) * elements * count * size_mul;
if (!transpose) {
if (uni->type->base_type == GLSL_TYPE_FLOAT16) {
assert(ctx->Const.PackedDriverUniformStorage);
const unsigned dst_components = align(components, 2);
const unsigned dst_elements = dst_components * vectors;
if (!transpose) {
const float *src = (const float *)values;
uint16_t *dst = (uint16_t*)storage;
unsigned i = 0, r = 0, c = 0;
if (flush) {
/* Find the first element that's different. */
for (; i < count; i++) {
for (; c < cols; c++) {
for (; r < rows; r++) {
if (dst[(c * dst_components) + r] !=
_mesa_float_to_half(src[(c * components) + r])) {
_mesa_flush_vertices_for_uniforms(ctx, uni);
flush = false;
goto break_loops_16bit;
}
}
r = 0;
}
c = 0;
dst += dst_elements;
src += elements;
}
break_loops_16bit:
if (flush)
return false; /* No change. */
}
/* Set the remaining elements. We know that at least 1 element is
* different and that we have flushed.
*/
for (; i < count; i++) {
for (; c < cols; c++) {
for (; r < rows; r++) {
dst[(c * dst_components) + r] =
_mesa_float_to_half(src[(c * components) + r]);
}
r = 0;
}
c = 0;
dst += dst_elements;
src += elements;
}
return true;
} else {
/* Transpose the matrix. */
const float *src = (const float *)values;
uint16_t *dst = (uint16_t*)storage;
unsigned i = 0, r = 0, c = 0;
if (flush) {
/* Find the first element that's different. */
for (; i < count; i++) {
for (; r < rows; r++) {
for (; c < cols; c++) {
if (dst[(c * dst_components) + r] !=
_mesa_float_to_half(src[c + (r * vectors)])) {
_mesa_flush_vertices_for_uniforms(ctx, uni);
flush = false;
goto break_loops_16bit_transpose;
}
}
c = 0;
}
r = 0;
dst += elements;
src += elements;
}
break_loops_16bit_transpose:
if (flush)
return false; /* No change. */
}
/* Set the remaining elements. We know that at least 1 element is
* different and that we have flushed.
*/
for (; i < count; i++) {
for (; r < rows; r++) {
for (; c < cols; c++) {
dst[(c * dst_components) + r] =
_mesa_float_to_half(src[c + (r * vectors)]);
}
c = 0;
}
r = 0;
dst += elements;
src += elements;
}
return true;
}
} else if (!transpose) {
if (!memcmp(storage, values, size))
return false;
@ -1529,7 +1678,9 @@ _mesa_uniform_matrix(GLint location, GLsizei count,
* There are no Boolean matrix types, so we do not need to allow
* GLSL_TYPE_BOOL here (as _mesa_uniform does).
*/
if (uni->type->base_type != basicType) {
if (uni->type->base_type != basicType &&
!(uni->type->base_type == GLSL_TYPE_FLOAT16 &&
basicType == GLSL_TYPE_FLOAT)) {
_mesa_error(ctx, GL_INVALID_OPERATION,
"glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
cols, rows, uni->name, location,
@ -1566,8 +1717,15 @@ _mesa_uniform_matrix(GLint location, GLsizei count,
bool flushed = false;
for (unsigned s = 0; s < uni->num_driver_storage; s++) {
unsigned dword_components = components;
/* 16-bit uniforms are packed. */
if (glsl_base_type_is_16bit(uni->type->base_type))
dword_components = DIV_ROUND_UP(dword_components, 2);
storage = (gl_constant_value *)
uni->driver_storage[s].data + (size_mul * offset * elements);
uni->driver_storage[s].data +
(size_mul * offset * dword_components * vectors);
if (copy_uniform_matrix_to_storage(ctx, storage, uni, count, values,
size_mul, offset, components,