gallivm: Allow to conversions to/from registers of different sizes.
Allow for example to convert from 4 x float32 to 4 x unorm8 and vice versa. Uses code and ideas from Brian Paul.
This commit is contained in:
parent
a70ec096aa
commit
b919bb7f61
|
@ -83,6 +83,9 @@
|
||||||
*
|
*
|
||||||
* Although the result values can be scaled to an arbitrary bit width specified
|
* Although the result values can be scaled to an arbitrary bit width specified
|
||||||
* by dst_width, the actual result type will have the same width.
|
* by dst_width, the actual result type will have the same width.
|
||||||
|
*
|
||||||
|
* Ex: src = { float, float, float, float }
|
||||||
|
* return { i32, i32, i32, i32 } where each value is in [0, 2^dst_width-1].
|
||||||
*/
|
*/
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
|
lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
|
||||||
|
@ -152,6 +155,8 @@ lp_build_clamped_float_to_unsigned_norm(LLVMBuilderRef builder,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inverse of lp_build_clamped_float_to_unsigned_norm above.
|
* Inverse of lp_build_clamped_float_to_unsigned_norm above.
|
||||||
|
* Ex: src = { i32, i32, i32, i32 } with values in range [0, 2^src_width-1]
|
||||||
|
* return {float, float, float, float} with values in range [0, 1].
|
||||||
*/
|
*/
|
||||||
LLVMValueRef
|
LLVMValueRef
|
||||||
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
|
lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
|
||||||
|
@ -219,14 +224,13 @@ lp_build_conv(LLVMBuilderRef builder,
|
||||||
unsigned num_tmps;
|
unsigned num_tmps;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
/* Register width must remain constant */
|
|
||||||
assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
|
|
||||||
|
|
||||||
/* We must not loose or gain channels. Only precision */
|
/* We must not loose or gain channels. Only precision */
|
||||||
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
|
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
|
||||||
|
|
||||||
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
|
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
|
||||||
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
|
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
|
||||||
|
assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
|
||||||
|
assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
|
||||||
|
|
||||||
tmp_type = src_type;
|
tmp_type = src_type;
|
||||||
for(i = 0; i < num_srcs; ++i)
|
for(i = 0; i < num_srcs; ++i)
|
||||||
|
@ -330,25 +334,11 @@ lp_build_conv(LLVMBuilderRef builder,
|
||||||
|
|
||||||
assert(!tmp_type.floating || tmp_type.width == dst_type.width);
|
assert(!tmp_type.floating || tmp_type.width == dst_type.width);
|
||||||
|
|
||||||
if(tmp_type.width > dst_type.width) {
|
lp_build_resize(builder, tmp_type, dst_type, tmp, num_srcs, tmp, num_dsts);
|
||||||
assert(num_dsts == 1);
|
|
||||||
tmp[0] = lp_build_pack(builder, tmp_type, dst_type, TRUE, tmp, num_tmps);
|
|
||||||
tmp_type.width = dst_type.width;
|
|
||||||
tmp_type.length = dst_type.length;
|
|
||||||
num_tmps = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(tmp_type.width < dst_type.width) {
|
tmp_type.width = dst_type.width;
|
||||||
assert(num_tmps == 1);
|
tmp_type.length = dst_type.length;
|
||||||
lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
|
num_tmps = num_dsts;
|
||||||
tmp_type.width = dst_type.width;
|
|
||||||
tmp_type.length = dst_type.length;
|
|
||||||
num_tmps = num_dsts;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(tmp_type.width == dst_type.width);
|
|
||||||
assert(tmp_type.length == dst_type.length);
|
|
||||||
assert(num_tmps == num_dsts);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scale to the widest range
|
* Scale to the widest range
|
||||||
|
|
|
@ -427,3 +427,109 @@ lp_build_pack(LLVMBuilderRef builder,
|
||||||
|
|
||||||
return tmp[0];
|
return tmp[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate or expand the bitwidth
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
lp_build_resize(LLVMBuilderRef builder,
|
||||||
|
struct lp_type src_type,
|
||||||
|
struct lp_type dst_type,
|
||||||
|
const LLVMValueRef *src, unsigned num_srcs,
|
||||||
|
LLVMValueRef *dst, unsigned num_dsts)
|
||||||
|
{
|
||||||
|
LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
assert(!src_type.floating || src_type.width == dst_type.width);
|
||||||
|
|
||||||
|
/* We must not loose or gain channels. Only precision */
|
||||||
|
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
|
||||||
|
|
||||||
|
/* We don't support M:N conversion, only 1:N, M:1, or 1:1 */
|
||||||
|
assert(num_srcs == 1 || num_dsts == 1);
|
||||||
|
|
||||||
|
assert(src_type.length <= LP_MAX_VECTOR_LENGTH);
|
||||||
|
assert(dst_type.length <= LP_MAX_VECTOR_LENGTH);
|
||||||
|
assert(num_srcs <= LP_MAX_VECTOR_LENGTH);
|
||||||
|
assert(num_dsts <= LP_MAX_VECTOR_LENGTH);
|
||||||
|
|
||||||
|
if (src_type.width > dst_type.width) {
|
||||||
|
/*
|
||||||
|
* Truncate bit width.
|
||||||
|
*/
|
||||||
|
|
||||||
|
assert(num_dsts == 1);
|
||||||
|
|
||||||
|
if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
|
||||||
|
/*
|
||||||
|
* Register width remains constant -- use vector packing intrinsics
|
||||||
|
*/
|
||||||
|
|
||||||
|
tmp[0] = lp_build_pack(builder, src_type, dst_type, TRUE, src, num_srcs);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* Do it element-wise.
|
||||||
|
*/
|
||||||
|
|
||||||
|
assert(src_type.length == dst_type.length);
|
||||||
|
tmp[0] = lp_build_undef(dst_type);
|
||||||
|
for (i = 0; i < dst_type.length; ++i) {
|
||||||
|
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
|
||||||
|
LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
|
||||||
|
val = LLVMBuildTrunc(builder, val, lp_build_elem_type(dst_type), "");
|
||||||
|
tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (src_type.width < dst_type.width) {
|
||||||
|
/*
|
||||||
|
* Expand bit width.
|
||||||
|
*/
|
||||||
|
|
||||||
|
assert(num_srcs == 1);
|
||||||
|
|
||||||
|
if (src_type.width * src_type.length == dst_type.width * dst_type.length) {
|
||||||
|
/*
|
||||||
|
* Register width remains constant -- use vector unpack intrinsics
|
||||||
|
*/
|
||||||
|
lp_build_unpack(builder, src_type, dst_type, src[0], tmp, num_dsts);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* Do it element-wise.
|
||||||
|
*/
|
||||||
|
|
||||||
|
assert(src_type.length == dst_type.length);
|
||||||
|
tmp[0] = lp_build_undef(dst_type);
|
||||||
|
for (i = 0; i < dst_type.length; ++i) {
|
||||||
|
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
|
||||||
|
LLVMValueRef val = LLVMBuildExtractElement(builder, src[0], index, "");
|
||||||
|
|
||||||
|
if (src_type.sign && dst_type.sign) {
|
||||||
|
val = LLVMBuildSExt(builder, val, lp_build_elem_type(dst_type), "");
|
||||||
|
} else {
|
||||||
|
val = LLVMBuildZExt(builder, val, lp_build_elem_type(dst_type), "");
|
||||||
|
}
|
||||||
|
tmp[0] = LLVMBuildInsertElement(builder, tmp[0], val, index, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/*
|
||||||
|
* No-op
|
||||||
|
*/
|
||||||
|
|
||||||
|
assert(num_srcs == 1);
|
||||||
|
assert(num_dsts == 1);
|
||||||
|
|
||||||
|
tmp[0] = src[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(i = 0; i < num_dsts; ++i)
|
||||||
|
dst[i] = tmp[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -92,4 +92,12 @@ lp_build_pack(LLVMBuilderRef builder,
|
||||||
const LLVMValueRef *src, unsigned num_srcs);
|
const LLVMValueRef *src, unsigned num_srcs);
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
lp_build_resize(LLVMBuilderRef builder,
|
||||||
|
struct lp_type src_type,
|
||||||
|
struct lp_type dst_type,
|
||||||
|
const LLVMValueRef *src, unsigned num_srcs,
|
||||||
|
LLVMValueRef *dst, unsigned num_dsts);
|
||||||
|
|
||||||
|
|
||||||
#endif /* !LP_BLD_PACK_H */
|
#endif /* !LP_BLD_PACK_H */
|
||||||
|
|
|
@ -167,19 +167,26 @@ test_one(unsigned verbose,
|
||||||
unsigned i, j;
|
unsigned i, j;
|
||||||
void *code;
|
void *code;
|
||||||
|
|
||||||
|
if (src_type.width * src_type.length != dst_type.width * dst_type.length ||
|
||||||
|
src_type.length != dst_type.length) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
if(verbose >= 1)
|
if(verbose >= 1)
|
||||||
dump_conv_types(stdout, src_type, dst_type);
|
dump_conv_types(stdout, src_type, dst_type);
|
||||||
|
|
||||||
if(src_type.length > dst_type.length) {
|
if (src_type.length > dst_type.length) {
|
||||||
num_srcs = 1;
|
num_srcs = 1;
|
||||||
num_dsts = src_type.length/dst_type.length;
|
num_dsts = src_type.length/dst_type.length;
|
||||||
}
|
}
|
||||||
else {
|
else if (src_type.length < dst_type.length) {
|
||||||
num_dsts = 1;
|
num_dsts = 1;
|
||||||
num_srcs = dst_type.length/src_type.length;
|
num_srcs = dst_type.length/src_type.length;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
|
num_dsts = 1;
|
||||||
|
num_srcs = 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* We must not loose or gain channels. Only precision */
|
/* We must not loose or gain channels. Only precision */
|
||||||
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
|
assert(src_type.length * num_srcs == dst_type.length * num_dsts);
|
||||||
|
@ -381,6 +388,11 @@ const struct lp_type conv_types[] = {
|
||||||
{ FALSE, FALSE, TRUE, FALSE, 8, 16 },
|
{ FALSE, FALSE, TRUE, FALSE, 8, 16 },
|
||||||
{ FALSE, FALSE, FALSE, TRUE, 8, 16 },
|
{ FALSE, FALSE, FALSE, TRUE, 8, 16 },
|
||||||
{ FALSE, FALSE, FALSE, FALSE, 8, 16 },
|
{ FALSE, FALSE, FALSE, FALSE, 8, 16 },
|
||||||
|
|
||||||
|
{ FALSE, FALSE, TRUE, TRUE, 8, 4 },
|
||||||
|
{ FALSE, FALSE, TRUE, FALSE, 8, 4 },
|
||||||
|
{ FALSE, FALSE, FALSE, TRUE, 8, 4 },
|
||||||
|
{ FALSE, FALSE, FALSE, FALSE, 8, 4 },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue