radeonsi: use pknorm_i16/u16 and pk_i16/u16 LLVM intrinsics
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
parent
b3a1d9588e
commit
847d0a393d
|
@ -67,6 +67,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
|
|||
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
|
||||
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
|
||||
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
|
||||
ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
|
||||
ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
|
||||
ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
|
||||
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
|
||||
|
@ -1281,6 +1282,20 @@ LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
|
|||
AC_FUNC_ATTR_READNONE);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
|
||||
return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
|
||||
return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
|
@ -1506,6 +1521,155 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
|
|||
AC_FUNC_ATTR_LEGACY);
|
||||
}
|
||||
|
||||
/* Upper 16 bits must be zero. */
|
||||
static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef val[2])
|
||||
{
|
||||
return LLVMBuildOr(ctx->builder, val[0],
|
||||
LLVMBuildShl(ctx->builder, val[1],
|
||||
LLVMConstInt(ctx->i32, 16, 0),
|
||||
""), "");
|
||||
}
|
||||
|
||||
/* Upper 16 bits are ignored and will be dropped. */
|
||||
static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef val[2])
|
||||
{
|
||||
LLVMValueRef v[2] = {
|
||||
LLVMBuildAnd(ctx->builder, val[0],
|
||||
LLVMConstInt(ctx->i32, 0xffff, 0), ""),
|
||||
val[1],
|
||||
};
|
||||
return ac_llvm_pack_two_int16(ctx, v);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2])
|
||||
{
|
||||
if (HAVE_LLVM >= 0x0600) {
|
||||
LLVMValueRef res =
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
|
||||
ctx->v2i16, args, 2,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
|
||||
}
|
||||
|
||||
LLVMValueRef val[2];
|
||||
|
||||
for (int chan = 0; chan < 2; chan++) {
|
||||
/* Clamp between [-1, 1]. */
|
||||
val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1);
|
||||
val[chan] = ac_build_fmax(ctx, val[chan], LLVMConstReal(ctx->f32, -1));
|
||||
/* Convert to a signed integer in [-32767, 32767]. */
|
||||
val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
|
||||
LLVMConstReal(ctx->f32, 32767), "");
|
||||
/* If positive, add 0.5, else add -0.5. */
|
||||
val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
|
||||
LLVMBuildSelect(ctx->builder,
|
||||
LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
|
||||
val[chan], ctx->f32_0, ""),
|
||||
LLVMConstReal(ctx->f32, 0.5),
|
||||
LLVMConstReal(ctx->f32, -0.5), ""), "");
|
||||
val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
|
||||
}
|
||||
return ac_llvm_pack_two_int32_as_int16(ctx, val);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2])
|
||||
{
|
||||
if (HAVE_LLVM >= 0x0600) {
|
||||
LLVMValueRef res =
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
|
||||
ctx->v2i16, args, 2,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
|
||||
}
|
||||
|
||||
LLVMValueRef val[2];
|
||||
|
||||
for (int chan = 0; chan < 2; chan++) {
|
||||
val[chan] = ac_build_clamp(ctx, args[chan]);
|
||||
val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
|
||||
LLVMConstReal(ctx->f32, 65535), "");
|
||||
val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
|
||||
LLVMConstReal(ctx->f32, 0.5), "");
|
||||
val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
|
||||
ctx->i32, "");
|
||||
}
|
||||
return ac_llvm_pack_two_int32_as_int16(ctx, val);
|
||||
}
|
||||
|
||||
/* The 8-bit and 10-bit clamping is for HW workarounds. */
|
||||
LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2], unsigned bits, bool hi)
|
||||
{
|
||||
assert(bits == 8 || bits == 10 || bits == 16);
|
||||
|
||||
LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
|
||||
bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
|
||||
LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
|
||||
bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
|
||||
LLVMValueRef max_alpha =
|
||||
bits != 10 ? max_rgb : ctx->i32_1;
|
||||
LLVMValueRef min_alpha =
|
||||
bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
|
||||
bool has_intrinsic = HAVE_LLVM >= 0x0600;
|
||||
|
||||
/* Clamp. */
|
||||
if (!has_intrinsic || bits != 16) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
bool alpha = hi && i == 1;
|
||||
args[i] = ac_build_imin(ctx, args[i],
|
||||
alpha ? max_alpha : max_rgb);
|
||||
args[i] = ac_build_imax(ctx, args[i],
|
||||
alpha ? min_alpha : min_rgb);
|
||||
}
|
||||
}
|
||||
|
||||
if (has_intrinsic) {
|
||||
LLVMValueRef res =
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
|
||||
ctx->v2i16, args, 2,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
|
||||
}
|
||||
|
||||
return ac_llvm_pack_two_int32_as_int16(ctx, args);
|
||||
}
|
||||
|
||||
/* The 8-bit and 10-bit clamping is for HW workarounds. */
|
||||
LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2], unsigned bits, bool hi)
|
||||
{
|
||||
assert(bits == 8 || bits == 10 || bits == 16);
|
||||
|
||||
LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
|
||||
bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
|
||||
LLVMValueRef max_alpha =
|
||||
bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
|
||||
bool has_intrinsic = HAVE_LLVM >= 0x0600;
|
||||
|
||||
/* Clamp. */
|
||||
if (!has_intrinsic || bits != 16) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
bool alpha = hi && i == 1;
|
||||
args[i] = ac_build_umin(ctx, args[i],
|
||||
alpha ? max_alpha : max_rgb);
|
||||
}
|
||||
}
|
||||
|
||||
if (has_intrinsic) {
|
||||
LLVMValueRef res =
|
||||
ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
|
||||
ctx->v2i16, args, 2,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
|
||||
}
|
||||
|
||||
return ac_llvm_pack_two_int16(ctx, args);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
|
||||
{
|
||||
assert(HAVE_LLVM >= 0x0600);
|
||||
|
|
|
@ -53,6 +53,7 @@ struct ac_llvm_context {
|
|||
LLVMTypeRef f16;
|
||||
LLVMTypeRef f32;
|
||||
LLVMTypeRef f64;
|
||||
LLVMTypeRef v2i16;
|
||||
LLVMTypeRef v2i32;
|
||||
LLVMTypeRef v3i32;
|
||||
LLVMTypeRef v4i32;
|
||||
|
@ -258,6 +259,10 @@ LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
|
|||
LLVMValueRef b);
|
||||
LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
|
||||
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
|
||||
|
||||
|
@ -302,6 +307,14 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
|
|||
struct ac_image_args *a);
|
||||
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2]);
|
||||
LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2]);
|
||||
LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2]);
|
||||
LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2], unsigned bits, bool hi);
|
||||
LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef args[2], unsigned bits, bool hi);
|
||||
LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
|
||||
void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
|
||||
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
|
||||
|
|
|
@ -2388,28 +2388,6 @@ static LLVMValueRef fetch_constant(
|
|||
return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr));
|
||||
}
|
||||
|
||||
/* Upper 16 bits must be zero. */
|
||||
static LLVMValueRef si_llvm_pack_two_int16(struct si_shader_context *ctx,
|
||||
LLVMValueRef val[2])
|
||||
{
|
||||
return LLVMBuildOr(ctx->ac.builder, val[0],
|
||||
LLVMBuildShl(ctx->ac.builder, val[1],
|
||||
LLVMConstInt(ctx->i32, 16, 0),
|
||||
""), "");
|
||||
}
|
||||
|
||||
/* Upper 16 bits are ignored and will be dropped. */
|
||||
static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct si_shader_context *ctx,
|
||||
LLVMValueRef val[2])
|
||||
{
|
||||
LLVMValueRef v[2] = {
|
||||
LLVMBuildAnd(ctx->ac.builder, val[0],
|
||||
LLVMConstInt(ctx->i32, 0xffff, 0), ""),
|
||||
val[1],
|
||||
};
|
||||
return si_llvm_pack_two_int16(ctx, v);
|
||||
}
|
||||
|
||||
/* Initialize arguments for the shader export intrinsic */
|
||||
static void si_llvm_init_export_args(struct si_shader_context *ctx,
|
||||
LLVMValueRef *values,
|
||||
|
@ -2417,8 +2395,6 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
|
|||
struct ac_export_args *args)
|
||||
{
|
||||
LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef val[4];
|
||||
unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
|
||||
unsigned chan;
|
||||
bool is_int8, is_int10;
|
||||
|
@ -2452,6 +2428,10 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
|
|||
args->out[2] = f32undef;
|
||||
args->out[3] = f32undef;
|
||||
|
||||
LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL;
|
||||
LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2],
|
||||
unsigned bits, bool hi) = NULL;
|
||||
|
||||
switch (spi_shader_col_format) {
|
||||
case V_028714_SPI_SHADER_ZERO:
|
||||
args->enabled_channels = 0; /* writemask */
|
||||
|
@ -2476,8 +2456,32 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
|
|||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_FP16_ABGR:
|
||||
args->compr = 1; /* COMPR flag */
|
||||
packf = ac_build_cvt_pkrtz_f16;
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_UNORM16_ABGR:
|
||||
packf = ac_build_cvt_pknorm_u16;
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_SNORM16_ABGR:
|
||||
packf = ac_build_cvt_pknorm_i16;
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_UINT16_ABGR:
|
||||
packi = ac_build_cvt_pk_u16;
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_SINT16_ABGR:
|
||||
packi = ac_build_cvt_pk_i16;
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_32_ABGR:
|
||||
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Pack f16 or norm_i16/u16. */
|
||||
if (packf) {
|
||||
for (chan = 0; chan < 2; chan++) {
|
||||
LLVMValueRef pack_args[2] = {
|
||||
values[2 * chan],
|
||||
|
@ -2485,104 +2489,26 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx,
|
|||
};
|
||||
LLVMValueRef packed;
|
||||
|
||||
packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
|
||||
packed = packf(&ctx->ac, pack_args);
|
||||
args->out[chan] = ac_to_float(&ctx->ac, packed);
|
||||
}
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_UNORM16_ABGR:
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
|
||||
val[chan] = LLVMBuildFMul(builder, val[chan],
|
||||
LLVMConstReal(ctx->f32, 65535), "");
|
||||
val[chan] = LLVMBuildFAdd(builder, val[chan],
|
||||
LLVMConstReal(ctx->f32, 0.5), "");
|
||||
val[chan] = LLVMBuildFPToUI(builder, val[chan],
|
||||
ctx->i32, "");
|
||||
}
|
||||
|
||||
args->compr = 1; /* COMPR flag */
|
||||
args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val));
|
||||
args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val+2));
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_SNORM16_ABGR:
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
/* Clamp between [-1, 1]. */
|
||||
val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN,
|
||||
values[chan],
|
||||
LLVMConstReal(ctx->f32, 1));
|
||||
val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX,
|
||||
val[chan],
|
||||
LLVMConstReal(ctx->f32, -1));
|
||||
/* Convert to a signed integer in [-32767, 32767]. */
|
||||
val[chan] = LLVMBuildFMul(builder, val[chan],
|
||||
LLVMConstReal(ctx->f32, 32767), "");
|
||||
/* If positive, add 0.5, else add -0.5. */
|
||||
val[chan] = LLVMBuildFAdd(builder, val[chan],
|
||||
LLVMBuildSelect(builder,
|
||||
LLVMBuildFCmp(builder, LLVMRealOGE,
|
||||
val[chan], ctx->ac.f32_0, ""),
|
||||
LLVMConstReal(ctx->f32, 0.5),
|
||||
LLVMConstReal(ctx->f32, -0.5), ""), "");
|
||||
val[chan] = LLVMBuildFPToSI(builder, val[chan], ctx->i32, "");
|
||||
}
|
||||
|
||||
args->compr = 1; /* COMPR flag */
|
||||
args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val));
|
||||
args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val+2));
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_UINT16_ABGR: {
|
||||
LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
|
||||
is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
|
||||
LLVMValueRef max_alpha =
|
||||
!is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
|
||||
|
||||
/* Clamp. */
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
val[chan] = ac_to_integer(&ctx->ac, values[chan]);
|
||||
val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN,
|
||||
val[chan],
|
||||
chan == 3 ? max_alpha : max_rgb);
|
||||
}
|
||||
|
||||
args->compr = 1; /* COMPR flag */
|
||||
args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val));
|
||||
args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int16(ctx, val+2));
|
||||
break;
|
||||
}
|
||||
/* Pack i16/u16. */
|
||||
if (packi) {
|
||||
for (chan = 0; chan < 2; chan++) {
|
||||
LLVMValueRef pack_args[2] = {
|
||||
ac_to_integer(&ctx->ac, values[2 * chan]),
|
||||
ac_to_integer(&ctx->ac, values[2 * chan + 1])
|
||||
};
|
||||
LLVMValueRef packed;
|
||||
|
||||
case V_028714_SPI_SHADER_SINT16_ABGR: {
|
||||
LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
|
||||
is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
|
||||
LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
|
||||
is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
|
||||
LLVMValueRef max_alpha =
|
||||
!is_int10 ? max_rgb : ctx->i32_1;
|
||||
LLVMValueRef min_alpha =
|
||||
!is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
|
||||
|
||||
/* Clamp. */
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
val[chan] = ac_to_integer(&ctx->ac, values[chan]);
|
||||
val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
|
||||
TGSI_OPCODE_IMIN,
|
||||
val[chan], chan == 3 ? max_alpha : max_rgb);
|
||||
val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
|
||||
TGSI_OPCODE_IMAX,
|
||||
val[chan], chan == 3 ? min_alpha : min_rgb);
|
||||
packed = packi(&ctx->ac, pack_args,
|
||||
is_int8 ? 8 : is_int10 ? 10 : 16,
|
||||
chan == 1);
|
||||
args->out[chan] = ac_to_float(&ctx->ac, packed);
|
||||
}
|
||||
|
||||
args->compr = 1; /* COMPR flag */
|
||||
args->out[0] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val));
|
||||
args->out[1] = ac_to_float(&ctx->ac, si_llvm_pack_two_int32_as_int16(ctx, val+2));
|
||||
break;
|
||||
}
|
||||
|
||||
case V_028714_SPI_SHADER_32_ABGR:
|
||||
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue