radeonsi: implement 16-bit VS inputs

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9051>
This commit is contained in:
Marek Olšák 2021-02-14 06:48:43 -05:00
parent 7db43960f6
commit 9fa0f5f1ba
2 changed files with 34 additions and 16 deletions

View File

@ -95,6 +95,9 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
return;
}
unsigned bit_size = info->input_fp16_lo_hi_valid[input_index] & 0x1 ? 16 : 32;
LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32;
LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32;
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
union si_vs_fix_fetch fix_fetch;
LLVMValueRef vb_desc;
@ -129,6 +132,19 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
for (unsigned i = 0; i < 4; ++i)
out[i] =
LLVMBuildExtractElement(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i32, i, false), "");
if (bit_size == 16) {
if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT ||
fix_fetch.u.format == AC_FETCH_FORMAT_SINT) {
for (unsigned i = 0; i < 4; i++)
out[i] = LLVMBuildTrunc(ctx->ac.builder, out[i], ctx->ac.i16, "");
} else {
for (unsigned i = 0; i < 4; i++) {
out[i] = ac_to_float(&ctx->ac, out[i]);
out[i] = LLVMBuildFPTrunc(ctx->ac.builder, out[i], ctx->ac.f16, "");
}
}
}
return;
}
@ -158,7 +174,8 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
for (unsigned i = 0; i < num_fetches; ++i) {
LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, fetch_stride * i, 0);
fetches[i] = ac_build_buffer_load_format(&ctx->ac, vb_desc, vertex_index, voffset,
channels_per_fetch, 0, true, false, false);
channels_per_fetch, 0, true,
bit_size == 16, false);
}
if (num_fetches == 1 && channels_per_fetch > 1) {
@ -172,27 +189,28 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
}
for (unsigned i = num_fetches; i < 4; ++i)
fetches[i] = LLVMGetUndef(ctx->ac.f32);
fetches[i] = LLVMGetUndef(float_type);
if (fix_fetch.u.log_size <= 1 && fix_fetch.u.num_channels_m1 == 2 && required_channels == 4) {
if (fix_fetch.u.format == AC_FETCH_FORMAT_UINT || fix_fetch.u.format == AC_FETCH_FORMAT_SINT)
fetches[3] = ctx->ac.i32_1;
fetches[3] = LLVMConstInt(int_type, 1, 0);
else
fetches[3] = ctx->ac.f32_1;
fetches[3] = LLVMConstReal(float_type, 1);
} else if (fix_fetch.u.log_size == 3 &&
(fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ||
fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED ||
fix_fetch.u.format == AC_FETCH_FORMAT_SINT) &&
required_channels == 4) {
/* For 2_10_10_10, the hardware returns an unsigned value;
* convert it to a signed one.
*/
LLVMValueRef tmp = fetches[3];
LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
LLVMValueRef c30 = LLVMConstInt(int_type, 30, 0);
/* First, recover the sign-extended signed integer value. */
if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED)
tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, ctx->ac.i32, "");
tmp = LLVMBuildFPToUI(ctx->ac.builder, tmp, int_type, "");
else
tmp = ac_to_integer(&ctx->ac, tmp);
@ -204,18 +222,18 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
*/
tmp = LLVMBuildShl(
ctx->ac.builder, tmp,
fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
fix_fetch.u.format == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(int_type, 7, 0) : c30, "");
tmp = LLVMBuildAShr(ctx->ac.builder, tmp, c30, "");
/* Convert back to the right type. */
if (fix_fetch.u.format == AC_FETCH_FORMAT_SNORM) {
LLVMValueRef clamp;
LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, "");
LLVMValueRef neg_one = LLVMConstReal(float_type, -1.0);
tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, "");
clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, tmp, neg_one, "");
tmp = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, tmp, "");
} else if (fix_fetch.u.format == AC_FETCH_FORMAT_SSCALED) {
tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, ctx->ac.f32, "");
tmp = LLVMBuildSIToFP(ctx->ac.builder, tmp, float_type, "");
}
fetches[3] = tmp;
@ -234,10 +252,8 @@ void si_llvm_load_vs_inputs(struct si_shader_context *ctx, struct nir_shader *ni
load_input_vs(ctx, i, values);
for (unsigned chan = 0; chan < 4; chan++) {
ctx->inputs[i * 4 + chan] =
LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
}
for (unsigned chan = 0; chan < 4; chan++)
ctx->inputs[i * 4 + chan] = ac_to_integer(&ctx->ac, values[chan]);
}
}

View File

@ -72,8 +72,10 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
}
assert(bit_size != 64 && !(mask & ~0xf) && "64-bit IO should have been lowered");
/* Convert the 16-bit component mask to a 32-bit component mask. */
if (bit_size == 16) {
/* Convert the 16-bit component mask to a 32-bit component mask except for VS inputs
* where the mask is untyped.
*/
if (bit_size == 16 && !is_input) {
unsigned new_mask = 0;
for (unsigned i = 0; i < 4; i++) {
if (mask & (1 << i))