radeonsi/gfx10: use 32-bit wavemasks for Wave32

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Marek Olšák 2019-07-16 00:55:46 -04:00
parent 81091a5183
commit 54e6900ede
4 changed files with 43 additions and 16 deletions

View File

@ -92,6 +92,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
ctx->v3f32 = LLVMVectorType(ctx->f32, 3); ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8); ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false); ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false); ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
@ -447,7 +448,16 @@ LLVMValueRef
ac_build_ballot(struct ac_llvm_context *ctx, ac_build_ballot(struct ac_llvm_context *ctx,
LLVMValueRef value) LLVMValueRef value)
{ {
const char *name = HAVE_LLVM >= 0x900 ? "llvm.amdgcn.icmp.i64.i32" : "llvm.amdgcn.icmp.i32"; const char *name;
if (HAVE_LLVM >= 0x900) {
if (ctx->wave_size == 64)
name = "llvm.amdgcn.icmp.i64.i32";
else
name = "llvm.amdgcn.icmp.i32.i32";
} else {
name = "llvm.amdgcn.icmp.i32";
}
LLVMValueRef args[3] = { LLVMValueRef args[3] = {
value, value,
ctx->i32_0, ctx->i32_0,
@ -461,8 +471,7 @@ ac_build_ballot(struct ac_llvm_context *ctx,
args[0] = ac_to_integer(ctx, args[0]); args[0] = ac_to_integer(ctx, args[0]);
return ac_build_intrinsic(ctx, name, return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
ctx->i64, args, 3,
AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_NOUNWIND |
AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_CONVERGENT); AC_FUNC_ATTR_CONVERGENT);
@ -498,7 +507,7 @@ ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
{ {
LLVMValueRef vote_set = ac_build_ballot(ctx, value); LLVMValueRef vote_set = ac_build_ballot(ctx, value);
return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
LLVMConstInt(ctx->i64, 0, 0), ""); LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
} }
LLVMValueRef LLVMValueRef
@ -511,7 +520,7 @@ ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
vote_set, active_set, ""); vote_set, active_set, "");
LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ, LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
vote_set, vote_set,
LLVMConstInt(ctx->i64, 0, 0), ""); LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
return LLVMBuildOr(ctx->builder, all, none, ""); return LLVMBuildOr(ctx->builder, all, none, "");
} }
@ -3848,6 +3857,11 @@ ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef v
LLVMValueRef LLVMValueRef
ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask) ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
{ {
if (ctx->wave_size == 32) {
return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
(LLVMValueRef []) { mask, ctx->i32_0 },
2, AC_FUNC_ATTR_READNONE);
}
LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
LLVMVectorType(ctx->i32, 2), LLVMVectorType(ctx->i32, 2),
""); "");

View File

@ -74,6 +74,7 @@ struct ac_llvm_context {
LLVMTypeRef v3f32; LLVMTypeRef v3f32;
LLVMTypeRef v4f32; LLVMTypeRef v4f32;
LLVMTypeRef v8i32; LLVMTypeRef v8i32;
LLVMTypeRef iN_wavemask;
LLVMValueRef i8_0; LLVMValueRef i8_0;
LLVMValueRef i8_1; LLVMValueRef i8_1;

View File

@ -2809,12 +2809,12 @@ static LLVMValueRef
visit_first_invocation(struct ac_nir_context *ctx) visit_first_invocation(struct ac_nir_context *ctx)
{ {
LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
/* The second argument is whether cttz(0) should be defined, but we do not care. */ /* The second argument is whether cttz(0) should be defined, but we do not care. */
LLVMValueRef args[] = {active_set, ctx->ac.i1false}; LLVMValueRef args[] = {active_set, ctx->ac.i1false};
LLVMValueRef result = ac_build_intrinsic(&ctx->ac, LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr,
"llvm.cttz.i64", ctx->ac.iN_wavemask, args, 2,
ctx->ac.i64, args, 2,
AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_NOUNWIND |
AC_FUNC_ATTR_READNONE); AC_FUNC_ATTR_READNONE);

View File

@ -2178,8 +2178,12 @@ void si_load_system_value(struct si_shader_context *ctx,
case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
{ {
LLVMValueRef id = ac_get_thread_id(&ctx->ac); LLVMValueRef id = ac_get_thread_id(&ctx->ac);
id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); if (ctx->ac.wave_size == 64)
value = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->i64, 1, 0), id, ""); id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
value = LLVMBuildShl(ctx->ac.builder,
LLVMConstInt(ctx->ac.iN_wavemask, 1, 0), id, "");
if (ctx->ac.wave_size == 32)
value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, ""); value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
break; break;
} }
@ -2193,16 +2197,19 @@ void si_load_system_value(struct si_shader_context *ctx,
if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK || if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK ||
decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) { decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) {
/* All bits set except LSB */ /* All bits set except LSB */
value = LLVMConstInt(ctx->i64, -2, 0); value = LLVMConstInt(ctx->ac.iN_wavemask, -2, 0);
} else { } else {
/* All bits set */ /* All bits set */
value = LLVMConstInt(ctx->i64, -1, 0); value = LLVMConstInt(ctx->ac.iN_wavemask, -1, 0);
} }
id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); if (ctx->ac.wave_size == 64)
id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, "");
value = LLVMBuildShl(ctx->ac.builder, value, id, ""); value = LLVMBuildShl(ctx->ac.builder, value, id, "");
if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK || if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK) decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
value = LLVMBuildNot(ctx->ac.builder, value, ""); value = LLVMBuildNot(ctx->ac.builder, value, "");
if (ctx->ac.wave_size == 32)
value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, "");
value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, ""); value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, "");
break; break;
} }
@ -4186,10 +4193,15 @@ static void ballot_emit(
tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
tmp = ac_build_ballot(&ctx->ac, tmp); tmp = ac_build_ballot(&ctx->ac, tmp);
tmp = LLVMBuildBitCast(builder, tmp, ctx->v2i32, "");
emit_data->output[0] = LLVMBuildExtractElement(builder, tmp, ctx->i32_0, ""); emit_data->output[0] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
emit_data->output[1] = LLVMBuildExtractElement(builder, tmp, ctx->i32_1, "");
if (ctx->ac.wave_size == 32) {
emit_data->output[1] = ctx->i32_0;
} else {
tmp = LLVMBuildLShr(builder, tmp, LLVMConstInt(ctx->i64, 32, 0), "");
emit_data->output[1] = LLVMBuildTrunc(builder, tmp, ctx->i32, "");
}
} }
static void read_lane_emit( static void read_lane_emit(