microsoft/compiler: Handle bitfield_insert

This *almost* matches what GLSL wants, except for the handling of
large widths. You can see this in the lowering algorithm:
   (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
    ('bcsel', ('ult', 31, 'bits'), 'insert',
              ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
    'options->lower_bitfield_insert'),

DXIL's 'bfi' instruction is an inseparable pairing of NIR's 'bfi(bfm(...), ...)',
so we just apply the additional bcsel in the backend.

Reviewed-by: Sil Vilerino <sivileri@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14624>
This commit is contained in:
Jesse Natalie 2022-01-17 13:20:53 -08:00 committed by Marge Bot
parent 8938c6e032
commit 80e782d5ed
2 changed files with 53 additions and 0 deletions

View File

@ -41,6 +41,7 @@ static struct predefined_func_descr predefined_funcs[] = {
{"dx.op.storeOutput", "v", "iiicO", DXIL_ATTR_KIND_NO_UNWIND},
{"dx.op.loadInput", "O", "iiici", DXIL_ATTR_KIND_READ_NONE},
{"dx.op.tertiary", "O", "iOOO", DXIL_ATTR_KIND_READ_NONE},
{"dx.op.quaternary", "O", "iOOOO", DXIL_ATTR_KIND_READ_NONE},
{"dx.op.threadId", "i", "ii", DXIL_ATTR_KIND_READ_NONE},
{"dx.op.threadIdInGroup", "i", "ii", DXIL_ATTR_KIND_READ_NONE},
{"dx.op.flattenedThreadIdInGroup", "i", "i", DXIL_ATTR_KIND_READ_NONE},

View File

@ -224,6 +224,8 @@ enum dxil_intr {
DXIL_INTR_FMA = 47,
DXIL_INTR_BFI = 53,
DXIL_INTR_CREATE_HANDLE = 57,
DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,
@ -540,6 +542,33 @@ emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,
return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
}
static const struct dxil_value *
emit_quaternary_call(struct ntd_context *ctx, enum overload_type overload,
enum dxil_intr intr,
const struct dxil_value *op0,
const struct dxil_value *op1,
const struct dxil_value *op2,
const struct dxil_value *op3)
{
const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.quaternary", overload);
if (!func)
return NULL;
const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);
if (!opcode)
return NULL;
const struct dxil_value *args[] = {
opcode,
op0,
op1,
op2,
op3
};
return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));
}
static const struct dxil_value *
emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)
{
@ -1874,6 +1903,27 @@ emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,
return true;
}
static bool
emit_bitfield_insert(struct ntd_context *ctx, nir_alu_instr *alu,
const struct dxil_value *base,
const struct dxil_value *insert,
const struct dxil_value *offset,
const struct dxil_value *width)
{
/* DXIL is width, offset, insert, base, NIR is base, insert, offset, width */
const struct dxil_value *v = emit_quaternary_call(ctx, DXIL_I32, DXIL_INTR_BFI,
width, offset, insert, base);
if (!v)
return false;
/* DXIL uses the 5 LSB from width/offset. Special-case width >= 32 == copy insert. */
const struct dxil_value *compare_width = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_SGE,
width, dxil_module_get_int32_const(&ctx->mod, 32));
v = dxil_emit_select(&ctx->mod, compare_width, insert, v);
store_alu_dest(ctx, alu, 0, v);
return true;
}
static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,
const struct dxil_value *sel,
const struct dxil_value *val_true,
@ -2236,6 +2286,8 @@ emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)
case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);
case nir_op_ffma: return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);
case nir_op_bitfield_insert: return emit_bitfield_insert(ctx, alu, src[0], src[1], src[2], src[3]);
case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0], false);
case nir_op_unpack_half_2x16_split_y: return emit_f16tof32(ctx, alu, src[0], true);
case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0], src[1]);