intel/compiler: Implement nir_intrinsic_last_invocation

We haven't exposed this intrinsic as it doesn't directly correspond to
anything in SPIR-V.  However, it's used internally by some NIR passes,
namely nir_opt_uniform_atomics().

We reuse most of the infrastructure in brw_find_live_channel, but with
LZD/ADD instead of FBL.  A new SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL is
like SHADER_OPCODE_FIND_LIVE_CHANNEL but from the other side.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15484>
This commit is contained in:
Kenneth Graunke 2022-03-17 00:46:21 -07:00 committed by Marge Bot
parent af529b545a
commit 6fa66ac228
10 changed files with 57 additions and 11 deletions

View File

@ -1799,7 +1799,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
void
brw_find_live_channel(struct brw_codegen *p,
struct brw_reg dst,
struct brw_reg mask);
struct brw_reg mask,
bool last);
void
brw_broadcast(struct brw_codegen *p,

View File

@ -501,13 +501,18 @@ enum opcode {
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT,
/**
* Return the index of an arbitrary live channel (i.e. one of the channels
* enabled in the current execution mask) and assign it to the first
* component of the destination. Expected to be used as input for the
* BROADCAST pseudo-opcode.
* Return the index of the first enabled live channel and assign it to
* to the first component of the destination. Frequently used as input
* for the BROADCAST pseudo-opcode.
*/
SHADER_OPCODE_FIND_LIVE_CHANNEL,
/**
* Return the index of the last enabled live channel and assign it to
* the first component of the destination.
*/
SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL,
/**
* Return the current execution mask in the specified flag subregister.
* Can be CSE'ed more easily than a plain MOV from the ce0 ARF register.

View File

@ -3365,7 +3365,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
void
brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
struct brw_reg mask)
struct brw_reg mask, bool last)
{
const struct intel_device_info *devinfo = p->devinfo;
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
@ -3414,10 +3414,17 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
}
/* Quarter control has the effect of magically shifting the value of
* ce0 so you'll get the first active channel relative to the
* ce0 so you'll get the first/last active channel relative to the
* specified quarter control as result.
*/
inst = brw_FBL(p, vec1(dst), exec_mask);
if (!last) {
inst = brw_FBL(p, vec1(dst), exec_mask);
} else {
inst = brw_LZD(p, vec1(dst), exec_mask);
struct brw_reg neg = vec1(dst);
neg.negate = true;
inst = brw_ADD(p, vec1(dst), neg, brw_imm_uw(31));
}
} else {
const struct brw_reg flag = brw_flag_subreg(flag_subreg);
@ -3449,7 +3456,15 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
*/
const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
if (!last) {
inst = brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
} else {
inst = brw_LZD(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
struct brw_reg neg = vec1(dst);
neg.negate = true;
inst = brw_ADD(p, vec1(dst), neg, brw_imm_uw(31));
}
}
} else {
brw_set_default_mask_control(p, BRW_MASK_DISABLE);

View File

@ -1058,6 +1058,7 @@ fs_inst::flags_written(const intel_device_info *devinfo) const
opcode == FS_OPCODE_FB_WRITE) {
return flag_mask(this, 1);
} else if (opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||
opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
opcode == FS_OPCODE_LOAD_LIVE_CHANNELS) {
return flag_mask(this, 32);
} else {

View File

@ -76,6 +76,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
case FS_OPCODE_LINTERP:
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
case FS_OPCODE_LOAD_LIVE_CHANNELS:
case SHADER_OPCODE_BROADCAST:
case SHADER_OPCODE_MOV_INDIRECT:

View File

@ -2425,9 +2425,21 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
prog_data) ? brw_imm_ud(~0u) :
stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :
brw_dmask_reg();
brw_find_live_channel(p, dst, mask);
brw_find_live_channel(p, dst, mask, false);
break;
}
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: {
/* ce0 doesn't consider the thread dispatch mask, so if we want
* to find the true last enabled channel, we need to apply that too.
*/
const struct brw_reg mask =
stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : brw_dmask_reg();
brw_find_live_channel(p, dst, mask, true);
break;
}
case FS_OPCODE_LOAD_LIVE_CHANNELS: {
assert(devinfo->ver >= 8);
assert(inst->force_writemask_all && inst->group == 0);

View File

@ -5429,6 +5429,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
case nir_intrinsic_last_invocation: {
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.exec_all().emit(SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL, tmp);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
fs_reg(component(tmp, 0)));
break;
}
case nir_intrinsic_quad_broadcast: {
const fs_reg value = get_nir_src(instr->src[0]);
const unsigned index = nir_src_as_uint(instr->src[1]);

View File

@ -690,6 +690,7 @@ namespace {
abort();
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
if (devinfo->ver >= 11)
return calculate_desc(info, EU_UNIT_FPU, 2, 0, 0, 2, 0,
0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0);

View File

@ -384,6 +384,8 @@ brw_instruction_name(const struct intel_device_info *devinfo, enum opcode op)
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel";
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
return "find_last_live_channel";
case FS_OPCODE_LOAD_LIVE_CHANNELS:
return "load_live_channels";

View File

@ -1937,7 +1937,7 @@ generate_code(struct brw_codegen *p,
brw_stage_has_packed_dispatch(devinfo, nir->info.stage,
&prog_data->base) ? brw_imm_ud(~0u) :
brw_dmask_reg();
brw_find_live_channel(p, dst, mask);
brw_find_live_channel(p, dst, mask, false);
break;
}