intel/compiler: Implement nir_intrinsic_last_invocation
We haven't exposed this intrinsic as it doesn't directly correspond to anything in SPIR-V. However, it's used internally by some NIR passes, namely nir_opt_uniform_atomics(). We reuse most of the infrastructure in brw_find_live_channel, but with LZD/ADD instead of FBL. A new SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL is like SHADER_OPCODE_FIND_LIVE_CHANNEL but from the other side. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Caio Oliveira <caio.oliveira@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15484>
This commit is contained in:
parent
af529b545a
commit
6fa66ac228
|
@ -1799,7 +1799,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
|
|||
void
|
||||
brw_find_live_channel(struct brw_codegen *p,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg mask);
|
||||
struct brw_reg mask,
|
||||
bool last);
|
||||
|
||||
void
|
||||
brw_broadcast(struct brw_codegen *p,
|
||||
|
|
|
@ -501,13 +501,18 @@ enum opcode {
|
|||
SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT,
|
||||
|
||||
/**
|
||||
* Return the index of an arbitrary live channel (i.e. one of the channels
|
||||
* enabled in the current execution mask) and assign it to the first
|
||||
* component of the destination. Expected to be used as input for the
|
||||
* BROADCAST pseudo-opcode.
|
||||
* Return the index of the first enabled live channel and assign it to
|
||||
* to the first component of the destination. Frequently used as input
|
||||
* for the BROADCAST pseudo-opcode.
|
||||
*/
|
||||
SHADER_OPCODE_FIND_LIVE_CHANNEL,
|
||||
|
||||
/**
|
||||
* Return the index of the last enabled live channel and assign it to
|
||||
* the first component of the destination.
|
||||
*/
|
||||
SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL,
|
||||
|
||||
/**
|
||||
* Return the current execution mask in the specified flag subregister.
|
||||
* Can be CSE'ed more easily than a plain MOV from the ce0 ARF register.
|
||||
|
|
|
@ -3365,7 +3365,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
|
|||
|
||||
void
|
||||
brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
||||
struct brw_reg mask)
|
||||
struct brw_reg mask, bool last)
|
||||
{
|
||||
const struct intel_device_info *devinfo = p->devinfo;
|
||||
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
|
||||
|
@ -3414,10 +3414,17 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
}
|
||||
|
||||
/* Quarter control has the effect of magically shifting the value of
|
||||
* ce0 so you'll get the first active channel relative to the
|
||||
* ce0 so you'll get the first/last active channel relative to the
|
||||
* specified quarter control as result.
|
||||
*/
|
||||
inst = brw_FBL(p, vec1(dst), exec_mask);
|
||||
if (!last) {
|
||||
inst = brw_FBL(p, vec1(dst), exec_mask);
|
||||
} else {
|
||||
inst = brw_LZD(p, vec1(dst), exec_mask);
|
||||
struct brw_reg neg = vec1(dst);
|
||||
neg.negate = true;
|
||||
inst = brw_ADD(p, vec1(dst), neg, brw_imm_uw(31));
|
||||
}
|
||||
} else {
|
||||
const struct brw_reg flag = brw_flag_subreg(flag_subreg);
|
||||
|
||||
|
@ -3449,7 +3456,15 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
*/
|
||||
const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
|
||||
if (!last) {
|
||||
inst = brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
|
||||
} else {
|
||||
inst = brw_LZD(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
|
||||
struct brw_reg neg = vec1(dst);
|
||||
neg.negate = true;
|
||||
inst = brw_ADD(p, vec1(dst), neg, brw_imm_uw(31));
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
|
|
@ -1058,6 +1058,7 @@ fs_inst::flags_written(const intel_device_info *devinfo) const
|
|||
opcode == FS_OPCODE_FB_WRITE) {
|
||||
return flag_mask(this, 1);
|
||||
} else if (opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||
|
||||
opcode == SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL ||
|
||||
opcode == FS_OPCODE_LOAD_LIVE_CHANNELS) {
|
||||
return flag_mask(this, 32);
|
||||
} else {
|
||||
|
|
|
@ -76,6 +76,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
|
|||
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
|
||||
case FS_OPCODE_LINTERP:
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
||||
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
|
||||
case FS_OPCODE_LOAD_LIVE_CHANNELS:
|
||||
case SHADER_OPCODE_BROADCAST:
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
|
|
|
@ -2425,9 +2425,21 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
|
|||
prog_data) ? brw_imm_ud(~0u) :
|
||||
stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :
|
||||
brw_dmask_reg();
|
||||
brw_find_live_channel(p, dst, mask);
|
||||
|
||||
brw_find_live_channel(p, dst, mask, false);
|
||||
break;
|
||||
}
|
||||
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: {
|
||||
/* ce0 doesn't consider the thread dispatch mask, so if we want
|
||||
* to find the true last enabled channel, we need to apply that too.
|
||||
*/
|
||||
const struct brw_reg mask =
|
||||
stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : brw_dmask_reg();
|
||||
|
||||
brw_find_live_channel(p, dst, mask, true);
|
||||
break;
|
||||
}
|
||||
|
||||
case FS_OPCODE_LOAD_LIVE_CHANNELS: {
|
||||
assert(devinfo->ver >= 8);
|
||||
assert(inst->force_writemask_all && inst->group == 0);
|
||||
|
|
|
@ -5429,6 +5429,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_last_invocation: {
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
bld.exec_all().emit(SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL, tmp);
|
||||
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
|
||||
fs_reg(component(tmp, 0)));
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_quad_broadcast: {
|
||||
const fs_reg value = get_nir_src(instr->src[0]);
|
||||
const unsigned index = nir_src_as_uint(instr->src[1]);
|
||||
|
|
|
@ -690,6 +690,7 @@ namespace {
|
|||
abort();
|
||||
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
||||
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
|
||||
if (devinfo->ver >= 11)
|
||||
return calculate_desc(info, EU_UNIT_FPU, 2, 0, 0, 2, 0,
|
||||
0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0);
|
||||
|
|
|
@ -384,6 +384,8 @@ brw_instruction_name(const struct intel_device_info *devinfo, enum opcode op)
|
|||
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
|
||||
return "find_live_channel";
|
||||
case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL:
|
||||
return "find_last_live_channel";
|
||||
case FS_OPCODE_LOAD_LIVE_CHANNELS:
|
||||
return "load_live_channels";
|
||||
|
||||
|
|
|
@ -1937,7 +1937,7 @@ generate_code(struct brw_codegen *p,
|
|||
brw_stage_has_packed_dispatch(devinfo, nir->info.stage,
|
||||
&prog_data->base) ? brw_imm_ud(~0u) :
|
||||
brw_dmask_reg();
|
||||
brw_find_live_channel(p, dst, mask);
|
||||
brw_find_live_channel(p, dst, mask, false);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue