broadcom/compiler: implement more subgroup intrinsics
Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11620>
This commit is contained in:
parent
87fa5908b3
commit
53341e44ad
|
@ -3241,6 +3241,57 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_subgroup_size:
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
vir_uniform_ui(c, V3D_CHANNELS));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_subgroup_invocation:
|
||||
ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
|
||||
break;
|
||||
|
||||
case nir_intrinsic_elect: {
|
||||
/* Flafirst reads A flags so we need to make sure it reflects
|
||||
* currently active lanes.
|
||||
*
|
||||
* MSF returns 0 for disabled lanes in compute shaders so
|
||||
* PUSHZ will set A=1 for disabled lanes. We want the inverse
|
||||
* of this but we don't have any means to negate the A flags
|
||||
* directly, but we can do it by repeating the same operation
|
||||
* with NORZ (A = ~A & ~Z).
|
||||
*/
|
||||
assert(c->s->info.stage == MESA_SHADER_COMPUTE);
|
||||
vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
|
||||
vir_set_uf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_UF_NORZ);
|
||||
|
||||
/* If we are under non-uniform control flow we also need to
|
||||
* AND the A flags with the current execute mask.
|
||||
*/
|
||||
if (vir_in_nonuniform_control_flow(c)) {
|
||||
const uint32_t bidx = c->cur_block->index;
|
||||
vir_set_uf(c, vir_XOR_dest(c, vir_nop_reg(),
|
||||
c->execute,
|
||||
vir_uniform_ui(c, bidx)),
|
||||
V3D_QPU_UF_ANDZ);
|
||||
}
|
||||
|
||||
struct qreg first = vir_FLAFIRST(c);
|
||||
|
||||
/* Produce a boolean result from Flafirst */
|
||||
vir_set_pf(c, vir_XOR_dest(c, vir_nop_reg(),
|
||||
first, vir_uniform_ui(c, 1)),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
struct qreg result =
|
||||
vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
|
||||
vir_uniform_ui(c, ~0),
|
||||
vir_uniform_ui(c, 0)));
|
||||
c->flags_temp = result.index;
|
||||
c->flags_cond = V3D_QPU_COND_IFA;
|
||||
|
||||
ntq_store_dest(c, &instr->dest, 0, result);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
unreachable("Should have been lowered");
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue