i965: Use LZD to implement nir_op_ufind_msb
This uses one less instruction. v2: Move emit_find_msb_using_lzd out of the visitor classes. Suggested by Curro. Signed-off-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
26c7f04d4a
commit
de20086eed
|
@ -1761,6 +1761,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
|||
/* FBL only supports UD type for dst. */
|
||||
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_LZD:
|
||||
brw_LZD(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_CBIT:
|
||||
assert(devinfo->gen >= 7);
|
||||
/* CBIT only supports UD type for dst. */
|
||||
|
|
|
@ -614,6 +614,25 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_find_msb_using_lzd(const fs_builder &bld,
|
||||
const fs_reg &result,
|
||||
const fs_reg &src,
|
||||
bool is_signed)
|
||||
{
|
||||
fs_inst *inst;
|
||||
|
||||
bld.LZD(retype(result, BRW_REGISTER_TYPE_UD), src);
|
||||
|
||||
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
|
||||
* from the LSB side. Subtract the result from 31 to convert the MSB
|
||||
* count into an LSB count. If no bits are set, LZD will return 32.
|
||||
* 31-32 = -1, which is exactly what findMSB() is supposed to return.
|
||||
*/
|
||||
inst = bld.ADD(result, retype(result, BRW_REGISTER_TYPE_D), brw_imm_d(31));
|
||||
inst->src[0].negate = true;
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||
{
|
||||
|
@ -1310,7 +1329,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
|||
bld.CBIT(result, op[0]);
|
||||
break;
|
||||
|
||||
case nir_op_ufind_msb:
|
||||
case nir_op_ufind_msb: {
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
emit_find_msb_using_lzd(bld, result, op[0], false);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_ifind_msb: {
|
||||
assert(nir_dest_bit_size(instr->dest.dest) < 64);
|
||||
bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
|
||||
|
|
|
@ -1637,6 +1637,9 @@ generate_code(struct brw_codegen *p,
|
|||
/* FBL only supports UD type for dst. */
|
||||
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_LZD:
|
||||
brw_LZD(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_CBIT:
|
||||
assert(devinfo->gen >= 7);
|
||||
/* CBIT only supports UD type for dst. */
|
||||
|
|
|
@ -993,6 +993,26 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
emit_find_msb_using_lzd(const vec4_builder &bld,
|
||||
const dst_reg &dst,
|
||||
const src_reg &src,
|
||||
bool is_signed)
|
||||
{
|
||||
vec4_instruction *inst;
|
||||
|
||||
bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD), src);
|
||||
|
||||
/* LZD counts from the MSB side, while GLSL's findMSB() wants the count
|
||||
* from the LSB side. Subtract the result from 31 to convert the MSB count
|
||||
* into an LSB count. If no bits are set, LZD will return 32. 31-32 = -1,
|
||||
* which is exactly what findMSB() is supposed to return.
|
||||
*/
|
||||
inst = bld.ADD(dst, retype(src_reg(dst), BRW_REGISTER_TYPE_D),
|
||||
brw_imm_d(31));
|
||||
inst->src[0].negate = true;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
{
|
||||
|
@ -1461,6 +1481,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_op_ufind_msb:
|
||||
emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false);
|
||||
break;
|
||||
|
||||
case nir_op_ifind_msb: {
|
||||
emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
|
||||
|
||||
|
|
Loading…
Reference in New Issue