aco: have emit_wqm() take Builder instead of isel_context
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8523>
This commit is contained in:
parent
489aa8c7cb
commit
9890dabb1b
|
@ -158,16 +158,14 @@ Temp emit_mbcnt(isel_context *ctx, Temp dst, Operand mask = Operand(), Operand b
|
|||
return bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, Definition(dst), mask_hi, mbcnt_lo);
|
||||
}
|
||||
|
||||
Temp emit_wqm(isel_context *ctx, Temp src, Temp dst=Temp(0, s1), bool program_needs_wqm = false)
|
||||
Temp emit_wqm(Builder& bld, Temp src, Temp dst=Temp(0, s1), bool program_needs_wqm = false)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
if (!dst.id())
|
||||
dst = bld.tmp(src.regClass());
|
||||
|
||||
assert(src.size() == dst.size());
|
||||
|
||||
if (ctx->stage != fragment_fs) {
|
||||
if (bld.program->stage != fragment_fs) {
|
||||
if (!dst.id())
|
||||
return src;
|
||||
|
||||
|
@ -176,7 +174,7 @@ Temp emit_wqm(isel_context *ctx, Temp src, Temp dst=Temp(0, s1), bool program_ne
|
|||
}
|
||||
|
||||
bld.pseudo(aco_opcode::p_wqm, Definition(dst), src);
|
||||
ctx->program->needs_wqm |= program_needs_wqm;
|
||||
bld.program->needs_wqm |= program_needs_wqm;
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
@ -554,7 +552,7 @@ Temp bool_to_scalar_condition(isel_context *ctx, Temp val, Temp dst = Temp(0, s1
|
|||
/* if we're currently in WQM mode, ensure that the source is also computed in WQM */
|
||||
Temp tmp = bld.tmp(s1);
|
||||
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.scc(Definition(tmp)), val, Operand(exec, bld.lm));
|
||||
return emit_wqm(ctx, tmp, dst);
|
||||
return emit_wqm(bld, tmp, dst);
|
||||
}
|
||||
|
||||
Temp convert_int(isel_context *ctx, Builder& bld, Temp src, unsigned src_bits, unsigned dst_bits, bool is_signed, Temp dst=Temp())
|
||||
|
@ -3125,7 +3123,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
|||
Temp tr = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl2);
|
||||
tmp = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), tr, tl);
|
||||
}
|
||||
emit_wqm(ctx, tmp, dst, true);
|
||||
emit_wqm(bld, tmp, dst, true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -4699,7 +4697,7 @@ void emit_load_frag_coord(isel_context *ctx, Temp dst, unsigned num_components)
|
|||
/* dFdx fine */
|
||||
Temp tl = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), frag_z, dpp_quad_perm(0, 0, 2, 2));
|
||||
tmp = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), frag_z, tl, dpp_quad_perm(1, 1, 3, 3));
|
||||
emit_wqm(ctx, tmp, adjusted_frag_z, true);
|
||||
emit_wqm(bld, tmp, adjusted_frag_z, true);
|
||||
|
||||
/* adjusted_frag_z * 0.0625 + frag_z */
|
||||
adjusted_frag_z = bld.vop3(aco_opcode::v_fma_f32, bld.def(v1), adjusted_frag_z,
|
||||
|
@ -7373,7 +7371,7 @@ Temp emit_boolean_reduce(isel_context *ctx, nir_op op, unsigned cluster_size, Te
|
|||
} else if (op == nir_op_iand && cluster_size == ctx->program->wave_size) {
|
||||
//subgroupAnd(val) -> (exec & ~val) == 0
|
||||
Temp tmp = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src).def(1).getTemp();
|
||||
Temp cond = bool_to_vector_condition(ctx, emit_wqm(ctx, tmp));
|
||||
Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
|
||||
return bld.sop1(Builder::s_not, bld.def(bld.lm), bld.def(s1, scc), cond);
|
||||
} else if (op == nir_op_ior && cluster_size == ctx->program->wave_size) {
|
||||
//subgroupOr(val) -> (val & exec) != 0
|
||||
|
@ -7747,9 +7745,9 @@ void emit_interp_center(isel_context *ctx, Temp dst, Temp pos1, Temp pos2)
|
|||
tmp1 = bld.vop3(mad, bld.def(v1), ddy_1, pos2, tmp1);
|
||||
tmp2 = bld.vop3(mad, bld.def(v1), ddy_2, pos2, tmp2);
|
||||
Temp wqm1 = bld.tmp(v1);
|
||||
emit_wqm(ctx, tmp1, wqm1, true);
|
||||
emit_wqm(bld, tmp1, wqm1, true);
|
||||
Temp wqm2 = bld.tmp(v1);
|
||||
emit_wqm(ctx, tmp2, wqm2, true);
|
||||
emit_wqm(bld, tmp2, wqm2, true);
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), wqm1, wqm2);
|
||||
return;
|
||||
}
|
||||
|
@ -8148,7 +8146,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
/* Wave32 with ballot size set to 64 */
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), lanemask_tmp.getTemp(), Operand(0u));
|
||||
}
|
||||
emit_wqm(ctx, tmp.getTemp(), dst);
|
||||
emit_wqm(bld, tmp.getTemp(), dst);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_shuffle:
|
||||
|
@ -8163,24 +8161,24 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
if (src.regClass() == v1b || src.regClass() == v2b) {
|
||||
Temp tmp = bld.tmp(v1);
|
||||
tmp = emit_wqm(ctx, emit_bpermute(ctx, bld, tid, src), tmp);
|
||||
tmp = emit_wqm(bld, emit_bpermute(ctx, bld, tid, src), tmp);
|
||||
if (dst.type() == RegType::vgpr)
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(src.regClass() == v1b ? v3b : v2b), tmp);
|
||||
else
|
||||
bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp);
|
||||
} else if (src.regClass() == v1) {
|
||||
emit_wqm(ctx, emit_bpermute(ctx, bld, tid, src), dst);
|
||||
emit_wqm(bld, emit_bpermute(ctx, bld, tid, src), dst);
|
||||
} else if (src.regClass() == v2) {
|
||||
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
|
||||
lo = emit_wqm(ctx, emit_bpermute(ctx, bld, tid, lo));
|
||||
hi = emit_wqm(ctx, emit_bpermute(ctx, bld, tid, hi));
|
||||
lo = emit_wqm(bld, emit_bpermute(ctx, bld, tid, lo));
|
||||
hi = emit_wqm(bld, emit_bpermute(ctx, bld, tid, hi));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
} else if (instr->dest.ssa.bit_size == 1 && tid.regClass() == s1) {
|
||||
assert(src.regClass() == bld.lm);
|
||||
Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src, tid);
|
||||
bool_to_vector_condition(ctx, emit_wqm(ctx, tmp), dst);
|
||||
bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
|
||||
} else if (instr->dest.ssa.bit_size == 1 && tid.regClass() == v1) {
|
||||
assert(src.regClass() == bld.lm);
|
||||
Temp tmp;
|
||||
|
@ -8192,7 +8190,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), tid, src);
|
||||
tmp = emit_extract_vector(ctx, tmp, 0, v1);
|
||||
tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(1u), tmp);
|
||||
emit_wqm(ctx, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), tmp), dst);
|
||||
emit_wqm(bld, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), tmp), dst);
|
||||
} else {
|
||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||
}
|
||||
|
@ -8212,21 +8210,21 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
if (src.regClass() == v1b || src.regClass() == v2b || src.regClass() == v1) {
|
||||
emit_wqm(ctx,
|
||||
emit_wqm(bld,
|
||||
bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), src),
|
||||
dst);
|
||||
} else if (src.regClass() == v2) {
|
||||
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
|
||||
lo = emit_wqm(ctx, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), lo));
|
||||
hi = emit_wqm(ctx, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), hi));
|
||||
lo = emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), lo));
|
||||
hi = emit_wqm(bld, bld.vop1(aco_opcode::v_readfirstlane_b32, bld.def(s1), hi));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
} else if (instr->dest.ssa.bit_size == 1) {
|
||||
assert(src.regClass() == bld.lm);
|
||||
Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src,
|
||||
bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)));
|
||||
bool_to_vector_condition(ctx, emit_wqm(ctx, tmp), dst);
|
||||
bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
|
||||
} else {
|
||||
bld.copy(Definition(dst), src);
|
||||
}
|
||||
|
@ -8239,7 +8237,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
assert(dst.regClass() == bld.lm);
|
||||
|
||||
Temp tmp = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src).def(1).getTemp();
|
||||
Temp cond = bool_to_vector_condition(ctx, emit_wqm(ctx, tmp));
|
||||
Temp cond = bool_to_vector_condition(ctx, emit_wqm(bld, tmp));
|
||||
bld.sop1(Builder::s_not, Definition(dst), bld.def(s1, scc), cond);
|
||||
break;
|
||||
}
|
||||
|
@ -8250,7 +8248,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
assert(dst.regClass() == bld.lm);
|
||||
|
||||
Temp tmp = bool_to_scalar_condition(ctx, src);
|
||||
bool_to_vector_condition(ctx, emit_wqm(ctx, tmp), dst);
|
||||
bool_to_vector_condition(ctx, emit_wqm(bld, tmp), dst);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_reduce:
|
||||
|
@ -8291,13 +8289,13 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_reduce:
|
||||
emit_wqm(ctx, emit_boolean_reduce(ctx, op, cluster_size, src), dst);
|
||||
emit_wqm(bld, emit_boolean_reduce(ctx, op, cluster_size, src), dst);
|
||||
break;
|
||||
case nir_intrinsic_exclusive_scan:
|
||||
emit_wqm(ctx, emit_boolean_exclusive_scan(ctx, op, src), dst);
|
||||
emit_wqm(bld, emit_boolean_exclusive_scan(ctx, op, src), dst);
|
||||
break;
|
||||
case nir_intrinsic_inclusive_scan:
|
||||
emit_wqm(ctx, emit_boolean_inclusive_scan(ctx, op, src), dst);
|
||||
emit_wqm(bld, emit_boolean_inclusive_scan(ctx, op, src), dst);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
|
@ -8321,7 +8319,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
Temp tmp_dst = emit_reduction_instr(ctx, aco_op, reduce_op, cluster_size, bld.def(dst.regClass()), src);
|
||||
emit_wqm(ctx, tmp_dst, dst);
|
||||
emit_wqm(bld, tmp_dst, dst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -8343,35 +8341,35 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
bld.sop1(Builder::s_wqm, Definition(tmp),
|
||||
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp,
|
||||
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm))));
|
||||
emit_wqm(ctx, tmp, dst);
|
||||
emit_wqm(bld, tmp, dst);
|
||||
} else if (instr->dest.ssa.bit_size == 8) {
|
||||
Temp tmp = bld.tmp(v1);
|
||||
if (ctx->program->chip_class >= GFX8)
|
||||
emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
else
|
||||
emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl), tmp);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
|
||||
} else if (instr->dest.ssa.bit_size == 16) {
|
||||
Temp tmp = bld.tmp(v1);
|
||||
if (ctx->program->chip_class >= GFX8)
|
||||
emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
else
|
||||
emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl), tmp);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
|
||||
} else if (instr->dest.ssa.bit_size == 32) {
|
||||
if (ctx->program->chip_class >= GFX8)
|
||||
emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), dst);
|
||||
emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), dst);
|
||||
else
|
||||
emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl), dst);
|
||||
emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, (1 << 15) | dpp_ctrl), dst);
|
||||
} else if (instr->dest.ssa.bit_size == 64) {
|
||||
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
|
||||
if (ctx->program->chip_class >= GFX8) {
|
||||
lo = emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl));
|
||||
hi = emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl));
|
||||
lo = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl));
|
||||
hi = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl));
|
||||
} else {
|
||||
lo = emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, (1 << 15) | dpp_ctrl));
|
||||
hi = emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, (1 << 15) | dpp_ctrl));
|
||||
lo = emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, (1 << 15) | dpp_ctrl));
|
||||
hi = emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, (1 << 15) | dpp_ctrl));
|
||||
}
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
|
@ -8419,20 +8417,20 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
else
|
||||
src = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl);
|
||||
Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src);
|
||||
emit_wqm(ctx, tmp, dst);
|
||||
emit_wqm(bld, tmp, dst);
|
||||
} else if (instr->dest.ssa.bit_size == 8) {
|
||||
Temp tmp = bld.tmp(v1);
|
||||
if (ctx->program->chip_class >= GFX8)
|
||||
emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
else
|
||||
emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v3b), tmp);
|
||||
} else if (instr->dest.ssa.bit_size == 16) {
|
||||
Temp tmp = bld.tmp(v1);
|
||||
if (ctx->program->chip_class >= GFX8)
|
||||
emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
else
|
||||
emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl), tmp);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(dst), bld.def(v2b), tmp);
|
||||
} else if (instr->dest.ssa.bit_size == 32) {
|
||||
Temp tmp;
|
||||
|
@ -8440,16 +8438,16 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl);
|
||||
else
|
||||
tmp = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl);
|
||||
emit_wqm(ctx, tmp, dst);
|
||||
emit_wqm(bld, tmp, dst);
|
||||
} else if (instr->dest.ssa.bit_size == 64) {
|
||||
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
|
||||
if (ctx->program->chip_class >= GFX8) {
|
||||
lo = emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl));
|
||||
hi = emit_wqm(ctx, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl));
|
||||
lo = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), lo, dpp_ctrl));
|
||||
hi = emit_wqm(bld, bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), hi, dpp_ctrl));
|
||||
} else {
|
||||
lo = emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, dpp_ctrl));
|
||||
hi = emit_wqm(ctx, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, dpp_ctrl));
|
||||
lo = emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), lo, dpp_ctrl));
|
||||
hi = emit_wqm(bld, bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), hi, dpp_ctrl));
|
||||
}
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
|
@ -8471,20 +8469,20 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand((uint32_t)-1), src);
|
||||
src = emit_masked_swizzle(ctx, bld, src, mask);
|
||||
Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src);
|
||||
emit_wqm(ctx, tmp, dst);
|
||||
emit_wqm(bld, tmp, dst);
|
||||
} else if (dst.regClass() == v1b) {
|
||||
Temp tmp = emit_wqm(ctx, emit_masked_swizzle(ctx, bld, src, mask));
|
||||
Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
|
||||
emit_extract_vector(ctx, tmp, 0, dst);
|
||||
} else if (dst.regClass() == v2b) {
|
||||
Temp tmp = emit_wqm(ctx, emit_masked_swizzle(ctx, bld, src, mask));
|
||||
Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask));
|
||||
emit_extract_vector(ctx, tmp, 0, dst);
|
||||
} else if (dst.regClass() == v1) {
|
||||
emit_wqm(ctx, emit_masked_swizzle(ctx, bld, src, mask), dst);
|
||||
emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask), dst);
|
||||
} else if (dst.regClass() == v2) {
|
||||
Temp lo = bld.tmp(v1), hi = bld.tmp(v1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src);
|
||||
lo = emit_wqm(ctx, emit_masked_swizzle(ctx, bld, lo, mask));
|
||||
hi = emit_wqm(ctx, emit_masked_swizzle(ctx, bld, hi, mask));
|
||||
lo = emit_wqm(bld, emit_masked_swizzle(ctx, bld, lo, mask));
|
||||
hi = emit_wqm(bld, emit_masked_swizzle(ctx, bld, hi, mask));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
} else {
|
||||
|
@ -8499,14 +8497,14 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
if (dst.regClass() == v1) {
|
||||
/* src2 is ignored for writelane. RA assigns the same reg for dst */
|
||||
emit_wqm(ctx, bld.writelane(bld.def(v1), val, lane, src), dst);
|
||||
emit_wqm(bld, bld.writelane(bld.def(v1), val, lane, src), dst);
|
||||
} else if (dst.regClass() == v2) {
|
||||
Temp src_lo = bld.tmp(v1), src_hi = bld.tmp(v1);
|
||||
Temp val_lo = bld.tmp(s1), val_hi = bld.tmp(s1);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(src_lo), Definition(src_hi), src);
|
||||
bld.pseudo(aco_opcode::p_split_vector, Definition(val_lo), Definition(val_hi), val);
|
||||
Temp lo = emit_wqm(ctx, bld.writelane(bld.def(v1), val_lo, lane, src_hi));
|
||||
Temp hi = emit_wqm(ctx, bld.writelane(bld.def(v1), val_hi, lane, src_hi));
|
||||
Temp lo = emit_wqm(bld, bld.writelane(bld.def(v1), val_lo, lane, src_hi));
|
||||
Temp hi = emit_wqm(bld, bld.writelane(bld.def(v1), val_hi, lane, src_hi));
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lo, hi);
|
||||
emit_split_vector(ctx, dst, 2);
|
||||
} else {
|
||||
|
@ -8520,7 +8518,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
/* Fit 64-bit mask for wave32 */
|
||||
src = emit_extract_vector(ctx, src, 0, RegClass(src.type(), bld.lm.size()));
|
||||
Temp wqm_tmp = emit_mbcnt(ctx, bld.tmp(v1), Operand(src));
|
||||
emit_wqm(ctx, wqm_tmp, dst);
|
||||
emit_wqm(bld, wqm_tmp, dst);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_helper_invocation: {
|
||||
|
@ -8558,7 +8556,7 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_first_invocation: {
|
||||
emit_wqm(ctx, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
|
||||
emit_wqm(bld, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
|
||||
get_ssa_temp(ctx, &instr->dest.ssa));
|
||||
break;
|
||||
}
|
||||
|
@ -8566,12 +8564,12 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
|
|||
Temp flbit = bld.sop1(Builder::s_flbit_i32, bld.def(s1), Operand(exec, bld.lm));
|
||||
Temp last = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc),
|
||||
Operand(ctx->program->wave_size - 1u), flbit);
|
||||
emit_wqm(ctx, last, get_ssa_temp(ctx, &instr->dest.ssa));
|
||||
emit_wqm(bld, last, get_ssa_temp(ctx, &instr->dest.ssa));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_elect: {
|
||||
Temp first = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm));
|
||||
emit_wqm(ctx, bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc), Operand(1u), first),
|
||||
emit_wqm(bld, bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc), Operand(1u), first),
|
||||
get_ssa_temp(ctx, &instr->dest.ssa));
|
||||
break;
|
||||
}
|
||||
|
@ -9487,7 +9485,7 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
|
|||
!has_derivs && !has_lod && !level_zero &&
|
||||
instr->sampler_dim != GLSL_SAMPLER_DIM_MS &&
|
||||
instr->sampler_dim != GLSL_SAMPLER_DIM_SUBPASS_MS)
|
||||
arg = emit_wqm(ctx, arg, bld.tmp(arg.regClass()), true);
|
||||
arg = emit_wqm(bld, arg, bld.tmp(arg.regClass()), true);
|
||||
|
||||
tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3 + instr->is_sparse, 1));
|
||||
tex->operands[0] = Operand(resource);
|
||||
|
|
Loading…
Reference in New Issue