pan/bi: Emit collect and split

..Rather than using offsets during instruction selection.

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16585>
This commit is contained in:
Alyssa Rosenzweig 2022-05-10 12:22:48 -04:00 committed by Marge Bot
parent 4731e9e55a
commit 5febeae58e
1 changed files with 212 additions and 164 deletions

View File

@ -215,7 +215,15 @@ bi_emit_cached_split_i32(bi_builder *b, bi_index vec, unsigned n)
static void static void
bi_emit_cached_split(bi_builder *b, bi_index vec, unsigned bits) bi_emit_cached_split(bi_builder *b, bi_index vec, unsigned bits)
{ {
bi_emit_cached_split(b, vec, DIV_ROUND_UP(bits, 32)); bi_emit_cached_split_i32(b, vec, DIV_ROUND_UP(bits, 32));
}
static void
bi_split_dest(bi_builder *b, nir_dest dest)
{
bi_emit_cached_split(b, bi_dest_index(&dest),
nir_dest_bit_size(dest) *
nir_dest_num_components(dest));
} }
static bi_instr * static bi_instr *
@ -373,17 +381,21 @@ static void
bi_copy_component(bi_builder *b, nir_intrinsic_instr *instr, bi_index tmp) bi_copy_component(bi_builder *b, nir_intrinsic_instr *instr, bi_index tmp)
{ {
unsigned component = nir_intrinsic_component(instr); unsigned component = nir_intrinsic_component(instr);
unsigned nr = instr->num_components;
unsigned total = nr + component;
unsigned bitsize = nir_dest_bit_size(instr->dest);
assert(total <= 4 && "should be vec4");
bi_emit_cached_split(b, tmp, total * bitsize);
if (component == 0) if (component == 0)
return; return;
bi_index srcs[] = { tmp, tmp, tmp, tmp }; bi_index srcs[] = { tmp, tmp, tmp };
unsigned channels[] = { component, component + 1, component + 2 }; unsigned channels[] = { component, component + 1, component + 2 };
bi_make_vec_to(b, bi_make_vec_to(b, bi_dest_index(&instr->dest),
bi_dest_index(&instr->dest), srcs, channels, nr, nir_dest_bit_size(instr->dest));
srcs, channels, instr->num_components,
nir_dest_bit_size(instr->dest));
} }
static void static void
@ -563,8 +575,8 @@ bi_make_vec16_to(bi_builder *b, bi_index dst, bi_index *src,
unsigned chan = channel ? channel[i] : 0; unsigned chan = channel ? channel[i] : 0;
unsigned nextc = next && channel ? channel[i + 1] : 0; unsigned nextc = next && channel ? channel[i + 1] : 0;
bi_index w0 = bi_word(src[i], chan >> 1); bi_index w0 = bi_extract(b, src[i], chan >> 1);
bi_index w1 = next ? bi_word(src[i + 1], nextc >> 1) : bi_zero(); bi_index w1 = next ? bi_extract(b, src[i + 1], nextc >> 1) : bi_zero();
bi_index h0 = bi_half(w0, chan & 1); bi_index h0 = bi_half(w0, chan & 1);
bi_index h1 = bi_half(w1, nextc & 1); bi_index h1 = bi_half(w1, nextc & 1);
@ -577,11 +589,7 @@ bi_make_vec16_to(bi_builder *b, bi_index dst, bi_index *src,
srcs[i >> 1] = bi_mkvec_v2i16(b, h0, h1); srcs[i >> 1] = bi_mkvec_v2i16(b, h0, h1);
} }
bi_instr *I = bi_collect_i32_to(b, dst); bi_emit_collect_to(b, dst, srcs, DIV_ROUND_UP(count, 2));
I->nr_srcs = DIV_ROUND_UP(count, 2);
for (unsigned i = 0; i < I->nr_srcs; ++i)
I->src[i] = srcs[i];
} }
static void static void
@ -592,19 +600,17 @@ bi_make_vec_to(bi_builder *b, bi_index dst,
unsigned bitsize) unsigned bitsize)
{ {
if (bitsize == 32) { if (bitsize == 32) {
bi_instr *I = bi_collect_i32_to(b, dst); bi_index srcs[BI_MAX_VEC];
I->nr_srcs = count;
for (unsigned i = 0; i < count; ++i) for (unsigned i = 0; i < count; ++i)
I->src[i] = bi_word(src[i], channel ? channel[i] : 0); srcs[i] = bi_extract(b, src[i], channel ? channel[i] : 0);
if (I->nr_srcs == 1) bi_emit_collect_to(b, dst, srcs, count);
I->op = BI_OPCODE_MOV_I32;
} else if (bitsize == 16) { } else if (bitsize == 16) {
bi_make_vec16_to(b, dst, src, channel, count); bi_make_vec16_to(b, dst, src, channel, count);
} else if (bitsize == 8 && count == 1) { } else if (bitsize == 8 && count == 1) {
bi_swz_v4i8_to(b, dst, bi_byte( bi_swz_v4i8_to(b, dst, bi_byte(
bi_word(src[0], channel[0] >> 2), bi_extract(b, src[0], channel[0] >> 2),
channel[0] & 3)); channel[0] & 3));
} else { } else {
unreachable("8-bit mkvec not yet supported"); unreachable("8-bit mkvec not yet supported");
@ -615,13 +621,17 @@ static inline bi_instr *
bi_load_ubo_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0, bi_load_ubo_to(bi_builder *b, unsigned bitsize, bi_index dest0, bi_index src0,
bi_index src1) bi_index src1)
{ {
bi_instr *I;
if (b->shader->arch >= 9) { if (b->shader->arch >= 9) {
bi_instr *I = bi_ld_buffer_to(b, bitsize, dest0, src0, src1); I = bi_ld_buffer_to(b, bitsize, dest0, src0, src1);
I->seg = BI_SEG_UBO; I->seg = BI_SEG_UBO;
return I;
} else { } else {
return bi_load_to(b, bitsize, dest0, src0, src1, BI_SEG_UBO, 0); I = bi_load_to(b, bitsize, dest0, src0, src1, BI_SEG_UBO, 0);
} }
bi_emit_cached_split(b, dest0, bitsize);
return I;
} }
static bi_instr * static bi_instr *
@ -852,8 +862,8 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
bi_index rgba = bi_src_index(&instr->src[0]); bi_index rgba = bi_src_index(&instr->src[0]);
bi_index alpha = bi_index alpha =
(T == nir_type_float16) ? bi_half(bi_word(rgba, 1), true) : (T == nir_type_float16) ? bi_half(bi_extract(b, rgba, 1), true) :
(T == nir_type_float32) ? bi_word(rgba, 3) : (T == nir_type_float32) ? bi_extract(b, rgba, 3) :
bi_dontcare(b); bi_dontcare(b);
/* Don't read out-of-bounds */ /* Don't read out-of-bounds */
@ -985,8 +995,35 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0)); assert(nr > 0 && nr <= nir_intrinsic_src_components(instr, 0));
bi_index data = bi_src_index(&instr->src[0]); bi_index data = bi_src_index(&instr->src[0]);
/* To keep the vector dimensions consistent, we need to drop some
* components. This should be coalesced.
*
* TODO: This is ugly and maybe inefficient. Would we rather
* introduce a TRIM.i32 pseudoinstruction?
*/
if (nr < nir_intrinsic_src_components(instr, 0)) {
assert(T_size == 32 && "todo: 16-bit trim");
bi_instr *split = bi_split_i32_to(b, bi_null(), data);
split->nr_dests = nir_intrinsic_src_components(instr, 0);
bi_index tmp = bi_temp(b->shader);
bi_instr *collect = bi_collect_i32_to(b, tmp);
collect->nr_srcs = nr;
for (unsigned w = 0; w < nr; ++w) {
split->dest[w] = bi_temp(b->shader);
collect->src[w] = split->dest[w];
}
data = tmp;
}
bool psiz = (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ); bool psiz = (nir_intrinsic_io_semantics(instr).location == VARYING_SLOT_PSIZ);
bi_index a[4] = { bi_null() };
if (b->shader->arch <= 8 && b->shader->idvs == BI_IDVS_POSITION) { if (b->shader->arch <= 8 && b->shader->idvs == BI_IDVS_POSITION) {
/* Bifrost position shaders have a fast path */ /* Bifrost position shaders have a fast path */
assert(T == nir_type_float16 || T == nir_type_float32); assert(T == nir_type_float16 || T == nir_type_float32);
@ -1006,20 +1043,21 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
} }
bi_index address = bi_lea_buf_imm(b, index); bi_index address = bi_lea_buf_imm(b, index);
bi_emit_split_i32(b, a, address, 2);
bool varying = (b->shader->idvs == BI_IDVS_VARYING); bool varying = (b->shader->idvs == BI_IDVS_VARYING);
bi_store(b, nr * nir_src_bit_size(instr->src[0]), bi_store(b, nr * nir_src_bit_size(instr->src[0]),
bi_src_index(&instr->src[0]), data, a[0], a[1],
address, bi_word(address, 1),
varying ? BI_SEG_VARY : BI_SEG_POS, varying ? BI_SEG_VARY : BI_SEG_POS,
varying ? bi_varying_offset(b->shader, instr) : 0); varying ? bi_varying_offset(b->shader, instr) : 0);
} else if (immediate) { } else if (immediate) {
bi_index address = bi_lea_attr_imm(b, bi_index address = bi_lea_attr_imm(b,
bi_vertex_id(b), bi_instance_id(b), bi_vertex_id(b), bi_instance_id(b),
regfmt, imm_index); regfmt, imm_index);
bi_emit_split_i32(b, a, address, 3);
bi_st_cvt(b, data, address, bi_word(address, 1), bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
bi_word(address, 2), regfmt, nr - 1);
} else { } else {
bi_index idx = bi_index idx =
bi_iadd_u32(b, bi_iadd_u32(b,
@ -1029,9 +1067,9 @@ bi_emit_store_vary(bi_builder *b, nir_intrinsic_instr *instr)
bi_index address = bi_lea_attr(b, bi_index address = bi_lea_attr(b,
bi_vertex_id(b), bi_instance_id(b), bi_vertex_id(b), bi_instance_id(b),
idx, regfmt); idx, regfmt);
bi_emit_split_i32(b, a, address, 3);
bi_st_cvt(b, data, address, bi_word(address, 1), bi_st_cvt(b, data, a[0], a[1], a[2], regfmt, nr - 1);
bi_word(address, 2), regfmt, nr - 1);
} }
} }
@ -1052,14 +1090,14 @@ bi_emit_load_ubo(bi_builder *b, nir_intrinsic_instr *instr)
} }
static bi_index static bi_index
bi_addr_high(nir_src *src) bi_addr_high(bi_builder *b, nir_src *src)
{ {
return (nir_src_bit_size(*src) == 64) ? return (nir_src_bit_size(*src) == 64) ?
bi_word(bi_src_index(src), 1) : bi_zero(); bi_extract(b, bi_src_index(src), 1) : bi_zero();
} }
static void static void
bi_handle_segment(bi_builder *b, bi_index *addr, bi_index *addr_hi, enum bi_seg seg, int16_t *offset) bi_handle_segment(bi_builder *b, bi_index *addr_lo, bi_index *addr_hi, enum bi_seg seg, int16_t *offset)
{ {
/* Not needed on Bifrost or for global accesses */ /* Not needed on Bifrost or for global accesses */
if (b->shader->arch < 9 || seg == BI_SEG_NONE) if (b->shader->arch < 9 || seg == BI_SEG_NONE)
@ -1072,35 +1110,34 @@ bi_handle_segment(bi_builder *b, bi_index *addr, bi_index *addr_hi, enum bi_seg
bool wls = (seg == BI_SEG_WLS); bool wls = (seg == BI_SEG_WLS);
assert(wls || (seg == BI_SEG_TL)); assert(wls || (seg == BI_SEG_TL));
bi_index base = bi_fau(wls ? BIR_FAU_WLS_PTR : BIR_FAU_TLS_PTR, false); enum bir_fau fau = wls ? BIR_FAU_WLS_PTR : BIR_FAU_TLS_PTR;
if (offset && addr->type == BI_INDEX_CONSTANT) { bi_index base_lo = bi_fau(fau, false);
int value = addr->value;
if (value == (int16_t) value) { if (offset && addr_lo->type == BI_INDEX_CONSTANT && addr_lo->value == (int16_t) addr_lo->value) {
*offset = value; *offset = addr_lo->value;
*addr = base; *addr_lo = base_lo;
}
} else { } else {
*addr = bi_collect_v2i32(b, bi_iadd_u32(b, base, *addr, false), *addr_lo = bi_iadd_u32(b, base_lo, *addr_lo, false);
bi_imm_u32(0));
} }
*addr_hi = bi_word(*addr, 1); /* Do not allow overflow for WLS or TLS */
*addr_hi = bi_fau(fau, true);
} }
static void static void
bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg) bi_emit_load(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
{ {
int16_t offset = 0; int16_t offset = 0;
bi_index addr_lo = bi_src_index(&instr->src[0]); unsigned bits = instr->num_components * nir_dest_bit_size(instr->dest);
bi_index addr_hi = bi_addr_high(&instr->src[0]); bi_index dest = bi_dest_index(&instr->dest);
bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[0]), 0);
bi_index addr_hi = bi_addr_high(b, &instr->src[0]);
bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset); bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
bi_load_to(b, instr->num_components * nir_dest_bit_size(instr->dest), bi_load_to(b, bits, dest, addr_lo, addr_hi, seg, offset);
bi_dest_index(&instr->dest), bi_emit_cached_split(b, dest, bits);
addr_lo, addr_hi, seg, offset);
} }
static void static void
@ -1111,8 +1148,8 @@ bi_emit_store(bi_builder *b, nir_intrinsic_instr *instr, enum bi_seg seg)
BITFIELD_MASK(instr->num_components)); BITFIELD_MASK(instr->num_components));
int16_t offset = 0; int16_t offset = 0;
bi_index addr_lo = bi_src_index(&instr->src[1]); bi_index addr_lo = bi_extract(b, bi_src_index(&instr->src[1]), 0);
bi_index addr_hi = bi_addr_high(&instr->src[1]); bi_index addr_hi = bi_addr_high(b, &instr->src[1]);
bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset); bi_handle_segment(b, &addr_lo, &addr_hi, seg, &offset);
@ -1133,29 +1170,14 @@ bi_emit_axchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg, enum
bi_index data = bi_src_index(arg); bi_index data = bi_src_index(arg);
bi_index data_words[] = { bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
bi_word(data, 0),
bi_word(data, 1),
};
bi_index inout = bi_temp_reg(b->shader);
bi_make_vec_to(b, inout, data_words, NULL, sz / 32, 32);
bi_index addr_hi = bi_word(addr, 1);
if (b->shader->arch >= 9) if (b->shader->arch >= 9)
bi_handle_segment(b, &addr, &addr_hi, seg, NULL); bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
else if (seg == BI_SEG_WLS) else if (seg == BI_SEG_WLS)
addr_hi = bi_zero(); addr_hi = bi_zero();
bi_axchg_to(b, sz, inout, inout, addr, addr_hi, seg); bi_axchg_to(b, sz, dst, data, bi_extract(b, addr, 0), addr_hi, seg);
bi_index inout_words[] = {
bi_word(inout, 0),
bi_word(inout, 1),
};
bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
} }
/* Exchanges the second staging register with memory if comparison with first /* Exchanges the second staging register with memory if comparison with first
@ -1174,29 +1196,29 @@ bi_emit_acmpxchg_to(bi_builder *b, bi_index dst, bi_index addr, nir_src *arg_1,
assert(sz == 32 || sz == 64); assert(sz == 32 || sz == 64);
bi_index data_words[] = { bi_index data_words[] = {
bi_word(src0, 0), bi_extract(b, src0, 0),
sz == 32 ? bi_word(src1, 0) : bi_word(src0, 1), sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src0, 1),
/* 64-bit */ /* 64-bit */
bi_word(src1, 0), bi_extract(b, src1, 0),
bi_word(src1, 1), sz == 32 ? bi_extract(b, src1, 0) : bi_extract(b, src1, 1),
}; };
bi_index inout = bi_temp_reg(b->shader); bi_index in = bi_temp(b->shader);
bi_make_vec_to(b, inout, data_words, NULL, 2 * (sz / 32), 32); bi_emit_collect_to(b, in, data_words, 2 * (sz / 32));
bi_index addr_hi = (seg == BI_SEG_WLS) ? bi_zero() : bi_extract(b, addr, 1);
bi_index addr_hi = bi_word(addr, 1);
if (b->shader->arch >= 9) if (b->shader->arch >= 9)
bi_handle_segment(b, &addr, &addr_hi, seg, NULL); bi_handle_segment(b, &addr, &addr_hi, seg, NULL);
else if (seg == BI_SEG_WLS) else if (seg == BI_SEG_WLS)
addr_hi = bi_zero(); addr_hi = bi_zero();
bi_acmpxchg_to(b, sz, inout, inout, addr, addr_hi, seg); bi_index out = bi_acmpxchg(b, sz, in, bi_extract(b, addr, 0), addr_hi, seg);
bi_emit_cached_split(b, out, sz);
bi_index inout_words[] = { bi_index inout_words[] = {
bi_word(inout, 0), bi_extract(b, out, 0),
bi_word(inout, 1), sz == 64 ? bi_extract(b, out, 1) : bi_null()
}; };
bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32); bi_make_vec_to(b, dst, inout_words, NULL, sz / 32, 32);
@ -1301,19 +1323,19 @@ bi_emit_image_coord(bi_builder *b, bi_index coord, unsigned src_idx,
if (src_idx == 0) { if (src_idx == 0) {
if (coord_comps == 1 || (coord_comps == 2 && is_array)) if (coord_comps == 1 || (coord_comps == 2 && is_array))
return bi_word(coord, 0); return bi_extract(b, coord, 0);
else else
return bi_mkvec_v2i16(b, return bi_mkvec_v2i16(b,
bi_half(bi_word(coord, 0), false), bi_half(bi_extract(b, coord, 0), false),
bi_half(bi_word(coord, 1), false)); bi_half(bi_extract(b, coord, 1), false));
} else { } else {
if (coord_comps == 3 && b->shader->arch >= 9) if (coord_comps == 3 && b->shader->arch >= 9)
return bi_mkvec_v2i16(b, bi_imm_u16(0), return bi_mkvec_v2i16(b, bi_imm_u16(0),
bi_half(bi_word(coord, 2), false)); bi_half(bi_extract(b, coord, 2), false));
else if (coord_comps == 3) else if (coord_comps == 3)
return bi_word(coord, 2); return bi_extract(b, coord, 2);
else if (coord_comps == 2 && is_array) else if (coord_comps == 2 && is_array)
return bi_word(coord, 1); return bi_extract(b, coord, 1);
else else
return bi_zero(); return bi_zero();
} }
@ -1368,6 +1390,8 @@ bi_emit_image_load(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_image_index(b, instr), regfmt, bi_emit_image_index(b, instr), regfmt,
vecsize); vecsize);
} }
bi_split_dest(b, instr->dest);
} }
static bi_index static bi_index
@ -1407,16 +1431,17 @@ bi_emit_lea_image(bi_builder *b, nir_intrinsic_instr *instr)
I->table = BI_TABLE_ATTRIBUTE_1; I->table = BI_TABLE_ATTRIBUTE_1;
} }
bi_emit_cached_split(b, dest, 3 * 32);
return dest; return dest;
} }
static void static void
bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr) bi_emit_image_store(bi_builder *b, nir_intrinsic_instr *instr)
{ {
bi_index addr = bi_emit_lea_image(b, instr); bi_index a[4] = { bi_null() };
bi_emit_split_i32(b, a, bi_emit_lea_image(b, instr), 3);
bi_st_cvt(b, bi_src_index(&instr->src[3]), bi_st_cvt(b, bi_src_index(&instr->src[3]), a[0], a[1], a[2],
addr, bi_word(addr, 1), bi_word(addr, 2),
bi_reg_fmt_for_nir(nir_intrinsic_src_type(instr)), bi_reg_fmt_for_nir(nir_intrinsic_src_type(instr)),
instr->num_components - 1); instr->num_components - 1);
} }
@ -1431,29 +1456,22 @@ bi_emit_atomic_i32_to(bi_builder *b, bi_index dst,
/* ATOM_C.i32 takes a vector with {arg, coalesced}, ATOM_C1.i32 doesn't /* ATOM_C.i32 takes a vector with {arg, coalesced}, ATOM_C1.i32 doesn't
* take any vector but can still output in RETURN mode */ * take any vector but can still output in RETURN mode */
bi_index sr = bifrost ? bi_temp_reg(b->shader) : bi_null(); bi_index tmp_dest = bifrost ? bi_temp(b->shader) : dst;
bi_index tmp_dest = bifrost ? sr : dst;
unsigned sr_count = bifrost ? 2 : 1; unsigned sr_count = bifrost ? 2 : 1;
/* Generate either ATOM or ATOM1 as required */ /* Generate either ATOM or ATOM1 as required */
if (bi_promote_atom_c1(opc, arg, &opc)) { if (bi_promote_atom_c1(opc, arg, &opc)) {
bi_atom1_return_i32_to(b, tmp_dest, bi_word(addr, 0), bi_atom1_return_i32_to(b, tmp_dest, bi_extract(b, addr, 0),
bi_word(addr, 1), opc, sr_count); bi_extract(b, addr, 1), opc, sr_count);
} else { } else {
bi_index src = arg; bi_atom_return_i32_to(b, tmp_dest, arg, bi_extract(b, addr, 0),
bi_extract(b, addr, 1), opc, sr_count);
if (bifrost) {
bi_mov_i32_to(b, sr, arg);
src = sr;
}
bi_atom_return_i32_to(b, tmp_dest, src, bi_word(addr, 0),
bi_word(addr, 1), opc, sr_count);
} }
if (bifrost) { if (bifrost) {
/* Post-process it */ /* Post-process it */
bi_atom_post_i32_to(b, dst, bi_word(sr, 0), bi_word(sr, 1), post_opc); bi_emit_cached_split_i32(b, tmp_dest, 2);
bi_atom_post_i32_to(b, dst, bi_extract(b, tmp_dest, 0), bi_extract(b, tmp_dest, 1), post_opc);
} }
} }
@ -1488,10 +1506,12 @@ bi_emit_load_frag_coord(bi_builder *b, nir_intrinsic_instr *instr)
static void static void
bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr) bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
{ {
bi_index dest = bi_dest_index(&instr->dest);
nir_alu_type T = nir_intrinsic_dest_type(instr); nir_alu_type T = nir_intrinsic_dest_type(instr);
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T); enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
unsigned rt = b->shader->inputs->blend.rt; unsigned rt = b->shader->inputs->blend.rt;
unsigned size = nir_dest_bit_size(instr->dest); unsigned size = nir_dest_bit_size(instr->dest);
unsigned nr = instr->num_components;
/* Get the render target */ /* Get the render target */
if (!b->shader->inputs->is_blend) { if (!b->shader->inputs->is_blend) {
@ -1514,9 +1534,9 @@ bi_emit_ld_tile(bi_builder *b, nir_intrinsic_instr *instr)
I->flow = 0x9; /* .wait */ I->flow = 0x9; /* .wait */
} }
bi_ld_tile_to(b, bi_dest_index(&instr->dest), bi_pixel_indices(b, rt), bi_ld_tile_to(b, dest, bi_pixel_indices(b, rt), bi_register(60), desc,
bi_register(60), desc, regfmt, regfmt, nr - 1);
(instr->num_components - 1)); bi_emit_cached_split(b, dest, size * nr);
} }
static void static void
@ -1619,12 +1639,15 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
if (b->shader->arch >= 9) { if (b->shader->arch >= 9) {
bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL); bi_handle_segment(b, &addr, &addr_hi, BI_SEG_WLS, NULL);
addr = bi_collect_v2i32(b, addr, addr_hi);
} else { } else {
addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS); addr = bi_seg_add_i64(b, addr, bi_zero(), false, BI_SEG_WLS);
bi_emit_cached_split(b, addr, 64);
} }
bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]), bi_emit_atomic_i32_to(b, dst, addr, bi_src_index(&instr->src[1]),
instr->intrinsic); instr->intrinsic);
bi_split_dest(b, instr->dest);
break; break;
} }
@ -1642,6 +1665,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_emit_lea_image(b, instr), bi_emit_lea_image(b, instr),
bi_src_index(&instr->src[3]), bi_src_index(&instr->src[3]),
instr->intrinsic); instr->intrinsic);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_global_atomic_add: case nir_intrinsic_global_atomic_add:
@ -1658,6 +1682,8 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_src_index(&instr->src[0]), bi_src_index(&instr->src[0]),
bi_src_index(&instr->src[1]), bi_src_index(&instr->src[1]),
instr->intrinsic); instr->intrinsic);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_image_load: case nir_intrinsic_image_load:
@ -1671,31 +1697,37 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_global_atomic_exchange: case nir_intrinsic_global_atomic_exchange:
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]),
&instr->src[1], BI_SEG_NONE); &instr->src[1], BI_SEG_NONE);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_image_atomic_exchange: case nir_intrinsic_image_atomic_exchange:
bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr), bi_emit_axchg_to(b, dst, bi_emit_lea_image(b, instr),
&instr->src[3], BI_SEG_NONE); &instr->src[3], BI_SEG_NONE);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_shared_atomic_exchange: case nir_intrinsic_shared_atomic_exchange:
bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]), bi_emit_axchg_to(b, dst, bi_src_index(&instr->src[0]),
&instr->src[1], BI_SEG_WLS); &instr->src[1], BI_SEG_WLS);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_global_atomic_comp_swap: case nir_intrinsic_global_atomic_comp_swap:
bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]),
&instr->src[1], &instr->src[2], BI_SEG_NONE); &instr->src[1], &instr->src[2], BI_SEG_NONE);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_image_atomic_comp_swap: case nir_intrinsic_image_atomic_comp_swap:
bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr), bi_emit_acmpxchg_to(b, dst, bi_emit_lea_image(b, instr),
&instr->src[3], &instr->src[4], BI_SEG_NONE); &instr->src[3], &instr->src[4], BI_SEG_NONE);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_shared_atomic_comp_swap: case nir_intrinsic_shared_atomic_comp_swap:
bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]), bi_emit_acmpxchg_to(b, dst, bi_src_index(&instr->src[0]),
&instr->src[1], &instr->src[2], BI_SEG_WLS); &instr->src[1], &instr->src[2], BI_SEG_WLS);
bi_split_dest(b, instr->dest);
break; break;
case nir_intrinsic_load_frag_coord: case nir_intrinsic_load_frag_coord:
@ -1784,6 +1816,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
bi_ld_var_special_to(b, dst, bi_zero(), BI_REGISTER_FORMAT_F32, bi_ld_var_special_to(b, dst, bi_zero(), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_CENTER, BI_UPDATE_CLOBBER, BI_SAMPLE_CENTER, BI_UPDATE_CLOBBER,
BI_VARYING_NAME_POINT, BI_VECSIZE_V2); BI_VARYING_NAME_POINT, BI_VECSIZE_V2);
bi_emit_cached_split_i32(b, dst, 2);
break; break;
/* It appears vertex_id is zero-based with Bifrost geometry flows, but /* It appears vertex_id is zero-based with Bifrost geometry flows, but
@ -1833,6 +1866,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_shader_clock: case nir_intrinsic_shader_clock:
bi_ld_gclk_u64_to(b, dst, BI_SOURCE_CYCLE_COUNTER); bi_ld_gclk_u64_to(b, dst, BI_SOURCE_CYCLE_COUNTER);
bi_split_dest(b, instr->dest);
break; break;
default: default:
@ -1860,7 +1894,7 @@ bi_emit_load_const(bi_builder *b, nir_load_const_instr *instr)
} }
static bi_index static bi_index
bi_alu_src_index(nir_alu_src src, unsigned comps) bi_alu_src_index(bi_builder *b, nir_alu_src src, unsigned comps)
{ {
/* we don't lower modifiers until the backend */ /* we don't lower modifiers until the backend */
assert(!(src.negate || src.abs)); assert(!(src.negate || src.abs));
@ -1884,7 +1918,7 @@ bi_alu_src_index(nir_alu_src src, unsigned comps)
offset = new_offset; offset = new_offset;
} }
bi_index idx = bi_word(bi_src_index(&src.src), offset); bi_index idx = bi_extract(b, bi_src_index(&src.src), offset);
/* Compose the subword swizzle with existing (identity) swizzle */ /* Compose the subword swizzle with existing (identity) swizzle */
assert(idx.swizzle == BI_SWIZZLE_H01); assert(idx.swizzle == BI_SWIZZLE_H01);
@ -2240,12 +2274,15 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
unreachable("should've been lowered"); unreachable("should've been lowered");
case nir_op_unpack_32_2x16: case nir_op_unpack_32_2x16:
case nir_op_unpack_64_2x32_split_x:
bi_mov_i32_to(b, dst, bi_src_index(&instr->src[0].src)); bi_mov_i32_to(b, dst, bi_src_index(&instr->src[0].src));
break;
case nir_op_unpack_64_2x32_split_x:
bi_mov_i32_to(b, dst, bi_extract(b, bi_src_index(&instr->src[0].src), 0));
return; return;
case nir_op_unpack_64_2x32_split_y: case nir_op_unpack_64_2x32_split_y:
bi_mov_i32_to(b, dst, bi_word(bi_src_index(&instr->src[0].src), 1)); bi_mov_i32_to(b, dst, bi_extract(b, bi_src_index(&instr->src[0].src), 1));
return; return;
case nir_op_pack_64_2x32_split: case nir_op_pack_64_2x32_split:
@ -2256,16 +2293,16 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
case nir_op_pack_64_2x32: case nir_op_pack_64_2x32:
bi_collect_v2i32_to(b, dst, bi_collect_v2i32_to(b, dst,
bi_word(bi_src_index(&instr->src[0].src), 0), bi_extract(b, bi_src_index(&instr->src[0].src), 0),
bi_word(bi_src_index(&instr->src[0].src), 1)); bi_extract(b, bi_src_index(&instr->src[0].src), 1));
return; return;
case nir_op_pack_uvec2_to_uint: { case nir_op_pack_uvec2_to_uint: {
bi_index src = bi_src_index(&instr->src[0].src); bi_index src = bi_src_index(&instr->src[0].src);
assert(sz == 32 && src_sz == 32); assert(sz == 32 && src_sz == 32);
bi_mkvec_v2i16_to(b, dst, bi_half(bi_word(src, 0), false), bi_mkvec_v2i16_to(b, dst, bi_half(bi_extract(b, src, 0), false),
bi_half(bi_word(src, 1), false)); bi_half(bi_extract(b, src, 1), false));
return; return;
} }
@ -2273,10 +2310,10 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_index src = bi_src_index(&instr->src[0].src); bi_index src = bi_src_index(&instr->src[0].src);
assert(sz == 32 && src_sz == 32); assert(sz == 32 && src_sz == 32);
bi_mkvec_v4i8_to(b, dst, bi_byte(bi_word(src, 0), 0), bi_mkvec_v4i8_to(b, dst, bi_byte(bi_extract(b, src, 0), 0),
bi_byte(bi_word(src, 1), 0), bi_byte(bi_extract(b, src, 1), 0),
bi_byte(bi_word(src, 2), 0), bi_byte(bi_extract(b, src, 2), 0),
bi_byte(bi_word(src, 3), 0)); bi_byte(bi_extract(b, src, 3), 0));
return; return;
} }
@ -2314,9 +2351,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
case nir_op_f2f16: case nir_op_f2f16:
assert(src_sz == 32); assert(src_sz == 32);
bi_index idx = bi_src_index(&instr->src[0].src); bi_index idx = bi_src_index(&instr->src[0].src);
bi_index s0 = bi_word(idx, instr->src[0].swizzle[0]); bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
bi_index s1 = comps > 1 ? bi_index s1 = comps > 1 ?
bi_word(idx, instr->src[0].swizzle[1]) : s0; bi_extract(b, idx, instr->src[0].swizzle[1]) : s0;
bi_v2f32_to_v2f16_to(b, dst, s0, s1); bi_v2f32_to_v2f16_to(b, dst, s0, s1);
return; return;
@ -2328,8 +2365,8 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
break; break;
bi_index idx = bi_src_index(&instr->src[0].src); bi_index idx = bi_src_index(&instr->src[0].src);
bi_index s0 = bi_word(idx, instr->src[0].swizzle[0]); bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
bi_index s1 = bi_word(idx, instr->src[0].swizzle[1]); bi_index s1 = bi_extract(b, idx, instr->src[0].swizzle[1]);
bi_mkvec_v2i16_to(b, dst, bi_mkvec_v2i16_to(b, dst,
bi_half(s0, false), bi_half(s1, false)); bi_half(s0, false), bi_half(s1, false));
@ -2348,8 +2385,8 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
nir_alu_src *src = &instr->src[0]; nir_alu_src *src = &instr->src[0];
bi_index idx = bi_src_index(&src->src); bi_index idx = bi_src_index(&src->src);
bi_index s0 = bi_word(idx, src->swizzle[0]); bi_index s0 = bi_extract(b, idx, src->swizzle[0]);
bi_index s1 = bi_word(idx, src->swizzle[1]); bi_index s1 = bi_extract(b, idx, src->swizzle[1]);
bi_index t = (src->swizzle[0] == src->swizzle[1]) ? bi_index t = (src->swizzle[0] == src->swizzle[1]) ?
bi_half(s0, false) : bi_half(s0, false) :
@ -2397,13 +2434,13 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
* insert a MKVEC.v2i16 first to convert down to 16-bit. * insert a MKVEC.v2i16 first to convert down to 16-bit.
*/ */
bi_index idx = bi_src_index(&instr->src[0].src); bi_index idx = bi_src_index(&instr->src[0].src);
bi_index s0 = bi_word(idx, instr->src[0].swizzle[0]); bi_index s0 = bi_extract(b, idx, instr->src[0].swizzle[0]);
bi_index s1 = bi_alu_src_index(instr->src[1], comps); bi_index s1 = bi_alu_src_index(b, instr->src[1], comps);
bi_index s2 = bi_alu_src_index(instr->src[2], comps); bi_index s2 = bi_alu_src_index(b, instr->src[2], comps);
if (!bi_nir_is_replicated(&instr->src[0])) { if (!bi_nir_is_replicated(&instr->src[0])) {
s0 = bi_mkvec_v2i16(b, bi_half(s0, false), s0 = bi_mkvec_v2i16(b, bi_half(s0, false),
bi_half(bi_word(idx, instr->src[0].swizzle[1]), false)); bi_half(bi_extract(b, idx, instr->src[0].swizzle[1]), false));
} }
bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO); bi_mux_v2i16_to(b, dst, s2, s1, s0, BI_MUX_INT_ZERO);
@ -2414,9 +2451,9 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
break; break;
} }
bi_index s0 = srcs > 0 ? bi_alu_src_index(instr->src[0], comps) : bi_null(); bi_index s0 = srcs > 0 ? bi_alu_src_index(b, instr->src[0], comps) : bi_null();
bi_index s1 = srcs > 1 ? bi_alu_src_index(instr->src[1], comps) : bi_null(); bi_index s1 = srcs > 1 ? bi_alu_src_index(b, instr->src[1], comps) : bi_null();
bi_index s2 = srcs > 2 ? bi_alu_src_index(instr->src[2], comps) : bi_null(); bi_index s2 = srcs > 2 ? bi_alu_src_index(b, instr->src[2], comps) : bi_null();
switch (instr->op) { switch (instr->op) {
case nir_op_ffma: case nir_op_ffma:
@ -3073,9 +3110,9 @@ bi_emit_texc_offset_ms_index(bi_builder *b, nir_tex_instr *instr)
unsigned nr = nir_src_num_components(instr->src[offs_idx].src); unsigned nr = nir_src_num_components(instr->src[offs_idx].src);
bi_index idx = bi_src_index(&instr->src[offs_idx].src); bi_index idx = bi_src_index(&instr->src[offs_idx].src);
dest = bi_mkvec_v4i8(b, dest = bi_mkvec_v4i8(b,
(nr > 0) ? bi_byte(bi_word(idx, 0), 0) : bi_imm_u8(0), (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
(nr > 1) ? bi_byte(bi_word(idx, 1), 0) : bi_imm_u8(0), (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
(nr > 2) ? bi_byte(bi_word(idx, 2), 0) : bi_imm_u8(0), (nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
bi_imm_u8(0)); bi_imm_u8(0));
} }
@ -3117,9 +3154,9 @@ bi_emit_valhall_offsets(bi_builder *b, nir_tex_instr *instr)
assert((nr <= 2) || (ms_idx < 0)); assert((nr <= 2) || (ms_idx < 0));
dest = bi_mkvec_v4i8(b, dest = bi_mkvec_v4i8(b,
(nr > 0) ? bi_byte(bi_word(idx, 0), 0) : bi_imm_u8(0), (nr > 0) ? bi_byte(bi_extract(b, idx, 0), 0) : bi_imm_u8(0),
(nr > 1) ? bi_byte(bi_word(idx, 1), 0) : bi_imm_u8(0), (nr > 1) ? bi_byte(bi_extract(b, idx, 1), 0) : bi_imm_u8(0),
(nr > 2) ? bi_byte(bi_word(idx, 2), 0) : bi_imm_u8(0), (nr > 2) ? bi_byte(bi_extract(b, idx, 2), 0) : bi_imm_u8(0),
bi_imm_u8(0)); bi_imm_u8(0));
} }
@ -3152,7 +3189,9 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
bi_index maxxyz = bi_temp(b->shader); bi_index maxxyz = bi_temp(b->shader);
*face = bi_temp(b->shader); *face = bi_temp(b->shader);
bi_index cx = coord, cy = bi_word(coord, 1), cz = bi_word(coord, 2); bi_index cx = bi_extract(b, coord, 0),
cy = bi_extract(b, coord, 1),
cz = bi_extract(b, coord, 2);
/* Use a pseudo op on Bifrost due to tuple restrictions */ /* Use a pseudo op on Bifrost due to tuple restrictions */
if (b->shader->arch <= 8) { if (b->shader->arch <= 8) {
@ -3163,8 +3202,8 @@ bi_emit_cube_coord(bi_builder *b, bi_index coord,
} }
/* Select coordinates */ /* Select coordinates */
bi_index ssel = bi_cube_ssel(b, bi_word(coord, 2), coord, *face); bi_index ssel = bi_cube_ssel(b, bi_extract(b, coord, 2), bi_extract(b, coord, 0), *face);
bi_index tsel = bi_cube_tsel(b, bi_word(coord, 1), bi_word(coord, 2), bi_index tsel = bi_cube_tsel(b, bi_extract(b, coord, 1), bi_extract(b, coord, 2),
*face); *face);
/* The OpenGL ES specification requires us to transform an input vector /* The OpenGL ES specification requires us to transform an input vector
@ -3314,22 +3353,22 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
cx = bi_emit_texc_cube_coord(b, index, &cy); cx = bi_emit_texc_cube_coord(b, index, &cy);
} else { } else {
/* Copy XY (for 2D+) or XX (for 1D) */ /* Copy XY (for 2D+) or XX (for 1D) */
cx = index; cx = bi_extract(b, index, 0);
cy = bi_word(index, MIN2(1, components - 1)); cy = bi_extract(b, index, MIN2(1, components - 1));
assert(components >= 1 && components <= 3); assert(components >= 1 && components <= 3);
if (components == 3 && !desc.array) { if (components == 3 && !desc.array) {
/* 3D */ /* 3D */
dregs[BIFROST_TEX_DREG_Z_COORD] = dregs[BIFROST_TEX_DREG_Z_COORD] =
bi_word(index, 2); bi_extract(b, index, 2);
} }
} }
if (desc.array) { if (desc.array) {
dregs[BIFROST_TEX_DREG_ARRAY] = dregs[BIFROST_TEX_DREG_ARRAY] =
bi_emit_texc_array_index(b, bi_emit_texc_array_index(b,
bi_word(index, components - 1), T); bi_extract(b, index, components - 1), T);
} }
break; break;
@ -3445,9 +3484,7 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
desc.sampler_index_or_mode = mode; desc.sampler_index_or_mode = mode;
} }
/* Allocate staging registers contiguously by compacting the array. /* Allocate staging registers contiguously by compacting the array. */
* Index is not SSA (tied operands) */
unsigned sr_count = 0; unsigned sr_count = 0;
for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) { for (unsigned i = 0; i < ARRAY_SIZE(dregs); ++i) {
@ -3455,24 +3492,26 @@ bi_emit_texc(bi_builder *b, nir_tex_instr *instr)
dregs[sr_count++] = dregs[i]; dregs[sr_count++] = dregs[i];
} }
bi_index idx = sr_count ? bi_temp_reg(b->shader) : bi_null(); unsigned res_size = nir_dest_bit_size(instr->dest) == 16 ? 2 : 4;
bi_index sr = sr_count ? bi_temp(b->shader) : bi_null();
bi_index dst = bi_temp(b->shader);
if (sr_count) if (sr_count)
bi_make_vec_to(b, idx, dregs, NULL, sr_count, 32); bi_emit_collect_to(b, sr, dregs, sr_count);
uint32_t desc_u = 0; uint32_t desc_u = 0;
memcpy(&desc_u, &desc, sizeof(desc_u)); memcpy(&desc_u, &desc, sizeof(desc_u));
bi_texc_to(b, sr_count ? idx : bi_dest_index(&instr->dest), bi_null(), bi_instr *I =
idx, cx, cy, bi_imm_u32(desc_u), bi_texc_to(b, dst, bi_null(), sr, cx, cy,
!nir_tex_instr_has_implicit_derivative(instr), bi_imm_u32(desc_u),
sr_count, 0); !nir_tex_instr_has_implicit_derivative(instr), sr_count, 0);
I->register_format = bi_reg_fmt_for_nir(instr->dest_type);
/* Explicit copy to facilitate tied operands */ bi_index w[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
if (sr_count) { bi_emit_split_i32(b, w, dst, res_size);
bi_index srcs[4] = { idx, idx, idx, idx }; bi_emit_collect_to(b, bi_dest_index(&instr->dest), w,
unsigned channels[4] = { 0, 1, 2, 3 }; DIV_ROUND_UP(nir_dest_num_components(instr->dest) * res_size, 4));
bi_make_vec_to(b, bi_dest_index(&instr->dest), srcs, channels, 4, 32);
}
} }
/* Staging registers required by texturing in the order they appear (Valhall) */ /* Staging registers required by texturing in the order they appear (Valhall) */
@ -3526,17 +3565,17 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
sregs[VALHALL_TEX_SREG_X_COORD] = index; sregs[VALHALL_TEX_SREG_X_COORD] = index;
if (components >= 2) if (components >= 2)
sregs[VALHALL_TEX_SREG_Y_COORD] = bi_word(index, 1); sregs[VALHALL_TEX_SREG_Y_COORD] = bi_extract(b, index, 1);
if (components == 3 && !instr->is_array) { if (components == 3 && !instr->is_array) {
sregs[VALHALL_TEX_SREG_Z_COORD] = sregs[VALHALL_TEX_SREG_Z_COORD] =
bi_word(index, 2); bi_extract(b, index, 2);
} }
} }
if (instr->is_array) { if (instr->is_array) {
sregs[VALHALL_TEX_SREG_ARRAY] = sregs[VALHALL_TEX_SREG_ARRAY] =
bi_word(index, components - 1); bi_extract(b, index, components - 1);
} }
break; break;
@ -3614,9 +3653,10 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
image_src = bi_lshift_or_i32(b, texture, image_src, bi_imm_u8(16)); image_src = bi_lshift_or_i32(b, texture, image_src, bi_imm_u8(16));
unsigned mask = BI_WRITE_MASK_RGBA; unsigned mask = BI_WRITE_MASK_RGBA;
unsigned res_size = nir_dest_bit_size(instr->dest) == 16 ? 2 : 4;
enum bi_register_format regfmt = bi_reg_fmt_for_nir(instr->dest_type); enum bi_register_format regfmt = bi_reg_fmt_for_nir(instr->dest_type);
enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim); enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim);
bi_index dest = bi_dest_index(&instr->dest); bi_index dest = bi_temp(b->shader);
switch (instr->op) { switch (instr->op) {
case nir_texop_tex: case nir_texop_tex:
@ -3641,6 +3681,11 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
default: default:
unreachable("Unhandled Valhall texture op"); unreachable("Unhandled Valhall texture op");
} }
bi_index w[4] = { bi_null(), bi_null(), bi_null(), bi_null() };
bi_emit_split_i32(b, w, dest, res_size);
bi_emit_collect_to(b, bi_dest_index(&instr->dest), w,
DIV_ROUND_UP(nir_dest_num_components(instr->dest) * res_size, 4));
} }
/* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube /* Simple textures ops correspond to NIR tex or txl with LOD = 0 on 2D/cube
@ -3665,10 +3710,13 @@ bi_emit_texs(bi_builder *b, nir_tex_instr *instr)
} else { } else {
bi_texs_2d_to(b, nir_dest_bit_size(instr->dest), bi_texs_2d_to(b, nir_dest_bit_size(instr->dest),
bi_dest_index(&instr->dest), bi_dest_index(&instr->dest),
coords, bi_word(coords, 1), bi_extract(b, coords, 0),
bi_extract(b, coords, 1),
instr->op != nir_texop_tex, /* zero LOD */ instr->op != nir_texop_tex, /* zero LOD */
instr->sampler_index, instr->texture_index); instr->sampler_index, instr->texture_index);
} }
bi_split_dest(b, instr->dest);
} }
static bool static bool
@ -3726,7 +3774,7 @@ bi_emit_tex(bi_builder *b, nir_tex_instr *instr)
case nir_texop_txs: case nir_texop_txs:
bi_load_sysval_to(b, bi_dest_index(&instr->dest), bi_load_sysval_to(b, bi_dest_index(&instr->dest),
panfrost_sysval_for_instr(&instr->instr, NULL), panfrost_sysval_for_instr(&instr->instr, NULL),
4, 0); nir_dest_num_components(instr->dest), 0);
return; return;
case nir_texop_tex: case nir_texop_tex:
case nir_texop_txl: case nir_texop_txl: