diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index 6584cc45b2e..24c56222e6b 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -1529,8 +1529,8 @@ static void bi_emit_alu(bi_builder *b, nir_alu_instr *instr) { bi_index dst = bi_dest_index(&instr->dest.dest); - unsigned sz = nir_dest_bit_size(instr->dest.dest); unsigned srcs = nir_op_infos[instr->op].num_inputs; + unsigned sz = nir_dest_bit_size(instr->dest.dest); unsigned comps = nir_dest_num_components(instr->dest.dest); unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0; @@ -1631,6 +1631,21 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) return; } + case nir_op_i2i8: + case nir_op_u2u8: + { + /* Acts like an 8-bit swizzle */ + bi_index idx = bi_src_index(&instr->src[0].src); + unsigned factor = src_sz / 8; + unsigned chan[4] = { 0 }; + + for (unsigned i = 0; i < comps; ++i) + chan[i] = instr->src[0].swizzle[i] * factor; + + bi_make_vec_to(b, dst, &idx, chan, comps, 8); + return; + } + default: break; } @@ -1909,15 +1924,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr) bi_mov_i32_to(b, dst, s0); break; - case nir_op_i2i8: - case nir_op_u2u8: - /* No vectorization in this part of the loop, so downcasts are - * a noop. When vectorization support lands, some case - * handlingg will be needed, but for the scalar case this is - * optimal as it can be copypropped away */ - bi_mov_i32_to(b, dst, s0); - break; - case nir_op_fround_even: case nir_op_fceil: case nir_op_ffloor: