diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 6584cc45b2e..24c56222e6b 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -1529,8 +1529,8 @@ static void
 bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
 {
         bi_index dst = bi_dest_index(&instr->dest.dest);
-        unsigned sz = nir_dest_bit_size(instr->dest.dest);
         unsigned srcs = nir_op_infos[instr->op].num_inputs;
+        unsigned sz = nir_dest_bit_size(instr->dest.dest);
         unsigned comps = nir_dest_num_components(instr->dest.dest);
         unsigned src_sz = srcs > 0 ? nir_src_bit_size(instr->src[0].src) : 0;
 
@@ -1631,6 +1631,21 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                 return;
         }
 
+        case nir_op_i2i8:
+        case nir_op_u2u8:
+        {
+                /* Acts like an 8-bit swizzle */
+                bi_index idx = bi_src_index(&instr->src[0].src);
+                unsigned factor = src_sz / 8;
+                unsigned chan[4] = { 0 };
+
+                for (unsigned i = 0; i < comps; ++i)
+                        chan[i] = instr->src[0].swizzle[i] * factor;
+
+                bi_make_vec_to(b, dst, &idx, chan, comps, 8);
+                return;
+        }
+
         default:
                 break;
         }
@@ -1909,15 +1924,6 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
                         bi_mov_i32_to(b, dst, s0);
                 break;
 
-        case nir_op_i2i8:
-        case nir_op_u2u8:
-                /* No vectorization in this part of the loop, so downcasts are
-                 * a noop. When vectorization support lands, some case
-                 * handlingg will be needed, but for the scalar case this is
-                 * optimal as it can be copypropped away */
-                bi_mov_i32_to(b, dst, s0);
-                break;
-
         case nir_op_fround_even:
         case nir_op_fceil:
         case nir_op_ffloor: