diff --git a/src/intel/compiler/brw_fs_lower_conversions.cpp b/src/intel/compiler/brw_fs_lower_conversions.cpp index 663c9674c49..e27e2402746 100644 --- a/src/intel/compiler/brw_fs_lower_conversions.cpp +++ b/src/intel/compiler/brw_fs_lower_conversions.cpp @@ -43,6 +43,24 @@ supports_type_conversion(const fs_inst *inst) { } } +/* From the SKL PRM Vol 2a, "Move": + * + * "A mov with the same source and destination type, no source modifier, + * and no saturation is a raw move. A packed byte destination region (B + * or UB type with HorzStride == 1 and ExecSize > 1) can only be written + * using raw move." + */ +static bool +is_byte_raw_mov (const fs_inst *inst) +{ + return type_sz(inst->dst.type) == 1 && + inst->opcode == BRW_OPCODE_MOV && + inst->src[0].type == inst->dst.type && + !inst->saturate && + !inst->src[0].negate && + !inst->src[0].abs; +} + bool fs_visitor::lower_conversions() { @@ -54,7 +72,8 @@ fs_visitor::lower_conversions() bool saturate = inst->saturate; if (supports_type_conversion(inst)) { - if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) { + if (type_sz(inst->dst.type) < get_exec_type_size(inst) && + !is_byte_raw_mov(inst)) { /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to * Single Precision Float": * @@ -64,6 +83,9 @@ fs_visitor::lower_conversions() * So we need to allocate a temporary that's two registers, and then do * a strided MOV to get the lower DWord of every Qword that has the * result. + * + * This restriction applies, in general, whenever we convert to + * a type with a smaller bit-size. */ fs_reg temp = ibld.vgrf(get_exec_type(inst)); fs_reg strided_temp = subscript(temp, dst.type, 0); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index c7f7bc21b8a..1ce89520bf1 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -755,19 +755,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) */ case nir_op_f2f16_undef: - case nir_op_i2i16: - case nir_op_u2u16: { - /* TODO: Fixing aligment rules for conversions from 32-bits to - * 16-bit types should be moved to lower_conversions - */ - fs_reg tmp = bld.vgrf(op[0].type, 1); - tmp = subscript(tmp, result.type, 0); - inst = bld.MOV(tmp, op[0]); - inst->saturate = instr->dest.saturate; - inst = bld.MOV(result, tmp); + inst = bld.MOV(result, op[0]); inst->saturate = instr->dest.saturate; break; - } case nir_op_f2f64: case nir_op_f2i64: @@ -807,6 +797,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_f2u16: case nir_op_i2i32: case nir_op_u2u32: + case nir_op_i2i16: + case nir_op_u2u16: case nir_op_i2f16: case nir_op_u2f16: inst = bld.MOV(result, op[0]);