diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index bac58d953c3..e5a4391cbb1 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -185,6 +185,34 @@ namespace { } } + /** + * Return a non-zero value if the execution type of the instruction is + * unsupported. The destination and sources matching the returned mask + * will be bit-cast to an integer type of appropriate size, lowering any + * source or destination modifiers into separate MOV instructions. + */ + unsigned + has_invalid_exec_type(const gen_device_info *devinfo, const fs_inst *inst) + { + switch (inst->opcode) { + case SHADER_OPCODE_QUAD_SWIZZLE: + return has_dst_aligned_region_restriction(devinfo, inst) ? + 0x1 : 0; + + case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_MOV_INDIRECT: + return (((devinfo->ver == 7 && !devinfo->is_haswell) || + devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) || + devinfo->verx10 >= 125) && type_sz(inst->src[0].type) > 4) || + (devinfo->verx10 >= 125 && + brw_reg_type_is_floating_point(inst->src[0].type)) ? + 0x1 : 0; + + default: + return 0; + } + } + /* * Return whether the instruction has unsupported source modifiers * specified for the i-th source region. @@ -193,8 +221,11 @@ namespace { has_invalid_src_modifiers(const gen_device_info *devinfo, const fs_inst *inst, unsigned i) { - return !inst->can_do_source_mods(devinfo) && - (inst->src[i].negate || inst->src[i].abs); + return (!inst->can_do_source_mods(devinfo) && + (inst->src[i].negate || inst->src[i].abs)) || + ((has_invalid_exec_type(devinfo, inst) & (1u << i)) && + (inst->src[i].negate || inst->src[i].abs || + inst->src[i].type != get_exec_type(inst))); } /* @@ -209,24 +240,27 @@ namespace { return false; case BRW_OPCODE_SEL: return inst->dst.type != get_exec_type(inst); - case SHADER_OPCODE_BROADCAST: - case SHADER_OPCODE_MOV_INDIRECT: - /* The source and destination types of these may be hard-coded to - * integer at codegen time due to hardware limitations of 64-bit - * types. - */ - return ((devinfo->ver == 7 && !devinfo->is_haswell) || - devinfo->is_cherryview || gen_device_info_is_9lp(devinfo)) && - type_sz(inst->src[0].type) > 4 && - inst->dst.type != inst->src[0].type; default: - /* FIXME: We assume the opcodes don't explicitly mentioned before - * just work fine with arbitrary conversions. + /* FIXME: We assume the opcodes not explicitly mentioned before just + * work fine with arbitrary conversions, unless they need to be + * bit-cast. */ - return false; + return has_invalid_exec_type(devinfo, inst) && + inst->dst.type != get_exec_type(inst); } } + /** + * Return whether the instruction has unsupported destination modifiers. + */ + bool + has_invalid_dst_modifiers(const gen_device_info *devinfo, const fs_inst *inst) + { + return (has_invalid_exec_type(devinfo, inst) && + (inst->saturate || inst->conditional_mod)) || + has_invalid_conversion(devinfo, inst); + } + /** * Return whether the instruction has non-standard semantics for the * conditional mod which don't cause the flag register to be updated with @@ -423,6 +457,30 @@ namespace { return true; } + /** + * Bit-cast sources and destination of the instruction to an appropriate + * integer type, to be used in cases where the instruction doesn't support + * some other execution type. + */ + bool + lower_exec_type(fs_visitor *v, bblock_t *block, fs_inst *inst) + { + assert(inst->dst.type == get_exec_type(inst)); + const unsigned mask = has_invalid_exec_type(v->devinfo, inst); + const brw_reg_type raw_type = brw_int_type(type_sz(inst->dst.type), false); + + for (unsigned i = 0; i < inst->sources; i++) { + if (mask & (1u << i)) { + assert(inst->src[i].type == inst->dst.type); + inst->src[i].type = raw_type; + } + } + + inst->dst.type = raw_type; + + return true; + } + /** * Legalize the source and destination regioning controls of the specified * instruction. @@ -433,7 +491,7 @@ namespace { const gen_device_info *devinfo = v->devinfo; bool progress = false; - if (has_invalid_conversion(devinfo, inst)) + if (has_invalid_dst_modifiers(devinfo, inst)) progress |= lower_dst_modifiers(v, block, inst); if (has_invalid_dst_region(devinfo, inst)) @@ -447,6 +505,9 @@ namespace { progress |= lower_src_region(v, block, inst, i); } + if (has_invalid_exec_type(devinfo, inst)) + progress |= lower_exec_type(v, block, inst); + return progress; } } diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 6becef748ce..d5fc48b7d00 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -570,7 +570,12 @@ has_dst_aligned_region_restriction(const gen_device_info *devinfo, if (type_sz(dst_type) > 4 || type_sz(exec_type) > 4 || (type_sz(exec_type) == 4 && is_dword_multiply)) - return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo); + return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo) || + devinfo->verx10 >= 125; + + else if (brw_reg_type_is_floating_point(inst->dst.type)) + return devinfo->verx10 >= 125; + else return false; }