intel/fs: Add a couple of simple helper opcodes
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
parent
57bff0a546
commit
b0858c1cc6
|
@ -460,6 +460,19 @@ enum opcode {
|
||||||
*/
|
*/
|
||||||
SHADER_OPCODE_SHUFFLE,
|
SHADER_OPCODE_SHUFFLE,
|
||||||
|
|
||||||
|
/* Select between src0 and src1 based on channel enables.
|
||||||
|
*
|
||||||
|
* This instruction copies src0 into the enabled channels of the
|
||||||
|
* destination and copies src1 into the disabled channels.
|
||||||
|
*/
|
||||||
|
SHADER_OPCODE_SEL_EXEC,
|
||||||
|
|
||||||
|
/* Take every Nth element in src0 and broadcast it to the group of N
|
||||||
|
* channels in which it lives in the destination. The offset within the
|
||||||
|
* cluster is given by src1 and the cluster size is given by src2.
|
||||||
|
*/
|
||||||
|
SHADER_OPCODE_CLUSTER_BROADCAST,
|
||||||
|
|
||||||
SHADER_OPCODE_GET_BUFFER_SIZE,
|
SHADER_OPCODE_GET_BUFFER_SIZE,
|
||||||
|
|
||||||
VEC4_OPCODE_MOV_BYTES,
|
VEC4_OPCODE_MOV_BYTES,
|
||||||
|
|
|
@ -316,6 +316,15 @@ fs_inst::has_source_and_destination_hazard() const
|
||||||
* that one of the instructions will read from a channel corresponding
|
* that one of the instructions will read from a channel corresponding
|
||||||
* to an earlier instruction.
|
* to an earlier instruction.
|
||||||
*/
|
*/
|
||||||
|
case SHADER_OPCODE_SEL_EXEC:
|
||||||
|
/* This is implemented as
|
||||||
|
*
|
||||||
|
* mov(16) g4<1>D 0D { align1 WE_all 1H };
|
||||||
|
* mov(16) g4<1>D g5<8,8,1>D { align1 1H }
|
||||||
|
*
|
||||||
|
* Because the source is only read in the second instruction, the first
|
||||||
|
* may stomp all over it.
|
||||||
|
*/
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
/* The SIMD16 compressed instruction
|
/* The SIMD16 compressed instruction
|
||||||
|
@ -5038,6 +5047,8 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
|
||||||
case BRW_OPCODE_MAD:
|
case BRW_OPCODE_MAD:
|
||||||
case BRW_OPCODE_LRP:
|
case BRW_OPCODE_LRP:
|
||||||
case FS_OPCODE_PACK:
|
case FS_OPCODE_PACK:
|
||||||
|
case SHADER_OPCODE_SEL_EXEC:
|
||||||
|
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
||||||
return get_fpu_lowered_simd_width(devinfo, inst);
|
return get_fpu_lowered_simd_width(devinfo, inst);
|
||||||
|
|
||||||
case BRW_OPCODE_CMP: {
|
case BRW_OPCODE_CMP: {
|
||||||
|
|
|
@ -2293,6 +2293,53 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||||
generate_shuffle(inst, dst, src[0], src[1]);
|
generate_shuffle(inst, dst, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_SEL_EXEC:
|
||||||
|
assert(inst->force_writemask_all);
|
||||||
|
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||||
|
brw_MOV(p, dst, src[1]);
|
||||||
|
brw_set_default_mask_control(p, BRW_MASK_ENABLE);
|
||||||
|
brw_MOV(p, dst, src[0]);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SHADER_OPCODE_CLUSTER_BROADCAST: {
|
||||||
|
assert(src[0].type == dst.type);
|
||||||
|
assert(!src[0].negate && !src[0].abs);
|
||||||
|
assert(src[1].file == BRW_IMMEDIATE_VALUE);
|
||||||
|
assert(src[1].type == BRW_REGISTER_TYPE_UD);
|
||||||
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||||
|
assert(src[2].type == BRW_REGISTER_TYPE_UD);
|
||||||
|
const unsigned component = src[1].ud;
|
||||||
|
const unsigned cluster_size = src[2].ud;
|
||||||
|
struct brw_reg strided = stride(suboffset(src[0], component),
|
||||||
|
cluster_size, cluster_size, 0);
|
||||||
|
if (type_sz(src[0].type) > 4 &&
|
||||||
|
(devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) {
|
||||||
|
/* IVB has an issue (which we found empirically) where it reads
|
||||||
|
* two address register components per channel for indirectly
|
||||||
|
* addressed 64-bit sources.
|
||||||
|
*
|
||||||
|
* From the Cherryview PRM Vol 7. "Register Region Restrictions":
|
||||||
|
*
|
||||||
|
* "When source or destination datatype is 64b or operation is
|
||||||
|
* integer DWord multiply, indirect addressing must not be
|
||||||
|
* used."
|
||||||
|
*
|
||||||
|
* To work around both of these, we do two integer MOVs insead of
|
||||||
|
* one 64-bit MOV. Because no double value should ever cross a
|
||||||
|
* register boundary, it's safe to use the immediate offset in the
|
||||||
|
* indirect here to handle adding 4 bytes to the offset and avoid
|
||||||
|
* the extra ADD to the register file.
|
||||||
|
*/
|
||||||
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0),
|
||||||
|
subscript(strided, BRW_REGISTER_TYPE_D, 0));
|
||||||
|
brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1),
|
||||||
|
subscript(strided, BRW_REGISTER_TYPE_D, 1));
|
||||||
|
} else {
|
||||||
|
brw_MOV(p, dst, strided);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case FS_OPCODE_SET_SAMPLE_ID:
|
case FS_OPCODE_SET_SAMPLE_ID:
|
||||||
generate_set_sample_id(inst, dst, src[0], src[1]);
|
generate_set_sample_id(inst, dst, src[0], src[1]);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -332,6 +332,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
|
||||||
return "broadcast";
|
return "broadcast";
|
||||||
case SHADER_OPCODE_SHUFFLE:
|
case SHADER_OPCODE_SHUFFLE:
|
||||||
return "shuffle";
|
return "shuffle";
|
||||||
|
case SHADER_OPCODE_SEL_EXEC:
|
||||||
|
return "sel_exec";
|
||||||
|
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
||||||
|
return "cluster_broadcast";
|
||||||
|
|
||||||
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
||||||
return "get_buffer_size";
|
return "get_buffer_size";
|
||||||
|
@ -847,6 +851,7 @@ backend_instruction::can_do_source_mods() const
|
||||||
case BRW_OPCODE_FBL:
|
case BRW_OPCODE_FBL:
|
||||||
case BRW_OPCODE_SUBB:
|
case BRW_OPCODE_SUBB:
|
||||||
case SHADER_OPCODE_BROADCAST:
|
case SHADER_OPCODE_BROADCAST:
|
||||||
|
case SHADER_OPCODE_CLUSTER_BROADCAST:
|
||||||
case SHADER_OPCODE_MOV_INDIRECT:
|
case SHADER_OPCODE_MOV_INDIRECT:
|
||||||
return false;
|
return false;
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Reference in New Issue