i965/fs/gen7: Emit code for GLSL 3.00 pack/unpack operations (v4)
v2: Remove lewd comment. [for idr] v3: - Optimize away tmp register for packHalf2x16. [for anholt, paul] - Improve comments. [for anholt, paul] - Reduce near-duplicate code by removing vec4_visitor emit_pack/unpack methods. [for chadv] v4: Factor our UD/W register conversion into helper function. [for anholt] Reviewed-by: Eric Anholt <eric@anholt.net> Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> (v2) Signed-off-by: Chad Versace <chad.versace@linux.intel.com>
This commit is contained in:
parent
203c12b18f
commit
20dfa501b3
|
@ -726,6 +726,9 @@ enum opcode {
|
|||
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
|
||||
FS_OPCODE_DISCARD_JUMP,
|
||||
FS_OPCODE_SET_GLOBAL_OFFSET,
|
||||
FS_OPCODE_PACK_HALF_2x16_SPLIT,
|
||||
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
|
||||
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
|
||||
|
||||
VS_OPCODE_URB_WRITE,
|
||||
VS_OPCODE_SCRATCH_READ,
|
||||
|
|
|
@ -542,6 +542,14 @@ private:
|
|||
struct brw_reg offset);
|
||||
void generate_discard_jump(fs_inst *inst);
|
||||
|
||||
void generate_pack_half_2x16_split(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg x,
|
||||
struct brw_reg y);
|
||||
void generate_unpack_half_2x16_split(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src);
|
||||
|
||||
void patch_discard_jumps_to_fb_writes();
|
||||
|
||||
struct brw_context *brw;
|
||||
|
|
|
@ -342,9 +342,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
|
|||
assert(!"not yet supported");
|
||||
break;
|
||||
|
||||
case ir_unop_pack_snorm_2x16:
|
||||
case ir_unop_pack_unorm_2x16:
|
||||
case ir_unop_pack_half_2x16:
|
||||
case ir_unop_unpack_snorm_2x16:
|
||||
case ir_unop_unpack_unorm_2x16:
|
||||
case ir_unop_unpack_half_2x16:
|
||||
case ir_quadop_vector:
|
||||
assert(!"should have been lowered");
|
||||
break;
|
||||
|
||||
case ir_unop_unpack_half_2x16_split_x:
|
||||
case ir_unop_unpack_half_2x16_split_y:
|
||||
case ir_binop_pack_half_2x16_split:
|
||||
assert("!not reached: expression operates on scalars only");
|
||||
break;
|
||||
}
|
||||
|
||||
ir->remove();
|
||||
|
|
|
@ -922,6 +922,95 @@ fs_generator::generate_set_global_offset(fs_inst *inst,
|
|||
brw_pop_insn_state(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the register's data type from UD to W, doubling the strides in order
|
||||
* to compensate for halving the data type width.
|
||||
*/
|
||||
static struct brw_reg
|
||||
ud_reg_to_w(struct brw_reg r)
|
||||
{
|
||||
assert(r.type == BRW_REGISTER_TYPE_UD);
|
||||
r.type = BRW_REGISTER_TYPE_W;
|
||||
|
||||
/* The BRW_*_STRIDE enums are defined so that incrementing the field
|
||||
* doubles the real stride.
|
||||
*/
|
||||
if (r.hstride != 0)
|
||||
++r.hstride;
|
||||
if (r.vstride != 0)
|
||||
++r.vstride;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_pack_half_2x16_split(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg x,
|
||||
struct brw_reg y)
|
||||
{
|
||||
assert(intel->gen >= 7);
|
||||
assert(dst.type == BRW_REGISTER_TYPE_UD);
|
||||
assert(x.type = BRW_REGISTER_TYPE_F);
|
||||
assert(y.type = BRW_REGISTER_TYPE_F);
|
||||
|
||||
/* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16:
|
||||
*
|
||||
* Because this instruction does not have a 16-bit floating-point type,
|
||||
* the destination data type must be Word (W).
|
||||
*
|
||||
* The destination must be DWord-aligned and specify a horizontal stride
|
||||
* (HorzStride) of 2. The 16-bit result is stored in the lower word of
|
||||
* each destination channel and the upper word is not modified.
|
||||
*/
|
||||
struct brw_reg dst_w = ud_reg_to_w(dst);
|
||||
|
||||
/* Give each 32-bit channel of dst the form below , where "." means
|
||||
* unchanged.
|
||||
* 0x....hhhh
|
||||
*/
|
||||
brw_F32TO16(p, dst_w, y);
|
||||
|
||||
/* Now the form:
|
||||
* 0xhhhh0000
|
||||
*/
|
||||
brw_SHL(p, dst, dst, brw_imm_ud(16u));
|
||||
|
||||
/* And, finally the form of packHalf2x16's output:
|
||||
* 0xhhhhllll
|
||||
*/
|
||||
brw_F32TO16(p, dst_w, x);
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
|
||||
struct brw_reg dst,
|
||||
struct brw_reg src)
|
||||
{
|
||||
assert(intel->gen >= 7);
|
||||
assert(dst.type == BRW_REGISTER_TYPE_F);
|
||||
assert(src.type == BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
|
||||
*
|
||||
* Because this instruction does not have a 16-bit floating-point type,
|
||||
* the source data type must be Word (W). The destination type must be
|
||||
* F (Float).
|
||||
*/
|
||||
struct brw_reg src_w = ud_reg_to_w(src);
|
||||
|
||||
/* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll.
|
||||
* For the Y case, we wish to access only the upper word; therefore
|
||||
* a 16-bit subregister offset is needed.
|
||||
*/
|
||||
assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X ||
|
||||
inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y);
|
||||
if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y)
|
||||
src.subnr += 2;
|
||||
|
||||
brw_F16TO32(p, dst, src_w);
|
||||
}
|
||||
|
||||
void
|
||||
fs_generator::generate_code(exec_list *instructions)
|
||||
{
|
||||
|
@ -1082,7 +1171,12 @@ fs_generator::generate_code(exec_list *instructions)
|
|||
case BRW_OPCODE_SHL:
|
||||
brw_SHL(p, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_F32TO16:
|
||||
brw_F32TO16(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_F16TO32:
|
||||
brw_F16TO32(p, dst, src[0]);
|
||||
break;
|
||||
case BRW_OPCODE_CMP:
|
||||
brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
|
||||
break;
|
||||
|
@ -1229,6 +1323,15 @@ fs_generator::generate_code(exec_list *instructions)
|
|||
generate_set_global_offset(inst, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
|
||||
generate_pack_half_2x16_split(inst, dst, src[0], src[1]);
|
||||
break;
|
||||
|
||||
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
|
||||
case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
|
||||
generate_unpack_half_2x16_split(inst, dst, src[0]);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
|
||||
_mesa_problem(ctx, "Unsupported opcode `%s' in FS",
|
||||
|
|
|
@ -538,7 +538,20 @@ fs_visitor::visit(ir_expression *ir)
|
|||
BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
|
||||
this->result, op[0], op[1]);
|
||||
break;
|
||||
|
||||
case ir_unop_pack_snorm_2x16:
|
||||
case ir_unop_pack_unorm_2x16:
|
||||
case ir_unop_unpack_snorm_2x16:
|
||||
case ir_unop_unpack_unorm_2x16:
|
||||
case ir_unop_unpack_half_2x16:
|
||||
case ir_unop_pack_half_2x16:
|
||||
assert(!"not reached: should be handled by lower_packing_builtins");
|
||||
break;
|
||||
case ir_unop_unpack_half_2x16_split_x:
|
||||
emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
|
||||
break;
|
||||
case ir_unop_unpack_half_2x16_split_y:
|
||||
emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
|
||||
break;
|
||||
case ir_binop_pow:
|
||||
emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
|
||||
break;
|
||||
|
@ -566,7 +579,9 @@ fs_visitor::visit(ir_expression *ir)
|
|||
else
|
||||
inst = emit(SHR(this->result, op[0], op[1]));
|
||||
break;
|
||||
|
||||
case ir_binop_pack_half_2x16_split:
|
||||
emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
|
||||
break;
|
||||
case ir_binop_ubo_load:
|
||||
/* This IR node takes a constant uniform block and a constant or
|
||||
* variable byte offset within the block and loads a vector from that.
|
||||
|
|
Loading…
Reference in New Issue