glsl: use bitfield_insert instead of and + shift + or for packing
It is fairly tricky to detect the proper conditions for using bitfield insert, but easy to just use it up front. This removes a lot of instructions on nvc0 when invoking the packing builtins. Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
c676c432f3
commit
889a946a45
|
@ -66,7 +66,9 @@ enum lower_packing_builtins_op {
|
|||
LOWER_UNPACK_SNORM_4x8 = 0x0200,
|
||||
|
||||
LOWER_PACK_UNORM_4x8 = 0x0400,
|
||||
LOWER_UNPACK_UNORM_4x8 = 0x0800
|
||||
LOWER_UNPACK_UNORM_4x8 = 0x0800,
|
||||
|
||||
LOWER_PACK_USE_BFI = 0x1000,
|
||||
};
|
||||
|
||||
bool do_common_optimization(exec_list *ir, bool linked,
|
||||
|
|
|
@ -118,6 +118,7 @@ public:
|
|||
*rvalue = split_unpack_half_2x16(op0);
|
||||
break;
|
||||
case LOWER_PACK_UNPACK_NONE:
|
||||
case LOWER_PACK_USE_BFI:
|
||||
assert(!"not reached");
|
||||
break;
|
||||
}
|
||||
|
@ -222,9 +223,16 @@ private:
|
|||
|
||||
/* uvec2 u = UVEC2_RVAL; */
|
||||
ir_variable *u = factory.make_temp(glsl_type::uvec2_type,
|
||||
"tmp_pack_uvec2_to_uint");
|
||||
"tmp_pack_uvec2_to_uint");
|
||||
factory.emit(assign(u, uvec2_rval));
|
||||
|
||||
if (op_mask & LOWER_PACK_USE_BFI) {
|
||||
return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)),
|
||||
swizzle_y(u),
|
||||
constant(16),
|
||||
constant(16));
|
||||
}
|
||||
|
||||
/* return (u.y << 16) | (u.x & 0xffff); */
|
||||
return bit_or(lshift(swizzle_y(u), constant(16u)),
|
||||
bit_and(swizzle_x(u), constant(0xffffu)));
|
||||
|
@ -242,9 +250,22 @@ private:
|
|||
{
|
||||
assert(uvec4_rval->type == glsl_type::uvec4_type);
|
||||
|
||||
/* uvec4 u = UVEC4_RVAL; */
|
||||
ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
|
||||
"tmp_pack_uvec4_to_uint");
|
||||
"tmp_pack_uvec4_to_uint");
|
||||
|
||||
if (op_mask & LOWER_PACK_USE_BFI) {
|
||||
/* uvec4 u = UVEC4_RVAL; */
|
||||
factory.emit(assign(u, uvec4_rval));
|
||||
|
||||
return bitfield_insert(bitfield_insert(
|
||||
bitfield_insert(
|
||||
bit_and(swizzle_x(u), constant(0xffu)),
|
||||
swizzle_y(u), constant(8), constant(8)),
|
||||
swizzle_z(u), constant(16), constant(8)),
|
||||
swizzle_w(u), constant(24), constant(8));
|
||||
}
|
||||
|
||||
/* uvec4 u = UVEC4_RVAL & 0xff */
|
||||
factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));
|
||||
|
||||
/* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */
|
||||
|
|
|
@ -6019,6 +6019,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||
LOWER_PACK_HALF_2x16 |
|
||||
LOWER_UNPACK_HALF_2x16;
|
||||
|
||||
if (ctx->Extensions.ARB_gpu_shader5)
|
||||
lower_inst |= LOWER_PACK_USE_BFI;
|
||||
|
||||
lower_packing_builtins(ir, lower_inst);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue