i965/gen7+: Use NIR for lowering of pack/unpack opcodes.
This commit is contained in:
parent
5deba3f00a
commit
874ede4983
|
@ -87,7 +87,15 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
|
|||
static const struct nir_shader_compiler_options scalar_nir_options = {
|
||||
COMMON_OPTIONS,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
};
|
||||
|
||||
static const struct nir_shader_compiler_options vector_nir_options = {
|
||||
|
@ -98,6 +106,13 @@ static const struct nir_shader_compiler_options vector_nir_options = {
|
|||
* instructions because it can optimize better for us.
|
||||
*/
|
||||
.fdot_replicates = true,
|
||||
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_extract_byte = true,
|
||||
.lower_extract_word = true,
|
||||
};
|
||||
|
||||
struct brw_compiler *
|
||||
|
|
|
@ -73,6 +73,10 @@ channel_expressions_predicate(ir_instruction *ir)
|
|||
|
||||
switch (expr->operation) {
|
||||
case ir_unop_pack_half_2x16:
|
||||
case ir_unop_pack_snorm_2x16:
|
||||
case ir_unop_pack_snorm_4x8:
|
||||
case ir_unop_pack_unorm_2x16:
|
||||
case ir_unop_pack_unorm_4x8:
|
||||
return false;
|
||||
|
||||
/* these opcodes need to act on the whole vector,
|
||||
|
@ -166,6 +170,10 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
|
|||
|
||||
switch (expr->operation) {
|
||||
case ir_unop_pack_half_2x16:
|
||||
case ir_unop_pack_snorm_2x16:
|
||||
case ir_unop_pack_snorm_4x8:
|
||||
case ir_unop_pack_unorm_2x16:
|
||||
case ir_unop_pack_unorm_4x8:
|
||||
case ir_unop_interpolate_at_centroid:
|
||||
case ir_binop_interpolate_at_offset:
|
||||
case ir_binop_interpolate_at_sample:
|
||||
|
|
|
@ -73,26 +73,13 @@ brw_lower_packing_builtins(struct brw_context *brw,
|
|||
gl_shader_stage shader_type,
|
||||
exec_list *ir)
|
||||
{
|
||||
const struct brw_compiler *compiler = brw->intelScreen->compiler;
|
||||
/* Gens < 7 don't have instructions to convert to or from half-precision,
|
||||
* and Gens < 6 don't expose that functionality.
|
||||
*/
|
||||
if (brw->gen != 6)
|
||||
return;
|
||||
|
||||
int ops = LOWER_PACK_SNORM_2x16
|
||||
| LOWER_UNPACK_SNORM_2x16
|
||||
| LOWER_PACK_UNORM_2x16
|
||||
| LOWER_UNPACK_UNORM_2x16;
|
||||
|
||||
if (compiler->scalar_stage[shader_type]) {
|
||||
ops |= LOWER_UNPACK_UNORM_4x8
|
||||
| LOWER_UNPACK_SNORM_4x8
|
||||
| LOWER_PACK_UNORM_4x8
|
||||
| LOWER_PACK_SNORM_4x8;
|
||||
}
|
||||
|
||||
if (brw->gen < 7) {
|
||||
ops |= LOWER_PACK_HALF_2x16
|
||||
| LOWER_UNPACK_HALF_2x16;
|
||||
}
|
||||
|
||||
lower_packing_builtins(ir, ops);
|
||||
lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in New Issue