From d6cd14f2131a5b1c41ab777ef3ea041993de1c1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Pi=C3=B1eiro?= Date: Sat, 1 Jul 2017 08:12:59 +0200 Subject: [PATCH] i965/fs: Define new shader opcode to set rounding modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Although it is possible to emit them directly as AND/OR on brw_fs_nir, having a specific opcode makes it easier to remove duplicate settings later. v2: (Curro) - Set thread control to 'switch' when using the control register - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate with the rounding mode. - Avoid magic numbers setting rounding mode field at control register. v3: (Curro) - Remove redundant and add missing whitespace lines. - Match printing instruction to IR opcode "rnd_mode" v4: (Topi Pohjolainen) - Fix code style. Signed-off-by: Alejandro Piñeiro Signed-off-by: Jose Maria Casanova Crespo Reviewed-by: Francisco Jerez Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_eu.h | 4 +++ src/intel/compiler/brw_eu_defines.h | 16 ++++++++++++ src/intel/compiler/brw_eu_emit.c | 33 +++++++++++++++++++++++++ src/intel/compiler/brw_fs_generator.cpp | 5 ++++ src/intel/compiler/brw_shader.cpp | 4 +++ 5 files changed, 62 insertions(+) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index b5a206b3f10..343dcd867db 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -510,6 +510,10 @@ brw_broadcast(struct brw_codegen *p, struct brw_reg src, struct brw_reg idx); +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode); + /*********************************************************************** * brw_eu_util.c: */ diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 291dd361a29..8a8f36cbc11 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -400,6 +400,8 @@ enum opcode { SHADER_OPCODE_TYPED_SURFACE_WRITE, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, + SHADER_OPCODE_RND_MODE, + SHADER_OPCODE_MEMORY_FENCE, SHADER_OPCODE_GEN4_SCRATCH_READ, @@ -1238,4 +1240,18 @@ enum brw_message_target { /* R0 */ # define GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT 27 +/* CR0.0[5:4] Floating-Point Rounding Modes + * Skylake PRM, Volume 7 Part 1, "Control Register", page 756 + */ + +#define BRW_CR0_RND_MODE_MASK 0x30 +#define BRW_CR0_RND_MODE_SHIFT 4 + +enum PACKED brw_rnd_mode { + BRW_RND_MODE_RTNE = 0, /* Round to Nearest or Even */ + BRW_RND_MODE_RU = 1, /* Round Up, toward +inf */ + BRW_RND_MODE_RD = 2, /* Round Down, toward -inf */ + BRW_RND_MODE_RTZ = 3, /* Round Toward Zero */ +}; + #endif /* BRW_EU_DEFINES_H */ diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index dc14023b484..ca97ff7325e 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3589,3 +3589,36 @@ brw_WAIT(struct brw_codegen *p) brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); } + +/** + * Changes the floating point rounding mode updating the control register + * field defined at cr0.0[5-6] bits. This function supports the changes to + * RTNE (00), RU (01), RD (10) and RTZ (11) rounding using bitwise operations. + * Only RTNE and RTZ rounding are enabled at nir. + */ +void +brw_rounding_mode(struct brw_codegen *p, + enum brw_rnd_mode mode) +{ + const unsigned bits = mode << BRW_CR0_RND_MODE_SHIFT; + + if (bits != BRW_CR0_RND_MODE_MASK) { + brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(~BRW_CR0_RND_MODE_MASK)); + + /* From the Skylake PRM, Volume 7, page 760: + * "Implementation Restriction on Register Access: When the control + * register is used as an explicit source and/or destination, hardware + * does not ensure execution pipeline coherency. Software must set the + * thread control field to ‘switch’ for an instruction that uses + * control register as an explicit operand." + */ + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); + } + + if (bits) { + brw_inst *inst = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0), + brw_imm_ud(bits)); + brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH); + } +} diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 4f90ec9dfff..a5c39cc956b 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -2176,6 +2176,11 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F)); break; + case SHADER_OPCODE_RND_MODE: + assert(src[0].file == BRW_IMMEDIATE_VALUE); + brw_rounding_mode(p, (brw_rnd_mode) src[0].d); + break; + default: unreachable("Unsupported opcode"); diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index aa9e5f3d284..d7d7616cf4f 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -482,6 +482,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "tes_add_indirect_urb_offset"; case TES_OPCODE_GET_PRIMITIVE_ID: return "tes_get_primitive_id"; + + case SHADER_OPCODE_RND_MODE: + return "rnd_mode"; } unreachable("not reached"); @@ -974,6 +977,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_BARRIER: case TCS_OPCODE_URB_WRITE: case TCS_OPCODE_RELEASE_INPUT: + case SHADER_OPCODE_RND_MODE: return true; default: return eot;