From 1b6a319c15f3c63acb0384c47a94fb40f2aeb17d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 15 May 2020 13:58:20 +0100 Subject: [PATCH] aco: add and set precise flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_builder_h.py | 8 ++++++++ src/amd/compiler/aco_instruction_selection.cpp | 7 +++++-- src/amd/compiler/aco_ir.h | 13 ++++++++++++- src/amd/compiler/aco_opt_value_numbering.cpp | 2 ++ src/amd/compiler/aco_print_ir.cpp | 2 ++ 5 files changed, 29 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index eb655471c90..edd5f3fda64 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -166,11 +166,18 @@ public: std::vector> *instructions; std::vector>::iterator it; + bool is_precise = false; Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm->lane_mask), instructions(NULL) {} Builder(Program *pgm, Block *block) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(&block->instructions) {} Builder(Program *pgm, std::vector> *instrs) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(instrs) {} + Builder precise() const { + Builder res = *this; + res.is_precise = true; + return res; + }; + void moveEnd(Block *block) { instructions = &block->instructions; } @@ -524,6 +531,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ${struct} *instr = create_instruction<${struct}>(opcode, (Format)(${'|'.join('(int)Format::%s' % f.name for f in formats)}), ${num_operands}, ${num_definitions}); % for i in range(num_definitions): instr->definitions[${i}] = def${i}; + instr->definitions[${i}].setPrecise(is_precise); % endfor % for i in range(num_operands): instr->operands[${i}] = op${i}.op; diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3c72f099411..0e9f5f0f609 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -590,6 +590,8 @@ void emit_vop2_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode o bool commutative, bool swap_srcs=false, bool flush_denorms = false) { Builder bld(ctx->program, ctx->block); + bld.is_precise = instr->exact; + Temp src0 = get_alu_src(ctx, instr->src[swap_srcs ? 1 : 0]); Temp src1 = get_alu_src(ctx, instr->src[swap_srcs ? 0 : 1]); if (src1.type() == RegType::sgpr) { @@ -628,6 +630,7 @@ void emit_vop3a_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode src2 = as_vgpr(ctx, src2); Builder bld(ctx->program, ctx->block); + bld.is_precise = instr->exact; if (flush_denorms && ctx->program->chip_class < GFX9) { assert(dst.size() == 1); Temp tmp = bld.vop3(op, Definition(dst), src0, src1, src2); @@ -640,6 +643,7 @@ void emit_vop3a_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode void emit_vop1_instruction(isel_context *ctx, nir_alu_instr *instr, aco_opcode op, Temp dst) { Builder bld(ctx->program, ctx->block); + bld.is_precise = instr->exact; if (dst.type() == RegType::sgpr) bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), bld.vop1(op, bld.def(RegType::vgpr, dst.size()), get_alu_src(ctx, instr->src[0]))); @@ -1041,6 +1045,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) abort(); } Builder bld(ctx->program, ctx->block); + bld.is_precise = instr->exact; Temp dst = get_ssa_temp(ctx, &instr->dest.dest.ssa); switch(instr->op) { case nir_op_vec2: @@ -2703,7 +2708,6 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } case nir_op_unpack_half_2x16_split_x: { if (dst.regClass() == v1) { - Builder bld(ctx->program, ctx->block); bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), get_alu_src(ctx, instr->src[0])); } else { fprintf(stderr, "Unimplemented NIR instr bit size: "); @@ -2714,7 +2718,6 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } case nir_op_unpack_half_2x16_split_y: { if (dst.regClass() == v1) { - Builder bld(ctx->program, ctx->block); /* TODO: use SDWA here */ bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand(16u), as_vgpr(ctx, get_alu_src(ctx, instr->src[0])))); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 6ce1e8d0044..bd221ad6b61 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -652,7 +652,7 @@ private: class Definition final { public: - constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {} + constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0), isPrecise_(0) {} Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {} explicit Definition(Temp tmp) noexcept @@ -739,6 +739,16 @@ public: return isKill_; } + constexpr void setPrecise(bool precise) noexcept + { + isPrecise_ = precise; + } + + constexpr bool isPrecise() const noexcept + { + return isPrecise_; + } + private: Temp temp = Temp(0, s1); PhysReg reg_; @@ -747,6 +757,7 @@ private: uint8_t isFixed_:1; uint8_t hasHint_:1; uint8_t isKill_:1; + uint8_t isPrecise_:1; }; /* can't initialize bit-fields in c++11, so work around using a union */ uint8_t control_ = 0; diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 487d1588128..93668442d32 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -401,6 +401,8 @@ void process_block(vn_ctx& ctx, Block& block) assert(instr->definitions[i].regClass() == orig_instr->definitions[i].regClass()); assert(instr->definitions[i].isTemp()); ctx.renames[instr->definitions[i].tempId()] = orig_instr->definitions[i].getTemp(); + if (instr->definitions[i].isPrecise()) + orig_instr->definitions[i].setPrecise(true); } } else { ctx.expr_values.erase(res.first); diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 545dc9f553c..0fb0ceb186d 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -174,6 +174,8 @@ static void print_operand(const Operand *operand, FILE *output) static void print_definition(const Definition *definition, FILE *output) { print_reg_class(definition->regClass(), output); + if (definition->isPrecise()) + fprintf(output, "(precise)"); fprintf(output, "%%%d", definition->tempId()); if (definition->isFixed())