From 1f0f26d60c148e360908af34130c4e00dba8f3df Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 9 Apr 2013 19:22:34 -0700 Subject: [PATCH] i965/fs: Add support for bit instructions. Don't bother scalarizing ir_binop_bfm, since its results are identical for all channels. v2: Subtract result of FBH from 31 (unless an error) to convert MSB counts to LSB counts. v3: Use op0->clone() in ir_triop_bfi to prevent (var_ref channel_expressions) from appearing multiple times in the IR. Reviewed-by: Chris Forbes [v2] --- src/mesa/drivers/dri/i965/brw_fs.cpp | 7 +++ src/mesa/drivers/dri/i965/brw_fs.h | 7 +++ .../dri/i965/brw_fs_channel_expressions.cpp | 37 ++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 48 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 43 +++++++++++++++++ 5 files changed, 142 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 84116752145..778a69e7091 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -173,6 +173,13 @@ ALU2(SHL) ALU2(SHR) ALU2(ASR) ALU3(LRP) +ALU1(BFREV) +ALU3(BFE) +ALU2(BFI1) +ALU3(BFI2) +ALU1(FBH) +ALU1(FBL) +ALU1(CBIT) /** Gen4 predicated IF. */ fs_inst * diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 436a97a9703..9a2bcc07685 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -276,6 +276,13 @@ public: uint32_t condition); fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x); fs_inst *DEP_RESOLVE_MOV(int grf); + fs_inst *BFREV(fs_reg dst, fs_reg value); + fs_inst *BFE(fs_reg dst, fs_reg bits, fs_reg offset, fs_reg value); + fs_inst *BFI1(fs_reg dst, fs_reg bits, fs_reg offset); + fs_inst *BFI2(fs_reg dst, fs_reg bfi1_dst, fs_reg insert, fs_reg base); + fs_inst *FBH(fs_reg dst, fs_reg value); + fs_inst *FBL(fs_reg dst, fs_reg value); + fs_inst *CBIT(fs_reg dst, fs_reg value); int type_size(const struct glsl_type *type); fs_inst *get_instruction_generating_reg(fs_inst *start, diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 30d8d9bf527..0f3d4abdd26 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -216,6 +216,10 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdy: + case ir_unop_bitfield_reverse: + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); @@ -338,11 +342,26 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) assert(!"noise should have been broken down to function call"); break; + case ir_binop_bfm: { + /* Does not need to be scalarized, since its result will be identical + * for all channels. + */ + ir_rvalue *op0 = get_element(op_var[0], 0); + ir_rvalue *op1 = get_element(op_var[1], 0); + + assign(ir, 0, new(mem_ctx) ir_expression(expr->operation, + element_type, + op0, + op1)); + break; + } + case ir_binop_ubo_load: assert(!"not yet supported"); break; case ir_triop_lrp: + case ir_triop_bitfield_extract: for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); ir_rvalue *op1 = get_element(op_var[1], i); @@ -356,6 +375,23 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) } break; + case ir_triop_bfi: { + /* Only a single BFM is needed for multiple BFIs. */ + ir_rvalue *op0 = get_element(op_var[0], 0); + + for (i = 0; i < vector_elements; i++) { + ir_rvalue *op1 = get_element(op_var[1], i); + ir_rvalue *op2 = get_element(op_var[2], i); + + assign(ir, i, new(mem_ctx) ir_expression(expr->operation, + element_type, + op0->clone(mem_ctx, NULL), + op1, + op2)); + } + break; + } + case ir_unop_pack_snorm_2x16: case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: @@ -366,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: + case ir_quadop_bitfield_insert: case ir_quadop_vector: assert(!"should have been lowered"); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 5a5044eedcd..9b7e68acb8f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -1209,6 +1209,54 @@ fs_generator::generate_code(exec_list *instructions) case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; + case BRW_OPCODE_BFREV: + /* BFREV only supports UD type for src and dst. */ + brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), + retype(src[0], BRW_REGISTER_TYPE_UD)); + break; + case BRW_OPCODE_FBH: + /* FBH only supports UD type for dst. */ + brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + case BRW_OPCODE_FBL: + /* FBL only supports UD type for dst. */ + brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + case BRW_OPCODE_CBIT: + /* CBIT only supports UD type for dst. */ + brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); + break; + + case BRW_OPCODE_BFE: + brw_set_access_mode(p, BRW_ALIGN_16); + if (dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_BFE(p, dst, src[0], src[1], src[2]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else { + brw_BFE(p, dst, src[0], src[1], src[2]); + } + brw_set_access_mode(p, BRW_ALIGN_1); + break; + + case BRW_OPCODE_BFI1: + brw_BFI1(p, dst, src[0], src[1]); + break; + case BRW_OPCODE_BFI2: + brw_set_access_mode(p, BRW_ALIGN_16); + if (dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_BFI2(p, dst, src[0], src[1], src[2]); + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } else { + brw_BFI2(p, dst, src[0], src[1], src[2]); + } + brw_set_access_mode(p, BRW_ALIGN_1); + break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 55ae6898866..d2bac2a3def 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -587,6 +587,49 @@ fs_visitor::visit(ir_expression *ir) emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; + case ir_unop_bitfield_reverse: + emit(BFREV(this->result, op[0])); + break; + case ir_unop_bit_count: + emit(CBIT(this->result, op[0])); + break; + case ir_unop_find_msb: + temp = fs_reg(this, glsl_type::uint_type); + emit(FBH(temp, op[0])); + + /* FBH counts from the MSB side, while GLSL's findMSB() wants the count + * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then + * subtract the result from 31 to convert the MSB count into an LSB count. + */ + + /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ + emit(MOV(this->result, temp)); + emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ)); + + temp.negate = true; + inst = emit(ADD(this->result, temp, fs_reg(31))); + inst->predicate = BRW_PREDICATE_NORMAL; + break; + case ir_unop_find_lsb: + emit(FBL(this->result, op[0])); + break; + case ir_triop_bitfield_extract: + /* Note that the instruction's argument order is reversed from GLSL + * and the IR. + */ + emit(BFE(this->result, op[2], op[1], op[0])); + break; + case ir_binop_bfm: + emit(BFI1(this->result, op[0], op[1])); + break; + case ir_triop_bfi: + emit(BFI2(this->result, op[0], op[1], op[2])); + break; + case ir_quadop_bitfield_insert: + assert(!"not reached: should be handled by " + "lower_instructions::bitfield_insert_to_bfm_bfi"); + break; + case ir_unop_bit_not: emit(NOT(this->result, op[0])); break;