i965/fs: Add support for bit instructions.

Don't bother scalarizing ir_binop_bfm, since its results are
identical for all channels.

v2: Subtract result of FBH from 31 (unless an error) to convert
    MSB counts to LSB counts.
v3: Use op0->clone() in ir_triop_bfi to prevent (var_ref
    channel_expressions) from appearing multiple times in the IR.

Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> [v2]
This commit is contained in:
Matt Turner 2013-04-09 19:22:34 -07:00
parent fa958182b7
commit 1f0f26d60c
5 changed files with 142 additions and 0 deletions

View File

@ -173,6 +173,13 @@ ALU2(SHL)
ALU2(SHR)
ALU2(ASR)
ALU3(LRP)
ALU1(BFREV)
ALU3(BFE)
ALU2(BFI1)
ALU3(BFI2)
ALU1(FBH)
ALU1(FBL)
ALU1(CBIT)
/** Gen4 predicated IF. */
fs_inst *

View File

@ -276,6 +276,13 @@ public:
uint32_t condition);
fs_inst *LRP(fs_reg dst, fs_reg a, fs_reg y, fs_reg x);
fs_inst *DEP_RESOLVE_MOV(int grf);
fs_inst *BFREV(fs_reg dst, fs_reg value);
fs_inst *BFE(fs_reg dst, fs_reg bits, fs_reg offset, fs_reg value);
fs_inst *BFI1(fs_reg dst, fs_reg bits, fs_reg offset);
fs_inst *BFI2(fs_reg dst, fs_reg bfi1_dst, fs_reg insert, fs_reg base);
fs_inst *FBH(fs_reg dst, fs_reg value);
fs_inst *FBL(fs_reg dst, fs_reg value);
fs_inst *CBIT(fs_reg dst, fs_reg value);
int type_size(const struct glsl_type *type);
fs_inst *get_instruction_generating_reg(fs_inst *start,

View File

@ -216,6 +216,10 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_cos_reduced:
case ir_unop_dFdx:
case ir_unop_dFdy:
case ir_unop_bitfield_reverse:
case ir_unop_bit_count:
case ir_unop_find_msb:
case ir_unop_find_lsb:
for (i = 0; i < vector_elements; i++) {
ir_rvalue *op0 = get_element(op_var[0], i);
@ -338,11 +342,26 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
assert(!"noise should have been broken down to function call");
break;
case ir_binop_bfm: {
/* Does not need to be scalarized, since its result will be identical
* for all channels.
*/
ir_rvalue *op0 = get_element(op_var[0], 0);
ir_rvalue *op1 = get_element(op_var[1], 0);
assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
element_type,
op0,
op1));
break;
}
case ir_binop_ubo_load:
assert(!"not yet supported");
break;
case ir_triop_lrp:
case ir_triop_bitfield_extract:
for (i = 0; i < vector_elements; i++) {
ir_rvalue *op0 = get_element(op_var[0], i);
ir_rvalue *op1 = get_element(op_var[1], i);
@ -356,6 +375,23 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
}
break;
case ir_triop_bfi: {
/* Only a single BFM is needed for multiple BFIs. */
ir_rvalue *op0 = get_element(op_var[0], 0);
for (i = 0; i < vector_elements; i++) {
ir_rvalue *op1 = get_element(op_var[1], i);
ir_rvalue *op2 = get_element(op_var[2], i);
assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
element_type,
op0->clone(mem_ctx, NULL),
op1,
op2));
}
break;
}
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_2x16:
@ -366,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_unorm_4x8:
case ir_unop_unpack_half_2x16:
case ir_quadop_bitfield_insert:
case ir_quadop_vector:
assert(!"should have been lowered");
break;

View File

@ -1209,6 +1209,54 @@ fs_generator::generate_code(exec_list *instructions)
case BRW_OPCODE_SEL:
brw_SEL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_BFREV:
/* BFREV only supports UD type for src and dst. */
brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
retype(src[0], BRW_REGISTER_TYPE_UD));
break;
case BRW_OPCODE_FBH:
/* FBH only supports UD type for dst. */
brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_FBL:
/* FBL only supports UD type for dst. */
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_CBIT:
/* CBIT only supports UD type for dst. */
brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_BFE:
brw_set_access_mode(p, BRW_ALIGN_16);
if (dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_BFE(p, dst, src[0], src[1], src[2]);
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else {
brw_BFE(p, dst, src[0], src[1], src[2]);
}
brw_set_access_mode(p, BRW_ALIGN_1);
break;
case BRW_OPCODE_BFI1:
brw_BFI1(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_BFI2:
brw_set_access_mode(p, BRW_ALIGN_16);
if (dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_BFI2(p, dst, src[0], src[1], src[2]);
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2]));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else {
brw_BFI2(p, dst, src[0], src[1], src[2]);
}
brw_set_access_mode(p, BRW_ALIGN_1);
break;
case BRW_OPCODE_IF:
if (inst->src[0].file != BAD_FILE) {

View File

@ -587,6 +587,49 @@ fs_visitor::visit(ir_expression *ir)
emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
break;
case ir_unop_bitfield_reverse:
emit(BFREV(this->result, op[0]));
break;
case ir_unop_bit_count:
emit(CBIT(this->result, op[0]));
break;
case ir_unop_find_msb:
temp = fs_reg(this, glsl_type::uint_type);
emit(FBH(temp, op[0]));
/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
* subtract the result from 31 to convert the MSB count into an LSB count.
*/
/* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
emit(MOV(this->result, temp));
emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
temp.negate = true;
inst = emit(ADD(this->result, temp, fs_reg(31)));
inst->predicate = BRW_PREDICATE_NORMAL;
break;
case ir_unop_find_lsb:
emit(FBL(this->result, op[0]));
break;
case ir_triop_bitfield_extract:
/* Note that the instruction's argument order is reversed from GLSL
* and the IR.
*/
emit(BFE(this->result, op[2], op[1], op[0]));
break;
case ir_binop_bfm:
emit(BFI1(this->result, op[0], op[1]));
break;
case ir_triop_bfi:
emit(BFI2(this->result, op[0], op[1], op[2]));
break;
case ir_quadop_bitfield_insert:
assert(!"not reached: should be handled by "
"lower_instructions::bitfield_insert_to_bfm_bfi");
break;
case ir_unop_bit_not:
emit(NOT(this->result, op[0]));
break;