intel/eu: Switch to a logical state stack

Instead of the state stack that's based on copying a dummy instruction
around, we start using a logical stack of brw_insn_states.  This uses a
bit less memory and is way less conceptually bogus.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Jason Ekstrand 2018-05-29 14:37:35 -07:00
parent db9675f5a4
commit 6a9525bf67
3 changed files with 72 additions and 126 deletions

View File

@ -129,91 +129,76 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
unsigned
brw_get_default_exec_size(struct brw_codegen *p)
{
return brw_inst_exec_size(p->devinfo, p->current);
return p->current->exec_size;
}
unsigned
brw_get_default_group(struct brw_codegen *p)
{
if (p->devinfo->gen >= 6) {
unsigned group = brw_inst_qtr_control(p->devinfo, p->current) * 8;
if (p->devinfo->gen >= 7)
group += brw_inst_nib_control(p->devinfo, p->current) * 4;
return group;
} else {
unsigned qtr_control = brw_inst_qtr_control(p->devinfo, p->current);
if (qtr_control == BRW_COMPRESSION_COMPRESSED)
return 0;
else
return qtr_control * 8;
}
return p->current->group;
}
unsigned
brw_get_default_access_mode(struct brw_codegen *p)
{
return brw_inst_access_mode(p->devinfo, p->current);
return p->current->access_mode;
}
void
brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
{
brw_inst_set_exec_size(p->devinfo, p->current, value);
p->current->exec_size = value;
}
void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc )
{
brw_inst_set_pred_control(p->devinfo, p->current, pc);
p->current->predicate = pc;
}
void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
{
brw_inst_set_pred_inv(p->devinfo, p->current, predicate_inverse);
p->current->pred_inv = predicate_inverse;
}
void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
{
if (p->devinfo->gen >= 7)
brw_inst_set_flag_reg_nr(p->devinfo, p->current, reg);
brw_inst_set_flag_subreg_nr(p->devinfo, p->current, subreg);
assert(subreg < 2);
p->current->flag_subreg = reg * 2 + subreg;
}
void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
{
brw_inst_set_access_mode(p->devinfo, p->current, access_mode);
p->current->access_mode = access_mode;
}
void
brw_set_default_compression_control(struct brw_codegen *p,
enum brw_compression compression_control)
{
if (p->devinfo->gen >= 6) {
/* Since we don't use the SIMD32 support in gen6, we translate
* the pre-gen6 compression control here.
switch (compression_control) {
case BRW_COMPRESSION_NONE:
/* This is the "use the first set of bits of dmask/vmask/arf
* according to execsize" option.
*/
switch (compression_control) {
case BRW_COMPRESSION_NONE:
/* This is the "use the first set of bits of dmask/vmask/arf
* according to execsize" option.
*/
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1Q);
break;
case BRW_COMPRESSION_2NDHALF:
/* For SIMD8, this is "use the second set of 8 bits." */
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_2Q);
break;
case BRW_COMPRESSION_COMPRESSED:
/* For SIMD16 instruction compression, use the first set of 16 bits
* since we don't do SIMD32 dispatch.
*/
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1H);
break;
default:
unreachable("not reached");
}
} else {
brw_inst_set_qtr_control(p->devinfo, p->current, compression_control);
p->current->group = 0;
break;
case BRW_COMPRESSION_2NDHALF:
/* For SIMD8, this is "use the second set of 8 bits." */
p->current->group = 8;
break;
case BRW_COMPRESSION_COMPRESSED:
/* For SIMD16 instruction compression, use the first set of 16 bits
* since we don't do SIMD32 dispatch.
*/
p->current->group = 0;
break;
default:
unreachable("not reached");
}
if (p->devinfo->gen <= 6) {
p->current->compressed =
(compression_control == BRW_COMPRESSION_COMPRESSED);
}
}
@ -246,7 +231,7 @@ brw_inst_set_compression(const struct gen_device_info *devinfo,
void
brw_set_default_compression(struct brw_codegen *p, bool on)
{
brw_inst_set_compression(p->devinfo, p->current, on);
p->current->compressed = on;
}
/**
@ -283,23 +268,22 @@ brw_inst_set_group(const struct gen_device_info *devinfo,
void
brw_set_default_group(struct brw_codegen *p, unsigned group)
{
brw_inst_set_group(p->devinfo, p->current, group);
p->current->group = group;
}
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
{
brw_inst_set_mask_control(p->devinfo, p->current, value);
p->current->mask_control = value;
}
void brw_set_default_saturate( struct brw_codegen *p, bool enable )
{
brw_inst_set_saturate(p->devinfo, p->current, enable);
p->current->saturate = enable;
}
void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
{
if (p->devinfo->gen >= 6)
brw_inst_set_acc_wr_control(p->devinfo, p->current, value);
p->current->acc_wr_control = value;
}
void brw_push_insn_state( struct brw_codegen *p )

View File

@ -46,6 +46,36 @@ extern "C" {
#define BRW_EU_MAX_INSN_STACK 5
struct brw_insn_state {
/* One of BRW_EXECUTE_* */
unsigned exec_size:3;
/* Group in units of channels */
unsigned group:5;
/* Compression control on gen4-5 */
bool compressed:1;
/* One of BRW_MASK_* */
unsigned mask_control:1;
bool saturate:1;
/* One of BRW_ALIGN_* */
unsigned access_mode:1;
/* One of BRW_PREDICATE_* */
enum brw_predicate predicate:4;
bool pred_inv:1;
/* Flag subreg. Bottom bit is subreg, top bit is reg */
unsigned flag_subreg:2;
bool acc_wr_control:1;
};
/* A helper for accessing the last instruction emitted. This makes it easy
* to set various bits on an instruction without having to create temporary
* variable and assign the emitted instruction to those.
@ -62,8 +92,8 @@ struct brw_codegen {
/* Allow clients to push/pop instruction state:
*/
brw_inst stack[BRW_EU_MAX_INSN_STACK];
brw_inst *current;
struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
struct brw_insn_state *current;
/** Whether or not the user wants automatic exec sizes
*

View File

@ -621,72 +621,6 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
}
struct brw_insn_state {
/* One of BRW_EXECUTE_* */
unsigned exec_size:3;
/* Group in units of channels */
unsigned group:5;
/* Compression control on gen4-5 */
bool compressed:1;
/* One of BRW_MASK_* */
unsigned mask_control:1;
bool saturate:1;
/* One of BRW_ALIGN_* */
unsigned access_mode:1;
/* One of BRW_PREDICATE_* */
enum brw_predicate predicate:4;
bool pred_inv:1;
/* Flag subreg. Bottom bit is subreg, top bit is reg */
unsigned flag_subreg:2;
bool acc_wr_control:1;
};
static struct brw_insn_state
brw_inst_get_state(const struct gen_device_info *devinfo,
const brw_inst *insn)
{
struct brw_insn_state state = { };
state.exec_size = brw_inst_exec_size(devinfo, insn);
if (devinfo->gen >= 6) {
state.group = brw_inst_qtr_control(devinfo, insn) * 8;
if (devinfo->gen >= 7)
state.group += brw_inst_nib_control(devinfo, insn) * 4;
} else {
unsigned qtr_control = brw_inst_qtr_control(devinfo, insn);
if (qtr_control == BRW_COMPRESSION_COMPRESSED) {
state.group = 0;
state.compressed = true;
} else {
state.group = qtr_control * 8;
state.compressed = false;
}
}
state.access_mode = brw_inst_access_mode(devinfo, insn);
state.mask_control = brw_inst_mask_control(devinfo, insn);
state.saturate = brw_inst_saturate(devinfo, insn);
state.predicate = brw_inst_pred_control(devinfo, insn);
state.pred_inv = brw_inst_pred_inv(devinfo, insn);
state.flag_subreg = brw_inst_flag_subreg_nr(devinfo, insn);
if (devinfo->gen >= 7)
state.flag_subreg += brw_inst_flag_reg_nr(devinfo, insn) * 2;
if (devinfo->gen >= 6)
state.acc_wr_control = brw_inst_acc_wr_control(devinfo, insn);
return state;
}
static void
brw_inst_set_state(const struct gen_device_info *devinfo,
brw_inst *insn,
@ -735,8 +669,7 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode)
brw_inst_set_opcode(devinfo, insn, opcode);
/* Apply the default instruction state */
struct brw_insn_state current = brw_inst_get_state(devinfo, p->current);
brw_inst_set_state(devinfo, insn, &current);
brw_inst_set_state(devinfo, insn, p->current);
return insn;
}
@ -3504,9 +3437,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
*/
inst = brw_FBL(p, vec1(dst), exec_mask);
} else {
const struct brw_reg flag = brw_flag_reg(
brw_inst_flag_reg_nr(devinfo, p->current),
brw_inst_flag_subreg_nr(devinfo, p->current));
const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2,
p->current->flag_subreg % 2);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));