intel/eu: Switch to a logical state stack
Instead of the state stack that's based on copying a dummy instruction around, we start using a logical stack of brw_insn_states. This uses a bit less memory and is way less conceptually bogus. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
parent
db9675f5a4
commit
6a9525bf67
|
@ -129,91 +129,76 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
|
|||
unsigned
|
||||
brw_get_default_exec_size(struct brw_codegen *p)
|
||||
{
|
||||
return brw_inst_exec_size(p->devinfo, p->current);
|
||||
return p->current->exec_size;
|
||||
}
|
||||
|
||||
unsigned
|
||||
brw_get_default_group(struct brw_codegen *p)
|
||||
{
|
||||
if (p->devinfo->gen >= 6) {
|
||||
unsigned group = brw_inst_qtr_control(p->devinfo, p->current) * 8;
|
||||
if (p->devinfo->gen >= 7)
|
||||
group += brw_inst_nib_control(p->devinfo, p->current) * 4;
|
||||
return group;
|
||||
} else {
|
||||
unsigned qtr_control = brw_inst_qtr_control(p->devinfo, p->current);
|
||||
if (qtr_control == BRW_COMPRESSION_COMPRESSED)
|
||||
return 0;
|
||||
else
|
||||
return qtr_control * 8;
|
||||
}
|
||||
return p->current->group;
|
||||
}
|
||||
|
||||
unsigned
|
||||
brw_get_default_access_mode(struct brw_codegen *p)
|
||||
{
|
||||
return brw_inst_access_mode(p->devinfo, p->current);
|
||||
return p->current->access_mode;
|
||||
}
|
||||
|
||||
void
|
||||
brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
|
||||
{
|
||||
brw_inst_set_exec_size(p->devinfo, p->current, value);
|
||||
p->current->exec_size = value;
|
||||
}
|
||||
|
||||
void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc )
|
||||
{
|
||||
brw_inst_set_pred_control(p->devinfo, p->current, pc);
|
||||
p->current->predicate = pc;
|
||||
}
|
||||
|
||||
void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
|
||||
{
|
||||
brw_inst_set_pred_inv(p->devinfo, p->current, predicate_inverse);
|
||||
p->current->pred_inv = predicate_inverse;
|
||||
}
|
||||
|
||||
void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
|
||||
{
|
||||
if (p->devinfo->gen >= 7)
|
||||
brw_inst_set_flag_reg_nr(p->devinfo, p->current, reg);
|
||||
|
||||
brw_inst_set_flag_subreg_nr(p->devinfo, p->current, subreg);
|
||||
assert(subreg < 2);
|
||||
p->current->flag_subreg = reg * 2 + subreg;
|
||||
}
|
||||
|
||||
void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
|
||||
{
|
||||
brw_inst_set_access_mode(p->devinfo, p->current, access_mode);
|
||||
p->current->access_mode = access_mode;
|
||||
}
|
||||
|
||||
void
|
||||
brw_set_default_compression_control(struct brw_codegen *p,
|
||||
enum brw_compression compression_control)
|
||||
{
|
||||
if (p->devinfo->gen >= 6) {
|
||||
/* Since we don't use the SIMD32 support in gen6, we translate
|
||||
* the pre-gen6 compression control here.
|
||||
switch (compression_control) {
|
||||
case BRW_COMPRESSION_NONE:
|
||||
/* This is the "use the first set of bits of dmask/vmask/arf
|
||||
* according to execsize" option.
|
||||
*/
|
||||
switch (compression_control) {
|
||||
case BRW_COMPRESSION_NONE:
|
||||
/* This is the "use the first set of bits of dmask/vmask/arf
|
||||
* according to execsize" option.
|
||||
*/
|
||||
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1Q);
|
||||
break;
|
||||
case BRW_COMPRESSION_2NDHALF:
|
||||
/* For SIMD8, this is "use the second set of 8 bits." */
|
||||
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_2Q);
|
||||
break;
|
||||
case BRW_COMPRESSION_COMPRESSED:
|
||||
/* For SIMD16 instruction compression, use the first set of 16 bits
|
||||
* since we don't do SIMD32 dispatch.
|
||||
*/
|
||||
brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1H);
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
} else {
|
||||
brw_inst_set_qtr_control(p->devinfo, p->current, compression_control);
|
||||
p->current->group = 0;
|
||||
break;
|
||||
case BRW_COMPRESSION_2NDHALF:
|
||||
/* For SIMD8, this is "use the second set of 8 bits." */
|
||||
p->current->group = 8;
|
||||
break;
|
||||
case BRW_COMPRESSION_COMPRESSED:
|
||||
/* For SIMD16 instruction compression, use the first set of 16 bits
|
||||
* since we don't do SIMD32 dispatch.
|
||||
*/
|
||||
p->current->group = 0;
|
||||
break;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
||||
if (p->devinfo->gen <= 6) {
|
||||
p->current->compressed =
|
||||
(compression_control == BRW_COMPRESSION_COMPRESSED);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -246,7 +231,7 @@ brw_inst_set_compression(const struct gen_device_info *devinfo,
|
|||
void
|
||||
brw_set_default_compression(struct brw_codegen *p, bool on)
|
||||
{
|
||||
brw_inst_set_compression(p->devinfo, p->current, on);
|
||||
p->current->compressed = on;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -283,23 +268,22 @@ brw_inst_set_group(const struct gen_device_info *devinfo,
|
|||
void
|
||||
brw_set_default_group(struct brw_codegen *p, unsigned group)
|
||||
{
|
||||
brw_inst_set_group(p->devinfo, p->current, group);
|
||||
p->current->group = group;
|
||||
}
|
||||
|
||||
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
|
||||
{
|
||||
brw_inst_set_mask_control(p->devinfo, p->current, value);
|
||||
p->current->mask_control = value;
|
||||
}
|
||||
|
||||
void brw_set_default_saturate( struct brw_codegen *p, bool enable )
|
||||
{
|
||||
brw_inst_set_saturate(p->devinfo, p->current, enable);
|
||||
p->current->saturate = enable;
|
||||
}
|
||||
|
||||
void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
|
||||
{
|
||||
if (p->devinfo->gen >= 6)
|
||||
brw_inst_set_acc_wr_control(p->devinfo, p->current, value);
|
||||
p->current->acc_wr_control = value;
|
||||
}
|
||||
|
||||
void brw_push_insn_state( struct brw_codegen *p )
|
||||
|
|
|
@ -46,6 +46,36 @@ extern "C" {
|
|||
|
||||
#define BRW_EU_MAX_INSN_STACK 5
|
||||
|
||||
struct brw_insn_state {
|
||||
/* One of BRW_EXECUTE_* */
|
||||
unsigned exec_size:3;
|
||||
|
||||
/* Group in units of channels */
|
||||
unsigned group:5;
|
||||
|
||||
/* Compression control on gen4-5 */
|
||||
bool compressed:1;
|
||||
|
||||
/* One of BRW_MASK_* */
|
||||
unsigned mask_control:1;
|
||||
|
||||
bool saturate:1;
|
||||
|
||||
/* One of BRW_ALIGN_* */
|
||||
unsigned access_mode:1;
|
||||
|
||||
/* One of BRW_PREDICATE_* */
|
||||
enum brw_predicate predicate:4;
|
||||
|
||||
bool pred_inv:1;
|
||||
|
||||
/* Flag subreg. Bottom bit is subreg, top bit is reg */
|
||||
unsigned flag_subreg:2;
|
||||
|
||||
bool acc_wr_control:1;
|
||||
};
|
||||
|
||||
|
||||
/* A helper for accessing the last instruction emitted. This makes it easy
|
||||
* to set various bits on an instruction without having to create temporary
|
||||
* variable and assign the emitted instruction to those.
|
||||
|
@ -62,8 +92,8 @@ struct brw_codegen {
|
|||
|
||||
/* Allow clients to push/pop instruction state:
|
||||
*/
|
||||
brw_inst stack[BRW_EU_MAX_INSN_STACK];
|
||||
brw_inst *current;
|
||||
struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
|
||||
struct brw_insn_state *current;
|
||||
|
||||
/** Whether or not the user wants automatic exec sizes
|
||||
*
|
||||
|
|
|
@ -621,72 +621,6 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
|
|||
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
|
||||
}
|
||||
|
||||
struct brw_insn_state {
|
||||
/* One of BRW_EXECUTE_* */
|
||||
unsigned exec_size:3;
|
||||
|
||||
/* Group in units of channels */
|
||||
unsigned group:5;
|
||||
|
||||
/* Compression control on gen4-5 */
|
||||
bool compressed:1;
|
||||
|
||||
/* One of BRW_MASK_* */
|
||||
unsigned mask_control:1;
|
||||
|
||||
bool saturate:1;
|
||||
|
||||
/* One of BRW_ALIGN_* */
|
||||
unsigned access_mode:1;
|
||||
|
||||
/* One of BRW_PREDICATE_* */
|
||||
enum brw_predicate predicate:4;
|
||||
|
||||
bool pred_inv:1;
|
||||
|
||||
/* Flag subreg. Bottom bit is subreg, top bit is reg */
|
||||
unsigned flag_subreg:2;
|
||||
|
||||
bool acc_wr_control:1;
|
||||
};
|
||||
|
||||
static struct brw_insn_state
|
||||
brw_inst_get_state(const struct gen_device_info *devinfo,
|
||||
const brw_inst *insn)
|
||||
{
|
||||
struct brw_insn_state state = { };
|
||||
|
||||
state.exec_size = brw_inst_exec_size(devinfo, insn);
|
||||
if (devinfo->gen >= 6) {
|
||||
state.group = brw_inst_qtr_control(devinfo, insn) * 8;
|
||||
if (devinfo->gen >= 7)
|
||||
state.group += brw_inst_nib_control(devinfo, insn) * 4;
|
||||
} else {
|
||||
unsigned qtr_control = brw_inst_qtr_control(devinfo, insn);
|
||||
if (qtr_control == BRW_COMPRESSION_COMPRESSED) {
|
||||
state.group = 0;
|
||||
state.compressed = true;
|
||||
} else {
|
||||
state.group = qtr_control * 8;
|
||||
state.compressed = false;
|
||||
}
|
||||
}
|
||||
state.access_mode = brw_inst_access_mode(devinfo, insn);
|
||||
state.mask_control = brw_inst_mask_control(devinfo, insn);
|
||||
state.saturate = brw_inst_saturate(devinfo, insn);
|
||||
state.predicate = brw_inst_pred_control(devinfo, insn);
|
||||
state.pred_inv = brw_inst_pred_inv(devinfo, insn);
|
||||
|
||||
state.flag_subreg = brw_inst_flag_subreg_nr(devinfo, insn);
|
||||
if (devinfo->gen >= 7)
|
||||
state.flag_subreg += brw_inst_flag_reg_nr(devinfo, insn) * 2;
|
||||
|
||||
if (devinfo->gen >= 6)
|
||||
state.acc_wr_control = brw_inst_acc_wr_control(devinfo, insn);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_inst_set_state(const struct gen_device_info *devinfo,
|
||||
brw_inst *insn,
|
||||
|
@ -735,8 +669,7 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode)
|
|||
brw_inst_set_opcode(devinfo, insn, opcode);
|
||||
|
||||
/* Apply the default instruction state */
|
||||
struct brw_insn_state current = brw_inst_get_state(devinfo, p->current);
|
||||
brw_inst_set_state(devinfo, insn, ¤t);
|
||||
brw_inst_set_state(devinfo, insn, p->current);
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
@ -3504,9 +3437,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
|||
*/
|
||||
inst = brw_FBL(p, vec1(dst), exec_mask);
|
||||
} else {
|
||||
const struct brw_reg flag = brw_flag_reg(
|
||||
brw_inst_flag_reg_nr(devinfo, p->current),
|
||||
brw_inst_flag_subreg_nr(devinfo, p->current));
|
||||
const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2,
|
||||
p->current->flag_subreg % 2);
|
||||
|
||||
brw_set_default_exec_size(p, BRW_EXECUTE_1);
|
||||
brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
|
||||
|
|
Loading…
Reference in New Issue