i965/fs: Add an exec_size field to fs_inst
This will, eventually, allow us to manage execution sizes of instructions in a much more natural way from the fs_visitor level. i965/fs: Explicitly set instruction execute size a couple of places i965/blorp: Explicitly set instruction execute sizes Since blorp is all 16-wide and nothing isn't, in general, very careful about register width, we'll just set it all explicitly. Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
fbc0a798ee
commit
071ac3a467
|
@ -96,7 +96,7 @@ brw_blorp_eu_emitter::emit_render_target_write(const struct brw_reg &src0,
|
|||
unsigned msg_length,
|
||||
bool use_header)
|
||||
{
|
||||
fs_inst *inst = new (mem_ctx) fs_inst(FS_OPCODE_BLORP_FB_WRITE);
|
||||
fs_inst *inst = new (mem_ctx) fs_inst(FS_OPCODE_BLORP_FB_WRITE, 16);
|
||||
|
||||
inst->src[0] = src0;
|
||||
inst->base_mrf = msg_reg_nr;
|
||||
|
|
|
@ -65,7 +65,7 @@ protected:
|
|||
{
|
||||
emit_cmp(op, x, y);
|
||||
|
||||
fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, dst, src);
|
||||
fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, 16, dst, src);
|
||||
mv->predicate = BRW_PREDICATE_NORMAL;
|
||||
insts.push_tail(mv);
|
||||
}
|
||||
|
@ -82,17 +82,17 @@ protected:
|
|||
const struct brw_reg &src3)
|
||||
{
|
||||
insts.push_tail(
|
||||
new (mem_ctx) fs_inst(BRW_OPCODE_LRP, dst, src1, src2, src3));
|
||||
new (mem_ctx) fs_inst(BRW_OPCODE_LRP, 16, dst, src1, src2, src3));
|
||||
}
|
||||
|
||||
inline void emit_mov(const struct brw_reg& dst, const struct brw_reg& src)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MOV, dst, src));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MOV, 16, dst, src));
|
||||
}
|
||||
|
||||
inline void emit_mov_8(const struct brw_reg& dst, const struct brw_reg& src)
|
||||
{
|
||||
fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, dst, src);
|
||||
fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, 8, dst, src);
|
||||
mv->force_uncompressed = true;
|
||||
insts.push_tail(mv);
|
||||
}
|
||||
|
@ -101,21 +101,21 @@ protected:
|
|||
const struct brw_reg& src1,
|
||||
const struct brw_reg& src2)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_AND, dst, src1, src2));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, dst, src1, src2));
|
||||
}
|
||||
|
||||
inline void emit_add(const struct brw_reg& dst,
|
||||
const struct brw_reg& src1,
|
||||
const struct brw_reg& src2)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ADD, dst, src1, src2));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ADD, 16, dst, src1, src2));
|
||||
}
|
||||
|
||||
inline void emit_add_8(const struct brw_reg& dst,
|
||||
const struct brw_reg& src1,
|
||||
const struct brw_reg& src2)
|
||||
{
|
||||
fs_inst *add = new (mem_ctx) fs_inst(BRW_OPCODE_ADD, dst, src1, src2);
|
||||
fs_inst *add = new (mem_ctx) fs_inst(BRW_OPCODE_ADD, 8, dst, src1, src2);
|
||||
add->force_uncompressed = true;
|
||||
insts.push_tail(add);
|
||||
}
|
||||
|
@ -124,40 +124,40 @@ protected:
|
|||
const struct brw_reg& src1,
|
||||
const struct brw_reg& src2)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MUL, dst, src1, src2));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MUL, 16, dst, src1, src2));
|
||||
}
|
||||
|
||||
inline void emit_shr(const struct brw_reg& dst,
|
||||
const struct brw_reg& src1,
|
||||
const struct brw_reg& src2)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHR, dst, src1, src2));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHR, 16, dst, src1, src2));
|
||||
}
|
||||
|
||||
inline void emit_shl(const struct brw_reg& dst,
|
||||
const struct brw_reg& src1,
|
||||
const struct brw_reg& src2)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHL, dst, src1, src2));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHL, 16, dst, src1, src2));
|
||||
}
|
||||
|
||||
inline void emit_or(const struct brw_reg& dst,
|
||||
const struct brw_reg& src1,
|
||||
const struct brw_reg& src2)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_OR, dst, src1, src2));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_OR, 16, dst, src1, src2));
|
||||
}
|
||||
|
||||
inline void emit_frc(const struct brw_reg& dst,
|
||||
const struct brw_reg& src)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_FRC, dst, src));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_FRC, 16, dst, src));
|
||||
}
|
||||
|
||||
inline void emit_rndd(const struct brw_reg& dst,
|
||||
const struct brw_reg& src)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_RNDD, dst, src));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_RNDD, 16, dst, src));
|
||||
}
|
||||
|
||||
inline void emit_cmp_if(enum brw_conditional_mod op,
|
||||
|
@ -165,17 +165,17 @@ protected:
|
|||
const struct brw_reg &y)
|
||||
{
|
||||
emit_cmp(op, x, y);
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_IF));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_IF, 16));
|
||||
}
|
||||
|
||||
inline void emit_else(void)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ELSE));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ELSE, 16));
|
||||
}
|
||||
|
||||
inline void emit_endif(void)
|
||||
{
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ENDIF));
|
||||
insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ENDIF, 16));
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -53,7 +53,8 @@ extern "C" {
|
|||
#include "glsl/glsl_types.h"
|
||||
|
||||
void
|
||||
fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources)
|
||||
fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
fs_reg *src, int sources)
|
||||
{
|
||||
memset(this, 0, sizeof(*this));
|
||||
|
||||
|
@ -61,6 +62,33 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources)
|
|||
this->dst = dst;
|
||||
this->src = src;
|
||||
this->sources = sources;
|
||||
this->exec_size = exec_size;
|
||||
|
||||
assert(dst.file != IMM && dst.file != UNIFORM);
|
||||
|
||||
/* If exec_size == 0, try to guess it from the registers. Since all
|
||||
* manner of things may use hardware registers, we first try to guess
|
||||
* based on GRF registers. If this fails, we will go ahead and take the
|
||||
* width from the destination register.
|
||||
*/
|
||||
if (this->exec_size == 0) {
|
||||
if (dst.file == GRF) {
|
||||
this->exec_size = dst.width;
|
||||
} else {
|
||||
for (int i = 0; i < sources; ++i) {
|
||||
if (src[i].file != GRF)
|
||||
continue;
|
||||
|
||||
if (this->exec_size <= 1)
|
||||
this->exec_size = src[i].width;
|
||||
assert(src[i].width == 1 || src[i].width == this->exec_size);
|
||||
}
|
||||
}
|
||||
|
||||
if (this->exec_size == 0 && dst.file != BAD_FILE)
|
||||
this->exec_size = dst.width;
|
||||
}
|
||||
assert(this->exec_size != 0);
|
||||
|
||||
this->conditional_mod = BRW_CONDITIONAL_NONE;
|
||||
|
||||
|
@ -84,17 +112,46 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources)
|
|||
this->writes_accumulator = false;
|
||||
}
|
||||
|
||||
fs_inst::fs_inst()
|
||||
{
|
||||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
init(BRW_OPCODE_NOP, 8, dst, src, 0);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size)
|
||||
{
|
||||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
init(opcode, exec_size, reg_undef, src, 0);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst)
|
||||
{
|
||||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
init(opcode, dst, src, 0);
|
||||
init(opcode, 0, dst, src, 0);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
const fs_reg &src0)
|
||||
{
|
||||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
src[0] = src0;
|
||||
init(opcode, exec_size, dst, src, 1);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
|
||||
{
|
||||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
src[0] = src0;
|
||||
init(opcode, dst, src, 1);
|
||||
init(opcode, 0, dst, src, 1);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
const fs_reg &src0, const fs_reg &src1)
|
||||
{
|
||||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
src[0] = src0;
|
||||
src[1] = src1;
|
||||
init(opcode, exec_size, dst, src, 2);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
||||
|
@ -103,7 +160,17 @@ fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
|||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
src[0] = src0;
|
||||
src[1] = src1;
|
||||
init(opcode, dst, src, 2);
|
||||
init(opcode, 0, dst, src, 2);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2)
|
||||
{
|
||||
fs_reg *src = ralloc_array(this, fs_reg, 3);
|
||||
src[0] = src0;
|
||||
src[1] = src1;
|
||||
src[2] = src2;
|
||||
init(opcode, exec_size, dst, src, 3);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
||||
|
@ -113,12 +180,18 @@ fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
|||
src[0] = src0;
|
||||
src[1] = src1;
|
||||
src[2] = src2;
|
||||
init(opcode, dst, src, 3);
|
||||
init(opcode, 0, dst, src, 3);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, fs_reg src[], int sources)
|
||||
{
|
||||
init(opcode, dst, src, sources);
|
||||
init(opcode, 0, dst, src, sources);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
|
||||
fs_reg src[], int sources)
|
||||
{
|
||||
init(opcode, exec_width, dst, src, sources);
|
||||
}
|
||||
|
||||
fs_inst::fs_inst(const fs_inst &that)
|
||||
|
@ -206,7 +279,7 @@ ALU2(MAC)
|
|||
fs_inst *
|
||||
fs_visitor::IF(enum brw_predicate predicate)
|
||||
{
|
||||
fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF);
|
||||
fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width);
|
||||
inst->predicate = predicate;
|
||||
return inst;
|
||||
}
|
||||
|
@ -217,7 +290,7 @@ fs_visitor::IF(const fs_reg &src0, const fs_reg &src1,
|
|||
enum brw_conditional_mod condition)
|
||||
{
|
||||
assert(brw->gen == 6);
|
||||
fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF,
|
||||
fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width,
|
||||
reg_null_d, src0, src1);
|
||||
inst->conditional_mod = condition;
|
||||
return inst;
|
||||
|
@ -358,6 +431,7 @@ fs_visitor::DEP_RESOLVE_MOV(int grf)
|
|||
/* The caller always wants uncompressed to emit the minimal extra
|
||||
* dependencies, and to avoid having to deal with aligning its regs to 2.
|
||||
*/
|
||||
inst->exec_size = 8;
|
||||
inst->force_uncompressed = true;
|
||||
|
||||
return inst;
|
||||
|
@ -380,6 +454,7 @@ fs_inst::equals(fs_inst *inst) const
|
|||
eot == inst->eot &&
|
||||
header_present == inst->header_present &&
|
||||
shadow_compare == inst->shadow_compare &&
|
||||
exec_size == inst->exec_size &&
|
||||
offset == inst->offset);
|
||||
}
|
||||
|
||||
|
@ -605,6 +680,7 @@ fs_visitor::get_timestamp()
|
|||
*/
|
||||
mov->force_writemask_all = true;
|
||||
mov->force_uncompressed = true;
|
||||
mov->exec_size = 8;
|
||||
|
||||
/* The caller wants the low 32 bits of the timestamp. Since it's running
|
||||
* at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds,
|
||||
|
@ -760,7 +836,7 @@ fs_visitor::no16(const char *format, ...)
|
|||
fs_inst *
|
||||
fs_visitor::emit(enum opcode opcode)
|
||||
{
|
||||
return emit(new(mem_ctx) fs_inst(opcode));
|
||||
return emit(new(mem_ctx) fs_inst(opcode, dispatch_width));
|
||||
}
|
||||
|
||||
fs_inst *
|
||||
|
@ -2129,7 +2205,7 @@ fs_visitor::demote_pull_constants()
|
|||
} else {
|
||||
fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
|
||||
fs_inst *pull =
|
||||
new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
|
||||
new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
|
||||
dst, surf_index, offset);
|
||||
inst->insert_before(block, pull);
|
||||
inst->src[i].set_smear(pull_index & 3);
|
||||
|
@ -2840,7 +2916,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
|
|||
* by live variable analysis, or register allocation will explode.
|
||||
*/
|
||||
fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET,
|
||||
payload, const_offset_reg);
|
||||
8, payload, const_offset_reg);
|
||||
setup->force_writemask_all = true;
|
||||
|
||||
setup->ir = inst->ir;
|
||||
|
|
|
@ -193,18 +193,29 @@ public:
|
|||
class fs_inst : public backend_instruction {
|
||||
fs_inst &operator=(const fs_inst &);
|
||||
|
||||
void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
|
||||
fs_reg *src, int sources);
|
||||
|
||||
public:
|
||||
DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
|
||||
|
||||
void init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources);
|
||||
|
||||
fs_inst(enum opcode opcode = BRW_OPCODE_NOP, const fs_reg &dst = reg_undef);
|
||||
fs_inst();
|
||||
fs_inst(enum opcode opcode, uint8_t exec_size);
|
||||
fs_inst(enum opcode opcode, const fs_reg &dst);
|
||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
const fs_reg &src0);
|
||||
fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0);
|
||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
const fs_reg &src0, const fs_reg &src1);
|
||||
fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
||||
const fs_reg &src1);
|
||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
|
||||
fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
|
||||
const fs_reg &src1, const fs_reg &src2);
|
||||
fs_inst(enum opcode opcode, const fs_reg &dst, fs_reg src[], int sources);
|
||||
fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
|
||||
fs_reg src[], int sources);
|
||||
fs_inst(const fs_inst &that);
|
||||
|
||||
void resize_sources(uint8_t num_sources);
|
||||
|
@ -224,6 +235,13 @@ public:
|
|||
|
||||
uint8_t sources; /**< Number of fs_reg sources. */
|
||||
|
||||
/**
|
||||
* Execution size of the instruction. This is used by the generator to
|
||||
* generate the correct binary for the given fs_inst. Current valid
|
||||
* values are 1, 8, 16.
|
||||
*/
|
||||
uint8_t exec_size;
|
||||
|
||||
/* Chooses which flag subregister (f0.0 or f0.1) is used for conditional
|
||||
* mod and predication.
|
||||
*/
|
||||
|
|
|
@ -944,7 +944,7 @@ fs_visitor::visit(ir_expression *ir)
|
|||
packed_consts.type = result.type;
|
||||
|
||||
fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
|
||||
emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
|
||||
emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8,
|
||||
packed_consts, surf_index, const_offset_reg));
|
||||
|
||||
for (int i = 0; i < ir->type->vector_elements; i++) {
|
||||
|
|
Loading…
Reference in New Issue