i965/fs: use SSA values directly
Before, we would use registers, but set a magical "parent_instr" field to indicate that it was actually purely an SSA value (i.e., it wasn't involved in any phi nodes). Instead, just use SSA values directly, which lets us get rid of the hack and reduces memory usage since we're not allocating a nir_register for every value. It also makes our handling of load_const more consistent compared to the other instructions. Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
This commit is contained in:
parent
2b1a1d8b12
commit
864907e2f1
|
@ -249,6 +249,8 @@ public:
|
||||||
void nir_emit_block(nir_block *block);
|
void nir_emit_block(nir_block *block);
|
||||||
void nir_emit_instr(nir_instr *instr);
|
void nir_emit_instr(nir_instr *instr);
|
||||||
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
|
void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
|
||||||
|
void nir_emit_undef(const brw::fs_builder &bld,
|
||||||
|
nir_ssa_undef_instr *instr);
|
||||||
void nir_emit_intrinsic(const brw::fs_builder &bld,
|
void nir_emit_intrinsic(const brw::fs_builder &bld,
|
||||||
nir_intrinsic_instr *instr);
|
nir_intrinsic_instr *instr);
|
||||||
void nir_emit_texture(const brw::fs_builder &bld,
|
void nir_emit_texture(const brw::fs_builder &bld,
|
||||||
|
@ -345,6 +347,7 @@ public:
|
||||||
unsigned max_grf;
|
unsigned max_grf;
|
||||||
|
|
||||||
fs_reg *nir_locals;
|
fs_reg *nir_locals;
|
||||||
|
fs_reg *nir_ssa_values;
|
||||||
fs_reg *nir_globals;
|
fs_reg *nir_globals;
|
||||||
fs_reg nir_inputs;
|
fs_reg nir_inputs;
|
||||||
fs_reg nir_outputs;
|
fs_reg nir_outputs;
|
||||||
|
|
|
@ -366,6 +366,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
|
||||||
nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
|
nir_locals[reg->index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nir_ssa_values = reralloc(mem_ctx, nir_ssa_values, fs_reg,
|
||||||
|
impl->ssa_alloc);
|
||||||
|
|
||||||
nir_emit_cf_list(&impl->body);
|
nir_emit_cf_list(&impl->body);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -464,6 +467,10 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
|
||||||
*/
|
*/
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case nir_instr_type_ssa_undef:
|
||||||
|
nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
|
||||||
|
break;
|
||||||
|
|
||||||
case nir_instr_type_jump:
|
case nir_instr_type_jump:
|
||||||
nir_emit_jump(abld, nir_instr_as_jump(instr));
|
nir_emit_jump(abld, nir_instr_as_jump(instr));
|
||||||
break;
|
break;
|
||||||
|
@ -495,17 +502,12 @@ bool
|
||||||
fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
|
fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
|
||||||
const fs_reg &result)
|
const fs_reg &result)
|
||||||
{
|
{
|
||||||
if (instr->src[0].src.is_ssa ||
|
if (!instr->src[0].src.is_ssa ||
|
||||||
!instr->src[0].src.reg.reg ||
|
instr->src[0].src.ssa->parent_instr->type != nir_instr_type_intrinsic)
|
||||||
!instr->src[0].src.reg.reg->parent_instr)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (instr->src[0].src.reg.reg->parent_instr->type !=
|
|
||||||
nir_instr_type_intrinsic)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
nir_intrinsic_instr *src0 =
|
nir_intrinsic_instr *src0 =
|
||||||
nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr);
|
nir_instr_as_intrinsic(instr->src[0].src.ssa->parent_instr);
|
||||||
|
|
||||||
if (src0->intrinsic != nir_intrinsic_load_front_face)
|
if (src0->intrinsic != nir_intrinsic_load_front_face)
|
||||||
return false;
|
return false;
|
||||||
|
@ -1146,6 +1148,13 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
|
||||||
|
{
|
||||||
|
nir_ssa_values[instr->def.index] = bld.vgrf(BRW_REGISTER_TYPE_D,
|
||||||
|
instr->def.num_components);
|
||||||
|
}
|
||||||
|
|
||||||
static fs_reg
|
static fs_reg
|
||||||
fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
|
fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
|
||||||
unsigned base_offset, nir_src *indirect)
|
unsigned base_offset, nir_src *indirect)
|
||||||
|
@ -1171,30 +1180,39 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
|
||||||
fs_reg
|
fs_reg
|
||||||
fs_visitor::get_nir_src(nir_src src)
|
fs_visitor::get_nir_src(nir_src src)
|
||||||
{
|
{
|
||||||
|
fs_reg reg;
|
||||||
if (src.is_ssa) {
|
if (src.is_ssa) {
|
||||||
assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
|
if (src.ssa->parent_instr->type == nir_instr_type_load_const) {
|
||||||
nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
|
nir_load_const_instr *load =
|
||||||
fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components);
|
nir_instr_as_load_const(src.ssa->parent_instr);
|
||||||
|
reg = bld.vgrf(BRW_REGISTER_TYPE_D, src.ssa->num_components);
|
||||||
|
|
||||||
for (unsigned i = 0; i < src.ssa->num_components; ++i)
|
for (unsigned i = 0; i < src.ssa->num_components; ++i)
|
||||||
bld.MOV(offset(reg, i), fs_reg(load->value.i[i]));
|
bld.MOV(offset(reg, i), fs_reg(load->value.i[i]));
|
||||||
|
} else {
|
||||||
return reg;
|
reg = nir_ssa_values[src.ssa->index];
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
|
reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
|
||||||
src.reg.indirect);
|
src.reg.indirect);
|
||||||
|
|
||||||
/* to avoid floating-point denorm flushing problems, set the type by
|
|
||||||
* default to D - instructions that need floating point semantics will set
|
|
||||||
* this to F if they need to
|
|
||||||
*/
|
|
||||||
return retype(reg, BRW_REGISTER_TYPE_D);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* to avoid floating-point denorm flushing problems, set the type by
|
||||||
|
* default to D - instructions that need floating point semantics will set
|
||||||
|
* this to F if they need to
|
||||||
|
*/
|
||||||
|
return retype(reg, BRW_REGISTER_TYPE_D);
|
||||||
}
|
}
|
||||||
|
|
||||||
fs_reg
|
fs_reg
|
||||||
fs_visitor::get_nir_dest(nir_dest dest)
|
fs_visitor::get_nir_dest(nir_dest dest)
|
||||||
{
|
{
|
||||||
|
if (dest.is_ssa) {
|
||||||
|
nir_ssa_values[dest.ssa.index] = bld.vgrf(BRW_REGISTER_TYPE_F,
|
||||||
|
dest.ssa.num_components);
|
||||||
|
return nir_ssa_values[dest.ssa.index];
|
||||||
|
}
|
||||||
|
|
||||||
return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
|
return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
|
||||||
dest.reg.indirect);
|
dest.reg.indirect);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2026,6 +2026,7 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
|
||||||
this->no16_msg = NULL;
|
this->no16_msg = NULL;
|
||||||
|
|
||||||
this->nir_locals = NULL;
|
this->nir_locals = NULL;
|
||||||
|
this->nir_ssa_values = NULL;
|
||||||
this->nir_globals = NULL;
|
this->nir_globals = NULL;
|
||||||
|
|
||||||
memset(&this->payload, 0, sizeof(this->payload));
|
memset(&this->payload, 0, sizeof(this->payload));
|
||||||
|
|
|
@ -156,7 +156,7 @@ brw_create_nir(struct brw_context *brw,
|
||||||
nir_print_shader(nir, stderr);
|
nir_print_shader(nir, stderr);
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_convert_from_ssa(nir, false);
|
nir_convert_from_ssa(nir, true);
|
||||||
nir_validate_shader(nir);
|
nir_validate_shader(nir);
|
||||||
|
|
||||||
/* This is the last pass we run before we start emitting stuff. It
|
/* This is the last pass we run before we start emitting stuff. It
|
||||||
|
|
|
@ -43,8 +43,8 @@
|
||||||
static uint8_t
|
static uint8_t
|
||||||
get_resolve_status_for_src(nir_src *src)
|
get_resolve_status_for_src(nir_src *src)
|
||||||
{
|
{
|
||||||
nir_instr *src_instr = nir_src_get_parent_instr(src);
|
if (src->is_ssa) {
|
||||||
if (src_instr) {
|
nir_instr *src_instr = src->ssa->parent_instr;
|
||||||
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
|
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
|
||||||
|
|
||||||
/* If the source instruction needs resolve, then from the perspective
|
/* If the source instruction needs resolve, then from the perspective
|
||||||
|
@ -66,8 +66,8 @@ get_resolve_status_for_src(nir_src *src)
|
||||||
static bool
|
static bool
|
||||||
src_mark_needs_resolve(nir_src *src, void *void_state)
|
src_mark_needs_resolve(nir_src *src, void *void_state)
|
||||||
{
|
{
|
||||||
nir_instr *src_instr = nir_src_get_parent_instr(src);
|
if (src->is_ssa) {
|
||||||
if (src_instr) {
|
nir_instr *src_instr = src->ssa->parent_instr;
|
||||||
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
|
uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK;
|
||||||
|
|
||||||
/* If the source instruction is unresolved, then mark it as needing
|
/* If the source instruction is unresolved, then mark it as needing
|
||||||
|
@ -172,11 +172,11 @@ analyze_boolean_resolves_block(nir_block *block, void *void_state)
|
||||||
resolve_status = BRW_NIR_NON_BOOLEAN;
|
resolve_status = BRW_NIR_NON_BOOLEAN;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the destination is SSA-like, go ahead allow unresolved booleans.
|
/* If the destination is SSA, go ahead allow unresolved booleans.
|
||||||
* If the destination register doesn't have a well-defined parent_instr
|
* If the destination register doesn't have a well-defined parent_instr
|
||||||
* we need to resolve immediately.
|
* we need to resolve immediately.
|
||||||
*/
|
*/
|
||||||
if (alu->dest.dest.reg.reg->parent_instr == NULL &&
|
if (!alu->dest.dest.is_ssa &&
|
||||||
resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) {
|
resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) {
|
||||||
resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE;
|
resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue