i965/vec4: Use NIR remapping for VS attributes

The NIR pass already handles remapping system values to attributes for
us so we delete the system value code as part of the conversion.

We also change nir_lower_vs_inputs to take an explicit inputs_read
bitmask and pass in the inputs_read from prog_data instead from pulling
it out of NIR.  This is because the version in prog_data may get
EDGEFLAG added to it on some old platforms.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Jason Ekstrand 2017-05-03 17:40:54 -07:00
parent 80aa6e9d32
commit 0d5f89cdc3
6 changed files with 33 additions and 120 deletions

View File

@ -230,7 +230,6 @@ remap_patch_urb_offsets(nir_block *block, nir_builder *b,
void
brw_nir_lower_vs_inputs(nir_shader *nir,
bool is_scalar,
bool use_legacy_snorm_formula,
const uint8_t *vs_attrib_wa_flags)
{
@ -253,11 +252,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula,
vs_attrib_wa_flags);
/* The last step is to remap VERT_ATTRIB_* to actual registers and we only
* do that for scalar shaders at the moment.
*/
if (!is_scalar)
return;
/* The last step is to remap VERT_ATTRIB_* to actual registers */
/* Whether or not we have any system generated values. gl_DrawID is not
* included here as it lives in its own vec4.

View File

@ -98,7 +98,6 @@ nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler,
bool brw_nir_lower_intrinsics(nir_shader *nir,
struct brw_stage_prog_data *prog_data);
void brw_nir_lower_vs_inputs(nir_shader *nir,
bool is_scalar,
bool use_legacy_snorm_formula,
const uint8_t *vs_attrib_wa_flags);
void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,

View File

@ -1740,40 +1740,23 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map,
int
vec4_vs_visitor::setup_attributes(int payload_reg)
{
int nr_attributes;
int attribute_map[VERT_ATTRIB_MAX + 2];
memset(attribute_map, 0, sizeof(attribute_map));
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == ATTR) {
assert(inst->src[i].offset % REG_SIZE == 0);
int grf = payload_reg + inst->src[i].nr +
inst->src[i].offset / REG_SIZE;
nr_attributes = 0;
GLbitfield64 vs_inputs = vs_prog_data->inputs_read;
while (vs_inputs) {
GLuint first = ffsll(vs_inputs) - 1;
int needed_slots =
(vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) ? 2 : 1;
for (int c = 0; c < needed_slots; c++) {
attribute_map[first + c] = payload_reg + nr_attributes;
nr_attributes++;
vs_inputs &= ~BITFIELD64_BIT(first + c);
struct brw_reg reg = brw_vec8_grf(grf, 0);
reg.swizzle = inst->src[i].swizzle;
reg.type = inst->src[i].type;
reg.abs = inst->src[i].abs;
reg.negate = inst->src[i].negate;
inst->src[i] = reg;
}
}
}
/* VertexID is stored by the VF as the last vertex element, but we
* don't represent it with a flag in inputs_read, so we call it
* VERT_ATTRIB_MAX.
*/
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
vs_prog_data->uses_basevertex || vs_prog_data->uses_baseinstance) {
attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
nr_attributes++;
}
if (vs_prog_data->uses_drawid) {
attribute_map[VERT_ATTRIB_MAX + 1] = payload_reg + nr_attributes;
nr_attributes++;
}
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
return payload_reg + vs_prog_data->nr_attribute_slots;
}
@ -2771,10 +2754,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_VERTEX];
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar);
brw_nir_lower_vs_inputs(shader, is_scalar,
use_legacy_snorm_formula, key->gl_attrib_wa_flags);
brw_nir_lower_vue_outputs(shader, is_scalar);
shader = brw_postprocess_nir(shader, compiler, is_scalar);
const unsigned *assembly = NULL;
@ -2783,15 +2762,24 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
* the edge flag from VERT_ATTRIB_EDGEFLAG. This will be done
* automatically by brw_vec4_visitor::emit_urb_slot but we need to
* ensure that prog_data->inputs_read is accurate.
*
* In order to make late NIR passes aware of the change, we actually
* whack shader->info.inputs_read instead. This is safe because we just
* made a copy of the shader.
*/
assert(!is_scalar);
assert(key->copy_edgeflag);
prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
shader->info.inputs_read |= VERT_BIT_EDGEFLAG;
}
prog_data->inputs_read = shader->info.inputs_read;
prog_data->double_inputs_read = shader->info.double_inputs_read;
brw_nir_lower_vs_inputs(shader, use_legacy_snorm_formula,
key->gl_attrib_wa_flags);
brw_nir_lower_vue_outputs(shader, is_scalar);
shader = brw_postprocess_nir(shader, compiler, is_scalar);
prog_data->base.clip_distance_mask =
((1 << shader->info.clip_distance_array_size) - 1);
prog_data->base.cull_distance_mask =

View File

@ -50,45 +50,6 @@ vec4_visitor::emit_nir_code()
void
vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr)
{
dst_reg *reg;
switch (instr->intrinsic) {
case nir_intrinsic_load_vertex_id:
unreachable("should be lowered by lower_vertex_id().");
case nir_intrinsic_load_vertex_id_zero_base:
reg = &nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
if (reg->file == BAD_FILE)
*reg = *make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
break;
case nir_intrinsic_load_base_vertex:
reg = &nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
if (reg->file == BAD_FILE)
*reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX);
break;
case nir_intrinsic_load_instance_id:
reg = &nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
if (reg->file == BAD_FILE)
*reg = *make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID);
break;
case nir_intrinsic_load_base_instance:
reg = &nir_system_values[SYSTEM_VALUE_BASE_INSTANCE];
if (reg->file == BAD_FILE)
*reg = *make_reg_for_system_value(SYSTEM_VALUE_BASE_INSTANCE);
break;
case nir_intrinsic_load_draw_id:
reg = &nir_system_values[SYSTEM_VALUE_DRAW_ID];
if (reg->file == BAD_FILE)
*reg = *make_reg_for_system_value(SYSTEM_VALUE_DRAW_ID);
break;
default:
break;
}
}
static bool
@ -826,14 +787,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_invocation_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
src_reg val = src_reg(nir_system_values[sv]);
assert(val.file != BAD_FILE);
dest = get_nir_dest(instr->dest, val.type);
emit(MOV(dest, val));
break;
}
case nir_intrinsic_load_invocation_id:
unreachable("should be lowered by brw_nir_lower_vs_inputs()");
case nir_intrinsic_load_uniform: {
/* Offsets are in bytes but they should always be multiples of 4 */

View File

@ -1315,7 +1315,7 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
if (output_reg[VARYING_SLOT_POS][0].file != BAD_FILE)
emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS][0])));
break;
case VARYING_SLOT_EDGE:
case VARYING_SLOT_EDGE: {
/* This is present when doing unfilled polygons. We're supposed to copy
* the edge flag from the user-provided vertex array
* (glEdgeFlagPointer), or otherwise we'll copy from the current value
@ -1323,9 +1323,12 @@ vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
* determine which edges should be drawn as wireframe.
*/
current_annotation = "edge flag";
emit(MOV(reg, src_reg(dst_reg(ATTR, VERT_ATTRIB_EDGEFLAG,
int edge_attr = _mesa_bitcount_64(nir->info.inputs_read &
BITFIELD64_MASK(VERT_ATTRIB_EDGEFLAG));
emit(MOV(reg, src_reg(dst_reg(ATTR, edge_attr,
glsl_type::float_type, WRITEMASK_XYZW))));
break;
}
case BRW_VARYING_SLOT_PAD:
/* No need to write to this slot */
break;

View File

@ -36,35 +36,8 @@ vec4_vs_visitor::emit_prolog()
dst_reg *
vec4_vs_visitor::make_reg_for_system_value(int location)
{
/* VertexID is stored by the VF as the last vertex element, but
* we don't represent it with a flag in inputs_read, so we call
* it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
*/
dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
switch (location) {
case SYSTEM_VALUE_BASE_VERTEX:
reg->writemask = WRITEMASK_X;
break;
case SYSTEM_VALUE_BASE_INSTANCE:
reg->writemask = WRITEMASK_Y;
break;
case SYSTEM_VALUE_VERTEX_ID:
case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
reg->writemask = WRITEMASK_Z;
break;
case SYSTEM_VALUE_INSTANCE_ID:
reg->writemask = WRITEMASK_W;
break;
case SYSTEM_VALUE_DRAW_ID:
reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX + 1);
reg->writemask = WRITEMASK_X;
break;
default:
unreachable("not reached");
}
return reg;
unreachable("not reached");
return NULL;
}