i965: Use NIR to lower legacy userclipping.

This allows us to drop legacy userclip plane handling in both the vec4
and FS backends, and simplifies a few interfaces.

v2 (Jason Ekstrand):
 - Move brw_nir_lower_legacy_clipping to brw_nir_uniforms.cpp because
   it's i965-specific.
 - Handle adding the params in brw_nir_lower_legacy_clipping
 - Call brw_nir_lower_legacy_clipping from brw_codegen_vs_prog

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Kenneth Graunke 2017-05-21 01:36:14 -07:00 committed by Jason Ekstrand
parent d10de25309
commit 517005b4cf
9 changed files with 64 additions and 158 deletions

View File

@ -7332,8 +7332,6 @@ fs_visitor::run_vs()
if (failed)
return false;
compute_clip_distance();
emit_urb_writes();
if (shader_time_index >= 0)

View File

@ -80,8 +80,6 @@ public:
fs_reg vgrf(const glsl_type *const type);
void import_uniforms(fs_visitor *v);
void setup_uniform_clipplane_values();
void compute_clip_distance();
void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
const fs_reg &dst,
@ -380,7 +378,6 @@ public:
fs_reg pixel_w;
fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
fs_reg shader_start_time;
fs_reg userplane[MAX_CLIP_PLANES];
fs_reg final_gs_vertex_count;
fs_reg control_data_bits;
fs_reg invocation_id;

View File

@ -563,87 +563,6 @@ fs_visitor::emit_fb_writes()
inst->eot = true;
}
void
fs_visitor::setup_uniform_clipplane_values()
{
const struct brw_vs_prog_key *key =
(const struct brw_vs_prog_key *) this->key;
if (key->nr_userclip_plane_consts == 0)
return;
assert(stage_prog_data->nr_params == uniforms);
brw_stage_prog_data_add_params(stage_prog_data,
key->nr_userclip_plane_consts * 4);
for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
this->userplane[i] = fs_reg(UNIFORM, uniforms);
for (int j = 0; j < 4; ++j) {
stage_prog_data->param[uniforms + j] =
BRW_PARAM_BUILTIN_CLIP_PLANE(i, j);
}
uniforms += 4;
}
}
/**
* Lower legacy fixed-function and gl_ClipVertex clipping to clip distances.
*
* This does nothing if the shader uses gl_ClipDistance or user clipping is
* disabled altogether.
*/
void fs_visitor::compute_clip_distance()
{
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
const struct brw_vs_prog_key *key =
(const struct brw_vs_prog_key *) this->key;
/* Bail unless some sort of legacy clipping is enabled */
if (key->nr_userclip_plane_consts == 0)
return;
/* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
*
* "If a linked set of shaders forming the vertex stage contains no
* static write to gl_ClipVertex or gl_ClipDistance, but the
* application has requested clipping against user clip planes through
* the API, then the coordinate written to gl_Position is used for
* comparison against the user clip planes."
*
* This function is only called if the shader didn't write to
* gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
* if the user wrote to it; otherwise we use gl_Position.
*/
gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
if (!(vue_prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX))
clip_vertex = VARYING_SLOT_POS;
/* If the clip vertex isn't written, skip this. Typically this means
* the GS will set up clipping. */
if (outputs[clip_vertex].file == BAD_FILE)
return;
setup_uniform_clipplane_values();
const fs_builder abld = bld.annotate("user clip distances");
this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type);
this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type);
for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
fs_reg u = userplane[i];
const fs_reg output = offset(outputs[VARYING_SLOT_CLIP_DIST0 + i / 4],
bld, i & 3);
abld.MUL(output, outputs[clip_vertex], u);
for (int j = 1; j < 4; j++) {
u.nr = userplane[i].nr + j;
abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u);
}
}
}
void
fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
{

View File

@ -102,6 +102,9 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
unsigned dispatch_width);
void brw_nir_lower_legacy_clipping(nir_shader *nir,
int nr_userclip_plane_consts,
struct brw_stage_prog_data *prog_data);
void brw_nir_lower_vs_inputs(nir_shader *nir,
const uint8_t *vs_attrib_wa_flags);
void brw_nir_lower_vue_inputs(nir_shader *nir,

View File

@ -109,7 +109,6 @@ public:
int *virtual_grf_start;
int *virtual_grf_end;
brw::vec4_live_variables *live_intervals;
dst_reg userplane[MAX_CLIP_PLANES];
bool need_all_constants_in_pull_buffer;

View File

@ -49,8 +49,6 @@ protected:
private:
int setup_attributes(int payload_reg);
void setup_uniform_clipplane_values();
void emit_clip_distances(dst_reg reg, int offset);
const struct brw_vs_prog_key *const key;
struct brw_vs_prog_data * const vs_prog_data;

View File

@ -86,78 +86,9 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying)
}
void
vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset)
{
/* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
*
* "If a linked set of shaders forming the vertex stage contains no
* static write to gl_ClipVertex or gl_ClipDistance, but the
* application has requested clipping against user clip planes through
* the API, then the coordinate written to gl_Position is used for
* comparison against the user clip planes."
*
* This function is only called if the shader didn't write to
* gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
* if the user wrote to it; otherwise we use gl_Position.
*/
gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
clip_vertex = VARYING_SLOT_POS;
}
for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4;
++i) {
reg.writemask = 1 << i;
emit(DP4(reg,
src_reg(output_reg[clip_vertex][0]),
src_reg(this->userplane[i + offset])));
}
}
void
vec4_vs_visitor::setup_uniform_clipplane_values()
{
if (key->nr_userclip_plane_consts == 0)
return;
assert(stage_prog_data->nr_params == (unsigned)this->uniforms * 4);
brw_stage_prog_data_add_params(stage_prog_data,
key->nr_userclip_plane_consts * 4);
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
this->userplane[i].type = BRW_REGISTER_TYPE_F;
for (int j = 0; j < 4; ++j) {
stage_prog_data->param[this->uniforms * 4 + j] =
BRW_PARAM_BUILTIN_CLIP_PLANE(i, j);
}
++this->uniforms;
}
}
void
vec4_vs_visitor::emit_thread_end()
{
setup_uniform_clipplane_values();
/* Lower legacy ff and ClipVertex clipping to clip distances */
if (key->nr_userclip_plane_consts > 0) {
current_annotation = "user clip distances";
output_reg[VARYING_SLOT_CLIP_DIST0][0] =
dst_reg(this, glsl_type::vec4_type);
output_reg[VARYING_SLOT_CLIP_DIST1][0] =
dst_reg(this, glsl_type::vec4_type);
output_num_components[VARYING_SLOT_CLIP_DIST0][0] = 4;
output_num_components[VARYING_SLOT_CLIP_DIST1][0] = 4;
emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0][0], 0);
emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1][0], 4);
}
/* For VS, we always end the thread by emitting a single vertex.
* emit_urb_write_opcode() will take care of setting the eot flag on the
* SEND instruction.

View File

@ -395,3 +395,59 @@ brw_nir_lower_gl_images(nir_shader *shader,
}
}
}
void
brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts,
struct brw_stage_prog_data *prog_data)
{
if (nr_userclip_plane_consts == 0)
return;
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true);
nir_lower_io_to_temporaries(nir, impl, true, false);
nir_lower_global_vars_to_local(nir);
nir_lower_vars_to_ssa(nir);
const unsigned clip_plane_base = nir->num_uniforms;
assert(nir->num_uniforms == prog_data->nr_params * 4);
const unsigned num_clip_floats = 4 * nr_userclip_plane_consts;
uint32_t *clip_param =
brw_stage_prog_data_add_params(prog_data, num_clip_floats);
nir->num_uniforms += num_clip_floats * sizeof(float);
assert(nir->num_uniforms == prog_data->nr_params * 4);
for (unsigned i = 0; i < num_clip_floats; i++)
clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4);
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
continue;
b.cursor = nir_before_instr(instr);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
load->num_components = 4;
load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) *
nir_intrinsic_ucp_id(intrin));
nir_intrinsic_set_range(load, 4 * sizeof(float));
nir_builder_instr_insert(&b, &load->instr);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&load->dest.ssa));
nir_instr_remove(instr);
}
}
}

View File

@ -149,6 +149,11 @@ brw_codegen_vs_prog(struct brw_context *brw,
&prog_data.base.base);
}
if (key->nr_userclip_plane_consts > 0) {
brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts,
&prog_data.base.base);
}
uint64_t outputs_written =
brw_vs_outputs_written(brw, key, nir->info.outputs_written);