From 517005b4cf376b292f61d786d419e2a611e4a02a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sun, 21 May 2017 01:36:14 -0700 Subject: [PATCH] i965: Use NIR to lower legacy userclipping. This allows us to drop legacy userclip plane handling in both the vec4 and FS backends, and simplifies a few interfaces. v2 (Jason Ekstrand): - Move brw_nir_lower_legacy_clipping to brw_nir_uniforms.cpp because it's i965-specific. - Handle adding the params in brw_nir_lower_legacy_clipping - Call brw_nir_lower_legacy_clipping from brw_codegen_vs_prog Co-authored-by: Jason Ekstrand Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs.cpp | 2 - src/intel/compiler/brw_fs.h | 3 - src/intel/compiler/brw_fs_visitor.cpp | 81 ------------------- src/intel/compiler/brw_nir.h | 3 + src/intel/compiler/brw_vec4.h | 1 - src/intel/compiler/brw_vec4_vs.h | 2 - src/intel/compiler/brw_vec4_vs_visitor.cpp | 69 ---------------- .../drivers/dri/i965/brw_nir_uniforms.cpp | 56 +++++++++++++ src/mesa/drivers/dri/i965/brw_vs.c | 5 ++ 9 files changed, 64 insertions(+), 158 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b16ed3be424..9a29c3ae702 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7332,8 +7332,6 @@ fs_visitor::run_vs() if (failed) return false; - compute_clip_distance(); - emit_urb_writes(); if (shader_time_index >= 0) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 4e7b5781827..2eb49dd5ace 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -80,8 +80,6 @@ public: fs_reg vgrf(const glsl_type *const type); void import_uniforms(fs_visitor *v); - void setup_uniform_clipplane_values(); - void compute_clip_distance(); void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld, const fs_reg &dst, @@ -380,7 +378,6 @@ public: fs_reg pixel_w; fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT]; fs_reg shader_start_time; - fs_reg userplane[MAX_CLIP_PLANES]; fs_reg final_gs_vertex_count; fs_reg control_data_bits; fs_reg invocation_id; diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index eb23b4087d0..869f8c9017b 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -563,87 +563,6 @@ fs_visitor::emit_fb_writes() inst->eot = true; } -void -fs_visitor::setup_uniform_clipplane_values() -{ - const struct brw_vs_prog_key *key = - (const struct brw_vs_prog_key *) this->key; - - if (key->nr_userclip_plane_consts == 0) - return; - - assert(stage_prog_data->nr_params == uniforms); - brw_stage_prog_data_add_params(stage_prog_data, - key->nr_userclip_plane_consts * 4); - - for (int i = 0; i < key->nr_userclip_plane_consts; i++) { - this->userplane[i] = fs_reg(UNIFORM, uniforms); - for (int j = 0; j < 4; ++j) { - stage_prog_data->param[uniforms + j] = - BRW_PARAM_BUILTIN_CLIP_PLANE(i, j); - } - uniforms += 4; - } -} - -/** - * Lower legacy fixed-function and gl_ClipVertex clipping to clip distances. - * - * This does nothing if the shader uses gl_ClipDistance or user clipping is - * disabled altogether. - */ -void fs_visitor::compute_clip_distance() -{ - struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); - const struct brw_vs_prog_key *key = - (const struct brw_vs_prog_key *) this->key; - - /* Bail unless some sort of legacy clipping is enabled */ - if (key->nr_userclip_plane_consts == 0) - return; - - /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables): - * - * "If a linked set of shaders forming the vertex stage contains no - * static write to gl_ClipVertex or gl_ClipDistance, but the - * application has requested clipping against user clip planes through - * the API, then the coordinate written to gl_Position is used for - * comparison against the user clip planes." - * - * This function is only called if the shader didn't write to - * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping - * if the user wrote to it; otherwise we use gl_Position. - */ - - gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX; - if (!(vue_prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) - clip_vertex = VARYING_SLOT_POS; - - /* If the clip vertex isn't written, skip this. Typically this means - * the GS will set up clipping. */ - if (outputs[clip_vertex].file == BAD_FILE) - return; - - setup_uniform_clipplane_values(); - - const fs_builder abld = bld.annotate("user clip distances"); - - this->outputs[VARYING_SLOT_CLIP_DIST0] = vgrf(glsl_type::vec4_type); - this->outputs[VARYING_SLOT_CLIP_DIST1] = vgrf(glsl_type::vec4_type); - - for (int i = 0; i < key->nr_userclip_plane_consts; i++) { - fs_reg u = userplane[i]; - const fs_reg output = offset(outputs[VARYING_SLOT_CLIP_DIST0 + i / 4], - bld, i & 3); - - abld.MUL(output, outputs[clip_vertex], u); - for (int j = 1; j < 4; j++) { - u.nr = userplane[i].nr + j; - abld.MAD(output, output, offset(outputs[clip_vertex], bld, j), u); - } - } -} - void fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) { diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h index d3130476ab1..d8af2e66b3e 100644 --- a/src/intel/compiler/brw_nir.h +++ b/src/intel/compiler/brw_nir.h @@ -102,6 +102,9 @@ brw_nir_link_shaders(const struct brw_compiler *compiler, bool brw_nir_lower_cs_intrinsics(nir_shader *nir, unsigned dispatch_width); +void brw_nir_lower_legacy_clipping(nir_shader *nir, + int nr_userclip_plane_consts, + struct brw_stage_prog_data *prog_data); void brw_nir_lower_vs_inputs(nir_shader *nir, const uint8_t *vs_attrib_wa_flags); void brw_nir_lower_vue_inputs(nir_shader *nir, diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index a8e0049bf9c..ab2ecc47f58 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -109,7 +109,6 @@ public: int *virtual_grf_start; int *virtual_grf_end; brw::vec4_live_variables *live_intervals; - dst_reg userplane[MAX_CLIP_PLANES]; bool need_all_constants_in_pull_buffer; diff --git a/src/intel/compiler/brw_vec4_vs.h b/src/intel/compiler/brw_vec4_vs.h index b62e03aa8d9..7662b6a73d6 100644 --- a/src/intel/compiler/brw_vec4_vs.h +++ b/src/intel/compiler/brw_vec4_vs.h @@ -49,8 +49,6 @@ protected: private: int setup_attributes(int payload_reg); - void setup_uniform_clipplane_values(); - void emit_clip_distances(dst_reg reg, int offset); const struct brw_vs_prog_key *const key; struct brw_vs_prog_data * const vs_prog_data; diff --git a/src/intel/compiler/brw_vec4_vs_visitor.cpp b/src/intel/compiler/brw_vec4_vs_visitor.cpp index 5aa1b9ac0b2..ab351dbd39b 100644 --- a/src/intel/compiler/brw_vec4_vs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_vs_visitor.cpp @@ -86,78 +86,9 @@ vec4_vs_visitor::emit_urb_slot(dst_reg reg, int varying) } -void -vec4_vs_visitor::emit_clip_distances(dst_reg reg, int offset) -{ - /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables): - * - * "If a linked set of shaders forming the vertex stage contains no - * static write to gl_ClipVertex or gl_ClipDistance, but the - * application has requested clipping against user clip planes through - * the API, then the coordinate written to gl_Position is used for - * comparison against the user clip planes." - * - * This function is only called if the shader didn't write to - * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping - * if the user wrote to it; otherwise we use gl_Position. - */ - gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX; - if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) { - clip_vertex = VARYING_SLOT_POS; - } - - for (int i = 0; i + offset < key->nr_userclip_plane_consts && i < 4; - ++i) { - reg.writemask = 1 << i; - emit(DP4(reg, - src_reg(output_reg[clip_vertex][0]), - src_reg(this->userplane[i + offset]))); - } -} - - -void -vec4_vs_visitor::setup_uniform_clipplane_values() -{ - if (key->nr_userclip_plane_consts == 0) - return; - - assert(stage_prog_data->nr_params == (unsigned)this->uniforms * 4); - brw_stage_prog_data_add_params(stage_prog_data, - key->nr_userclip_plane_consts * 4); - - for (int i = 0; i < key->nr_userclip_plane_consts; ++i) { - this->userplane[i] = dst_reg(UNIFORM, this->uniforms); - this->userplane[i].type = BRW_REGISTER_TYPE_F; - for (int j = 0; j < 4; ++j) { - stage_prog_data->param[this->uniforms * 4 + j] = - BRW_PARAM_BUILTIN_CLIP_PLANE(i, j); - } - ++this->uniforms; - } -} - - void vec4_vs_visitor::emit_thread_end() { - setup_uniform_clipplane_values(); - - /* Lower legacy ff and ClipVertex clipping to clip distances */ - if (key->nr_userclip_plane_consts > 0) { - current_annotation = "user clip distances"; - - output_reg[VARYING_SLOT_CLIP_DIST0][0] = - dst_reg(this, glsl_type::vec4_type); - output_reg[VARYING_SLOT_CLIP_DIST1][0] = - dst_reg(this, glsl_type::vec4_type); - output_num_components[VARYING_SLOT_CLIP_DIST0][0] = 4; - output_num_components[VARYING_SLOT_CLIP_DIST1][0] = 4; - - emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST0][0], 0); - emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1][0], 4); - } - /* For VS, we always end the thread by emitting a single vertex. * emit_urb_write_opcode() will take care of setting the eot flag on the * SEND instruction. diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index e5f97b647af..2ea316e7420 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -395,3 +395,59 @@ brw_nir_lower_gl_images(nir_shader *shader, } } } + +void +brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts, + struct brw_stage_prog_data *prog_data) +{ + if (nr_userclip_plane_consts == 0) + return; + + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + + nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true); + nir_lower_io_to_temporaries(nir, impl, true, false); + nir_lower_global_vars_to_local(nir); + nir_lower_vars_to_ssa(nir); + + const unsigned clip_plane_base = nir->num_uniforms; + + assert(nir->num_uniforms == prog_data->nr_params * 4); + const unsigned num_clip_floats = 4 * nr_userclip_plane_consts; + uint32_t *clip_param = + brw_stage_prog_data_add_params(prog_data, num_clip_floats); + nir->num_uniforms += num_clip_floats * sizeof(float); + assert(nir->num_uniforms == prog_data->nr_params * 4); + + for (unsigned i = 0; i < num_clip_floats; i++) + clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4); + + nir_builder b; + nir_builder_init(&b, impl); + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane) + continue; + + b.cursor = nir_before_instr(instr); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); + load->num_components = 4; + load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); + nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); + nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) * + nir_intrinsic_ucp_id(intrin)); + nir_intrinsic_set_range(load, 4 * sizeof(float)); + nir_builder_instr_insert(&b, &load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + nir_instr_remove(instr); + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index d15cd33ed3e..32b3953d537 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -149,6 +149,11 @@ brw_codegen_vs_prog(struct brw_context *brw, &prog_data.base.base); } + if (key->nr_userclip_plane_consts > 0) { + brw_nir_lower_legacy_clipping(nir, key->nr_userclip_plane_consts, + &prog_data.base.base); + } + uint64_t outputs_written = brw_vs_outputs_written(brw, key, nir->info.outputs_written);