From 64fa67dd2f11fc441173f92109a374d2c4f2cd4c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 May 2021 10:11:25 +1000 Subject: [PATCH] intel/gfx6: move xfb_setup outside the gs compiler into the driver. This remove the use of a GL thing from the backend compiler Reviewed-by: Jason Ekstrand Reviewed-by: Emma Anholt Part-of: --- src/gallium/drivers/iris/iris_program.c | 2 +- src/intel/compiler/brw_compiler.h | 1 - src/intel/compiler/brw_vec4_gs_visitor.cpp | 3 +- src/intel/compiler/gfx6_gs_visitor.cpp | 47 ++-------------------- src/intel/compiler/gfx6_gs_visitor.h | 7 +--- src/intel/vulkan/anv_pipeline.c | 2 +- src/mesa/drivers/dri/i965/brw_gs.c | 38 ++++++++++++++++- 7 files changed, 45 insertions(+), 55 deletions(-) diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 97b505c6e14..7938824e569 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -1660,7 +1660,7 @@ iris_compile_gs(struct iris_screen *screen, char *error_str = NULL; const unsigned *program = brw_compile_gs(compiler, dbg, mem_ctx, &brw_key, gs_prog_data, - nir, NULL, -1, NULL, &error_str); + nir, -1, NULL, &error_str); if (program == NULL) { dbg_printf("Failed to compile geometry shader: %s\n", error_str); ralloc_free(mem_ctx); diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 3d36d8948c2..8bea789e8ad 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -1562,7 +1562,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, nir_shader *nir, - struct gl_program *prog, int shader_time_index, struct brw_compile_stats *stats, char **error_str); diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index 135ad0000ab..c1dbed44e40 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -588,7 +588,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, nir_shader *nir, - struct gl_program *prog, int shader_time_index, struct brw_compile_stats *stats, char **error_str) @@ -932,7 +931,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, nir, mem_ctx, false /* no_spills */, shader_time_index, debug_enabled); else - gs = new brw::gfx6_gs_visitor(compiler, log_data, &c, prog_data, prog, + gs = new brw::gfx6_gs_visitor(compiler, log_data, &c, prog_data, nir, mem_ctx, false /* no_spills */, shader_time_index, debug_enabled); diff --git a/src/intel/compiler/gfx6_gs_visitor.cpp b/src/intel/compiler/gfx6_gs_visitor.cpp index 0118d20d3d0..a7f3490cf76 100644 --- a/src/intel/compiler/gfx6_gs_visitor.cpp +++ b/src/intel/compiler/gfx6_gs_visitor.cpp @@ -96,7 +96,7 @@ gfx6_gs_visitor::emit_prolog() this->prim_count = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(this->prim_count), brw_imm_ud(0u))); - if (prog->info.has_transform_feedback_varyings) { + if (gs_prog_data->num_transform_feedback_bindings) { /* Create a virtual register to hold destination indices in SOL */ this->destination_indices = src_reg(this, glsl_type::uvec4_type); /* Create a virtual register to hold number of written primitives */ @@ -107,8 +107,6 @@ gfx6_gs_visitor::emit_prolog() this->max_svbi = src_reg(this, glsl_type::uvec4_type); emit(MOV(dst_reg(this->max_svbi), src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD)))); - - xfb_setup(); } /* PrimitveID is delivered in r0.1 of the thread payload. If the program @@ -353,7 +351,7 @@ gfx6_gs_visitor::emit_thread_end() this->current_annotation = "gfx6 thread end: ff_sync"; vec4_instruction *inst = NULL; - if (prog->info.has_transform_feedback_varyings) { + if (gs_prog_data->num_transform_feedback_bindings) { src_reg sol_temp(this, glsl_type::uvec4_type); emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES, dst_reg(this->svbi), @@ -446,7 +444,7 @@ gfx6_gs_visitor::emit_thread_end() } emit(BRW_OPCODE_WHILE); - if (prog->info.has_transform_feedback_varyings) + if (gs_prog_data->num_transform_feedback_bindings) xfb_write(); } emit(BRW_OPCODE_ENDIF); @@ -468,7 +466,7 @@ gfx6_gs_visitor::emit_thread_end() */ this->current_annotation = "gfx6 thread end: EOT"; - if (prog->info.has_transform_feedback_varyings) { + if (gs_prog_data->num_transform_feedback_bindings) { /* When emitting EOT, set SONumPrimsWritten Increment Value. */ src_reg data(this, glsl_type::uint_type); emit(AND(dst_reg(data), this->sol_prim_written, brw_imm_ud(0xffffu))); @@ -521,48 +519,11 @@ gfx6_gs_visitor::setup_payload() this->first_non_payload_grf = reg; } -void -gfx6_gs_visitor::xfb_setup() -{ - static const unsigned swizzle_for_offset[4] = { - BRW_SWIZZLE4(0, 1, 2, 3), - BRW_SWIZZLE4(1, 2, 3, 3), - BRW_SWIZZLE4(2, 3, 3, 3), - BRW_SWIZZLE4(3, 3, 3, 3) - }; - - const struct gl_transform_feedback_info *linked_xfb_info = - this->prog->sh.LinkedTransformFeedback; - int i; - - /* Make sure that the VUE slots won't overflow the unsigned chars in - * prog_data->transform_feedback_bindings[]. - */ - STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); - - /* Make sure that we don't need more binding table entries than we've - * set aside for use in transform feedback. (We shouldn't, since we - * set aside enough binding table entries to have one per component). - */ - assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); - - gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; - for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) { - gs_prog_data->transform_feedback_bindings[i] = - linked_xfb_info->Outputs[i].OutputRegister; - gs_prog_data->transform_feedback_swizzles[i] = - swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; - } -} - void gfx6_gs_visitor::xfb_write() { unsigned num_verts; - if (!gs_prog_data->num_transform_feedback_bindings) - return; - switch (gs_prog_data->output_topology) { case _3DPRIM_POINTLIST: num_verts = 1; diff --git a/src/intel/compiler/gfx6_gs_visitor.h b/src/intel/compiler/gfx6_gs_visitor.h index 38ffda0c482..99723b6f881 100644 --- a/src/intel/compiler/gfx6_gs_visitor.h +++ b/src/intel/compiler/gfx6_gs_visitor.h @@ -39,15 +39,13 @@ public: void *log_data, struct brw_gs_compile *c, struct brw_gs_prog_data *prog_data, - struct gl_program *prog, const nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index, bool debug_enabled) : vec4_gs_visitor(comp, log_data, c, prog_data, shader, mem_ctx, no_spills, - shader_time_index, debug_enabled), - prog(prog) + shader_time_index, debug_enabled) { } @@ -66,11 +64,8 @@ protected: private: void xfb_write(); void xfb_program(unsigned vertex, unsigned num_verts); - void xfb_setup(); int get_vertex_output_offset_for_varying(int vertex, int varying); - const struct gl_program *prog; - src_reg vertex_output; src_reg vertex_output_offset; src_reg temp; diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 19c48934797..22b9ce0767e 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -998,7 +998,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler, gs_stage->code = brw_compile_gs(compiler, device, mem_ctx, &gs_stage->key.gs, &gs_stage->prog_data.gs, - gs_stage->nir, NULL, -1, + gs_stage->nir, -1, gs_stage->stats, NULL); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index a4b4fb8c74d..9c1d31ed0ec 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -49,6 +49,38 @@ assign_gs_binding_table_offsets(const struct intel_device_info *devinfo, &prog_data->base.base, reserved); } +static void +brw_gfx6_xfb_setup(const struct gl_transform_feedback_info *linked_xfb_info, + struct brw_gs_prog_data *gs_prog_data) +{ + static const unsigned swizzle_for_offset[4] = { + BRW_SWIZZLE4(0, 1, 2, 3), + BRW_SWIZZLE4(1, 2, 3, 3), + BRW_SWIZZLE4(2, 3, 3, 3), + BRW_SWIZZLE4(3, 3, 3, 3) + }; + + int i; + + /* Make sure that the VUE slots won't overflow the unsigned chars in + * prog_data->transform_feedback_bindings[]. + */ + STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256); + + /* Make sure that we don't need more binding table entries than we've + * set aside for use in transform feedback. (We shouldn't, since we + * set aside enough binding table entries to have one per component). + */ + assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS); + + gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs; + for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) { + gs_prog_data->transform_feedback_bindings[i] = + linked_xfb_info->Outputs[i].OutputRegister; + gs_prog_data->transform_feedback_swizzles[i] = + swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; + } +} static bool brw_codegen_gs_prog(struct brw_context *brw, struct brw_program *gp, @@ -81,6 +113,10 @@ brw_codegen_gs_prog(struct brw_context *brw, &prog_data.base.vue_map, outputs_written, gp->program.info.separate_shader, 1); + if (devinfo->ver == 6) + brw_gfx6_xfb_setup(gp->program.sh.LinkedTransformFeedback, + &prog_data); + int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) st_index = brw_get_shader_time_index(brw, &gp->program, ST_GS, true); @@ -93,7 +129,7 @@ brw_codegen_gs_prog(struct brw_context *brw, char *error_str; const unsigned *program = brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key, - &prog_data, nir, &gp->program, st_index, + &prog_data, nir, st_index, NULL, &error_str); if (program == NULL) { ralloc_strcat(&gp->program.sh.data->InfoLog, error_str);