intel/fs: Allow multiple slots for position
Change brw_compute_vue_map() to also take the number of pos slots. If more than one slot is used, the VARYING_SLOT_POS is treated as an array. When using Primitive Replication, instead of a single position, the VUE must contain an array of positions. Padding might be necessary (after clip distance) to ensure rest of attributes start aligned. v2: Add note about array in the commit message and assert that pos_slots >= 1 to make clear 0 is invalid. (Jason) Move padding to be after the clip distance. v3: Apply the correct offset when gathering the sources from outputs. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> [v2] Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Rafael Antognolli <rafael.antognolli@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2313>
This commit is contained in:
parent
afa5447312
commit
395de69b1f
|
@ -1097,7 +1097,7 @@ iris_compile_vs(struct iris_context *ice,
|
|||
|
||||
brw_compute_vue_map(devinfo,
|
||||
&vue_prog_data->vue_map, nir->info.outputs_written,
|
||||
nir->info.separate_shader);
|
||||
nir->info.separate_shader, /* pos_slots */ 1);
|
||||
|
||||
struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
|
||||
|
||||
|
@ -1551,7 +1551,7 @@ iris_compile_gs(struct iris_context *ice,
|
|||
|
||||
brw_compute_vue_map(devinfo,
|
||||
&vue_prog_data->vue_map, nir->info.outputs_written,
|
||||
nir->info.separate_shader);
|
||||
nir->info.separate_shader, /* pos_slots */ 1);
|
||||
|
||||
struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
|
||||
|
||||
|
|
|
@ -227,7 +227,8 @@ blorp_compile_vs(struct blorp_context *blorp, void *mem_ctx,
|
|||
brw_compute_vue_map(compiler->devinfo,
|
||||
&vs_prog_data->base.vue_map,
|
||||
nir->info.outputs_written,
|
||||
nir->info.separate_shader);
|
||||
nir->info.separate_shader,
|
||||
1);
|
||||
|
||||
struct brw_vs_prog_key vs_key = { 0, };
|
||||
|
||||
|
@ -285,7 +286,7 @@ blorp_ensure_sf_program(struct blorp_batch *batch,
|
|||
unsigned program_size;
|
||||
|
||||
struct brw_vue_map vue_map;
|
||||
brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false);
|
||||
brw_compute_vue_map(blorp->compiler->devinfo, &vue_map, slots_valid, false, 1);
|
||||
|
||||
struct brw_sf_prog_data prog_data_tmp;
|
||||
program = brw_compile_sf(blorp->compiler, mem_ctx, &key.key,
|
||||
|
|
|
@ -1045,7 +1045,8 @@ GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying)
|
|||
void brw_compute_vue_map(const struct gen_device_info *devinfo,
|
||||
struct brw_vue_map *vue_map,
|
||||
uint64_t slots_valid,
|
||||
bool separate_shader);
|
||||
bool separate_shader,
|
||||
uint32_t pos_slots);
|
||||
|
||||
void brw_compute_tess_vue_map(struct brw_vue_map *const vue_map,
|
||||
uint64_t slots_valid,
|
||||
|
|
|
@ -1699,7 +1699,7 @@ calculate_urb_setup(const struct gen_device_info *devinfo,
|
|||
struct brw_vue_map prev_stage_vue_map;
|
||||
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
|
||||
key->input_slots_valid,
|
||||
nir->info.separate_shader);
|
||||
nir->info.separate_shader, 1);
|
||||
|
||||
int first_slot =
|
||||
brw_compute_first_urb_slot_required(nir->info.inputs_read,
|
||||
|
|
|
@ -698,8 +698,18 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
|||
sources[length++] = reg;
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
sources[length++] = offset(this->outputs[varying], bld, i);
|
||||
int slot_offset = 0;
|
||||
|
||||
/* When using Primitive Replication, there may be multiple slots
|
||||
* assigned to POS.
|
||||
*/
|
||||
if (varying == VARYING_SLOT_POS)
|
||||
slot_offset = slot - vue_map->varying_to_slot[VARYING_SLOT_POS];
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
sources[length++] = offset(this->outputs[varying], bld,
|
||||
i + (slot_offset * 4));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1277,7 +1277,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
|
||||
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
|
||||
nir->info.outputs_written,
|
||||
nir->info.separate_shader);
|
||||
nir->info.separate_shader, 1);
|
||||
|
||||
unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
|
||||
|
||||
|
|
|
@ -638,7 +638,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||
GLbitfield64 inputs_read = shader->info.inputs_read;
|
||||
brw_compute_vue_map(compiler->devinfo,
|
||||
&c.input_vue_map, inputs_read,
|
||||
shader->info.separate_shader);
|
||||
shader->info.separate_shader, 1);
|
||||
|
||||
brw_nir_apply_key(shader, compiler, &key->base, 8, is_scalar);
|
||||
brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
|
||||
|
|
|
@ -373,7 +373,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
|
||||
struct brw_vue_map input_vue_map;
|
||||
brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
|
||||
nir->info.separate_shader);
|
||||
nir->info.separate_shader, 1);
|
||||
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
|
||||
nir->info.outputs_written,
|
||||
nir->info.patch_outputs_written);
|
||||
|
|
|
@ -60,7 +60,8 @@ void
|
|||
brw_compute_vue_map(const struct gen_device_info *devinfo,
|
||||
struct brw_vue_map *vue_map,
|
||||
uint64_t slots_valid,
|
||||
bool separate)
|
||||
bool separate,
|
||||
uint32_t pos_slots)
|
||||
{
|
||||
/* Keep using the packed/contiguous layout on old hardware - we only need
|
||||
* the SSO layout when using geometry/tessellation shaders or 32 FS input
|
||||
|
@ -133,11 +134,27 @@ brw_compute_vue_map(const struct gen_device_info *devinfo,
|
|||
*/
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_PSIZ, slot++);
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_POS, slot++);
|
||||
|
||||
/* When using Primitive Replication, multiple slots are used for storing
|
||||
* positions for each view.
|
||||
*/
|
||||
assert(pos_slots >= 1);
|
||||
if (pos_slots > 1) {
|
||||
for (int i = 1; i < pos_slots; i++) {
|
||||
vue_map->slot_to_varying[slot++] = VARYING_SLOT_POS;
|
||||
}
|
||||
}
|
||||
|
||||
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0, slot++);
|
||||
if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
|
||||
assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1, slot++);
|
||||
|
||||
/* Vertex URB Formats table says: "Vertex Header shall be padded at the
|
||||
* end so that the header ends on a 32-byte boundary".
|
||||
*/
|
||||
slot += slot % 2;
|
||||
|
||||
/* front and back colors need to be consecutive so that we can use
|
||||
* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING to swizzle them when doing
|
||||
* two-sided color.
|
||||
|
|
|
@ -741,7 +741,7 @@ anv_pipeline_compile_vs(const struct brw_compiler *compiler,
|
|||
brw_compute_vue_map(compiler->devinfo,
|
||||
&vs_stage->prog_data.vs.base.vue_map,
|
||||
vs_stage->nir->info.outputs_written,
|
||||
vs_stage->nir->info.separate_shader);
|
||||
vs_stage->nir->info.separate_shader, 1);
|
||||
|
||||
vs_stage->num_stats = 1;
|
||||
vs_stage->code = brw_compile_vs(compiler, device, mem_ctx,
|
||||
|
@ -887,7 +887,7 @@ anv_pipeline_compile_gs(const struct brw_compiler *compiler,
|
|||
brw_compute_vue_map(compiler->devinfo,
|
||||
&gs_stage->prog_data.gs.base.vue_map,
|
||||
gs_stage->nir->info.outputs_written,
|
||||
gs_stage->nir->info.separate_shader);
|
||||
gs_stage->nir->info.separate_shader, 1);
|
||||
|
||||
gs_stage->num_stats = 1;
|
||||
gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
|
||||
|
|
|
@ -79,7 +79,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
|
|||
|
||||
brw_compute_vue_map(devinfo,
|
||||
&prog_data.base.vue_map, outputs_written,
|
||||
gp->program.info.separate_shader);
|
||||
gp->program.info.separate_shader, 1);
|
||||
|
||||
int st_index = -1;
|
||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
|
||||
|
|
|
@ -159,7 +159,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
|
|||
|
||||
brw_compute_vue_map(devinfo,
|
||||
&prog_data.base.vue_map, outputs_written,
|
||||
nir->info.separate_shader);
|
||||
nir->info.separate_shader, 1);
|
||||
|
||||
if (0) {
|
||||
_mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
|
||||
|
|
|
@ -602,7 +602,7 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog)
|
|||
if (devinfo->gen < 6) {
|
||||
brw_compute_vue_map(&brw->screen->devinfo, &vue_map,
|
||||
prog->info.inputs_read | VARYING_BIT_POS,
|
||||
false);
|
||||
false, 1);
|
||||
}
|
||||
|
||||
bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map);
|
||||
|
|
Loading…
Reference in New Issue