i965: Avoid recalculating the normal VUE map for IO lowering.
The caller already computes it. Now that we have stage specific functions, it's really easy to pass this in. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
This commit is contained in:
parent
15b3639bf1
commit
8151003ade
|
@ -246,9 +246,8 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_nir_lower_vue_inputs(nir_shader *nir,
|
brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
|
||||||
const struct brw_device_info *devinfo,
|
const struct brw_vue_map *vue_map)
|
||||||
bool is_scalar)
|
|
||||||
{
|
{
|
||||||
if (!is_scalar && nir->stage == MESA_SHADER_GEOMETRY) {
|
if (!is_scalar && nir->stage == MESA_SHADER_GEOMETRY) {
|
||||||
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
||||||
|
@ -256,26 +255,6 @@ brw_nir_lower_vue_inputs(nir_shader *nir,
|
||||||
}
|
}
|
||||||
nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
|
nir_lower_io(nir, nir_var_shader_in, type_size_vec4);
|
||||||
} else {
|
} else {
|
||||||
/* The GLSL linker will have already matched up GS inputs and
|
|
||||||
* the outputs of prior stages. The driver does extend VS outputs
|
|
||||||
* in some cases, but only for legacy OpenGL or Gen4-5 hardware,
|
|
||||||
* neither of which offer geometry shader support. So we can
|
|
||||||
* safely ignore that.
|
|
||||||
*
|
|
||||||
* For SSO pipelines, we use a fixed VUE map layout based on variable
|
|
||||||
* locations, so we can rely on rendezvous-by-location to make this
|
|
||||||
* work.
|
|
||||||
*
|
|
||||||
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
|
|
||||||
* written by previous stages and shows up via payload magic.
|
|
||||||
*/
|
|
||||||
struct brw_vue_map input_vue_map;
|
|
||||||
GLbitfield64 inputs_read =
|
|
||||||
nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
|
|
||||||
brw_compute_vue_map(devinfo, &input_vue_map, inputs_read,
|
|
||||||
nir->info.separate_shader ||
|
|
||||||
nir->stage == MESA_SHADER_TESS_CTRL);
|
|
||||||
|
|
||||||
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
foreach_list_typed(nir_variable, var, node, &nir->inputs) {
|
||||||
var->data.driver_location = var->data.location;
|
var->data.driver_location = var->data.location;
|
||||||
}
|
}
|
||||||
|
@ -291,7 +270,7 @@ brw_nir_lower_vue_inputs(nir_shader *nir,
|
||||||
nir_foreach_function(nir, function) {
|
nir_foreach_function(nir, function) {
|
||||||
if (function->impl) {
|
if (function->impl) {
|
||||||
nir_foreach_block(function->impl, remap_inputs_with_vue_map,
|
nir_foreach_block(function->impl, remap_inputs_with_vue_map,
|
||||||
&input_vue_map);
|
(void *) vue_map);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,9 +88,8 @@ void brw_nir_lower_vs_inputs(nir_shader *nir,
|
||||||
bool is_scalar,
|
bool is_scalar,
|
||||||
bool use_legacy_snorm_formula,
|
bool use_legacy_snorm_formula,
|
||||||
const uint8_t *vs_attrib_wa_flags);
|
const uint8_t *vs_attrib_wa_flags);
|
||||||
void brw_nir_lower_vue_inputs(nir_shader *nir,
|
void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
|
||||||
const struct brw_device_info *devinfo,
|
const struct brw_vue_map *vue_map);
|
||||||
bool is_scalar);
|
|
||||||
void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue);
|
void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue);
|
||||||
void brw_nir_lower_fs_inputs(nir_shader *nir);
|
void brw_nir_lower_fs_inputs(nir_shader *nir);
|
||||||
void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar);
|
void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar);
|
||||||
|
|
|
@ -596,9 +596,27 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
||||||
|
|
||||||
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
|
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
|
||||||
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
|
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
|
||||||
|
|
||||||
|
/* The GLSL linker will have already matched up GS inputs and the outputs
|
||||||
|
* of prior stages. The driver does extend VS outputs in some cases, but
|
||||||
|
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
|
||||||
|
* geometry shader support. So we can safely ignore that.
|
||||||
|
*
|
||||||
|
* For SSO pipelines, we use a fixed VUE map layout based on variable
|
||||||
|
* locations, so we can rely on rendezvous-by-location making this work.
|
||||||
|
*
|
||||||
|
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
|
||||||
|
* written by previous stages and shows up via payload magic.
|
||||||
|
*/
|
||||||
|
GLbitfield64 inputs_read =
|
||||||
|
shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
|
||||||
|
brw_compute_vue_map(compiler->devinfo,
|
||||||
|
&c.input_vue_map, inputs_read,
|
||||||
|
shader->info.separate_shader);
|
||||||
|
|
||||||
shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
|
shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
|
||||||
is_scalar);
|
is_scalar);
|
||||||
brw_nir_lower_vue_inputs(shader, compiler->devinfo, is_scalar);
|
brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map);
|
||||||
brw_nir_lower_vue_outputs(shader, is_scalar);
|
brw_nir_lower_vue_outputs(shader, is_scalar);
|
||||||
shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar);
|
shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar);
|
||||||
|
|
||||||
|
@ -777,23 +795,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
||||||
|
|
||||||
prog_data->vertices_in = shader->info.gs.vertices_in;
|
prog_data->vertices_in = shader->info.gs.vertices_in;
|
||||||
|
|
||||||
/* The GLSL linker will have already matched up GS inputs and the outputs
|
|
||||||
* of prior stages. The driver does extend VS outputs in some cases, but
|
|
||||||
* only for legacy OpenGL or Gen4-5 hardware, neither of which offer
|
|
||||||
* geometry shader support. So we can safely ignore that.
|
|
||||||
*
|
|
||||||
* For SSO pipelines, we use a fixed VUE map layout based on variable
|
|
||||||
* locations, so we can rely on rendezvous-by-location making this work.
|
|
||||||
*
|
|
||||||
* However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not
|
|
||||||
* written by previous stages and shows up via payload magic.
|
|
||||||
*/
|
|
||||||
GLbitfield64 inputs_read =
|
|
||||||
shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID;
|
|
||||||
brw_compute_vue_map(compiler->devinfo,
|
|
||||||
&c.input_vue_map, inputs_read,
|
|
||||||
shader->info.separate_shader);
|
|
||||||
|
|
||||||
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
|
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
|
||||||
* need to program a URB read length of ceiling(num_slots / 2).
|
* need to program a URB read length of ceiling(num_slots / 2).
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -516,12 +516,17 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
||||||
nir->info.outputs_written = key->outputs_written;
|
nir->info.outputs_written = key->outputs_written;
|
||||||
nir->info.patch_outputs_written = key->patch_outputs_written;
|
nir->info.patch_outputs_written = key->patch_outputs_written;
|
||||||
|
|
||||||
|
struct brw_vue_map input_vue_map;
|
||||||
|
brw_compute_vue_map(devinfo, &input_vue_map,
|
||||||
|
nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
|
||||||
|
true);
|
||||||
|
|
||||||
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
|
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
|
||||||
nir->info.outputs_written,
|
nir->info.outputs_written,
|
||||||
nir->info.patch_outputs_written);
|
nir->info.patch_outputs_written);
|
||||||
|
|
||||||
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
|
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
|
||||||
brw_nir_lower_vue_inputs(nir, compiler->devinfo, is_scalar);
|
brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
|
||||||
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
|
brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
|
||||||
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
|
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
|
||||||
|
|
||||||
|
@ -553,11 +558,6 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
||||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||||
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||||
|
|
||||||
struct brw_vue_map input_vue_map;
|
|
||||||
brw_compute_vue_map(devinfo, &input_vue_map,
|
|
||||||
nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
|
|
||||||
true);
|
|
||||||
|
|
||||||
/* HS does not use the usual payload pushing from URB to GRFs,
|
/* HS does not use the usual payload pushing from URB to GRFs,
|
||||||
* because we don't have enough registers for a full-size payload, and
|
* because we don't have enough registers for a full-size payload, and
|
||||||
* the hardware is broken on Haswell anyway.
|
* the hardware is broken on Haswell anyway.
|
||||||
|
|
Loading…
Reference in New Issue