i965: Fix output register sizes when multiple variables share a slot.
ARB_enhanced_layouts allows multiple output variables to share the same location - and these variables may not have the same sizes. For example, consider these output variables: // consume X/Y/Z components of 6 vectors layout(location = 0) out vec3 a[6]; // consumes W component of the first vector layout(location = 0, component = 3) out float b; Looking at the first declaration, we see that VARYING_SLOT_VAR0 needs 24 components worth of space (vec3 padded out to a vec4, 4 * 6 = 24). But looking at the second declaration, we would think that VARYING_SLOT_VAR0 needs only 4 components of space (a single float padded out to a vec4). nir_setup_outputs() only considered the space requirements of the first declaration it happened to see, so if 'float b' came first, it would underallocate the output register space, causing brw_fs_validator.cpp to assert fail about inst->dst.offset exceeding the register size. Fixes Piglit's tests/spec/arb_enhanced_layouts/execution/component-layout/ vs-to-fs-array-interleave-single-location.shader_test. Thanks to Tim Arceri for finding this bug and writing a test! Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
This commit is contained in:
parent
2d36efdb7f
commit
6f5abf3146
|
@ -53,14 +53,27 @@ fs_visitor::nir_setup_outputs()
|
|||
if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT)
|
||||
return;
|
||||
|
||||
unsigned vec4s[VARYING_SLOT_TESS_MAX] = { 0, };
|
||||
|
||||
/* Calculate the size of output registers in a separate pass, before
|
||||
* allocating them. With ARB_enhanced_layouts, multiple output variables
|
||||
* may occupy the same slot, but have different type sizes.
|
||||
*/
|
||||
nir_foreach_variable(var, &nir->outputs) {
|
||||
const unsigned vec4s =
|
||||
const int loc = var->data.driver_location;
|
||||
const unsigned var_vec4s =
|
||||
var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
|
||||
: type_size_vec4(var->type);
|
||||
fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s);
|
||||
for (unsigned i = 0; i < vec4s; i++) {
|
||||
if (outputs[var->data.driver_location + i].file == BAD_FILE)
|
||||
outputs[var->data.driver_location + i] = offset(reg, bld, 4 * i);
|
||||
vec4s[loc] = MAX2(vec4s[loc], var_vec4s);
|
||||
}
|
||||
|
||||
nir_foreach_variable(var, &nir->outputs) {
|
||||
const int loc = var->data.driver_location;
|
||||
if (outputs[loc].file == BAD_FILE) {
|
||||
fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s[loc]);
|
||||
for (unsigned i = 0; i < vec4s[loc]; i++) {
|
||||
outputs[loc + i] = offset(reg, bld, 4 * i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue