mesa/src/compiler/glsl/link_varyings.cpp

3364 lines
128 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* \file link_varyings.cpp
*
* Linker functions related specifically to linking varyings between shader
* stages.
*/
#include "main/errors.h"
#include "main/consts_exts.h"
#include "main/shader_types.h"
#include "glsl_symbol_table.h"
#include "glsl_parser_extras.h"
#include "ir_optimization.h"
#include "linker.h"
#include "link_varyings.h"
#include "main/macros.h"
#include "util/hash_table.h"
#include "util/u_math.h"
#include "program.h"
/**
* Get the varying type stripped of the outermost array if we're processing
* a stage whose varyings are arrays indexed by a vertex number (such as
* geometry shader inputs).
*/
static const glsl_type *
get_varying_type(const ir_variable *var, gl_shader_stage stage)
{
const glsl_type *type = var->type;
if (!var->data.patch &&
((var->data.mode == ir_var_shader_out &&
stage == MESA_SHADER_TESS_CTRL) ||
(var->data.mode == ir_var_shader_in &&
(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
stage == MESA_SHADER_GEOMETRY)))) {
assert(type->is_array());
type = type->fields.array;
}
return type;
}
static bool
varying_has_user_specified_location(const ir_variable *var)
{
return var->data.explicit_location &&
var->data.location >= VARYING_SLOT_VAR0;
}
static void
create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
size_t name_length, unsigned *count,
const char *ifc_member_name,
const glsl_type *ifc_member_t, char ***varying_names)
{
if (t->is_interface()) {
size_t new_length = name_length;
assert(ifc_member_name && ifc_member_t);
ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
NULL, NULL, varying_names);
} else if (t->is_struct()) {
for (unsigned i = 0; i < t->length; i++) {
const char *field = t->fields.structure[i].name;
size_t new_length = name_length;
ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
new_length, count, NULL, NULL,
varying_names);
}
} else if (t->without_array()->is_struct() ||
t->without_array()->is_interface() ||
(t->is_array() && t->fields.array->is_array())) {
for (unsigned i = 0; i < t->length; i++) {
size_t new_length = name_length;
/* Append the subscript to the current variable name */
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
count, ifc_member_name, ifc_member_t,
varying_names);
}
} else {
(*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
}
}
static bool
process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
struct gl_shader_program *prog,
unsigned *num_tfeedback_decls,
char ***varying_names)
{
bool has_xfb_qualifiers = false;
/* We still need to enable transform feedback mode even if xfb_stride is
* only applied to a global out. Also we don't bother to propagate
* xfb_stride to interface block members so this will catch that case also.
*/
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (prog->TransformFeedback.BufferStride[j]) {
has_xfb_qualifiers = true;
break;
}
}
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
if (!var || var->data.mode != ir_var_shader_out)
continue;
/* From the ARB_enhanced_layouts spec:
*
* "Any shader making any static use (after preprocessing) of any of
* these *xfb_* qualifiers will cause the shader to be in a
* transform feedback capturing mode and hence responsible for
* describing the transform feedback setup. This mode will capture
* any output selected by *xfb_offset*, directly or indirectly, to
* a transform feedback buffer."
*/
if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
has_xfb_qualifiers = true;
}
if (var->data.explicit_xfb_offset) {
*num_tfeedback_decls += var->type->varying_count();
has_xfb_qualifiers = true;
}
}
if (*num_tfeedback_decls == 0)
return has_xfb_qualifiers;
unsigned i = 0;
*varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *var = node->as_variable();
if (!var || var->data.mode != ir_var_shader_out)
continue;
if (var->data.explicit_xfb_offset) {
char *name;
const glsl_type *type, *member_type;
if (var->data.from_named_ifc_block) {
type = var->get_interface_type();
/* Find the member type before it was altered by lowering */
const glsl_type *type_wa = type->without_array();
member_type =
type_wa->fields.structure[type_wa->field_index(var->name)].type;
name = ralloc_strdup(NULL, type_wa->name);
} else {
type = var->type;
member_type = NULL;
name = ralloc_strdup(NULL, var->name);
}
create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
var->name, member_type, varying_names);
ralloc_free(name);
}
}
assert(i == *num_tfeedback_decls);
return has_xfb_qualifiers;
}
/**
* Validate the types and qualifiers of an output from one stage against the
* matching input to another stage.
*/
static void
cross_validate_types_and_qualifiers(const struct gl_constants *consts,
struct gl_shader_program *prog,
const ir_variable *input,
const ir_variable *output,
gl_shader_stage consumer_stage,
gl_shader_stage producer_stage)
{
/* Check that the types match between stages.
*/
const glsl_type *type_to_match = input->type;
/* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
consumer_stage != MESA_SHADER_FRAGMENT) ||
consumer_stage == MESA_SHADER_GEOMETRY;
if (extra_array_level) {
assert(type_to_match->is_array());
type_to_match = type_to_match->fields.array;
}
if (type_to_match != output->type) {
if (output->type->is_struct()) {
/* Structures across shader stages can have different name
* and considered to match in type if and only if structure
* members match in name, type, qualification, and declaration
* order. The precision doesnt need to match.
*/
if (!output->type->record_compare(type_to_match,
false, /* match_name */
true, /* match_locations */
false /* match_precision */)) {
linker_error(prog,
"%s shader output `%s' declared as struct `%s', "
"doesn't match in type with %s shader input "
"declared as struct `%s'\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
output->type->name,
_mesa_shader_stage_to_string(consumer_stage),
input->type->name);
}
} else if (!output->type->is_array() || !is_gl_identifier(output->name)) {
/* There is a bit of a special case for gl_TexCoord. This
* built-in is unsized by default. Applications that variable
* access it must redeclare it with a size. There is some
* language in the GLSL spec that implies the fragment shader
* and vertex shader do not have to agree on this size. Other
* driver behave this way, and one or two applications seem to
* rely on it.
*
* Neither declaration needs to be modified here because the array
* sizes are fixed later when update_array_sizes is called.
*
* From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
*
* "Unlike user-defined varying variables, the built-in
* varying variables don't have a strict one-to-one
* correspondence between the vertex language and the
* fragment language."
*/
linker_error(prog,
"%s shader output `%s' declared as type `%s', "
"but %s shader input declared as type `%s'\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
output->type->name,
_mesa_shader_stage_to_string(consumer_stage),
input->type->name);
return;
}
}
/* Check that all of the qualifiers match between stages.
*/
/* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
* should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
* conformance test suite does not verify that the qualifiers must match.
* The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
* OpenGLES 3.0 drivers, so we relax the checking in all cases.
*/
if (false /* always skip the centroid check */ &&
prog->data->Version < (prog->IsES ? 310 : 430) &&
input->data.centroid != output->data.centroid) {
linker_error(prog,
"%s shader output `%s' %s centroid qualifier, "
"but %s shader input %s centroid qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
(output->data.centroid) ? "has" : "lacks",
_mesa_shader_stage_to_string(consumer_stage),
(input->data.centroid) ? "has" : "lacks");
return;
}
if (input->data.sample != output->data.sample) {
linker_error(prog,
"%s shader output `%s' %s sample qualifier, "
"but %s shader input %s sample qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
(output->data.sample) ? "has" : "lacks",
_mesa_shader_stage_to_string(consumer_stage),
(input->data.sample) ? "has" : "lacks");
return;
}
if (input->data.patch != output->data.patch) {
linker_error(prog,
"%s shader output `%s' %s patch qualifier, "
"but %s shader input %s patch qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
(output->data.patch) ? "has" : "lacks",
_mesa_shader_stage_to_string(consumer_stage),
(input->data.patch) ? "has" : "lacks");
return;
}
/* The GLSL 4.20 and GLSL ES 3.00 specifications say:
*
* "As only outputs need be declared with invariant, an output from
* one shader stage will still match an input of a subsequent stage
* without the input being declared as invariant."
*
* while GLSL 4.10 says:
*
* "For variables leaving one shader and coming into another shader,
* the invariant keyword has to be used in both shaders, or a link
* error will result."
*
* and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
*
* "The invariance of varyings that are declared in both the vertex
* and fragment shaders must match."
*/
if (input->data.explicit_invariant != output->data.explicit_invariant &&
prog->data->Version < (prog->IsES ? 300 : 420)) {
linker_error(prog,
"%s shader output `%s' %s invariant qualifier, "
"but %s shader input %s invariant qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
(output->data.explicit_invariant) ? "has" : "lacks",
_mesa_shader_stage_to_string(consumer_stage),
(input->data.explicit_invariant) ? "has" : "lacks");
return;
}
/* GLSL >= 4.40 removes text requiring interpolation qualifiers
* to match cross stage, they must only match within the same stage.
*
* From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
*
* "It is a link-time error if, within the same stage, the interpolation
* qualifiers of variables of the same name do not match.
*
* Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
*
* "When no interpolation qualifier is present, smooth interpolation
* is used."
*
* So we match variables where one is smooth and the other has no explicit
* qualifier.
*/
unsigned input_interpolation = input->data.interpolation;
unsigned output_interpolation = output->data.interpolation;
if (prog->IsES) {
if (input_interpolation == INTERP_MODE_NONE)
input_interpolation = INTERP_MODE_SMOOTH;
if (output_interpolation == INTERP_MODE_NONE)
output_interpolation = INTERP_MODE_SMOOTH;
}
if (input_interpolation != output_interpolation &&
prog->data->Version < 440) {
if (!consts->AllowGLSLCrossStageInterpolationMismatch) {
linker_error(prog,
"%s shader output `%s' specifies %s "
"interpolation qualifier, "
"but %s shader input specifies %s "
"interpolation qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
interpolation_string(output->data.interpolation),
_mesa_shader_stage_to_string(consumer_stage),
interpolation_string(input->data.interpolation));
return;
} else {
linker_warning(prog,
"%s shader output `%s' specifies %s "
"interpolation qualifier, "
"but %s shader input specifies %s "
"interpolation qualifier\n",
_mesa_shader_stage_to_string(producer_stage),
output->name,
interpolation_string(output->data.interpolation),
_mesa_shader_stage_to_string(consumer_stage),
interpolation_string(input->data.interpolation));
}
}
}
/**
* Validate front and back color outputs against single color input
*/
static void
cross_validate_front_and_back_color(const struct gl_constants *consts,
struct gl_shader_program *prog,
const ir_variable *input,
const ir_variable *front_color,
const ir_variable *back_color,
gl_shader_stage consumer_stage,
gl_shader_stage producer_stage)
{
if (front_color != NULL && front_color->data.assigned)
cross_validate_types_and_qualifiers(consts, prog, input, front_color,
consumer_stage, producer_stage);
if (back_color != NULL && back_color->data.assigned)
cross_validate_types_and_qualifiers(consts, prog, input, back_color,
consumer_stage, producer_stage);
}
static unsigned
compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
{
unsigned location_start = VARYING_SLOT_VAR0;
switch (stage) {
case MESA_SHADER_VERTEX:
if (var->data.mode == ir_var_shader_in)
location_start = VERT_ATTRIB_GENERIC0;
break;
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
if (var->data.patch)
location_start = VARYING_SLOT_PATCH0;
break;
case MESA_SHADER_FRAGMENT:
if (var->data.mode == ir_var_shader_out)
location_start = FRAG_RESULT_DATA0;
break;
default:
break;
}
return var->data.location - location_start;
}
struct explicit_location_info {
ir_variable *var;
bool base_type_is_integer;
unsigned base_type_bit_size;
unsigned interpolation;
bool centroid;
bool sample;
bool patch;
};
static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],
ir_variable *var,
unsigned location,
unsigned component,
unsigned location_limit,
const glsl_type *type,
unsigned interpolation,
bool centroid,
bool sample,
bool patch,
gl_shader_program *prog,
gl_shader_stage stage)
{
unsigned last_comp;
unsigned base_type_bit_size;
const glsl_type *type_without_array = type->without_array();
const bool base_type_is_integer =
glsl_base_type_is_integer(type_without_array->base_type);
const bool is_struct = type_without_array->is_struct();
if (is_struct) {
/* structs don't have a defined underlying base type so just treat all
* component slots as used and set the bit size to 0. If there is
* location aliasing, we'll fail anyway later.
*/
last_comp = 4;
base_type_bit_size = 0;
} else {
unsigned dmul = type_without_array->is_64bit() ? 2 : 1;
last_comp = component + type_without_array->vector_elements * dmul;
base_type_bit_size =
glsl_base_type_get_bit_size(type_without_array->base_type);
}
while (location < location_limit) {
unsigned comp = 0;
while (comp < 4) {
struct explicit_location_info *info =
&explicit_locations[location][comp];
if (info->var) {
if (info->var->type->without_array()->is_struct() || is_struct) {
/* Structs cannot share location since they are incompatible
* with any other underlying numerical type.
*/
linker_error(prog,
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"underlying numerical type. Struct variable '%s', "
"location %u\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ? "in" : "out",
is_struct ? var->name : info->var->name,
location);
return false;
} else if (comp >= component && comp < last_comp) {
/* Component aliasing is not allowed */
linker_error(prog,
"%s shader has multiple %sputs explicitly "
"assigned to location %d and component %d\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ? "in" : "out",
location, comp);
return false;
} else {
/* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
* Qualifiers, Page 67, (Location aliasing):
*
* " Further, when location aliasing, the aliases sharing the
* location must have the same underlying numerical type
* and bit width (floating-point or integer, 32-bit versus
* 64-bit, etc.) and the same auxiliary storage and
* interpolation qualification."
*/
/* If the underlying numerical type isn't integer, implicitly
* it will be float or else we would have failed by now.
*/
if (info->base_type_is_integer != base_type_is_integer) {
linker_error(prog,
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"underlying numerical type. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location, comp);
return false;
}
if (info->base_type_bit_size != base_type_bit_size) {
linker_error(prog,
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"underlying numerical bit size. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location, comp);
return false;
}
if (info->interpolation != interpolation) {
linker_error(prog,
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"interpolation qualification. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location, comp);
return false;
}
if (info->centroid != centroid ||
info->sample != sample ||
info->patch != patch) {
linker_error(prog,
"%s shader has multiple %sputs sharing the "
"same location that don't have the same "
"auxiliary storage qualification. Location %u "
"component %u.\n",
_mesa_shader_stage_to_string(stage),
var->data.mode == ir_var_shader_in ?
"in" : "out", location, comp);
return false;
}
}
} else if (comp >= component && comp < last_comp) {
info->var = var;
info->base_type_is_integer = base_type_is_integer;
info->base_type_bit_size = base_type_bit_size;
info->interpolation = interpolation;
info->centroid = centroid;
info->sample = sample;
info->patch = patch;
}
comp++;
/* We need to do some special handling for doubles as dvec3 and
* dvec4 consume two consecutive locations. We don't need to
* worry about components beginning at anything other than 0 as
* the spec does not allow this for dvec3 and dvec4.
*/
if (comp == 4 && last_comp > 4) {
last_comp = last_comp - 4;
/* Bump location index and reset the component index */
location++;
comp = 0;
component = 0;
}
}
location++;
}
return true;
}
static bool
validate_explicit_variable_location(const struct gl_constants *consts,
struct explicit_location_info explicit_locations[][4],
ir_variable *var,
gl_shader_program *prog,
gl_linked_shader *sh)
{
const glsl_type *type = get_varying_type(var, sh->Stage);
unsigned num_elements = type->count_attribute_slots(false);
unsigned idx = compute_variable_location_slot(var, sh->Stage);
unsigned slot_limit = idx + num_elements;
/* Vertex shader inputs and fragment shader outputs are validated in
* assign_attribute_or_color_locations() so we should not attempt to
* validate them again here.
*/
unsigned slot_max;
if (var->data.mode == ir_var_shader_out) {
assert(sh->Stage != MESA_SHADER_FRAGMENT);
slot_max =
consts->Program[sh->Stage].MaxOutputComponents / 4;
} else {
assert(var->data.mode == ir_var_shader_in);
assert(sh->Stage != MESA_SHADER_VERTEX);
slot_max =
consts->Program[sh->Stage].MaxInputComponents / 4;
}
if (slot_limit > slot_max) {
linker_error(prog,
"Invalid location %u in %s shader\n",
idx, _mesa_shader_stage_to_string(sh->Stage));
return false;
}
const glsl_type *type_without_array = type->without_array();
if (type_without_array->is_interface()) {
for (unsigned i = 0; i < type_without_array->length; i++) {
glsl_struct_field *field = &type_without_array->fields.structure[i];
unsigned field_location = field->location -
(field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
unsigned field_slots = field->type->count_attribute_slots(false);
if (!check_location_aliasing(explicit_locations, var,
field_location,
0,
field_location + field_slots,
field->type,
field->interpolation,
field->centroid,
field->sample,
field->patch,
prog, sh->Stage)) {
return false;
}
}
} else if (!check_location_aliasing(explicit_locations, var,
idx, var->data.location_frac,
slot_limit, type,
var->data.interpolation,
var->data.centroid,
var->data.sample,
var->data.patch,
prog, sh->Stage)) {
return false;
}
return true;
}
/**
* Validate explicit locations for the inputs to the first stage and the
* outputs of the last stage in a program, if those are not the VS and FS
* shaders.
*/
void
validate_first_and_last_interface_explicit_locations(const struct gl_constants *consts,
struct gl_shader_program *prog,
gl_shader_stage first_stage,
gl_shader_stage last_stage)
{
/* VS inputs and FS outputs are validated in
* assign_attribute_or_color_locations()
*/
bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
if (!validate_first_stage && !validate_last_stage)
return;
struct explicit_location_info explicit_locations[MAX_VARYING][4];
gl_shader_stage stages[2] = { first_stage, last_stage };
bool validate_stage[2] = { validate_first_stage, validate_last_stage };
ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
for (unsigned i = 0; i < 2; i++) {
if (!validate_stage[i])
continue;
gl_shader_stage stage = stages[i];
gl_linked_shader *sh = prog->_LinkedShaders[stage];
assert(sh);
memset(explicit_locations, 0, sizeof(explicit_locations));
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *const var = node->as_variable();
if (var == NULL ||
!var->data.explicit_location ||
var->data.location < VARYING_SLOT_VAR0 ||
var->data.mode != var_direction[i])
continue;
if (!validate_explicit_variable_location(
consts, explicit_locations, var, prog, sh)) {
return;
}
}
}
}
/**
* Check if we should force input / output matching between shader
* interfaces.
*
* Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
*
* "Only the input variables that are actually read need to be
* written by the previous stage; it is allowed to have
* superfluous declarations of input variables."
*
* However it's not defined anywhere as to how we should handle
* inputs that are not written in the previous stage and it's not
* clear what "actually read" means.
*
* The GLSL 4.20 spec however is much clearer:
*
* "Only the input variables that are statically read need to
* be written by the previous stage; it is allowed to have
* superfluous declarations of input variables."
*
* It also has a table that states it is an error to statically
* read an input that is not defined in the previous stage. While
* it is not an error to not statically write to the output (it
* just needs to be defined to not be an error).
*
* The text in the GLSL 4.20 spec was an attempt to clarify the
* previous spec iterations. However given the difference in spec
* and that some applications seem to depend on not erroring when
* the input is not actually read in control flow we only apply
* this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
* seen in the wild that depend on the less strict interpretation.
*/
static bool
static_input_output_matching(struct gl_shader_program *prog)
{
return prog->data->Version >= (prog->IsES ? 0 : 420);
}
/**
* Validate that outputs from one stage match inputs of another
*/
void
cross_validate_outputs_to_inputs(const struct gl_constants *consts,
struct gl_shader_program *prog,
gl_linked_shader *producer,
gl_linked_shader *consumer)
{
glsl_symbol_table parameters;
struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {};
struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {};
/* Find all shader outputs in the "producer" stage.
*/
foreach_in_list(ir_instruction, node, producer->ir) {
ir_variable *const var = node->as_variable();
if (var == NULL || var->data.mode != ir_var_shader_out)
continue;
if (!var->data.explicit_location
|| var->data.location < VARYING_SLOT_VAR0)
parameters.add_variable(var);
else {
/* User-defined varyings with explicit locations are handled
* differently because they do not need to have matching names.
*/
if (!validate_explicit_variable_location(consts,
output_explicit_locations,
var, prog, producer)) {
return;
}
}
}
/* Find all shader inputs in the "consumer" stage. Any variables that have
* matching outputs already in the symbol table must have the same type and
* qualifiers.
*
* Exception: if the consumer is the geometry shader, then the inputs
* should be arrays and the type of the array element should match the type
* of the corresponding producer output.
*/
foreach_in_list(ir_instruction, node, consumer->ir) {
ir_variable *const input = node->as_variable();
if (input == NULL || input->data.mode != ir_var_shader_in)
continue;
if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
const ir_variable *const front_color =
parameters.get_variable("gl_FrontColor");
const ir_variable *const back_color =
parameters.get_variable("gl_BackColor");
cross_validate_front_and_back_color(consts, prog, input,
front_color, back_color,
consumer->Stage, producer->Stage);
} else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
const ir_variable *const front_color =
parameters.get_variable("gl_FrontSecondaryColor");
const ir_variable *const back_color =
parameters.get_variable("gl_BackSecondaryColor");
cross_validate_front_and_back_color(consts, prog, input,
front_color, back_color,
consumer->Stage, producer->Stage);
} else {
/* The rules for connecting inputs and outputs change in the presence
* of explicit locations. In this case, we no longer care about the
* names of the variables. Instead, we care only about the
* explicitly assigned location.
*/
ir_variable *output = NULL;
if (input->data.explicit_location
&& input->data.location >= VARYING_SLOT_VAR0) {
const glsl_type *type = get_varying_type(input, consumer->Stage);
unsigned num_elements = type->count_attribute_slots(false);
unsigned idx =
compute_variable_location_slot(input, consumer->Stage);
unsigned slot_limit = idx + num_elements;
if (!validate_explicit_variable_location(consts,
input_explicit_locations,
input, prog, consumer)) {
return;
}
while (idx < slot_limit) {
if (idx >= MAX_VARYING) {
linker_error(prog,
"Invalid location %u in %s shader\n", idx,
_mesa_shader_stage_to_string(consumer->Stage));
return;
}
output = output_explicit_locations[idx][input->data.location_frac].var;
if (output == NULL) {
/* A linker failure should only happen when there is no
* output declaration and there is Static Use of the
* declared input.
*/
if (input->data.used && static_input_output_matching(prog)) {
linker_error(prog,
"%s shader input `%s' with explicit location "
"has no matching output\n",
_mesa_shader_stage_to_string(consumer->Stage),
input->name);
break;
}
} else if (input->data.location != output->data.location) {
linker_error(prog,
"%s shader input `%s' with explicit location "
"has no matching output\n",
_mesa_shader_stage_to_string(consumer->Stage),
input->name);
break;
}
idx++;
}
} else {
output = parameters.get_variable(input->name);
}
if (output != NULL) {
/* Interface blocks have their own validation elsewhere so don't
* try validating them here.
*/
if (!(input->get_interface_type() &&
output->get_interface_type()))
cross_validate_types_and_qualifiers(consts, prog, input, output,
consumer->Stage,
producer->Stage);
} else {
/* Check for input vars with unmatched output vars in prev stage
* taking into account that interface blocks could have a matching
* output but with different name, so we ignore them.
*/
assert(!input->data.assigned);
if (input->data.used && !input->get_interface_type() &&
!input->data.explicit_location &&
static_input_output_matching(prog))
linker_error(prog,
"%s shader input `%s' "
"has no matching output in the previous stage\n",
_mesa_shader_stage_to_string(consumer->Stage),
input->name);
}
}
}
}
/**
* Demote shader inputs and outputs that are not used in other stages, and
* remove them via dead code elimination.
*/
static void
remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
gl_linked_shader *sh,
enum ir_variable_mode mode)
{
if (is_separate_shader_object)
return;
foreach_in_list(ir_instruction, node, sh->ir) {
ir_variable *const var = node->as_variable();
if (var == NULL || var->data.mode != int(mode))
continue;
/* A shader 'in' or 'out' variable is only really an input or output if
* its value is used by other shader stages. This will cause the
* variable to have a location assigned.
*/
if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
assert(var->data.mode != ir_var_temporary);
/* Assign zeros to demoted inputs to allow more optimizations. */
if (var->data.mode == ir_var_shader_in && !var->constant_value)
var->constant_value = ir_constant::zero(var, var->type);
var->data.mode = ir_var_auto;
}
}
/* Eliminate code that is now dead due to unused inputs/outputs being
* demoted.
*/
while (do_dead_code(sh->ir, false))
;
}
/**
* Initialize this object based on a string that was passed to
* glTransformFeedbackVaryings.
*
* If the input is mal-formed, this call still succeeds, but it sets
* this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
* will fail to find any matching variable.
*/
void
tfeedback_decl::init(const struct gl_constants *consts,
const struct gl_extensions *exts,
const void *mem_ctx,
const char *input)
{
/* We don't have to be pedantic about what is a valid GLSL variable name,
* because any variable with an invalid name can't exist in the IR anyway.
*/
this->location = -1;
this->orig_name = input;
this->lowered_builtin_array_variable = none;
this->skip_components = 0;
this->next_buffer_separator = false;
this->matched_candidate = NULL;
this->stream_id = 0;
this->buffer = 0;
this->offset = 0;
if (exts->ARB_transform_feedback3) {
/* Parse gl_NextBuffer. */
if (strcmp(input, "gl_NextBuffer") == 0) {
this->next_buffer_separator = true;
return;
}
/* Parse gl_SkipComponents. */
if (strcmp(input, "gl_SkipComponents1") == 0)
this->skip_components = 1;
else if (strcmp(input, "gl_SkipComponents2") == 0)
this->skip_components = 2;
else if (strcmp(input, "gl_SkipComponents3") == 0)
this->skip_components = 3;
else if (strcmp(input, "gl_SkipComponents4") == 0)
this->skip_components = 4;
if (this->skip_components)
return;
}
/* Parse a declaration. */
const char *base_name_end;
long subscript = parse_program_resource_name(input, strlen(input),
&base_name_end);
this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
if (this->var_name == NULL) {
_mesa_error_no_memory(__func__);
return;
}
if (subscript >= 0) {
this->array_subscript = subscript;
this->is_subscripted = true;
} else {
this->is_subscripted = false;
}
/* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
* class must behave specially to account for the fact that gl_ClipDistance
* is converted from a float[8] to a vec4[2].
*/
if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
strcmp(this->var_name, "gl_ClipDistance") == 0) {
this->lowered_builtin_array_variable = clip_distance;
}
if (consts->ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
strcmp(this->var_name, "gl_CullDistance") == 0) {
this->lowered_builtin_array_variable = cull_distance;
}
if (consts->LowerTessLevel &&
(strcmp(this->var_name, "gl_TessLevelOuter") == 0))
this->lowered_builtin_array_variable = tess_level_outer;
if (consts->LowerTessLevel &&
(strcmp(this->var_name, "gl_TessLevelInner") == 0))
this->lowered_builtin_array_variable = tess_level_inner;
}
/**
* Determine whether two tfeedback_decl objects refer to the same variable and
* array index (if applicable).
*/
bool
tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
{
assert(x.is_varying() && y.is_varying());
if (strcmp(x.var_name, y.var_name) != 0)
return false;
if (x.is_subscripted != y.is_subscripted)
return false;
if (x.is_subscripted && x.array_subscript != y.array_subscript)
return false;
return true;
}
/**
* Assign a location and stream ID for this tfeedback_decl object based on the
* transform feedback candidate found by find_candidate.
*
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
bool
tfeedback_decl::assign_location(const struct gl_constants *consts,
struct gl_shader_program *prog)
{
assert(this->is_varying());
unsigned fine_location
= this->matched_candidate->toplevel_var->data.location * 4
+ this->matched_candidate->toplevel_var->data.location_frac
+ this->matched_candidate->struct_offset_floats;
const unsigned dmul =
this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
if (this->matched_candidate->type->is_array()) {
/* Array variable */
const unsigned matrix_cols =
this->matched_candidate->type->fields.array->matrix_columns;
const unsigned vector_elements =
this->matched_candidate->type->fields.array->vector_elements;
unsigned actual_array_size;
switch (this->lowered_builtin_array_variable) {
case clip_distance:
actual_array_size = prog->last_vert_prog ?
prog->last_vert_prog->info.clip_distance_array_size : 0;
break;
case cull_distance:
actual_array_size = prog->last_vert_prog ?
prog->last_vert_prog->info.cull_distance_array_size : 0;
break;
case tess_level_outer:
actual_array_size = 4;
break;
case tess_level_inner:
actual_array_size = 2;
break;
case none:
default:
actual_array_size = this->matched_candidate->type->array_size();
break;
}
if (this->is_subscripted) {
/* Check array bounds. */
if (this->array_subscript >= actual_array_size) {
linker_error(prog, "Transform feedback varying %s has index "
"%i, but the array size is %u.",
this->orig_name, this->array_subscript,
actual_array_size);
return false;
}
unsigned array_elem_size = this->lowered_builtin_array_variable ?
1 : vector_elements * matrix_cols * dmul;
fine_location += array_elem_size * this->array_subscript;
this->size = 1;
} else {
this->size = actual_array_size;
}
this->vector_elements = vector_elements;
this->matrix_columns = matrix_cols;
if (this->lowered_builtin_array_variable)
this->type = GL_FLOAT;
else
this->type = this->matched_candidate->type->fields.array->gl_type;
} else {
/* Regular variable (scalar, vector, or matrix) */
if (this->is_subscripted) {
linker_error(prog, "Transform feedback varying %s requested, "
"but %s is not an array.",
this->orig_name, this->var_name);
return false;
}
this->size = 1;
this->vector_elements = this->matched_candidate->type->vector_elements;
this->matrix_columns = this->matched_candidate->type->matrix_columns;
this->type = this->matched_candidate->type->gl_type;
}
this->location = fine_location / 4;
this->location_frac = fine_location % 4;
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
*
* * the total number of components to capture in any varying
* variable in <varyings> is greater than the constant
* MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
* buffer mode is SEPARATE_ATTRIBS_EXT;
*/
if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
this->num_components() >
consts->MaxTransformFeedbackSeparateComponents) {
linker_error(prog, "Transform feedback varying %s exceeds "
"MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
this->orig_name);
return false;
}
/* Only transform feedback varyings can be assigned to non-zero streams,
* so assign the stream id here.
*/
this->stream_id = this->matched_candidate->toplevel_var->data.stream;
unsigned array_offset = this->array_subscript * 4 * dmul;
unsigned struct_offset = this->matched_candidate->xfb_offset_floats * 4;
this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
this->offset = this->matched_candidate->toplevel_var->data.offset +
array_offset + struct_offset;
return true;
}
unsigned
tfeedback_decl::get_num_outputs() const
{
if (!this->is_varying()) {
return 0;
}
if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) {
unsigned dmul = this->is_64bit() ? 2 : 1;
unsigned rows_per_element = DIV_ROUND_UP(this->vector_elements * dmul, 4);
return this->size * this->matrix_columns * rows_per_element;
} else {
return (this->num_components() + this->location_frac + 3) / 4;
}
}
/**
* Update gl_transform_feedback_info to reflect this tfeedback_decl.
*
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
bool
tfeedback_decl::store(const struct gl_constants *consts,
struct gl_shader_program *prog,
struct gl_transform_feedback_info *info,
unsigned buffer, unsigned buffer_index,
const unsigned max_outputs,
BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
bool *explicit_stride, unsigned *max_member_alignment,
bool has_xfb_qualifiers, const void* mem_ctx) const
{
unsigned xfb_offset = 0;
unsigned size = this->size;
/* Handle gl_SkipComponents. */
if (this->skip_components) {
info->Buffers[buffer].Stride += this->skip_components;
size = this->skip_components;
goto store_varying;
}
if (this->next_buffer_separator) {
size = 0;
goto store_varying;
}
if (has_xfb_qualifiers) {
xfb_offset = this->offset / 4;
} else {
xfb_offset = info->Buffers[buffer].Stride;
}
info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
{
unsigned location = this->location;
unsigned location_frac = this->location_frac;
unsigned num_components = this->num_components();
/* From GL_EXT_transform_feedback:
*
* " A program will fail to link if:
*
* * the total number of components to capture is greater than the
* constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
* and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
*
* From GL_ARB_enhanced_layouts:
*
* " The resulting stride (implicit or explicit) must be less than or
* equal to the implementation-dependent constant
* gl_MaxTransformFeedbackInterleavedComponents."
*/
if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
has_xfb_qualifiers) &&
xfb_offset + num_components >
consts->MaxTransformFeedbackInterleavedComponents) {
linker_error(prog,
"The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
"limit has been exceeded.");
return false;
}
/* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
* Page 76, (Transform Feedback Layout Qualifiers):
*
* " No aliasing in output buffers is allowed: It is a compile-time or
* link-time error to specify variables with overlapping transform
* feedback offsets."
*/
const unsigned max_components =
consts->MaxTransformFeedbackInterleavedComponents;
const unsigned first_component = xfb_offset;
const unsigned last_component = xfb_offset + num_components - 1;
const unsigned start_word = BITSET_BITWORD(first_component);
const unsigned end_word = BITSET_BITWORD(last_component);
BITSET_WORD *used;
assert(last_component < max_components);
if (!used_components[buffer]) {
used_components[buffer] =
rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
}
used = used_components[buffer];
for (unsigned word = start_word; word <= end_word; word++) {
unsigned start_range = 0;
unsigned end_range = BITSET_WORDBITS - 1;
if (word == start_word)
start_range = first_component % BITSET_WORDBITS;
if (word == end_word)
end_range = last_component % BITSET_WORDBITS;
if (used[word] & BITSET_RANGE(start_range, end_range)) {
linker_error(prog,
"variable '%s', xfb_offset (%d) is causing aliasing.",
this->orig_name, xfb_offset * 4);
return false;
}
used[word] |= BITSET_RANGE(start_range, end_range);
}
const unsigned type_num_components =
this->vector_elements * (this->is_64bit() ? 2 : 1);
unsigned current_type_components_left = type_num_components;
while (num_components > 0) {
unsigned output_size = 0;
/* From GL_ARB_enhanced_layouts:
*
* "When an attribute variable declared using an array type is bound to
* generic attribute index <i>, the active array elements are assigned to
* consecutive generic attributes beginning with generic attribute <i>. The
* number of attributes and components assigned to each element are
* determined according to the data type of array elements and "component"
* layout qualifier (if any) specified in the declaration of the array."
*
* "When an attribute variable declared using a matrix type is bound to a
* generic attribute index <i>, its values are taken from consecutive generic
* attributes beginning with generic attribute <i>. Such matrices are
* treated as an array of column vectors with values taken from the generic
* attributes.
* This means there may be gaps in the varyings we are taking values from."
*
* Examples:
*
* | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
* | | |
* | 32b 32b 32b 32b | 32b 32b 32b 32b |
* | 0 X X Y Y | 4 X Y 0 0 |
* | 1 Z Z 0 0 | 5 X Y 0 0 |
* | 2 X X Y Y | 6 X Y 0 0 |
* | 3 Z Z 0 0 | 7 X Y 0 0 |
*
*/
if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) {
output_size = MIN3(num_components, current_type_components_left, 4);
current_type_components_left -= output_size;
if (current_type_components_left == 0) {
current_type_components_left = type_num_components;
}
} else {
output_size = MIN2(num_components, 4 - location_frac);
}
assert((info->NumOutputs == 0 && max_outputs == 0) ||
info->NumOutputs < max_outputs);
/* From the ARB_enhanced_layouts spec:
*
* "If such a block member or variable is not written during a shader
* invocation, the buffer contents at the assigned offset will be
* undefined. Even if there are no static writes to a variable or
* member that is assigned a transform feedback offset, the space is
* still allocated in the buffer and still affects the stride."
*/
if (this->is_varying_written()) {
info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
info->Outputs[info->NumOutputs].OutputRegister = location;
info->Outputs[info->NumOutputs].NumComponents = output_size;
info->Outputs[info->NumOutputs].StreamId = stream_id;
info->Outputs[info->NumOutputs].OutputBuffer = buffer;
info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
++info->NumOutputs;
}
info->Buffers[buffer].Stream = this->stream_id;
xfb_offset += output_size;
num_components -= output_size;
location++;
location_frac = 0;
}
}
if (explicit_stride && explicit_stride[buffer]) {
if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
"multiple of 8 as its applied to a type that is or "
"contains a double.",
info->Buffers[buffer].Stride * 4);
return false;
}
if (xfb_offset > info->Buffers[buffer].Stride) {
linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
"buffer (%d)", xfb_offset * 4,
info->Buffers[buffer].Stride * 4, buffer);
return false;
}
} else {
if (max_member_alignment && has_xfb_qualifiers) {
max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
this->is_64bit() ? 2 : 1);
info->Buffers[buffer].Stride = ALIGN(xfb_offset,
max_member_alignment[buffer]);
} else {
info->Buffers[buffer].Stride = xfb_offset;
}
}
store_varying:
info->Varyings[info->NumVarying].name.string = ralloc_strdup(prog,
this->orig_name);
resource_name_updated(&info->Varyings[info->NumVarying].name);
info->Varyings[info->NumVarying].Type = this->type;
info->Varyings[info->NumVarying].Size = size;
info->Varyings[info->NumVarying].BufferIndex = buffer_index;
info->NumVarying++;
info->Buffers[buffer].NumVaryings++;
return true;
}
const tfeedback_candidate *
tfeedback_decl::find_candidate(gl_shader_program *prog,
hash_table *tfeedback_candidates)
{
const char *name = this->var_name;
switch (this->lowered_builtin_array_variable) {
case none:
name = this->var_name;
break;
case clip_distance:
name = "gl_ClipDistanceMESA";
break;
case cull_distance:
name = "gl_CullDistanceMESA";
break;
case tess_level_outer:
name = "gl_TessLevelOuterMESA";
break;
case tess_level_inner:
name = "gl_TessLevelInnerMESA";
break;
}
hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
this->matched_candidate = entry ?
(const tfeedback_candidate *) entry->data : NULL;
if (!this->matched_candidate) {
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
*
* * any variable name specified in the <varyings> array is not
* declared as an output in the geometry shader (if present) or
* the vertex shader (if no geometry shader is present);
*/
linker_error(prog, "Transform feedback varying %s undeclared.",
this->orig_name);
}
return this->matched_candidate;
}
/**
* Force a candidate over the previously matched one. It happens when a new
* varying needs to be created to match the xfb declaration, for example,
* to fullfil an alignment criteria.
*/
void
tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate)
{
this->matched_candidate = candidate;
/* The subscript part is no longer relevant */
this->is_subscripted = false;
this->array_subscript = 0;
}
/**
* Parse all the transform feedback declarations that were passed to
* glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
*
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
static bool
parse_tfeedback_decls(const struct gl_constants *consts,
const struct gl_extensions *exts,
struct gl_shader_program *prog,
const void *mem_ctx, unsigned num_names,
char **varying_names, tfeedback_decl *decls)
{
for (unsigned i = 0; i < num_names; ++i) {
decls[i].init(consts, exts, mem_ctx, varying_names[i]);
if (!decls[i].is_varying())
continue;
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
*
* * any two entries in the <varyings> array specify the same varying
* variable;
*
* We interpret this to mean "any two entries in the <varyings> array
* specify the same varying variable and array index", since transform
* feedback of arrays would be useless otherwise.
*/
for (unsigned j = 0; j < i; ++j) {
if (decls[j].is_varying()) {
if (tfeedback_decl::is_same(decls[i], decls[j])) {
linker_error(prog, "Transform feedback varying %s specified "
"more than once.", varying_names[i]);
return false;
}
}
}
}
return true;
}
static int
cmp_xfb_offset(const void * x_generic, const void * y_generic)
{
tfeedback_decl *x = (tfeedback_decl *) x_generic;
tfeedback_decl *y = (tfeedback_decl *) y_generic;
if (x->get_buffer() != y->get_buffer())
return x->get_buffer() - y->get_buffer();
return x->get_offset() - y->get_offset();
}
/**
* Store transform feedback location assignments into
* prog->sh.LinkedTransformFeedback based on the data stored in
* tfeedback_decls.
*
* If an error occurs, the error is reported through linker_error() and false
* is returned.
*/
static bool
store_tfeedback_info(const struct gl_constants *consts,
struct gl_shader_program *prog,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers,
const void *mem_ctx)
{
if (!prog->last_vert_prog)
return true;
/* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
* tracking the number of buffers doesn't overflow.
*/
assert(consts->MaxTransformFeedbackBuffers < 32);
bool separate_attribs_mode =
prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
struct gl_program *xfb_prog = prog->last_vert_prog;
xfb_prog->sh.LinkedTransformFeedback =
rzalloc(xfb_prog, struct gl_transform_feedback_info);
/* The xfb_offset qualifier does not have to be used in increasing order
* however some drivers expect to receive the list of transform feedback
* declarations in order so sort it now for convenience.
*/
if (has_xfb_qualifiers) {
qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
cmp_xfb_offset);
}
xfb_prog->sh.LinkedTransformFeedback->Varyings =
rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
num_tfeedback_decls);
unsigned num_outputs = 0;
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (tfeedback_decls[i].is_varying_written())
num_outputs += tfeedback_decls[i].get_num_outputs();
}
xfb_prog->sh.LinkedTransformFeedback->Outputs =
rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
num_outputs);
unsigned num_buffers = 0;
unsigned buffers = 0;
BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {};
if (!has_xfb_qualifiers && separate_attribs_mode) {
/* GL_SEPARATE_ATTRIBS */
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (!tfeedback_decls[i].store(consts, prog,
xfb_prog->sh.LinkedTransformFeedback,
num_buffers, num_buffers, num_outputs,
used_components, NULL, NULL,
has_xfb_qualifiers, mem_ctx))
return false;
buffers |= 1 << num_buffers;
num_buffers++;
}
}
else {
/* GL_INVERLEAVED_ATTRIBS */
int buffer_stream_id = -1;
unsigned buffer =
num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
/* Apply any xfb_stride global qualifiers */
if (has_xfb_qualifiers) {
for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
if (prog->TransformFeedback.BufferStride[j]) {
explicit_stride[j] = true;
xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
prog->TransformFeedback.BufferStride[j] / 4;
}
}
}
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (has_xfb_qualifiers &&
buffer != tfeedback_decls[i].get_buffer()) {
/* we have moved to the next buffer so reset stream id */
buffer_stream_id = -1;
num_buffers++;
}
if (tfeedback_decls[i].is_next_buffer_separator()) {
if (!tfeedback_decls[i].store(consts, prog,
xfb_prog->sh.LinkedTransformFeedback,
buffer, num_buffers, num_outputs,
used_components, explicit_stride,
max_member_alignment,
has_xfb_qualifiers,
mem_ctx))
return false;
num_buffers++;
buffer_stream_id = -1;
continue;
}
if (has_xfb_qualifiers) {
buffer = tfeedback_decls[i].get_buffer();
} else {
buffer = num_buffers;
}
if (tfeedback_decls[i].is_varying()) {
if (buffer_stream_id == -1) {
/* First varying writing to this buffer: remember its stream */
buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
/* Only mark a buffer as active when there is a varying
* attached to it. This behaviour is based on a revised version
* of section 13.2.2 of the GL 4.6 spec.
*/
buffers |= 1 << buffer;
} else if (buffer_stream_id !=
(int) tfeedback_decls[i].get_stream_id()) {
/* Varying writes to the same buffer from a different stream */
linker_error(prog,
"Transform feedback can't capture varyings belonging "
"to different vertex streams in a single buffer. "
"Varying %s writes to buffer from stream %u, other "
"varyings in the same buffer write from stream %u.",
tfeedback_decls[i].name(),
tfeedback_decls[i].get_stream_id(),
buffer_stream_id);
return false;
}
}
if (!tfeedback_decls[i].store(consts, prog,
xfb_prog->sh.LinkedTransformFeedback,
buffer, num_buffers, num_outputs,
used_components, explicit_stride,
max_member_alignment,
has_xfb_qualifiers,
mem_ctx))
return false;
}
}
assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
return true;
}
namespace {
/**
* Data structure recording the relationship between outputs of one shader
* stage (the "producer") and inputs of another (the "consumer").
*/
class varying_matches
{
public:
varying_matches(bool disable_varying_packing,
bool disable_xfb_packing,
bool xfb_enabled,
bool enhanced_layouts_enabled,
bool prefer_pot_aligned_varyings,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage);
~varying_matches();
void record(ir_variable *producer_var, ir_variable *consumer_var);
unsigned assign_locations(struct gl_shader_program *prog,
uint8_t components[],
uint64_t reserved_slots);
void store_locations() const;
private:
bool is_varying_packing_safe(const glsl_type *type,
const ir_variable *var) const;
/**
* If true, this driver disables varying packing, so all varyings need to
* be aligned on slot boundaries, and take up a number of slots equal to
* their number of matrix columns times their array size.
*
* Packing may also be disabled because our current packing method is not
* safe in SSO or versions of OpenGL where interpolation qualifiers are not
* guaranteed to match across stages.
*/
const bool disable_varying_packing;
/**
* If true, this driver disables packing for varyings used by transform
* feedback.
*/
const bool disable_xfb_packing;
/**
* If true, this driver has transform feedback enabled. The transform
* feedback code usually requires at least some packing be done even
* when varying packing is disabled, fortunately where transform feedback
* requires packing it's safe to override the disabled setting. See
* is_varying_packing_safe().
*/
const bool xfb_enabled;
const bool enhanced_layouts_enabled;
/**
* If true, this driver prefers varyings to be aligned to power of two
* in a slot.
*/
const bool prefer_pot_aligned_varyings;
/**
* Enum representing the order in which varyings are packed within a
* packing class.
*
* Currently we pack vec4's first, then vec2's, then scalar values, then
* vec3's. This order ensures that the only vectors that are at risk of
* having to be "double parked" (split between two adjacent varying slots)
* are the vec3's.
*/
enum packing_order_enum {
PACKING_ORDER_VEC4,
PACKING_ORDER_VEC2,
PACKING_ORDER_SCALAR,
PACKING_ORDER_VEC3,
};
static unsigned compute_packing_class(const ir_variable *var);
static packing_order_enum compute_packing_order(const ir_variable *var);
static int match_comparator(const void *x_generic, const void *y_generic);
static int xfb_comparator(const void *x_generic, const void *y_generic);
static int not_xfb_comparator(const void *x_generic, const void *y_generic);
/**
* Structure recording the relationship between a single producer output
* and a single consumer input.
*/
struct match {
/**
* Packing class for this varying, computed by compute_packing_class().
*/
unsigned packing_class;
/**
* Packing order for this varying, computed by compute_packing_order().
*/
packing_order_enum packing_order;
/**
* The output variable in the producer stage.
*/
ir_variable *producer_var;
/**
* The input variable in the consumer stage.
*/
ir_variable *consumer_var;
/**
* The location which has been assigned for this varying. This is
* expressed in multiples of a float, with the first generic varying
* (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
* value 0.
*/
unsigned generic_location;
} *matches;
/**
* The number of elements in the \c matches array that are currently in
* use.
*/
unsigned num_matches;
/**
* The number of elements that were set aside for the \c matches array when
* it was allocated.
*/
unsigned matches_capacity;
gl_shader_stage producer_stage;
gl_shader_stage consumer_stage;
};
} /* anonymous namespace */
varying_matches::varying_matches(bool disable_varying_packing,
bool disable_xfb_packing,
bool xfb_enabled,
bool enhanced_layouts_enabled,
bool prefer_pot_aligned_varyings,
gl_shader_stage producer_stage,
gl_shader_stage consumer_stage)
: disable_varying_packing(disable_varying_packing),
disable_xfb_packing(disable_xfb_packing),
xfb_enabled(xfb_enabled),
enhanced_layouts_enabled(enhanced_layouts_enabled),
prefer_pot_aligned_varyings(prefer_pot_aligned_varyings),
producer_stage(producer_stage),
consumer_stage(consumer_stage)
{
/* Note: this initial capacity is rather arbitrarily chosen to be large
* enough for many cases without wasting an unreasonable amount of space.
* varying_matches::record() will resize the array if there are more than
* this number of varyings.
*/
this->matches_capacity = 8;
this->matches = (match *)
malloc(sizeof(*this->matches) * this->matches_capacity);
this->num_matches = 0;
}
varying_matches::~varying_matches()
{
free(this->matches);
}
/**
* Packing is always safe on individual arrays, structures, and matrices. It
* is also safe if the varying is only used for transform feedback.
*/
bool
varying_matches::is_varying_packing_safe(const glsl_type *type,
const ir_variable *var) const
{
if (consumer_stage == MESA_SHADER_TESS_EVAL ||
consumer_stage == MESA_SHADER_TESS_CTRL ||
producer_stage == MESA_SHADER_TESS_CTRL)
return false;
return xfb_enabled && (type->is_array() || type->is_struct() ||
type->is_matrix() || var->data.is_xfb_only);
}
/**
* Record the given producer/consumer variable pair in the list of variables
* that should later be assigned locations.
*
* It is permissible for \c consumer_var to be NULL (this happens if a
* variable is output by the producer and consumed by transform feedback, but
* not consumed by the consumer).
*
* If \c producer_var has already been paired up with a consumer_var, or
* producer_var is part of fixed pipeline functionality (and hence already has
* a location assigned), this function has no effect.
*
* Note: as a side effect this function may change the interpolation type of
* \c producer_var, but only when the change couldn't possibly affect
* rendering.
*/
void
varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
{
assert(producer_var != NULL || consumer_var != NULL);
if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
producer_var->data.explicit_location)) ||
(consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
consumer_var->data.explicit_location))) {
/* Either a location already exists for this variable (since it is part
* of fixed functionality), or it has already been recorded as part of a
* previous match.
*/
return;
}
bool needs_flat_qualifier = consumer_var == NULL &&
(producer_var->type->contains_integer() ||
producer_var->type->contains_double());
if (!disable_varying_packing &&
(!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
(needs_flat_qualifier ||
(consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
/* Since this varying is not being consumed by the fragment shader, its
* interpolation type varying cannot possibly affect rendering.
* Also, this variable is non-flat and is (or contains) an integer
* or a double.
* If the consumer stage is unknown, don't modify the interpolation
* type as it could affect rendering later with separate shaders.
*
* lower_packed_varyings requires all integer varyings to flat,
* regardless of where they appear. We can trivially satisfy that
* requirement by changing the interpolation type to flat here.
*/
if (producer_var) {
producer_var->data.centroid = false;
producer_var->data.sample = false;
producer_var->data.interpolation = INTERP_MODE_FLAT;
}
if (consumer_var) {
consumer_var->data.centroid = false;
consumer_var->data.sample = false;
consumer_var->data.interpolation = INTERP_MODE_FLAT;
}
}
if (this->num_matches == this->matches_capacity) {
this->matches_capacity *= 2;
this->matches = (match *)
realloc(this->matches,
sizeof(*this->matches) * this->matches_capacity);
}
/* We must use the consumer to compute the packing class because in GL4.4+
* there is no guarantee interpolation qualifiers will match across stages.
*
* From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
*
* "The type and presence of interpolation qualifiers of variables with
* the same name declared in all linked shaders for the same cross-stage
* interface must match, otherwise the link command will fail.
*
* When comparing an output from one stage to an input of a subsequent
* stage, the input and output don't match if their interpolation
* qualifiers (or lack thereof) are not the same."
*
* This text was also in at least revison 7 of the 4.40 spec but is no
* longer in revision 9 and not in the 4.50 spec.
*/
const ir_variable *const var = (consumer_var != NULL)
? consumer_var : producer_var;
if (producer_var && consumer_var &&
consumer_var->data.must_be_shader_input) {
producer_var->data.must_be_shader_input = 1;
}
this->matches[this->num_matches].packing_class
= this->compute_packing_class(var);
this->matches[this->num_matches].packing_order
= this->compute_packing_order(var);
this->matches[this->num_matches].producer_var = producer_var;
this->matches[this->num_matches].consumer_var = consumer_var;
this->num_matches++;
if (producer_var)
producer_var->data.is_unmatched_generic_inout = 0;
if (consumer_var)
consumer_var->data.is_unmatched_generic_inout = 0;
}
/**
* Choose locations for all of the variable matches that were previously
* passed to varying_matches::record().
* \param components returns array[slot] of number of components used
* per slot (1, 2, 3 or 4)
* \param reserved_slots bitmask indicating which varying slots are already
* allocated
* \return number of slots (4-element vectors) allocated
*/
unsigned
varying_matches::assign_locations(struct gl_shader_program *prog,
uint8_t components[],
uint64_t reserved_slots)
{
/* If packing has been disabled then we cannot safely sort the varyings by
* class as it may mean we are using a version of OpenGL where
* interpolation qualifiers are not guaranteed to be matching across
* shaders, sorting in this case could result in mismatching shader
* interfaces.
* When packing is disabled the sort orders varyings used by transform
* feedback first, but also depends on *undefined behaviour* of qsort to
* reverse the order of the varyings. See: xfb_comparator().
*
* If packing is only disabled for xfb varyings (mutually exclusive with
* disable_varying_packing), we then group varyings depending on if they
* are captured for transform feedback. The same *undefined behaviour* is
* taken advantage of.
*/
if (this->disable_varying_packing) {
/* Only sort varyings that are only used by transform feedback. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::xfb_comparator);
} else if (this->disable_xfb_packing) {
/* Only sort varyings that are NOT used by transform feedback. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::not_xfb_comparator);
} else {
/* Sort varying matches into an order that makes them easy to pack. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
&varying_matches::match_comparator);
}
unsigned generic_location = 0;
unsigned generic_patch_location = MAX_VARYING*4;
bool previous_var_xfb = false;
bool previous_var_xfb_only = false;
unsigned previous_packing_class = ~0u;
/* For tranform feedback separate mode, we know the number of attributes
* is <= the number of buffers. So packing isn't critical. In fact,
* packing vec3 attributes can cause trouble because splitting a vec3
* effectively creates an additional transform feedback output. The
* extra TFB output may exceed device driver limits.
*
* Also don't pack vec3 if the driver prefers power of two aligned
* varyings. Packing order guarantees that vec4, vec2 and vec1 will be
* pot-aligned, we only need to take care of vec3s
*/
const bool dont_pack_vec3 =
(prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
prog->TransformFeedback.NumVarying > 0) ||
this->prefer_pot_aligned_varyings;
for (unsigned i = 0; i < this->num_matches; i++) {
unsigned *location = &generic_location;
const ir_variable *var;
const glsl_type *type;
bool is_vertex_input = false;
if (matches[i].consumer_var) {
var = matches[i].consumer_var;
type = get_varying_type(var, consumer_stage);
if (consumer_stage == MESA_SHADER_VERTEX)
is_vertex_input = true;
} else {
var = matches[i].producer_var;
type = get_varying_type(var, producer_stage);
}
if (var->data.patch)
location = &generic_patch_location;
/* Advance to the next slot if this varying has a different packing
* class than the previous one, and we're not already on a slot
* boundary.
*
* Also advance if varying packing is disabled for transform feedback,
* and previous or current varying is used for transform feedback.
*
* Also advance to the next slot if packing is disabled. This makes sure
* we don't assign varyings the same locations which is possible
* because we still pack individual arrays, records and matrices even
* when packing is disabled. Note we don't advance to the next slot if
* we can pack varyings together that are only used for transform
* feedback.
*/
if (var->data.must_be_shader_input ||
(this->disable_xfb_packing &&
(previous_var_xfb || var->data.is_xfb)) ||
(this->disable_varying_packing &&
!(previous_var_xfb_only && var->data.is_xfb_only)) ||
(previous_packing_class != this->matches[i].packing_class) ||
(this->matches[i].packing_order == PACKING_ORDER_VEC3 &&
dont_pack_vec3)) {
*location = ALIGN(*location, 4);
}
previous_var_xfb = var->data.is_xfb;
previous_var_xfb_only = var->data.is_xfb_only;
previous_packing_class = this->matches[i].packing_class;
/* The number of components taken up by this variable. For vertex shader
* inputs, we use the number of slots * 4, as they have different
* counting rules.
*/
unsigned num_components = 0;
if (is_vertex_input) {
num_components = type->count_attribute_slots(is_vertex_input) * 4;
} else {
if ((this->disable_varying_packing &&
!is_varying_packing_safe(type, var)) ||
(this->disable_xfb_packing && var->data.is_xfb &&
!(type->is_array() || type->is_struct() || type->is_matrix())) ||
var->data.must_be_shader_input) {
num_components = type->count_attribute_slots(false) * 4;
} else {
num_components = type->component_slots_aligned(*location);
}
}
/* The last slot for this variable, inclusive. */
unsigned slot_end = *location + num_components - 1;
/* FIXME: We could be smarter in the below code and loop back over
* trying to fill any locations that we skipped because we couldn't pack
* the varying between an explicit location. For now just let the user
* hit the linking error if we run out of room and suggest they use
* explicit locations.
*/
while (slot_end < MAX_VARYING * 4u) {
const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
assert(slots > 0);
if ((reserved_slots & slot_mask) == 0) {
break;
}
*location = ALIGN(*location + 1, 4);
slot_end = *location + num_components - 1;
}
if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
linker_error(prog, "insufficient contiguous locations available for "
"%s it is possible an array or struct could not be "
"packed between varyings with explicit locations. Try "
"using an explicit location for arrays and structs.",
var->name);
}
if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
components[j] = 4;
components[slot_end / 4u] = (slot_end & 3) + 1;
}
this->matches[i].generic_location = *location;
*location = slot_end + 1;
}
return (generic_location + 3) / 4;
}
/**
* Update the producer and consumer shaders to reflect the locations
* assignments that were made by varying_matches::assign_locations().
*/
void
varying_matches::store_locations() const
{
/* Check is location needs to be packed with lower_packed_varyings() or if
* we can just use ARB_enhanced_layouts packing.
*/
bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {};
const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
for (unsigned i = 0; i < this->num_matches; i++) {
ir_variable *producer_var = this->matches[i].producer_var;
ir_variable *consumer_var = this->matches[i].consumer_var;
unsigned generic_location = this->matches[i].generic_location;
unsigned slot = generic_location / 4;
unsigned offset = generic_location % 4;
if (producer_var) {
producer_var->data.location = VARYING_SLOT_VAR0 + slot;
producer_var->data.location_frac = offset;
}
if (consumer_var) {
assert(consumer_var->data.location == -1);
consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
consumer_var->data.location_frac = offset;
}
/* Find locations suitable for native packing via
* ARB_enhanced_layouts.
*/
if (producer_var && consumer_var) {
if (enhanced_layouts_enabled) {
const glsl_type *type =
get_varying_type(producer_var, producer_stage);
if (type->is_array() || type->is_matrix() || type->is_struct() ||
type->is_64bit()) {
unsigned comp_slots = type->component_slots() + offset;
unsigned slots = comp_slots / 4;
if (comp_slots % 4)
slots += 1;
for (unsigned j = 0; j < slots; j++) {
pack_loc[slot + j] = true;
}
} else if (offset + type->vector_elements > 4) {
pack_loc[slot] = true;
pack_loc[slot + 1] = true;
} else {
loc_type[slot][offset] = type;
}
}
}
}
/* Attempt to use ARB_enhanced_layouts for more efficient packing if
* suitable.
*/
if (enhanced_layouts_enabled) {
for (unsigned i = 0; i < this->num_matches; i++) {
ir_variable *producer_var = this->matches[i].producer_var;
ir_variable *consumer_var = this->matches[i].consumer_var;
unsigned generic_location = this->matches[i].generic_location;
unsigned slot = generic_location / 4;
if (pack_loc[slot] || !producer_var || !consumer_var)
continue;
const glsl_type *type =
get_varying_type(producer_var, producer_stage);
bool type_match = true;
for (unsigned j = 0; j < 4; j++) {
if (loc_type[slot][j]) {
if (type->base_type != loc_type[slot][j]->base_type)
type_match = false;
}
}
if (type_match) {
producer_var->data.explicit_location = 1;
consumer_var->data.explicit_location = 1;
producer_var->data.explicit_component = 1;
consumer_var->data.explicit_component = 1;
}
}
}
}
/**
* Compute the "packing class" of the given varying. This is an unsigned
* integer with the property that two variables in the same packing class can
* be safely backed into the same vec4.
*/
unsigned
varying_matches::compute_packing_class(const ir_variable *var)
{
/* Without help from the back-end, there is no way to pack together
* variables with different interpolation types, because
* lower_packed_varyings must choose exactly one interpolation type for
* each packed varying it creates.
*
* However, we can safely pack together floats, ints, and uints, because:
*
* - varyings of base type "int" and "uint" must use the "flat"
* interpolation type, which can only occur in GLSL 1.30 and above.
*
* - On platforms that support GLSL 1.30 and above, lower_packed_varyings
* can store flat floats as ints without losing any information (using
* the ir_unop_bitcast_* opcodes).
*
* Therefore, the packing class depends only on the interpolation type.
*/
const unsigned interp = var->is_interpolation_flat()
? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
assert(interp < (1 << 3));
const unsigned packing_class = (interp << 0) |
(var->data.centroid << 3) |
(var->data.sample << 4) |
(var->data.patch << 5) |
(var->data.must_be_shader_input << 6);
return packing_class;
}
/**
* Compute the "packing order" of the given varying. This is a sort key we
* use to determine when to attempt to pack the given varying relative to
* other varyings in the same packing class.
*/
varying_matches::packing_order_enum
varying_matches::compute_packing_order(const ir_variable *var)
{
const glsl_type *element_type = var->type;
while (element_type->is_array()) {
element_type = element_type->fields.array;
}
switch (element_type->component_slots() % 4) {
case 1: return PACKING_ORDER_SCALAR;
case 2: return PACKING_ORDER_VEC2;
case 3: return PACKING_ORDER_VEC3;
case 0: return PACKING_ORDER_VEC4;
default:
assert(!"Unexpected value of vector_elements");
return PACKING_ORDER_VEC4;
}
}
/**
* Comparison function passed to qsort() to sort varyings by packing_class and
* then by packing_order.
*/
int
varying_matches::match_comparator(const void *x_generic, const void *y_generic)
{
const match *x = (const match *) x_generic;
const match *y = (const match *) y_generic;
if (x->packing_class != y->packing_class)
return x->packing_class - y->packing_class;
return x->packing_order - y->packing_order;
}
/**
* Comparison function passed to qsort() to sort varyings used only by
* transform feedback when packing of other varyings is disabled.
*/
int
varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
{
const match *x = (const match *) x_generic;
if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
return match_comparator(x_generic, y_generic);
/* FIXME: When the comparator returns 0 it means the elements being
* compared are equivalent. However the qsort documentation says:
*
* "The order of equivalent elements is undefined."
*
* In practice the sort ends up reversing the order of the varyings which
* means locations are also assigned in this reversed order and happens to
* be what we want. This is also whats happening in
* varying_matches::match_comparator().
*/
return 0;
}
/**
* Comparison function passed to qsort() to sort varyings NOT used by
* transform feedback when packing of xfb varyings is disabled.
*/
int
varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic)
{
const match *x = (const match *) x_generic;
if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
return match_comparator(x_generic, y_generic);
/* FIXME: When the comparator returns 0 it means the elements being
* compared are equivalent. However the qsort documentation says:
*
* "The order of equivalent elements is undefined."
*
* In practice the sort ends up reversing the order of the varyings which
* means locations are also assigned in this reversed order and happens to
* be what we want. This is also whats happening in
* varying_matches::match_comparator().
*/
return 0;
}
/**
* Is the given variable a varying variable to be counted against the
* limit in consts->MaxVarying?
* This includes variables such as texcoords, colors and generic
* varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
*/
static bool
var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
{
/* Only fragment shaders will take a varying variable as an input */
if (stage == MESA_SHADER_FRAGMENT &&
var->data.mode == ir_var_shader_in) {
switch (var->data.location) {
case VARYING_SLOT_POS:
case VARYING_SLOT_FACE:
case VARYING_SLOT_PNTC:
return false;
default:
return true;
}
}
return false;
}
/**
* Visitor class that generates tfeedback_candidate structs describing all
* possible targets of transform feedback.
*
* tfeedback_candidate structs are stored in the hash table
* tfeedback_candidates, which is passed to the constructor. This hash table
* maps varying names to instances of the tfeedback_candidate struct.
*/
class tfeedback_candidate_generator : public program_resource_visitor
{
public:
tfeedback_candidate_generator(void *mem_ctx,
hash_table *tfeedback_candidates,
gl_shader_stage stage)
: mem_ctx(mem_ctx),
tfeedback_candidates(tfeedback_candidates),
stage(stage),
toplevel_var(NULL),
varying_floats(0),
xfb_offset_floats(0)
{
}
void process(ir_variable *var)
{
/* All named varying interface blocks should be flattened by now */
assert(!var->is_interface_instance());
assert(var->data.mode == ir_var_shader_out);
this->toplevel_var = var;
this->varying_floats = 0;
this->xfb_offset_floats = 0;
const glsl_type *t =
var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) {
assert(t->is_array());
t = t->fields.array;
}
program_resource_visitor::process(var, t, false);
}
private:
virtual void visit_field(const glsl_type *type, const char *name,
bool /* row_major */,
const glsl_type * /* record_type */,
const enum glsl_interface_packing,
bool /* last_field */)
{
assert(!type->without_array()->is_struct());
assert(!type->without_array()->is_interface());
tfeedback_candidate *candidate
= rzalloc(this->mem_ctx, tfeedback_candidate);
candidate->toplevel_var = this->toplevel_var;
candidate->type = type;
if (type->without_array()->is_64bit()) {
/* From ARB_gpu_shader_fp64:
*
* If any variable captured in transform feedback has double-precision
* components, the practical requirements for defined behavior are:
* ...
* (c) each double-precision variable captured must be aligned to a
* multiple of eight bytes relative to the beginning of a vertex.
*/
this->xfb_offset_floats = ALIGN(this->xfb_offset_floats, 2);
/* 64-bit members of structs are also aligned. */
this->varying_floats = ALIGN(this->varying_floats, 2);
}
candidate->xfb_offset_floats = this->xfb_offset_floats;
candidate->struct_offset_floats = this->varying_floats;
_mesa_hash_table_insert(this->tfeedback_candidates,
ralloc_strdup(this->mem_ctx, name),
candidate);
const unsigned component_slots = type->component_slots();
if (varying_has_user_specified_location(this->toplevel_var)) {
this->varying_floats += type->count_attribute_slots(false) * 4;
} else {
this->varying_floats += component_slots;
}
this->xfb_offset_floats += component_slots;
}
/**
* Memory context used to allocate hash table keys and values.
*/
void * const mem_ctx;
/**
* Hash table in which tfeedback_candidate objects should be stored.
*/
hash_table * const tfeedback_candidates;
gl_shader_stage stage;
/**
* Pointer to the toplevel variable that is being traversed.
*/
ir_variable *toplevel_var;
/**
* Total number of varying floats that have been visited so far. This is
* used to determine the offset to each varying within the toplevel
* variable.
*/
unsigned varying_floats;
/**
* Offset within the xfb. Counted in floats.
*/
unsigned xfb_offset_floats;
};
namespace linker {
void
populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
hash_table *consumer_inputs,
hash_table *consumer_interface_inputs,
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
{
memset(consumer_inputs_with_locations,
0,
sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
foreach_in_list(ir_instruction, node, ir) {
ir_variable *const input_var = node->as_variable();
if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
/* All interface blocks should have been lowered by this point */
assert(!input_var->type->is_interface());
if (input_var->data.explicit_location) {
/* assign_varying_locations only cares about finding the
* ir_variable at the start of a contiguous location block.
*
* - For !producer, consumer_inputs_with_locations isn't used.
*
* - For !consumer, consumer_inputs_with_locations is empty.
*
* For consumer && producer, if you were trying to set some
* ir_variable to the middle of a location block on the other side
* of producer/consumer, cross_validate_outputs_to_inputs() should
* be link-erroring due to either type mismatch or location
* overlaps. If the variables do match up, then they've got a
* matching data.location and you only looked at
* consumer_inputs_with_locations[var->data.location], not any
* following entries for the array/structure.
*/
consumer_inputs_with_locations[input_var->data.location] =
input_var;
} else if (input_var->get_interface_type() != NULL) {
char *const iface_field_name =
ralloc_asprintf(mem_ctx, "%s.%s",
input_var->get_interface_type()->without_array()->name,
input_var->name);
_mesa_hash_table_insert(consumer_interface_inputs,
iface_field_name, input_var);
} else {
_mesa_hash_table_insert(consumer_inputs,
ralloc_strdup(mem_ctx, input_var->name),
input_var);
}
}
}
}
/**
* Find a variable from the consumer that "matches" the specified variable
*
* This function only finds inputs with names that match. There is no
* validation (here) that the types, etc. are compatible.
*/
ir_variable *
get_matching_input(void *mem_ctx,
const ir_variable *output_var,
hash_table *consumer_inputs,
hash_table *consumer_interface_inputs,
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
{
ir_variable *input_var;
if (output_var->data.explicit_location) {
input_var = consumer_inputs_with_locations[output_var->data.location];
} else if (output_var->get_interface_type() != NULL) {
char *const iface_field_name =
ralloc_asprintf(mem_ctx, "%s.%s",
output_var->get_interface_type()->without_array()->name,
output_var->name);
hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
input_var = entry ? (ir_variable *) entry->data : NULL;
} else {
hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
input_var = entry ? (ir_variable *) entry->data : NULL;
}
return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
? NULL : input_var;
}
}
static int
io_variable_cmp(const void *_a, const void *_b)
{
const ir_variable *const a = *(const ir_variable **) _a;
const ir_variable *const b = *(const ir_variable **) _b;
if (a->data.explicit_location && b->data.explicit_location)
return b->data.location - a->data.location;
if (a->data.explicit_location && !b->data.explicit_location)
return 1;
if (!a->data.explicit_location && b->data.explicit_location)
return -1;
return -strcmp(a->name, b->name);
}
/**
* Sort the shader IO variables into canonical order
*/
static void
canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
{
ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
unsigned num_variables = 0;
foreach_in_list(ir_instruction, node, ir) {
ir_variable *const var = node->as_variable();
if (var == NULL || var->data.mode != io_mode)
continue;
/* If we have already encountered more I/O variables that could
* successfully link, bail.
*/
if (num_variables == ARRAY_SIZE(var_table))
return;
var_table[num_variables++] = var;
}
if (num_variables == 0)
return;
/* Sort the list in reverse order (io_variable_cmp handles this). Later
* we're going to push the variables on to the IR list as a stack, so we
* want the last variable (in canonical order) to be first in the list.
*/
qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
/* Remove the variable from it's current location in the IR, and put it at
* the front.
*/
for (unsigned i = 0; i < num_variables; i++) {
var_table[i]->remove();
ir->push_head(var_table[i]);
}
}
/**
* Generate a bitfield map of the explicit locations for shader varyings.
*
* Note: For Tessellation shaders we are sitting right on the limits of the
* 64 bit map. Per-vertex and per-patch both have separate location domains
* with a max of MAX_VARYING.
*/
static uint64_t
reserved_varying_slot(struct gl_linked_shader *stage,
ir_variable_mode io_mode)
{
assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
/* Avoid an overflow of the returned value */
assert(MAX_VARYINGS_INCL_PATCH <= 64);
uint64_t slots = 0;
int var_slot;
if (!stage)
return slots;
foreach_in_list(ir_instruction, node, stage->ir) {
ir_variable *const var = node->as_variable();
if (var == NULL || var->data.mode != io_mode ||
!var->data.explicit_location ||
var->data.location < VARYING_SLOT_VAR0)
continue;
var_slot = var->data.location - VARYING_SLOT_VAR0;
unsigned num_elements = get_varying_type(var, stage->Stage)
->count_attribute_slots(io_mode == ir_var_shader_in &&
stage->Stage == MESA_SHADER_VERTEX);
for (unsigned i = 0; i < num_elements; i++) {
if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
slots |= UINT64_C(1) << var_slot;
var_slot += 1;
}
}
return slots;
}
/**
* Assign locations for all variables that are produced in one pipeline stage
* (the "producer") and consumed in the next stage (the "consumer").
*
* Variables produced by the producer may also be consumed by transform
* feedback.
*
* \param num_tfeedback_decls is the number of declarations indicating
* variables that may be consumed by transform feedback.
*
* \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
* representing the result of parsing the strings passed to
* glTransformFeedbackVaryings(). assign_location() will be called for
* each of these objects that matches one of the outputs of the
* producer.
*
* When num_tfeedback_decls is nonzero, it is permissible for the consumer to
* be NULL. In this case, varying locations are assigned solely based on the
* requirements of transform feedback.
*/
static bool
assign_varying_locations(const struct gl_constants *consts,
const struct gl_extensions *exts,
void *mem_ctx,
struct gl_shader_program *prog,
gl_linked_shader *producer,
gl_linked_shader *consumer,
unsigned num_tfeedback_decls,
tfeedback_decl *tfeedback_decls,
const uint64_t reserved_slots)
{
/* Tessellation shaders treat inputs and outputs as shared memory and can
* access inputs and outputs of other invocations.
* Therefore, they can't be lowered to temps easily (and definitely not
* efficiently).
*/
bool unpackable_tess =
(consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
(consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
(producer && producer->Stage == MESA_SHADER_TESS_CTRL);
/* Transform feedback code assumes varying arrays are packed, so if the
* driver has disabled varying packing, make sure to at least enable
* packing required by transform feedback. See below for exception.
*/
bool xfb_enabled =
exts->EXT_transform_feedback && !unpackable_tess;
/* Some drivers actually requires packing to be explicitly disabled
* for varyings used by transform feedback.
*/
bool disable_xfb_packing =
consts->DisableTransformFeedbackPacking;
/* Disable packing on outward facing interfaces for SSO because in ES we
* need to retain the unpacked varying information for draw time
* validation.
*
* Packing is still enabled on individual arrays, structs, and matrices as
* these are required by the transform feedback code and it is still safe
* to do so. We also enable packing when a varying is only used for
* transform feedback and its not a SSO.
*/
bool disable_varying_packing =
consts->DisableVaryingPacking || unpackable_tess;
if (prog->SeparateShader && (producer == NULL || consumer == NULL))
disable_varying_packing = true;
bool prefer_pot_aligned_varyings =
consts->PreferPOTAlignedVaryings;
varying_matches matches(disable_varying_packing,
disable_xfb_packing,
xfb_enabled,
exts->ARB_enhanced_layouts,
prefer_pot_aligned_varyings,
producer ? producer->Stage : MESA_SHADER_NONE,
consumer ? consumer->Stage : MESA_SHADER_NONE);
void *hash_table_ctx = ralloc_context(NULL);
hash_table *tfeedback_candidates =
_mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
_mesa_key_string_equal);
hash_table *consumer_inputs =
_mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
_mesa_key_string_equal);
hash_table *consumer_interface_inputs =
_mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
_mesa_key_string_equal);
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
NULL,
};
unsigned consumer_vertices = 0;
if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
consumer_vertices = prog->Geom.VerticesIn;
/* Operate in a total of four passes.
*
* 1. Sort inputs / outputs into a canonical order. This is necessary so
* that inputs / outputs of separable shaders will be assigned
* predictable locations regardless of the order in which declarations
* appeared in the shader source.
*
* 2. Assign locations for any matching inputs and outputs.
*
* 3. Mark output variables in the producer that do not have locations as
* not being outputs. This lets the optimizer eliminate them.
*
* 4. Mark input variables in the consumer that do not have locations as
* not being inputs. This lets the optimizer eliminate them.
*/
if (consumer)
canonicalize_shader_io(consumer->ir, ir_var_shader_in);
if (producer)
canonicalize_shader_io(producer->ir, ir_var_shader_out);
if (consumer)
linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
consumer_inputs,
consumer_interface_inputs,
consumer_inputs_with_locations);
if (producer) {
foreach_in_list(ir_instruction, node, producer->ir) {
ir_variable *const output_var = node->as_variable();
if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
continue;
/* Only geometry shaders can use non-zero streams */
assert(output_var->data.stream == 0 ||
(output_var->data.stream < MAX_VERTEX_STREAMS &&
producer->Stage == MESA_SHADER_GEOMETRY));
if (num_tfeedback_decls > 0) {
tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage);
/* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
* ("Vertex Shader Variables / Output Variables")
*
* "Each program object can specify a set of output variables from
* one shader to be recorded in transform feedback mode (see
* section 13.3). The variables that can be recorded are those
* emitted by the first active shader, in order, from the
* following list:
*
* * geometry shader
* * tessellation evaluation shader
* * tessellation control shader
* * vertex shader"
*
* But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
* Variables / Output Variables") tessellation control shader is
* not included in the stages list.
*/
if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
g.process(output_var);
}
}
ir_variable *const input_var =
linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
consumer_interface_inputs,
consumer_inputs_with_locations);
/* If a matching input variable was found, add this output (and the
* input) to the set. If this is a separable program and there is no
* consumer stage, add the output.
*
* Always add TCS outputs. They are shared by all invocations
* within a patch and can be used as shared memory.
*/
if (input_var || (prog->SeparateShader && consumer == NULL) ||
producer->Stage == MESA_SHADER_TESS_CTRL) {
matches.record(output_var, input_var);
}
/* Only stream 0 outputs can be consumed in the next stage */
if (input_var && output_var->data.stream != 0) {
linker_error(prog, "output %s is assigned to stream=%d but "
"is linked to an input, which requires stream=0",
output_var->name, output_var->data.stream);
ralloc_free(hash_table_ctx);
return false;
}
}
} else {
/* If there's no producer stage, then this must be a separable program.
* For example, we may have a program that has just a fragment shader.
* Later this program will be used with some arbitrary vertex (or
* geometry) shader program. This means that locations must be assigned
* for all the inputs.
*/
foreach_in_list(ir_instruction, node, consumer->ir) {
ir_variable *const input_var = node->as_variable();
if (input_var && input_var->data.mode == ir_var_shader_in) {
matches.record(NULL, input_var);
}
}
}
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (!tfeedback_decls[i].is_varying())
continue;
const tfeedback_candidate *matched_candidate
= tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
if (matched_candidate == NULL) {
ralloc_free(hash_table_ctx);
return false;
}
/* There are two situations where a new output varying is needed:
*
* - If varying packing is disabled for xfb and the current declaration
* is subscripting an array, whether the subscript is aligned or not.
* to preserve the rest of the array for the consumer.
*
* - If a builtin variable needs to be copied to a new variable
* before its content is modified by another lowering pass (e.g.
* \c gl_Position is transformed by \c nir_lower_viewport_transform).
*/
const bool lowered =
(disable_xfb_packing && tfeedback_decls[i].subscripted()) ||
(matched_candidate->toplevel_var->data.explicit_location &&
matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
(!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
(consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
if (lowered) {
ir_variable *new_var;
tfeedback_candidate *new_candidate = NULL;
new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name());
if (new_var == NULL) {
ralloc_free(hash_table_ctx);
return false;
}
/* Create new candidate and replace matched_candidate */
new_candidate = rzalloc(mem_ctx, tfeedback_candidate);
new_candidate->toplevel_var = new_var;
new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1;
new_candidate->type = new_var->type;
new_candidate->struct_offset_floats = 0;
new_candidate->xfb_offset_floats = 0;
_mesa_hash_table_insert(tfeedback_candidates,
ralloc_strdup(mem_ctx, new_var->name),
new_candidate);
tfeedback_decls[i].set_lowered_candidate(new_candidate);
matched_candidate = new_candidate;
}
/* Mark as xfb varying */
matched_candidate->toplevel_var->data.is_xfb = 1;
/* Mark xfb varyings as always active */
matched_candidate->toplevel_var->data.always_active_io = 1;
/* Mark any corresponding inputs as always active also. We must do this
* because we have a NIR pass that lowers vectors to scalars and another
* that removes unused varyings.
* We don't split varyings marked as always active because there is no
* point in doing so. This means we need to mark both sides of the
* interface as always active otherwise we will have a mismatch and
* start removing things we shouldn't.
*/
ir_variable *const input_var =
linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
consumer_inputs,
consumer_interface_inputs,
consumer_inputs_with_locations);
if (input_var) {
input_var->data.is_xfb = 1;
input_var->data.always_active_io = 1;
}
if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
matched_candidate->toplevel_var->data.is_xfb_only = 1;
matches.record(matched_candidate->toplevel_var, NULL);
}
}
uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
const unsigned slots_used = matches.assign_locations(
prog, components, reserved_slots);
matches.store_locations();
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
if (tfeedback_decls[i].is_varying()) {
if (!tfeedback_decls[i].assign_location(consts, prog)) {
ralloc_free(hash_table_ctx);
return false;
}
}
}
ralloc_free(hash_table_ctx);
if (consumer && producer) {
foreach_in_list(ir_instruction, node, consumer->ir) {
ir_variable *const var = node->as_variable();
if (var && var->data.mode == ir_var_shader_in &&
var->data.is_unmatched_generic_inout) {
if (!prog->IsES && prog->data->Version <= 120) {
/* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
*
* Only those varying variables used (i.e. read) in
* the fragment shader executable must be written to
* by the vertex shader executable; declaring
* superfluous varying variables in a vertex shader is
* permissible.
*
* We interpret this text as meaning that the VS must
* write the variable for the FS to read it. See
* "glsl1-varying read but not written" in piglit.
*/
linker_error(prog, "%s shader varying %s not written "
"by %s shader\n.",
_mesa_shader_stage_to_string(consumer->Stage),
var->name,
_mesa_shader_stage_to_string(producer->Stage));
} else {
linker_warning(prog, "%s shader varying %s not written "
"by %s shader\n.",
_mesa_shader_stage_to_string(consumer->Stage),
var->name,
_mesa_shader_stage_to_string(producer->Stage));
}
}
}
/* Now that validation is done its safe to remove unused varyings. As
* we have both a producer and consumer its safe to remove unused
* varyings even if the program is a SSO because the stages are being
* linked together i.e. we have a multi-stage SSO.
*/
remove_unused_shader_inputs_and_outputs(false, producer,
ir_var_shader_out);
remove_unused_shader_inputs_and_outputs(false, consumer,
ir_var_shader_in);
}
if (producer) {
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
0, producer, disable_varying_packing,
disable_xfb_packing, xfb_enabled);
}
if (consumer) {
lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
consumer_vertices, consumer, disable_varying_packing,
disable_xfb_packing, xfb_enabled);
}
return true;
}
static bool
check_against_output_limit(const struct gl_constants *consts,
gl_api api,
struct gl_shader_program *prog,
gl_linked_shader *producer,
unsigned num_explicit_locations)
{
unsigned output_vectors = num_explicit_locations;
foreach_in_list(ir_instruction, node, producer->ir) {
ir_variable *const var = node->as_variable();
if (var && !var->data.explicit_location &&
var->data.mode == ir_var_shader_out &&
var_counts_against_varying_limit(producer->Stage, var)) {
/* outputs for fragment shader can't be doubles */
output_vectors += var->type->count_attribute_slots(false);
}
}
assert(producer->Stage != MESA_SHADER_FRAGMENT);
unsigned max_output_components =
consts->Program[producer->Stage].MaxOutputComponents;
const unsigned output_components = output_vectors * 4;
if (output_components > max_output_components) {
if (api == API_OPENGLES2 || prog->IsES)
linker_error(prog, "%s shader uses too many output vectors "
"(%u > %u)\n",
_mesa_shader_stage_to_string(producer->Stage),
output_vectors,
max_output_components / 4);
else
linker_error(prog, "%s shader uses too many output components "
"(%u > %u)\n",
_mesa_shader_stage_to_string(producer->Stage),
output_components,
max_output_components);
return false;
}
return true;
}
static bool
check_against_input_limit(const struct gl_constants *consts,
gl_api api,
struct gl_shader_program *prog,
gl_linked_shader *consumer,
unsigned num_explicit_locations)
{
unsigned input_vectors = num_explicit_locations;
foreach_in_list(ir_instruction, node, consumer->ir) {
ir_variable *const var = node->as_variable();
if (var && !var->data.explicit_location &&
var->data.mode == ir_var_shader_in &&
var_counts_against_varying_limit(consumer->Stage, var)) {
/* vertex inputs aren't varying counted */
input_vectors += var->type->count_attribute_slots(false);
}
}
assert(consumer->Stage != MESA_SHADER_VERTEX);
unsigned max_input_components =
consts->Program[consumer->Stage].MaxInputComponents;
const unsigned input_components = input_vectors * 4;
if (input_components > max_input_components) {
if (api == API_OPENGLES2 || prog->IsES)
linker_error(prog, "%s shader uses too many input vectors "
"(%u > %u)\n",
_mesa_shader_stage_to_string(consumer->Stage),
input_vectors,
max_input_components / 4);
else
linker_error(prog, "%s shader uses too many input components "
"(%u > %u)\n",
_mesa_shader_stage_to_string(consumer->Stage),
input_components,
max_input_components);
return false;
}
return true;
}
bool
link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
const struct gl_constants *consts,
const struct gl_extensions *exts,
gl_api api, void *mem_ctx)
{
bool has_xfb_qualifiers = false;
unsigned num_tfeedback_decls = 0;
char **varying_names = NULL;
tfeedback_decl *tfeedback_decls = NULL;
/* From the ARB_enhanced_layouts spec:
*
* "If the shader used to record output variables for transform feedback
* varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
* qualifiers, the values specified by TransformFeedbackVaryings are
* ignored, and the set of variables captured for transform feedback is
* instead derived from the specified layout qualifiers."
*/
for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
/* Find last stage before fragment shader */
if (prog->_LinkedShaders[i]) {
has_xfb_qualifiers =
process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
prog, &num_tfeedback_decls,
&varying_names);
break;
}
}
if (!has_xfb_qualifiers) {
num_tfeedback_decls = prog->TransformFeedback.NumVarying;
varying_names = prog->TransformFeedback.VaryingNames;
}
if (num_tfeedback_decls != 0) {
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
*
* * the <count> specified by TransformFeedbackVaryingsEXT is
* non-zero, but the program object has no vertex or geometry
* shader;
*/
if (first >= MESA_SHADER_FRAGMENT) {
linker_error(prog, "Transform feedback varyings specified, but "
"no vertex, tessellation, or geometry shader is "
"present.\n");
return false;
}
tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
num_tfeedback_decls);
if (!parse_tfeedback_decls(consts, exts,
prog, mem_ctx, num_tfeedback_decls,
varying_names, tfeedback_decls))
return false;
}
/* If there is no fragment shader we need to set transform feedback.
*
* For SSO we also need to assign output locations. We assign them here
* because we need to do it for both single stage programs and multi stage
* programs.
*/
if (last < MESA_SHADER_FRAGMENT &&
(num_tfeedback_decls != 0 || prog->SeparateShader)) {
const uint64_t reserved_out_slots =
reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
if (!assign_varying_locations(consts, exts, mem_ctx, prog,
prog->_LinkedShaders[last], NULL,
num_tfeedback_decls, tfeedback_decls,
reserved_out_slots))
return false;
}
if (last <= MESA_SHADER_FRAGMENT) {
/* Remove unused varyings from the first/last stage unless SSO */
remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
prog->_LinkedShaders[first],
ir_var_shader_in);
remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
prog->_LinkedShaders[last],
ir_var_shader_out);
/* If the program is made up of only a single stage */
if (first == last) {
gl_linked_shader *const sh = prog->_LinkedShaders[last];
do_dead_builtin_varyings(consts, api, NULL, sh, 0, NULL);
do_dead_builtin_varyings(consts, api, sh, NULL, num_tfeedback_decls,
tfeedback_decls);
if (prog->SeparateShader) {
const uint64_t reserved_slots =
reserved_varying_slot(sh, ir_var_shader_in);
/* Assign input locations for SSO, output locations are already
* assigned.
*/
if (!assign_varying_locations(consts, exts,
mem_ctx, prog,
NULL /* producer */,
sh /* consumer */,
0 /* num_tfeedback_decls */,
NULL /* tfeedback_decls */,
reserved_slots))
return false;
}
} else {
/* Linking the stages in the opposite order (from fragment to vertex)
* ensures that inter-shader outputs written to in an earlier stage
* are eliminated if they are (transitively) not used in a later
* stage.
*/
int next = last;
for (int i = next - 1; i >= 0; i--) {
if (prog->_LinkedShaders[i] == NULL && i != 0)
continue;
gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
const uint64_t reserved_out_slots =
reserved_varying_slot(sh_i, ir_var_shader_out);
const uint64_t reserved_in_slots =
reserved_varying_slot(sh_next, ir_var_shader_in);
do_dead_builtin_varyings(consts, api, sh_i, sh_next,
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
tfeedback_decls);
if (!assign_varying_locations(consts, exts,
mem_ctx, prog, sh_i, sh_next,
next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
tfeedback_decls,
reserved_out_slots | reserved_in_slots))
return false;
/* This must be done after all dead varyings are eliminated. */
if (sh_i != NULL) {
unsigned slots_used = util_bitcount64(reserved_out_slots);
if (!check_against_output_limit(consts, api, prog, sh_i, slots_used)) {
return false;
}
}
unsigned slots_used = util_bitcount64(reserved_in_slots);
if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
return false;
next = i;
}
}
}
if (!store_tfeedback_info(consts, prog,
num_tfeedback_decls, tfeedback_decls,
has_xfb_qualifiers, mem_ctx))
return false;
return true;
}