From df87722beccf0255d149668ca54a35cabf99a9c4 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Sun, 9 Dec 2012 20:59:26 -0800 Subject: [PATCH] glsl/linker: Pack within compound varyings. This patch implements varying packing within varyings that are composed of multiple vectors of size less than 4 (e.g. arrays of vec2's, or matrices with height less than 4). Previously, such varyings used up a full 4-wide varying slot for each constituent vector, meaning that some of the components of each varying slot went unused. For example, a mat4x3 would be stored as follows: <----slot1----> <----slot2----> <----slot3----> <----slot4----> slots * * * * * * * * * * * * * * * * <-column1-> x <-column2-> x <-column3-> x <-column4-> x matrix (Each * represents a varying component, and the "x"s represent wasted space). In addition to wasting precious varying components, this layout complicated transform feedback, since the constituents of the varying are expected to be output to the transform feedback buffer contiguously (e.g. without gaps between the columns, in the case of a matrix). This change packs the constituents of each varying together so that all wasted space is at the end. For the mat4x3 example, this looks like so: <----slot1----> <----slot2----> <----slot3----> <----slot4----> slots * * * * * * * * * * * * * * * * <-column1-> <-column2-> <-column3-> <-column4-> x x x x matrix Note that matrix columns 2 and 3 now cross a boundary between varying slots (a characteristic I call "double parking" of a varying). We don't bother trying to eliminate the wasted space at the end of the varying, since the patch that follows will take care of that. Since compiler back-ends don't (yet) support this packed layout, the lower_packed_varyings function is used to rewrite the shader into a form where each varying occupies a full varying slot. Later, if we add native back-end support for varying packing, we can make this lowering pass optional. Reviewed-by: Eric Anholt v2: Skip varying packing if ctx->Const.DisableVaryingPacking is true. --- src/glsl/linker.cpp | 93 +++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 55d23d1c342..1ae8fad22d0 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -1600,6 +1600,17 @@ private: */ int location; + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + * + * Only valid if location != -1. + */ + unsigned location_frac; + /** * If location != -1, the number of vector elements in this variable, or 1 * if this variable is a scalar. @@ -1739,6 +1750,8 @@ tfeedback_decl::assign_location(struct gl_context *ctx, /* Array variable */ const unsigned matrix_cols = output_var->type->fields.array->matrix_columns; + const unsigned vector_elements = + output_var->type->fields.array->vector_elements; unsigned actual_array_size = this->is_clip_distance_mesa ? prog->Vert.ClipDistanceArraySize : output_var->type->array_size(); @@ -1754,16 +1767,22 @@ tfeedback_decl::assign_location(struct gl_context *ctx, if (this->is_clip_distance_mesa) { this->location = output_var->location + this->array_subscript / 4; + this->location_frac = this->array_subscript % 4; } else { - this->location = - output_var->location + this->array_subscript * matrix_cols; + unsigned fine_location + = output_var->location * 4 + output_var->location_frac; + unsigned array_elem_size = vector_elements * matrix_cols; + fine_location += array_elem_size * this->array_subscript; + this->location = fine_location / 4; + this->location_frac = fine_location % 4; } this->size = 1; } else { this->location = output_var->location; + this->location_frac = output_var->location_frac; this->size = actual_array_size; } - this->vector_elements = output_var->type->fields.array->vector_elements; + this->vector_elements = vector_elements; this->matrix_columns = matrix_cols; if (this->is_clip_distance_mesa) this->type = GL_FLOAT; @@ -1778,6 +1797,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx, return false; } this->location = output_var->location; + this->location_frac = output_var->location_frac; this->size = 1; this->vector_elements = output_var->type->vector_elements; this->matrix_columns = output_var->type->matrix_columns; @@ -1812,11 +1832,7 @@ tfeedback_decl::get_num_outputs() const return 0; } - unsigned translated_size = this->size; - if (this->is_clip_distance_mesa) - translated_size = (translated_size + 3) / 4; - - return translated_size * this->matrix_columns; + return (this->num_components() + this->location_frac + 3)/4; } @@ -1854,35 +1870,23 @@ tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, return false; } - unsigned translated_size = this->size; - if (this->is_clip_distance_mesa) - translated_size = (translated_size + 3) / 4; - unsigned components_so_far = 0; - for (unsigned index = 0; index < translated_size; ++index) { - for (unsigned v = 0; v < this->matrix_columns; ++v) { - unsigned num_components = this->vector_elements; - assert(info->NumOutputs < max_outputs); - info->Outputs[info->NumOutputs].ComponentOffset = 0; - if (this->is_clip_distance_mesa) { - if (this->is_subscripted) { - num_components = 1; - info->Outputs[info->NumOutputs].ComponentOffset = - this->array_subscript % 4; - } else { - num_components = MIN2(4, this->size - components_so_far); - } - } - info->Outputs[info->NumOutputs].OutputRegister = - this->location + v + index * this->matrix_columns; - info->Outputs[info->NumOutputs].NumComponents = num_components; - info->Outputs[info->NumOutputs].OutputBuffer = buffer; - info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; - ++info->NumOutputs; - info->BufferStride[buffer] += num_components; - components_so_far += num_components; - } + unsigned location = this->location; + unsigned location_frac = this->location_frac; + unsigned num_components = this->num_components(); + while (num_components > 0) { + unsigned output_size = MIN2(num_components, 4 - location_frac); + assert(info->NumOutputs < max_outputs); + info->Outputs[info->NumOutputs].ComponentOffset = location_frac; + info->Outputs[info->NumOutputs].OutputRegister = location; + info->Outputs[info->NumOutputs].NumComponents = output_size; + info->Outputs[info->NumOutputs].OutputBuffer = buffer; + info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; + ++info->NumOutputs; + info->BufferStride[buffer] += output_size; + num_components -= output_size; + location++; + location_frac = 0; } - assert(components_so_far == this->num_components()); info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, this->orig_name); info->Varyings[info->NumVarying].Type = this->type; @@ -2330,7 +2334,7 @@ assign_varying_locations(struct gl_context *ctx, } } - matches.assign_locations(); + const unsigned slots_used = matches.assign_locations(); matches.store_locations(producer_base, consumer_base); for (unsigned i = 0; i < num_tfeedback_decls; ++i) { @@ -2344,6 +2348,21 @@ assign_varying_locations(struct gl_context *ctx, return false; } + if (ctx->Const.DisableVaryingPacking) { + /* Transform feedback code assumes varyings are packed, so if the driver + * has disabled varying packing, make sure it does not support transform + * feedback. + */ + assert(!ctx->Extensions.EXT_transform_feedback); + } else { + lower_packed_varyings(ctx, producer_base, slots_used, ir_var_out, + producer); + if (consumer) { + lower_packed_varyings(ctx, consumer_base, slots_used, ir_var_in, + consumer); + } + } + unsigned varying_vectors = 0; if (consumer) {