zink: improve/expand manual xfb emission

I left this semi-unfinished back when I discovered that I could blast
out xfb values inline with variable declarations, but this is not viable
for all scenarios, so it has to work and it has to be able to pass cts

Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16669>
This commit is contained in:
Mike Blumenkrantz 2022-05-20 20:12:57 -04:00 committed by Marge Bot
parent 26338c4abb
commit 24725e572e
1 changed files with 178 additions and 58 deletions

View File

@ -1490,79 +1490,199 @@ emit_so_outputs(struct ntv_context *ctx,
SpvId result;
for (unsigned c = 0; c < so_output.num_components; c++) {
components[c] = so_output.start_component + c;
/* this is the second half of a 2 * vec4 array */
if (slot == VARYING_SLOT_CLIP_DIST1)
components[c] += 4;
}
/* this is the type being indexed into */
const struct glsl_type *bare_type = glsl_without_array(out_type);
if (glsl_type_is_struct_or_ifc(bare_type)) {
uint32_t base_slot = (location & ~so_output.start_component) / 4;
unsigned slot_idx = slot - base_slot;
unsigned struct_slots = glsl_count_vec4_slots(bare_type, false, false);
unsigned array_idx = slot_idx / struct_slots;
slot_idx %= glsl_count_vec4_slots(bare_type, false, false);
src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1);
/* need to find the vec4 that's being exported by this slot */
while (glsl_type_is_struct_or_ifc(bare_type))
bare_type = unroll_struct_type(ctx, bare_type, &slot_idx, &src, &out_type);
output_type = get_glsl_type(ctx, out_type);
if (glsl_type_is_vector_or_scalar(out_type))
src = emit_bitcast(ctx, type, src);
else if (glsl_type_is_array(out_type)) {
for (unsigned c = 0; c < so_output.num_components; c++) {
uint32_t member = so_output.start_component + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type)));
components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
}
if (so_output.num_components > 1)
src = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
else
src = components[0];
out_type = glsl_vector_type(GLSL_TYPE_UINT, so_output.num_components);
output_type = type;
/* this is the array index into matrix types */
unsigned matrix_offset = so_output.register_index;
do {
if (glsl_type_is_struct_or_ifc(bare_type)) {
uint32_t base_slot = (location & ~so_output.start_component) / 4;
/* this is the slot index into the "current" value */
unsigned slot_idx = slot - base_slot;
unsigned struct_slots = glsl_count_vec4_slots(bare_type, false, false);
unsigned array_idx = slot_idx / struct_slots;
bool first = true;
slot_idx %= glsl_count_vec4_slots(bare_type, false, false);
if (glsl_type_is_array(out_type))
src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1);
/* need to find the vec4 that's being exported by this slot */
while (glsl_type_is_struct_or_ifc(bare_type)) {
/* a struct may have nested arrays of structs: handle them inline here */
if (!first && glsl_type_is_array(out_type)) {
struct_slots = glsl_count_vec4_slots(bare_type, false, false);
array_idx = slot_idx / struct_slots;
src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1);
slot_idx -= array_idx * struct_slots;
}
/* unroll this level of struct:
* - slot_idx is incremented to reflect the current value
* - unwrap src
* - out_type is the array type if src is an array
*/
bare_type = unroll_struct_type(ctx, bare_type, &slot_idx, &src, &out_type);
first = false;
}
/* update to the matrix row index */
matrix_offset = slot_idx;
output_type = get_glsl_type(ctx, out_type);
if (glsl_type_is_vector_or_scalar(out_type)) {
/* this is a simple case: handle below */
if (glsl_get_vector_elements(out_type) * glsl_get_bit_size(out_type) == so_output.num_components * 32) {
src = emit_bitcast(ctx, type, src);
out_type = glsl_vector_type(GLSL_TYPE_UINT, so_output.num_components);
output_type = get_glsl_type(ctx, out_type);
}
} else if (glsl_type_is_array(out_type)) {
/* this should be impossible */
if (glsl_type_is_struct(bare_type))
unreachable("zink: gross nested struct array struct arrays in xfb!");
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(bare_type));
if (glsl_type_is_scalar(bare_type)) {
/* this wouldn't make sense */
assert(so_output.start_component == 0);
if (glsl_type_is_64bit(bare_type)) {
/* 64bit components count as 2 so outputs: bitcast to vec2 and extract */
unsigned idx = 0;
for (unsigned c = 0; idx < so_output.num_components; c++) {
uint32_t member = slot_idx + c;
SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
SpvId val = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), conv);
unsigned v = 0;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
v = 1;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
}
result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
} else {
/* array of scalars: each one is its own slot */
for (unsigned c = 0; c < so_output.num_components; c++) {
uint32_t member = slot_idx + c;
components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
}
}
} else if (glsl_type_is_matrix(bare_type)) {
/* nested matrix type: unwrap, update matrix offset, select a vec, handle below */
unsigned mat_slots = glsl_count_attribute_slots(bare_type, false);
array_idx = matrix_offset / mat_slots;
output_type = get_glsl_type(ctx, bare_type);
out_type = bare_type;
src = spirv_builder_emit_composite_extract(&ctx->builder, output_type, src, &array_idx, 1);
matrix_offset %= mat_slots;
unsigned real_offset = glsl_type_is_64bit(bare_type) ? matrix_offset / 2 : matrix_offset;
/* store for later */
if (glsl_type_is_64bit(bare_type))
matrix_offset %= 2;
assert(real_offset < glsl_get_matrix_columns(bare_type));
out_type = glsl_without_array_or_matrix(out_type);
output_type = get_glsl_type(ctx, out_type);
src = spirv_builder_emit_composite_extract(&ctx->builder, output_type, src, &real_offset, 1);
break;
} else {
assert(glsl_type_is_vector(bare_type));
/* just extract the right vec and let it be handled below */
unsigned vec_slots = glsl_count_attribute_slots(bare_type, false);
unsigned idx = matrix_offset / vec_slots;
matrix_offset %= vec_slots;
output_type = get_glsl_type(ctx, bare_type);
out_type = bare_type;
src = spirv_builder_emit_composite_extract(&ctx->builder, output_type, src, &idx, 1);
break;
}
if (so_output.num_components > 1)
src = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
else
src = components[0];
out_type = glsl_vector_type(GLSL_TYPE_UINT, so_output.num_components);
output_type = type;
}
}
}
} while (0);
assert(!glsl_type_is_struct_or_ifc(out_type));
/* if we're emitting a scalar or the type we're emitting matches the output's original type and we're
* emitting the same number of components, then we can skip any sort of conversion here
*/
if (glsl_type_is_scalar(out_type) || (type == output_type && glsl_get_length(out_type) == so_output.num_components))
if (!glsl_type_is_64bit(out_type) &&
(glsl_type_is_scalar(out_type) ||
(type == output_type &&
(glsl_type_is_vector(out_type) && glsl_get_vector_elements(out_type) == so_output.num_components))))
/* if we're emitting a scalar or the type we're emitting matches the output's original type and we're
* emitting the same number of components, then we can skip any sort of conversion here
*/
result = src;
else {
/* OpCompositeExtract can only extract scalars for our use here */
if (so_output.num_components == 1) {
result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, components, so_output.num_components);
/* OpCompositeExtract can only extract scalars for our use here,
* but not from arrays since they have different packing rules
*/
if (so_output.num_components == 1 && !glsl_type_is_array(out_type)) {
unsigned component = so_output.start_component;
result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, &component, so_output.num_components);
} else if (glsl_type_is_vector(out_type)) {
/* OpVectorShuffle can select vector members into a differently-sized vector */
result = spirv_builder_emit_vector_shuffle(&ctx->builder, type,
src, src,
components, so_output.num_components);
if (glsl_type_is_64bit(out_type)) {
/* 64bit components count as 2 so outputs: bitcast to vec2 and extract */
unsigned idx = 0;
for (unsigned c = 0; idx < so_output.num_components; c++) {
uint32_t member = so_output.start_component + (matrix_offset * 2) + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(out_type));
SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
SpvId val = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), conv);
unsigned v = 0;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
v = 1;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
}
result = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, so_output.num_components), components, so_output.num_components);
} else {
for (unsigned c = 0; c < so_output.num_components; c++) {
components[c] = so_output.start_component + c;
/* this is the second half of a 2 * vec4 array */
if (slot == VARYING_SLOT_CLIP_DIST1)
components[c] += 4;
}
/* OpVectorShuffle can select vector members into a differently-sized vector */
result = spirv_builder_emit_vector_shuffle(&ctx->builder, type,
src, src,
components, so_output.num_components);
}
} else {
assert(glsl_type_is_array_or_matrix(out_type));
ASSERTED const struct glsl_type *bare_type = glsl_without_array(out_type);
const struct glsl_type *bare_type = glsl_without_array(out_type);
assert(!glsl_type_is_struct_or_ifc(bare_type));
/* for arrays, we need to manually extract each desired member
if (glsl_type_is_matrix(out_type)) {
/* for matrices, the xfb output will never be more than one vec4 from a single row */
unsigned vec_size = glsl_get_vector_elements(out_type);
SpvId vec_type = get_fvec_type(ctx, glsl_get_bit_size(out_type), vec_size);
if (glsl_type_is_64bit(out_type) && vec_size > 2) {
/* dvec3/dvec4 uses 2 slots per row: normalize matrix offset */
matrix_offset /= 2;
}
src = spirv_builder_emit_composite_extract(&ctx->builder, vec_type, src, &matrix_offset, 1);
out_type = glsl_vector_type(glsl_get_base_type(out_type), glsl_get_vector_elements(out_type));
}
/* for arrays (or matrix rows), we need to manually extract each desired member
* and re-pack them into the desired output type
*/
for (unsigned c = 0; c < so_output.num_components; c++) {
uint32_t member[2];
unsigned member_idx = 0;
if (glsl_type_is_matrix(out_type)) {
member_idx = 1;
member[0] = so_output.register_index;
}
member[member_idx] = so_output.start_component + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type)));
unsigned idx = 0;
for (unsigned c = 0; idx < so_output.num_components; c++) {
uint32_t member = so_output.start_component + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(bare_type));
if (slot == VARYING_SLOT_CLIP_DIST1)
member[member_idx] += 4;
components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1 + member_idx);
member += 4;
components[idx] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
if (glsl_type_is_64bit(bare_type)) {
/* 64bit components count as 2 so outputs: bitcast to vec2 and extract */
SpvId val = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), components[idx]);
unsigned v = 0;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
v = 1;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
} else {
idx++;
}
}
result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
if (so_output.num_components > 1)
result = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, so_output.num_components), components, so_output.num_components);
else
result = components[0];
}
}