zink: improve/expand manual xfb emission

I left this semi-unfinished back when I discovered that I could blast
out xfb values inline with variable declarations, but this is not viable
for all scenarios, so it has to work and it has to be able to pass cts

Reviewed-by: Adam Jackson <ajax@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16669>
This commit is contained in:
Mike Blumenkrantz 2022-05-20 20:12:57 -04:00 committed by Marge Bot
parent 26338c4abb
commit 24725e572e
1 changed files with 178 additions and 58 deletions

View File

@ -1490,33 +1490,105 @@ emit_so_outputs(struct ntv_context *ctx,
SpvId result; SpvId result;
for (unsigned c = 0; c < so_output.num_components; c++) { /* this is the type being indexed into */
components[c] = so_output.start_component + c;
/* this is the second half of a 2 * vec4 array */
if (slot == VARYING_SLOT_CLIP_DIST1)
components[c] += 4;
}
const struct glsl_type *bare_type = glsl_without_array(out_type); const struct glsl_type *bare_type = glsl_without_array(out_type);
/* this is the array index into matrix types */
unsigned matrix_offset = so_output.register_index;
do {
if (glsl_type_is_struct_or_ifc(bare_type)) { if (glsl_type_is_struct_or_ifc(bare_type)) {
uint32_t base_slot = (location & ~so_output.start_component) / 4; uint32_t base_slot = (location & ~so_output.start_component) / 4;
/* this is the slot index into the "current" value */
unsigned slot_idx = slot - base_slot; unsigned slot_idx = slot - base_slot;
unsigned struct_slots = glsl_count_vec4_slots(bare_type, false, false); unsigned struct_slots = glsl_count_vec4_slots(bare_type, false, false);
unsigned array_idx = slot_idx / struct_slots; unsigned array_idx = slot_idx / struct_slots;
bool first = true;
slot_idx %= glsl_count_vec4_slots(bare_type, false, false); slot_idx %= glsl_count_vec4_slots(bare_type, false, false);
if (glsl_type_is_array(out_type))
src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1); src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1);
/* need to find the vec4 that's being exported by this slot */ /* need to find the vec4 that's being exported by this slot */
while (glsl_type_is_struct_or_ifc(bare_type)) while (glsl_type_is_struct_or_ifc(bare_type)) {
/* a struct may have nested arrays of structs: handle them inline here */
if (!first && glsl_type_is_array(out_type)) {
struct_slots = glsl_count_vec4_slots(bare_type, false, false);
array_idx = slot_idx / struct_slots;
src = spirv_builder_emit_composite_extract(&ctx->builder, get_glsl_type(ctx, bare_type), src, &array_idx, 1);
slot_idx -= array_idx * struct_slots;
}
/* unroll this level of struct:
* - slot_idx is incremented to reflect the current value
* - unwrap src
* - out_type is the array type if src is an array
*/
bare_type = unroll_struct_type(ctx, bare_type, &slot_idx, &src, &out_type); bare_type = unroll_struct_type(ctx, bare_type, &slot_idx, &src, &out_type);
first = false;
}
/* update to the matrix row index */
matrix_offset = slot_idx;
output_type = get_glsl_type(ctx, out_type); output_type = get_glsl_type(ctx, out_type);
if (glsl_type_is_vector_or_scalar(out_type)) if (glsl_type_is_vector_or_scalar(out_type)) {
/* this is a simple case: handle below */
if (glsl_get_vector_elements(out_type) * glsl_get_bit_size(out_type) == so_output.num_components * 32) {
src = emit_bitcast(ctx, type, src); src = emit_bitcast(ctx, type, src);
else if (glsl_type_is_array(out_type)) { out_type = glsl_vector_type(GLSL_TYPE_UINT, so_output.num_components);
output_type = get_glsl_type(ctx, out_type);
}
} else if (glsl_type_is_array(out_type)) {
/* this should be impossible */
if (glsl_type_is_struct(bare_type))
unreachable("zink: gross nested struct array struct arrays in xfb!");
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(bare_type));
if (glsl_type_is_scalar(bare_type)) {
/* this wouldn't make sense */
assert(so_output.start_component == 0);
if (glsl_type_is_64bit(bare_type)) {
/* 64bit components count as 2 so outputs: bitcast to vec2 and extract */
unsigned idx = 0;
for (unsigned c = 0; idx < so_output.num_components; c++) {
uint32_t member = slot_idx + c;
SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
SpvId val = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), conv);
unsigned v = 0;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
v = 1;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
}
result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
} else {
/* array of scalars: each one is its own slot */
for (unsigned c = 0; c < so_output.num_components; c++) { for (unsigned c = 0; c < so_output.num_components; c++) {
uint32_t member = so_output.start_component + c; uint32_t member = slot_idx + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type)));
components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1); components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
} }
}
} else if (glsl_type_is_matrix(bare_type)) {
/* nested matrix type: unwrap, update matrix offset, select a vec, handle below */
unsigned mat_slots = glsl_count_attribute_slots(bare_type, false);
array_idx = matrix_offset / mat_slots;
output_type = get_glsl_type(ctx, bare_type);
out_type = bare_type;
src = spirv_builder_emit_composite_extract(&ctx->builder, output_type, src, &array_idx, 1);
matrix_offset %= mat_slots;
unsigned real_offset = glsl_type_is_64bit(bare_type) ? matrix_offset / 2 : matrix_offset;
/* store for later */
if (glsl_type_is_64bit(bare_type))
matrix_offset %= 2;
assert(real_offset < glsl_get_matrix_columns(bare_type));
out_type = glsl_without_array_or_matrix(out_type);
output_type = get_glsl_type(ctx, out_type);
src = spirv_builder_emit_composite_extract(&ctx->builder, output_type, src, &real_offset, 1);
break;
} else {
assert(glsl_type_is_vector(bare_type));
/* just extract the right vec and let it be handled below */
unsigned vec_slots = glsl_count_attribute_slots(bare_type, false);
unsigned idx = matrix_offset / vec_slots;
matrix_offset %= vec_slots;
output_type = get_glsl_type(ctx, bare_type);
out_type = bare_type;
src = spirv_builder_emit_composite_extract(&ctx->builder, output_type, src, &idx, 1);
break;
}
if (so_output.num_components > 1) if (so_output.num_components > 1)
src = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components); src = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components);
else else
@ -1525,44 +1597,92 @@ emit_so_outputs(struct ntv_context *ctx,
output_type = type; output_type = type;
} }
} }
} while (0);
assert(!glsl_type_is_struct_or_ifc(out_type)); assert(!glsl_type_is_struct_or_ifc(out_type));
if (!glsl_type_is_64bit(out_type) &&
(glsl_type_is_scalar(out_type) ||
(type == output_type &&
(glsl_type_is_vector(out_type) && glsl_get_vector_elements(out_type) == so_output.num_components))))
/* if we're emitting a scalar or the type we're emitting matches the output's original type and we're /* if we're emitting a scalar or the type we're emitting matches the output's original type and we're
* emitting the same number of components, then we can skip any sort of conversion here * emitting the same number of components, then we can skip any sort of conversion here
*/ */
if (glsl_type_is_scalar(out_type) || (type == output_type && glsl_get_length(out_type) == so_output.num_components))
result = src; result = src;
else { else {
/* OpCompositeExtract can only extract scalars for our use here */ /* OpCompositeExtract can only extract scalars for our use here,
if (so_output.num_components == 1) { * but not from arrays since they have different packing rules
result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, components, so_output.num_components); */
if (so_output.num_components == 1 && !glsl_type_is_array(out_type)) {
unsigned component = so_output.start_component;
result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, &component, so_output.num_components);
} else if (glsl_type_is_vector(out_type)) { } else if (glsl_type_is_vector(out_type)) {
if (glsl_type_is_64bit(out_type)) {
/* 64bit components count as 2 so outputs: bitcast to vec2 and extract */
unsigned idx = 0;
for (unsigned c = 0; idx < so_output.num_components; c++) {
uint32_t member = so_output.start_component + (matrix_offset * 2) + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(out_type));
SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
SpvId val = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), conv);
unsigned v = 0;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
v = 1;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
}
result = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, so_output.num_components), components, so_output.num_components);
} else {
for (unsigned c = 0; c < so_output.num_components; c++) {
components[c] = so_output.start_component + c;
/* this is the second half of a 2 * vec4 array */
if (slot == VARYING_SLOT_CLIP_DIST1)
components[c] += 4;
}
/* OpVectorShuffle can select vector members into a differently-sized vector */ /* OpVectorShuffle can select vector members into a differently-sized vector */
result = spirv_builder_emit_vector_shuffle(&ctx->builder, type, result = spirv_builder_emit_vector_shuffle(&ctx->builder, type,
src, src, src, src,
components, so_output.num_components); components, so_output.num_components);
}
} else { } else {
assert(glsl_type_is_array_or_matrix(out_type)); assert(glsl_type_is_array_or_matrix(out_type));
ASSERTED const struct glsl_type *bare_type = glsl_without_array(out_type); const struct glsl_type *bare_type = glsl_without_array(out_type);
assert(!glsl_type_is_struct_or_ifc(bare_type)); assert(!glsl_type_is_struct_or_ifc(bare_type));
/* for arrays, we need to manually extract each desired member if (glsl_type_is_matrix(out_type)) {
/* for matrices, the xfb output will never be more than one vec4 from a single row */
unsigned vec_size = glsl_get_vector_elements(out_type);
SpvId vec_type = get_fvec_type(ctx, glsl_get_bit_size(out_type), vec_size);
if (glsl_type_is_64bit(out_type) && vec_size > 2) {
/* dvec3/dvec4 uses 2 slots per row: normalize matrix offset */
matrix_offset /= 2;
}
src = spirv_builder_emit_composite_extract(&ctx->builder, vec_type, src, &matrix_offset, 1);
out_type = glsl_vector_type(glsl_get_base_type(out_type), glsl_get_vector_elements(out_type));
}
/* for arrays (or matrix rows), we need to manually extract each desired member
* and re-pack them into the desired output type * and re-pack them into the desired output type
*/ */
for (unsigned c = 0; c < so_output.num_components; c++) { unsigned idx = 0;
uint32_t member[2]; for (unsigned c = 0; idx < so_output.num_components; c++) {
unsigned member_idx = 0; uint32_t member = so_output.start_component + c;
if (glsl_type_is_matrix(out_type)) { SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(bare_type));
member_idx = 1;
member[0] = so_output.register_index;
}
member[member_idx] = so_output.start_component + c;
SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type)));
if (slot == VARYING_SLOT_CLIP_DIST1) if (slot == VARYING_SLOT_CLIP_DIST1)
member[member_idx] += 4; member += 4;
components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1 + member_idx); components[idx] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, &member, 1);
if (glsl_type_is_64bit(bare_type)) {
/* 64bit components count as 2 so outputs: bitcast to vec2 and extract */
SpvId val = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), components[idx]);
unsigned v = 0;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
v = 1;
components[idx++] = spirv_builder_emit_composite_extract(&ctx->builder, get_uvec_type(ctx, 32, 1), val, &v, 1);
} else {
idx++;
} }
result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components); }
if (so_output.num_components > 1)
result = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, so_output.num_components), components, so_output.num_components);
else
result = components[0];
} }
} }